This commit is contained in:
@@ -32,6 +32,7 @@ type SearchService struct {
|
||||
WebEngine string
|
||||
Client *http.Client
|
||||
collectors []searchCollector
|
||||
Debug func(message string, data any)
|
||||
}
|
||||
|
||||
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
|
||||
@@ -58,6 +59,10 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
if s.BaseURL == "" {
|
||||
return nil, fmt.Errorf("searxng base url is not configured")
|
||||
}
|
||||
s.debug("search_service:start", map[string]any{
|
||||
"queries": queries,
|
||||
"enabledPlatforms": enabledPlatforms,
|
||||
})
|
||||
|
||||
seen := map[string]bool{}
|
||||
sourceCounts := map[string]int{}
|
||||
@@ -85,15 +90,32 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
}
|
||||
searchQueries := collector.BuildQueries(base)
|
||||
shuffleStrings(searchQueries)
|
||||
s.debug("search_service:collector_queries", map[string]any{
|
||||
"collector": collector.Name(),
|
||||
"base": base,
|
||||
"onlyMissing": onlyMissing,
|
||||
"searchQueries": searchQueries,
|
||||
})
|
||||
for _, searchQuery := range searchQueries {
|
||||
if sourceCounts[collector.Name()] >= collector.MaxResults() {
|
||||
break
|
||||
}
|
||||
items, err := collector.Collect(s, searchQuery)
|
||||
if err != nil {
|
||||
s.debug("search_service:collector_error", map[string]any{
|
||||
"collector": collector.Name(),
|
||||
"query": searchQuery,
|
||||
"error": err.Error(),
|
||||
})
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
s.debug("search_service:collector_results", map[string]any{
|
||||
"collector": collector.Name(),
|
||||
"query": searchQuery,
|
||||
"rawCount": len(items),
|
||||
"sourceCount": sourceCounts[collector.Name()],
|
||||
})
|
||||
for _, item := range items {
|
||||
item = normalizeResultForCollector(collector.Name(), item)
|
||||
if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
|
||||
@@ -123,6 +145,11 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
sort.SliceStable(results, func(i, j int) bool {
|
||||
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
|
||||
})
|
||||
s.debug("search_service:complete", map[string]any{
|
||||
"resultCount": len(results),
|
||||
"sourceCounts": sourceCounts,
|
||||
"hadError": lastErr != nil,
|
||||
})
|
||||
return s.EnrichResults(results), nil
|
||||
}
|
||||
|
||||
@@ -131,6 +158,10 @@ func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
|
||||
if limit == 0 {
|
||||
return results
|
||||
}
|
||||
s.debug("search_service:enrich_start", map[string]any{
|
||||
"total": len(results),
|
||||
"limit": limit,
|
||||
})
|
||||
|
||||
enriched := make([]SearchResult, len(results))
|
||||
copy(enriched, results)
|
||||
@@ -143,10 +174,24 @@ func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
|
||||
defer wg.Done()
|
||||
sem <- struct{}{}
|
||||
defer func() { <-sem }()
|
||||
s.debug("search_service:enrich_item_start", map[string]any{
|
||||
"index": i,
|
||||
"link": enriched[i].Link,
|
||||
"source": enriched[i].Source,
|
||||
})
|
||||
enriched[i] = s.enrichResult(enriched[i])
|
||||
s.debug("search_service:enrich_item_done", map[string]any{
|
||||
"index": i,
|
||||
"link": enriched[i].Link,
|
||||
"source": enriched[i].Source,
|
||||
"thumbnail": strings.TrimSpace(enriched[i].ThumbnailURL) != "",
|
||||
"preview": strings.TrimSpace(enriched[i].PreviewVideoURL) != "",
|
||||
"title": truncateForDebug(enriched[i].Title, 120),
|
||||
})
|
||||
}(idx)
|
||||
}
|
||||
wg.Wait()
|
||||
s.debug("search_service:enrich_complete", map[string]any{"limit": limit})
|
||||
return enriched
|
||||
}
|
||||
|
||||
@@ -163,10 +208,21 @@ func (s *SearchService) enrichResult(result SearchResult) SearchResult {
|
||||
}
|
||||
|
||||
func (s *SearchService) searchWithFallback(query, categories, engine, source string) ([]SearchResult, error) {
|
||||
s.debug("search_service:search_with_fallback", map[string]any{
|
||||
"query": query,
|
||||
"categories": categories,
|
||||
"engine": engine,
|
||||
"source": source,
|
||||
})
|
||||
items, err := s.search(query, categories, engine, source)
|
||||
if err == nil {
|
||||
return items, nil
|
||||
}
|
||||
s.debug("search_service:search_with_fallback_primary_error", map[string]any{
|
||||
"query": query,
|
||||
"engine": engine,
|
||||
"error": err.Error(),
|
||||
})
|
||||
if strings.TrimSpace(engine) == "" {
|
||||
return nil, err
|
||||
}
|
||||
@@ -174,8 +230,10 @@ func (s *SearchService) searchWithFallback(query, categories, engine, source str
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
||||
s.debug("search_service:enrich_envato_start", map[string]any{"link": result.Link})
|
||||
html, err := s.fetchText(result.Link)
|
||||
if err != nil {
|
||||
s.debug("search_service:enrich_envato_fetch_error", map[string]any{"link": result.Link, "error": err.Error()})
|
||||
return result
|
||||
}
|
||||
videoMeta := extractVideoObjectJSONLD(html)
|
||||
@@ -224,14 +282,21 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
||||
)
|
||||
}
|
||||
}
|
||||
s.debug("search_service:enrich_envato_done", map[string]any{
|
||||
"link": result.Link,
|
||||
"thumbnail": strings.TrimSpace(result.ThumbnailURL) != "",
|
||||
"preview": strings.TrimSpace(result.PreviewVideoURL) != "",
|
||||
})
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
clipID := extractArtgridClipID(result.Link)
|
||||
if clipID == "" {
|
||||
s.debug("search_service:enrich_artgrid_skip", map[string]any{"link": result.Link, "reason": "missing clip id"})
|
||||
return result
|
||||
}
|
||||
s.debug("search_service:enrich_artgrid_start", map[string]any{"link": result.Link, "clipId": clipID})
|
||||
|
||||
apiURL := "https://artgrid.io/api/clip/details?clipId=" + clipID
|
||||
body, err := s.fetchJSONText(apiURL)
|
||||
@@ -244,11 +309,15 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
result.PreviewVideoURL = pickVideoURL(urls)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
s.debug("search_service:enrich_artgrid_api_error", map[string]any{"link": result.Link, "clipId": clipID, "error": err.Error()})
|
||||
}
|
||||
|
||||
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
|
||||
html, err := s.fetchText(result.Link)
|
||||
if err == nil {
|
||||
if !isMatchingArtgridClipPage(html, clipID) {
|
||||
s.debug("search_service:enrich_artgrid_html_mismatch", map[string]any{"link": result.Link, "clipId": clipID})
|
||||
return result
|
||||
}
|
||||
result.Title = firstNonEmpty(
|
||||
@@ -289,6 +358,12 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
}
|
||||
}
|
||||
}
|
||||
s.debug("search_service:enrich_artgrid_done", map[string]any{
|
||||
"link": result.Link,
|
||||
"clipId": clipID,
|
||||
"thumbnail": strings.TrimSpace(result.ThumbnailURL) != "",
|
||||
"preview": strings.TrimSpace(result.PreviewVideoURL) != "",
|
||||
})
|
||||
|
||||
return result
|
||||
}
|
||||
@@ -307,6 +382,13 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
|
||||
}
|
||||
|
||||
endpoint := s.BaseURL + "/search?" + values.Encode()
|
||||
s.debug("search_service:searx_request", map[string]any{
|
||||
"endpoint": endpoint,
|
||||
"query": query,
|
||||
"categories": categories,
|
||||
"engine": engine,
|
||||
"source": source,
|
||||
})
|
||||
resp, err := s.Client.Get(endpoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -332,6 +414,11 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
|
||||
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
||||
return nil, fmt.Errorf("searxng JSON decode failed for query %q: %w", query, err)
|
||||
}
|
||||
s.debug("search_service:searx_response", map[string]any{
|
||||
"query": query,
|
||||
"source": source,
|
||||
"rawCount": len(payload.Results),
|
||||
})
|
||||
|
||||
results := make([]SearchResult, 0, len(payload.Results))
|
||||
for _, item := range payload.Results {
|
||||
@@ -351,6 +438,20 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (s *SearchService) debug(message string, data any) {
|
||||
if s != nil && s.Debug != nil {
|
||||
s.Debug(message, data)
|
||||
}
|
||||
}
|
||||
|
||||
func truncateForDebug(text string, limit int) string {
|
||||
trimmed := strings.TrimSpace(text)
|
||||
if len(trimmed) <= limit {
|
||||
return trimmed
|
||||
}
|
||||
return trimmed[:limit] + "..."
|
||||
}
|
||||
|
||||
func buildGoogleVideoQueries(base string) []string {
|
||||
return []string{
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
|
||||
|
||||
Reference in New Issue
Block a user