diff --git a/TODO.md b/TODO.md index c082178..6e90576 100644 --- a/TODO.md +++ b/TODO.md @@ -255,6 +255,20 @@ - backend debug broadcasts ## Recent Change Log +- Date: `2026-03-16` +- What changed: + - Hardened search result enrichment and recommendation metadata for preview recovery work. + - Added provider-aware fetch strategy for source HTML/JSON requests, broader Envato preview parsing, looser Artgrid HTML acceptance, and stronger thumbnail preservation rules. + - Added low-value thumbnail detection, ranking penalties for weak visuals, capped filler backfill, and response metadata fields for modal rendering (`mediaMode`, `embedUrl`, `previewBlockedReason`). + - Expanded debug summaries with usable-thumbnail and embed counts, and added unit coverage for the new parsing/ranking helpers. +- Why it changed: + - The latest production log showed Envato enrichment frequently failing, Artgrid enrichment collapsing on `403` plus HTML mismatch, and Gemini seeing too few usable visuals to do meaningful review. +- How it was verified: + - `go test ./...` +- What is still risky or incomplete: + - Frontend modal fallback behavior is not updated yet in this batch, so the new response metadata is not fully consumed until the UI patch lands. + - Envato source fetches may still fail on some pages if the provider changes challenge behavior again. + - Date: `2026-03-16` - What changed: - Added in-process query translation / expansion cache inside `GeminiService` so repeated identical searches can reuse the same English query and variant list without re-calling Gemini or Google Translate. diff --git a/backend/handlers/api.go b/backend/handlers/api.go index 99d30ca..2691d8a 100644 --- a/backend/handlers/api.go +++ b/backend/handlers/api.go @@ -81,14 +81,17 @@ type PreviewResponse struct { } type searchDebugSummary struct { - Total int `json:"total"` - BySource map[string]int `json:"bySource"` - WithPreview int `json:"withPreview"` - WithThumbnail int `json:"withThumbnail"` - Top []map[string]any `json:"top"` - Warning string `json:"warning,omitempty"` - DurationMS int64 `json:"durationMs,omitempty"` - GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"` + Total int `json:"total"` + BySource map[string]int `json:"bySource"` + WithPreview int `json:"withPreview"` + WithThumbnail int `json:"withThumbnail"` + WithUsableThumbnail int `json:"withUsableThumbnail,omitempty"` + WithLowValueThumbnail int `json:"withLowValueThumbnail,omitempty"` + WithEmbedURL int `json:"withEmbedUrl,omitempty"` + Top []map[string]any `json:"top"` + Warning string `json:"warning,omitempty"` + DurationMS int64 `json:"durationMs,omitempty"` + GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"` } type debugResponseWriter struct { @@ -484,6 +487,9 @@ func (a *App) searchMedia(c *gin.Context) { ) } merged = services.RandomizeTopRecommendations(merged, 8) + for idx := range merged { + merged[idx] = services.DecorateRecommendationMedia(merged[idx]) + } warning := "" if geminiErr != nil { warning = geminiErr.Error() @@ -628,6 +634,8 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat bySource := map[string]int{} withPreview := 0 withThumbnail := 0 + withUsableThumbnail := 0 + withLowValueThumbnail := 0 top := make([]map[string]any, 0, min(6, len(results))) for idx, item := range results { bySource[item.Source]++ @@ -636,6 +644,12 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat } if strings.TrimSpace(item.ThumbnailURL) != "" { withThumbnail++ + if services.HasUsableThumbnail(item.ThumbnailURL) { + withUsableThumbnail++ + } + if services.IsLowValueThumbnail(item.ThumbnailURL) { + withLowValueThumbnail++ + } } if idx < 6 { top = append(top, map[string]any{ @@ -643,20 +657,23 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat "source": item.Source, "hasPreview": item.PreviewVideoURL != "", "hasThumbnail": item.ThumbnailURL != "", + "usableThumb": services.HasUsableThumbnail(item.ThumbnailURL), "displayLink": item.DisplayLink, "snippetSample": truncateText(item.Snippet, 160), }) } } return searchDebugSummary{ - Total: len(results), - BySource: bySource, - WithPreview: withPreview, - WithThumbnail: withThumbnail, - Top: top, - Warning: warning, - DurationMS: duration.Milliseconds(), - GeminiCandidateCap: geminiCap, + Total: len(results), + BySource: bySource, + WithPreview: withPreview, + WithThumbnail: withThumbnail, + WithUsableThumbnail: withUsableThumbnail, + WithLowValueThumbnail: withLowValueThumbnail, + Top: top, + Warning: warning, + DurationMS: duration.Milliseconds(), + GeminiCandidateCap: geminiCap, } } @@ -664,6 +681,9 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio bySource := map[string]int{} withPreview := 0 withThumbnail := 0 + withUsableThumbnail := 0 + withLowValueThumbnail := 0 + withEmbedURL := 0 top := make([]map[string]any, 0, min(6, len(results))) for idx, item := range results { bySource[item.Source]++ @@ -672,6 +692,15 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio } if strings.TrimSpace(item.ThumbnailURL) != "" { withThumbnail++ + if services.HasUsableThumbnail(item.ThumbnailURL) { + withUsableThumbnail++ + } + if services.IsLowValueThumbnail(item.ThumbnailURL) { + withLowValueThumbnail++ + } + } + if strings.TrimSpace(item.EmbedURL) != "" { + withEmbedURL++ } if idx < 6 { top = append(top, map[string]any{ @@ -679,19 +708,24 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio "source": item.Source, "hasPreview": item.PreviewVideoURL != "", "hasThumbnail": item.ThumbnailURL != "", + "hasEmbed": item.EmbedURL != "", + "mediaMode": item.MediaMode, "reasonSample": truncateText(item.Reason, 120), "snippetSample": truncateText(item.Snippet, 160), }) } } return searchDebugSummary{ - Total: len(results), - BySource: bySource, - WithPreview: withPreview, - WithThumbnail: withThumbnail, - Top: top, - Warning: warning, - DurationMS: duration.Milliseconds(), + Total: len(results), + BySource: bySource, + WithPreview: withPreview, + WithThumbnail: withThumbnail, + WithUsableThumbnail: withUsableThumbnail, + WithLowValueThumbnail: withLowValueThumbnail, + WithEmbedURL: withEmbedURL, + Top: top, + Warning: warning, + DurationMS: duration.Milliseconds(), } } diff --git a/backend/services/cse.go b/backend/services/cse.go index 97fa891..33863cd 100644 --- a/backend/services/cse.go +++ b/backend/services/cse.go @@ -294,31 +294,16 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult { extractMetaContent(html, "twitter:image"), extractJSONLDValue(html, "thumbnailUrl"), ) - if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) { + if hasUsableThumbnail(pageThumbnail) && shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) { result.ThumbnailURL = pageThumbnail } if result.PreviewVideoURL == "" { - result.PreviewVideoURL = firstNonEmpty( - videoMeta.ContentURL, - extractJSONLDValue(html, "contentUrl"), - extractMetaContent(html, "twitter:player:stream"), - extractVideoPreviewURL(html), - extractEnvatoPreviewFromHydration(html), - deriveEnvatoPreviewFromThumbnail(pageThumbnail), - deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL), - ) + result.PreviewVideoURL = collectEnvatoPreviewURL(html, pageThumbnail, result.ThumbnailURL, videoMeta.ContentURL) } if result.PreviewVideoURL == "" { time.Sleep(1200 * time.Millisecond) if retryHTML, retryErr := s.fetchText(result.Link); retryErr == nil { - result.PreviewVideoURL = firstNonEmpty( - extractJSONLDValue(retryHTML, "contentUrl"), - extractMetaContent(retryHTML, "twitter:player:stream"), - extractVideoPreviewURL(retryHTML), - extractEnvatoPreviewFromHydration(retryHTML), - deriveEnvatoPreviewFromThumbnail(pageThumbnail), - deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL), - ) + result.PreviewVideoURL = collectEnvatoPreviewURL(retryHTML, pageThumbnail, result.ThumbnailURL, "") } } s.debug("search_service:enrich_envato_done", map[string]any{ @@ -341,8 +326,8 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult { body, err := s.fetchJSONText(apiURL) if err == nil { urls := collectURLs(body) - if result.ThumbnailURL == "" { - result.ThumbnailURL = pickImageURL(urls) + if !hasUsableThumbnail(result.ThumbnailURL) { + result.ThumbnailURL = pickArtgridImageURL(urls, clipID) } if result.PreviewVideoURL == "" { result.PreviewVideoURL = pickVideoURL(urls) @@ -356,12 +341,16 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult { html, err := s.fetchText(result.Link) if err == nil { if !isMatchingArtgridClipPage(html, clipID) { - s.debug("search_service:enrich_artgrid_html_mismatch", map[string]any{"link": result.Link, "clipId": clipID}) + s.debug("search_service:enrich_artgrid_html_mismatch", map[string]any{ + "link": result.Link, + "clipId": clipID, + "signals": artgridHTMLSignals(html, clipID), + }) return result } result.Title = firstNonEmpty( cleanArtgridTitle(extractMetaContent(html, "og:title")), - cleanArtgridTitle(extractMetaContent(html, "title")), + cleanArtgridTitle(extractHTMLTitle(html)), result.Title, ) result.Snippet = firstNonEmpty( @@ -374,15 +363,20 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult { extractMetaContent(html, "twitter:image"), extractArtgridBackgroundThumbnail(html, clipID), extractJSONLDValue(html, "image"), + pickArtgridImageURL(collectURLs(html), clipID), ) - if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) { + if hasUsableThumbnail(pageThumbnail) && shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) { result.ThumbnailURL = pageThumbnail } if result.PreviewVideoURL == "" { result.PreviewVideoURL = firstNonEmpty( extractJSONLDValue(html, "contentUrl"), extractMetaContent(html, "twitter:player:stream"), + extractMetaContent(html, "og:video"), + extractMetaContent(html, "og:video:url"), + extractMetaContent(html, "og:video:secure_url"), extractVideoPreviewURL(html), + pickVideoURL(collectURLs(html)), ) } if result.PreviewVideoURL == "" { @@ -677,6 +671,112 @@ func deriveThumbnail(link string) string { return "" } +func isLowValueThumbnail(raw string) bool { + lower := strings.ToLower(strings.TrimSpace(raw)) + if lower == "" { + return true + } + for _, token := range []string{ + "favicon", "apple-touch-icon", "/logo", "/icon", "icon.", "logo.", "placehold.co", + } { + if strings.Contains(lower, token) { + return true + } + } + for _, host := range []string{ + "googleusercontent.com", "gstatic.com", "bing.com", "duckduckgo.com", "icons.duckduckgo.com", + } { + if strings.Contains(lower, host) && !strings.Contains(lower, "ytimg.com") { + return true + } + } + return false +} + +func hasUsableThumbnail(raw string) bool { + return strings.TrimSpace(raw) != "" && !isLowValueThumbnail(raw) +} + +func HasUsableThumbnail(raw string) bool { + return hasUsableThumbnail(raw) +} + +func IsLowValueThumbnail(raw string) bool { + return isLowValueThumbnail(raw) +} + +func buildEmbedURL(source, link string) string { + trimmed := strings.TrimSpace(link) + if trimmed == "" { + return "" + } + if strings.EqualFold(strings.TrimSpace(source), "Google Video") { + if videoID := extractYouTubeID(trimmed); videoID != "" { + return "https://www.youtube-nocookie.com/embed/" + videoID + "?autoplay=1&rel=0&playsinline=1&modestbranding=1&enablejsapi=1" + } + } + return trimmed +} + +func defaultMediaMode(source, link, previewURL, thumbnailURL string) (string, string, string) { + embedURL := buildEmbedURL(source, link) + switch source { + case "Google Video": + if embedURL != "" { + return "embed", embedURL, "" + } + if hasUsableThumbnail(thumbnailURL) { + return "thumbnail", "", "missing_google_embed" + } + return "none", "", "missing_google_embed" + case "Envato": + if strings.TrimSpace(previewURL) != "" { + return "preview_video", embedURL, "provider_embed_blocked" + } + if hasUsableThumbnail(thumbnailURL) { + return "thumbnail", embedURL, "provider_embed_blocked" + } + if embedURL != "" { + return "embed", embedURL, "" + } + return "none", "", "provider_embed_blocked" + case "Artgrid": + if hasUsableThumbnail(thumbnailURL) { + return "thumbnail", embedURL, "provider_preview_unavailable" + } + if strings.TrimSpace(previewURL) != "" { + return "preview_video", embedURL, "provider_preview_unavailable" + } + if embedURL != "" { + return "embed", embedURL, "" + } + return "none", "", "provider_preview_unavailable" + default: + if strings.TrimSpace(previewURL) != "" { + return "preview_video", embedURL, "" + } + if hasUsableThumbnail(thumbnailURL) { + return "thumbnail", embedURL, "" + } + if embedURL != "" { + return "embed", embedURL, "" + } + return "none", "", "" + } +} + +func DecorateRecommendationMedia(item AIRecommendation) AIRecommendation { + item.EmbedURL = buildEmbedURL(item.Source, item.Link) + item.MediaMode, _, item.PreviewBlockedReason = defaultMediaMode(item.Source, item.Link, item.PreviewVideoURL, item.ThumbnailURL) + if item.MediaMode == "embed" && item.EmbedURL == "" { + item.MediaMode = "none" + } + if item.MediaMode == "thumbnail" && !hasUsableThumbnail(item.ThumbnailURL) && strings.TrimSpace(item.PreviewVideoURL) != "" { + item.MediaMode = "preview_video" + } + return item +} + func extractYouTubeID(link string) string { patterns := []*regexp.Regexp{ regexp.MustCompile(`(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})`), @@ -705,6 +805,15 @@ func extractMetaContent(html, property string) string { return "" } +func extractHTMLTitle(html string) string { + pattern := regexp.MustCompile(`(?is)]*>(.*?)`) + matches := pattern.FindStringSubmatch(html) + if len(matches) == 2 { + return htmlUnescape(strings.TrimSpace(matches[1])) + } + return "" +} + func extractVideoPreviewURL(html string) string { normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`) normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`) @@ -737,6 +846,19 @@ func extractArtgridBackgroundThumbnail(html, clipID string) string { return "" } +func pickArtgridImageURL(urls []string, clipID string) string { + for _, item := range urls { + lower := strings.ToLower(item) + if !(strings.Contains(lower, ".jpg") || strings.Contains(lower, ".jpeg") || strings.Contains(lower, ".png") || strings.Contains(lower, ".webp")) { + continue + } + if strings.Contains(item, clipID) || strings.Contains(lower, "graded-thumbnail") || strings.Contains(lower, "imgix") { + return item + } + } + return pickImageURL(urls) +} + func extractArtgridClipID(link string) string { patterns := []*regexp.Regexp{ regexp.MustCompile(`/clip/([0-9]+)/`), @@ -840,30 +962,38 @@ func (s *SearchService) fetchText(target string) (string, error) { s.debug("search_service:fetch_cache_hit", map[string]any{"type": "html", "target": target, "bytes": len(cached)}) return cached, nil } - - req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + for _, strategy := range []string{"default", "provider"} { + req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", strategy) + if err != nil { + return "", err + } + s.debug("search_service:envato_fetch_strategy", map[string]any{"target": target, "strategy": strategy}) + resp, err := s.Client.Do(req) + if err != nil { + continue + } + data, readErr := io.ReadAll(io.LimitReader(resp.Body, 1024*1024)) + _ = resp.Body.Close() + if readErr != nil { + continue + } + if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable { + continue + } + if resp.StatusCode >= 300 { + continue + } + if looksLikeCloudflareChallenge(string(data)) { + continue + } + body := string(data) + s.setCachedFetchResult(cacheKey, body, 3*time.Minute) + return body, nil + } + body, err := fetchTextViaPython(target) if err != nil { return "", err } - resp, err := s.Client.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable { - return fetchTextViaPython(target) - } - if resp.StatusCode >= 300 { - return "", fmt.Errorf("fetch returned status %d", resp.StatusCode) - } - data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024)) - if err != nil { - return "", err - } - if looksLikeCloudflareChallenge(string(data)) { - return fetchTextViaPython(target) - } - body := string(data) s.setCachedFetchResult(cacheKey, body, 3*time.Minute) return body, nil } @@ -875,7 +1005,7 @@ func (s *SearchService) fetchJSONText(target string) (string, error) { return cached, nil } - req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*") + req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*", "provider") if err != nil { return "", err } @@ -1034,19 +1164,35 @@ func cleanArtgridDescription(description string) string { return strings.TrimSpace(description) } +func artgridHTMLSignals(html, clipID string) map[string]bool { + ogURL := extractMetaContent(html, "og:url") + canonical := extractCanonicalURL(html) + alWebURL := extractMetaContent(html, "al:web:url") + lowerHTML := strings.ToLower(html) + title := strings.ToLower(extractHTMLTitle(html)) + ogImage := strings.ToLower(extractMetaContent(html, "og:image")) + twitterImage := strings.ToLower(extractMetaContent(html, "twitter:image")) + return map[string]bool{ + "og_url_clip": strings.Contains(ogURL, clipID), + "canonical_clip": strings.Contains(canonical, clipID), + "al_web_clip": strings.Contains(alWebURL, clipID), + "body_main_clipvideo": strings.Contains(lowerHTML, "main-clipvideo_"+clipID), + "body_clip_path": strings.Contains(lowerHTML, "/clip/"+clipID+"/"), + "body_clip_id": strings.Contains(lowerHTML, clipID), + "title_mentions_clip": strings.Contains(title, "artgrid") || strings.Contains(title, "artlist"), + "image_clip": strings.Contains(ogImage, strings.ToLower(clipID)) || strings.Contains(twitterImage, strings.ToLower(clipID)), + } +} + func isMatchingArtgridClipPage(html, clipID string) bool { if clipID == "" { return false } - ogURL := extractMetaContent(html, "og:url") - canonical := extractCanonicalURL(html) - lowerHTML := strings.ToLower(html) - for _, candidate := range []string{ogURL, canonical} { - if strings.Contains(candidate, clipID) { - return true - } + signals := artgridHTMLSignals(html, clipID) + if signals["og_url_clip"] || signals["canonical_clip"] || signals["al_web_clip"] || signals["body_main_clipvideo"] || signals["body_clip_path"] || signals["image_clip"] { + return true } - if strings.Contains(lowerHTML, "main-clipvideo_"+clipID) || strings.Contains(lowerHTML, "/clip/"+clipID+"/") { + if signals["body_clip_id"] && signals["title_mentions_clip"] { return true } return false @@ -1090,6 +1236,23 @@ func extractEnvatoPreviewFromHydration(html string) string { return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded))) } +func collectEnvatoPreviewURL(html, pageThumbnail, currentThumbnail, contentURL string) string { + urls := collectURLs(html) + return firstNonEmpty( + contentURL, + extractJSONLDValue(html, "contentUrl"), + extractMetaContent(html, "twitter:player:stream"), + extractMetaContent(html, "og:video"), + extractMetaContent(html, "og:video:url"), + extractMetaContent(html, "og:video:secure_url"), + extractEnvatoPreviewFromHydration(html), + pickBestEnvatoPreviewURL(urls), + extractVideoPreviewURL(html), + deriveEnvatoPreviewFromThumbnail(pageThumbnail), + deriveEnvatoPreviewFromThumbnail(currentThumbnail), + ) +} + func extractWindowAssignedValue(html, variable string) string { pattern := regexp.MustCompile(`window\.` + regexp.QuoteMeta(variable) + `\s*=\s*"([^"]+)"`) matches := pattern.FindStringSubmatch(html) @@ -1121,7 +1284,19 @@ func pickBestEnvatoPreviewURL(urls []string) string { return "" } -func newBrowserRequest(method, target, accept string) (*http.Request, error) { +func inferFetchReferer(target string) string { + lower := strings.ToLower(target) + switch { + case strings.Contains(lower, "envatousercontent.com"), strings.Contains(lower, "elements.envato.com"): + return "https://elements.envato.com/" + case strings.Contains(lower, "artgrid"), strings.Contains(lower, "artlist"): + return "https://artgrid.io/" + default: + return "" + } +} + +func newBrowserRequest(method, target, accept, strategy string) (*http.Request, error) { req, err := http.NewRequest(method, target, nil) if err != nil { return nil, err @@ -1131,6 +1306,14 @@ func newBrowserRequest(method, target, accept string) (*http.Request, error) { if accept != "" { req.Header.Set("Accept", accept) } + if strategy == "provider" { + req.Header.Set("Referer", inferFetchReferer(target)) + req.Header.Set("Upgrade-Insecure-Requests", "1") + req.Header.Set("Sec-Fetch-Dest", "document") + req.Header.Set("Sec-Fetch-Mode", "navigate") + req.Header.Set("Sec-Fetch-Site", "none") + req.Header.Set("Sec-Fetch-User", "?1") + } return req, nil } @@ -1142,11 +1325,17 @@ req = Request(sys.argv[1], headers={ "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.9", + "Referer": sys.argv[2], + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", }) with urlopen(req, timeout=20) as resp: sys.stdout.buffer.write(resp.read(1024 * 1024)) ` - output, err := exec.Command("python3", "-c", script, target).CombinedOutput() + output, err := exec.Command("python3", "-c", script, target, inferFetchReferer(target)).CombinedOutput() if err != nil { return "", fmt.Errorf("python fallback failed: %v: %s", err, truncateBytes(output, 300)) } diff --git a/backend/services/cse_test.go b/backend/services/cse_test.go index fbb96cd..99b96d8 100644 --- a/backend/services/cse_test.go +++ b/backend/services/cse_test.go @@ -44,6 +44,15 @@ func TestExtractEnvatoPreviewFromHydration(t *testing.T) { } } +func TestCollectEnvatoPreviewURLFindsOgVideo(t *testing.T) { + html := `` + got := collectEnvatoPreviewURL(html, "", "", "") + want := "https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4" + if got != want { + t.Fatalf("expected %q, got %q", want, got) + } +} + func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) { result := SearchResult{ Title: "Couple Friendly Sad Bgm Movie Best Bgm", @@ -109,6 +118,22 @@ func TestIsMatchingArtgridClipPageRejectsHomepage(t *testing.T) { } } +func TestIsMatchingArtgridClipPageAcceptsBodySignals(t *testing.T) { + html := `Night City | Stock Video Footage - Artgrid.io` + if !isMatchingArtgridClipPage(html, "6600269") { + t.Fatal("expected body/title signal Artgrid HTML to be accepted") + } +} + +func TestLowValueThumbnailDetection(t *testing.T) { + if !IsLowValueThumbnail("https://example.com/favicon.ico") { + t.Fatal("expected favicon to be low-value thumbnail") + } + if IsLowValueThumbnail("https://i.ytimg.com/vi/abcd1234xyz/hqdefault.jpg") { + t.Fatal("expected youtube thumbnail to be usable") + } +} + func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) { if got := GeminiCandidateLimit(9); got != 9 { t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got) diff --git a/backend/services/gemini.go b/backend/services/gemini.go index dd43149..770fbc6 100644 --- a/backend/services/gemini.go +++ b/backend/services/gemini.go @@ -47,14 +47,17 @@ type cachedExpansionValue struct { } type AIRecommendation struct { - Title string `json:"title"` - Link string `json:"link"` - Snippet string `json:"snippet"` - ThumbnailURL string `json:"thumbnailUrl"` - PreviewVideoURL string `json:"previewVideoUrl"` - Source string `json:"source"` - Reason string `json:"reason"` - Recommended bool `json:"recommended"` + Title string `json:"title"` + Link string `json:"link"` + Snippet string `json:"snippet"` + ThumbnailURL string `json:"thumbnailUrl"` + PreviewVideoURL string `json:"previewVideoUrl"` + Source string `json:"source"` + Reason string `json:"reason"` + Recommended bool `json:"recommended"` + MediaMode string `json:"mediaMode,omitempty"` + EmbedURL string `json:"embedUrl,omitempty"` + PreviewBlockedReason string `json:"previewBlockedReason,omitempty"` } type QueryExpansion struct { @@ -480,6 +483,14 @@ func (g *GeminiService) fetchCandidateVisualInlineData(candidate SearchResult) ( } } if candidate.ThumbnailURL != "" { + if isLowValueThumbnail(candidate.ThumbnailURL) { + g.debug("gemini:vision_candidate_rejected_low_value", map[string]any{ + "link": candidate.Link, + "source": candidate.Source, + "thumbnailUrl": candidate.ThumbnailURL, + }) + return "", "", fmt.Errorf("candidate thumbnail is low value") + } cacheKey := "image\n" + candidate.ThumbnailURL if data, mimeType, ok := g.getCachedVisual(cacheKey); ok { return data, mimeType, nil diff --git a/backend/services/gemini_test.go b/backend/services/gemini_test.go index a3c59d8..17d91ef 100644 --- a/backend/services/gemini_test.go +++ b/backend/services/gemini_test.go @@ -3,6 +3,7 @@ package services import ( "net/http" "net/http/httptest" + "strings" "testing" "time" ) @@ -114,3 +115,27 @@ func TestGeminiExpansionCacheRoundTrip(t *testing.T) { t.Fatalf("unexpected expansion cache value: %#v", value) } } + +func TestDecorateRecommendationMediaUsesEmbedForGoogleVideo(t *testing.T) { + item := DecorateRecommendationMedia(AIRecommendation{ + Source: "Google Video", + Link: "https://www.youtube.com/watch?v=dQw4w9WgXcQ", + }) + if item.MediaMode != "embed" { + t.Fatalf("expected embed media mode, got %q", item.MediaMode) + } + if item.EmbedURL == "" || !strings.Contains(item.EmbedURL, "youtube-nocookie.com/embed/") { + t.Fatalf("unexpected embed url: %q", item.EmbedURL) + } +} + +func TestRankSearchResultsPrefersUsableVisuals(t *testing.T) { + results := []SearchResult{ + {Title: "cyberpunk city", Link: "https://example.com/a", ThumbnailURL: "https://example.com/favicon.ico"}, + {Title: "cyberpunk city", Link: "https://example.com/b", ThumbnailURL: "https://example.com/frame.jpg"}, + } + ranked := RankSearchResults("cyberpunk city", results) + if ranked[0].Link != "https://example.com/b" { + t.Fatalf("expected usable thumbnail result first, got %#v", ranked) + } +} diff --git a/backend/services/ranker.go b/backend/services/ranker.go index 8811b34..5cb52b4 100644 --- a/backend/services/ranker.go +++ b/backend/services/ranker.go @@ -10,6 +10,8 @@ import ( ) const GeminiFallbackReason = "Gemini Vision 응답이 부족해 키워드 기준으로 보강된 결과입니다." +const FallbackPreviewReason = "Fallback due to missing provider preview." +const PendingVisualReason = "Ranked candidate pending stronger visual evidence." type GeminiBatchStats struct { CandidateCap int `json:"candidateCap"` @@ -19,6 +21,7 @@ type GeminiBatchStats struct { Failed int `json:"failed"` SequentialRetried int `json:"sequentialRetried"` RecommendedCount int `json:"recommendedCount"` + VisualRejectCount int `json:"visualRejectCount"` Errors []string `json:"errors,omitempty"` } @@ -58,19 +61,25 @@ func RankSearchResults(query string, results []SearchResult) []SearchResult { score -= 4 } } - if result.ThumbnailURL != "" { - score += 2 - } if result.PreviewVideoURL != "" { - score += 3 + score += 10 + } + if hasUsableThumbnail(result.ThumbnailURL) { + score += 5 + } + if isLowValueThumbnail(result.ThumbnailURL) { + score -= 8 + } + if strings.TrimSpace(result.PreviewVideoURL) == "" && !hasUsableThumbnail(result.ThumbnailURL) { + score -= 10 } switch result.Source { case "Google Video": - score -= 1 + score -= 2 case "Envato": - score += 7 + score += 5 case "Artgrid": - score += 7 + score += 4 } scored = append(scored, scoredResult{item: result, score: score}) } @@ -106,6 +115,11 @@ func EvaluateAllCandidatesWithGeminiWithDeadline(service *GeminiService, query s CandidateCap: limit, Requested: min(limit, len(ranked)), } + for _, item := range ranked[:min(limit, len(ranked))] { + if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) { + stats.VisualRejectCount++ + } + } type batchResult struct { index int recommendations []AIRecommendation @@ -231,7 +245,7 @@ func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason strin fallback := make([]AIRecommendation, 0, min(limit, len(ranked))) for _, item := range ranked[:min(limit, len(ranked))] { - fallback = append(fallback, AIRecommendation{ + fallback = append(fallback, DecorateRecommendationMedia(AIRecommendation{ Title: item.Title, Link: item.Link, Snippet: item.Snippet, @@ -240,7 +254,7 @@ func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason strin Source: item.Source, Reason: reason, Recommended: false, - }) + })) } return fallback } @@ -385,6 +399,8 @@ func looksNegativeReason(reason string) bool { func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, limit int) []AIRecommendation { merged := make([]AIRecommendation, 0, min(limit, len(ranked))) seen := map[string]bool{} + fillerCount := 0 + maxFiller := min(4, limit) for _, item := range recommended { if !item.Recommended { @@ -394,7 +410,7 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, continue } seen[item.Link] = true - merged = append(merged, item) + merged = append(merged, DecorateRecommendationMedia(item)) } for _, item := range recommended { @@ -404,8 +420,11 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, if looksNegativeReason(item.Reason) || strings.Contains(item.Reason, GeminiFallbackReason) { continue } + if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) { + continue + } seen[item.Link] = true - merged = append(merged, item) + merged = append(merged, DecorateRecommendationMedia(item)) } if len(merged) < min(12, limit) { @@ -413,20 +432,24 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, if len(merged) >= min(12, limit) || item.Link == "" || seen[item.Link] { continue } - if strings.TrimSpace(item.ThumbnailURL) == "" && strings.TrimSpace(item.PreviewVideoURL) == "" { + if fillerCount >= maxFiller { + break + } + if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) { continue } seen[item.Link] = true - merged = append(merged, AIRecommendation{ + merged = append(merged, DecorateRecommendationMedia(AIRecommendation{ Title: item.Title, Link: item.Link, Snippet: item.Snippet, ThumbnailURL: item.ThumbnailURL, PreviewVideoURL: item.PreviewVideoURL, Source: item.Source, - Reason: "Gemini 검토가 부족해 편집용 후보로 추가된 결과입니다.", + Reason: PendingVisualReason, Recommended: false, - }) + })) + fillerCount++ } } return merged @@ -435,31 +458,37 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, func BackfillRecommendations(existing []AIRecommendation, ranked []SearchResult, limit int, reason string) []AIRecommendation { merged := make([]AIRecommendation, 0, min(limit, len(ranked))) seen := map[string]bool{} + fillerCount := 0 + maxFiller := min(4, limit) for _, item := range existing { if item.Link == "" || seen[item.Link] { continue } seen[item.Link] = true - merged = append(merged, item) + merged = append(merged, DecorateRecommendationMedia(item)) } for _, item := range ranked { if len(merged) >= limit || item.Link == "" || seen[item.Link] { continue } - if strings.TrimSpace(item.ThumbnailURL) == "" && strings.TrimSpace(item.PreviewVideoURL) == "" { + if fillerCount >= maxFiller { + break + } + if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) { continue } seen[item.Link] = true - merged = append(merged, AIRecommendation{ + merged = append(merged, DecorateRecommendationMedia(AIRecommendation{ Title: item.Title, Link: item.Link, Snippet: item.Snippet, ThumbnailURL: item.ThumbnailURL, PreviewVideoURL: item.PreviewVideoURL, Source: item.Source, - Reason: reason, + Reason: firstNonEmpty(strings.TrimSpace(reason), FallbackPreviewReason), Recommended: false, - }) + })) + fillerCount++ } return merged }