Harden preview enrichment and recommendation metadata
build-push / docker (push) Has been cancelled

This commit is contained in:
AI Assistant
2026-03-16 16:39:09 +09:00
parent 93b9f571ab
commit 2064825d29
7 changed files with 433 additions and 106 deletions
+14
View File
@@ -255,6 +255,20 @@
- backend debug broadcasts
## Recent Change Log
- Date: `2026-03-16`
- What changed:
- Hardened search result enrichment and recommendation metadata for preview recovery work.
- Added provider-aware fetch strategy for source HTML/JSON requests, broader Envato preview parsing, looser Artgrid HTML acceptance, and stronger thumbnail preservation rules.
- Added low-value thumbnail detection, ranking penalties for weak visuals, capped filler backfill, and response metadata fields for modal rendering (`mediaMode`, `embedUrl`, `previewBlockedReason`).
- Expanded debug summaries with usable-thumbnail and embed counts, and added unit coverage for the new parsing/ranking helpers.
- Why it changed:
- The latest production log showed Envato enrichment frequently failing, Artgrid enrichment collapsing on `403` plus HTML mismatch, and Gemini seeing too few usable visuals to do meaningful review.
- How it was verified:
- `go test ./...`
- What is still risky or incomplete:
- Frontend modal fallback behavior is not updated yet in this batch, so the new response metadata is not fully consumed until the UI patch lands.
- Envato source fetches may still fail on some pages if the provider changes challenge behavior again.
- Date: `2026-03-16`
- What changed:
- Added in-process query translation / expansion cache inside `GeminiService` so repeated identical searches can reuse the same English query and variant list without re-calling Gemini or Google Translate.
+57 -23
View File
@@ -81,14 +81,17 @@ type PreviewResponse struct {
}
type searchDebugSummary struct {
Total int `json:"total"`
BySource map[string]int `json:"bySource"`
WithPreview int `json:"withPreview"`
WithThumbnail int `json:"withThumbnail"`
Top []map[string]any `json:"top"`
Warning string `json:"warning,omitempty"`
DurationMS int64 `json:"durationMs,omitempty"`
GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"`
Total int `json:"total"`
BySource map[string]int `json:"bySource"`
WithPreview int `json:"withPreview"`
WithThumbnail int `json:"withThumbnail"`
WithUsableThumbnail int `json:"withUsableThumbnail,omitempty"`
WithLowValueThumbnail int `json:"withLowValueThumbnail,omitempty"`
WithEmbedURL int `json:"withEmbedUrl,omitempty"`
Top []map[string]any `json:"top"`
Warning string `json:"warning,omitempty"`
DurationMS int64 `json:"durationMs,omitempty"`
GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"`
}
type debugResponseWriter struct {
@@ -484,6 +487,9 @@ func (a *App) searchMedia(c *gin.Context) {
)
}
merged = services.RandomizeTopRecommendations(merged, 8)
for idx := range merged {
merged[idx] = services.DecorateRecommendationMedia(merged[idx])
}
warning := ""
if geminiErr != nil {
warning = geminiErr.Error()
@@ -628,6 +634,8 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat
bySource := map[string]int{}
withPreview := 0
withThumbnail := 0
withUsableThumbnail := 0
withLowValueThumbnail := 0
top := make([]map[string]any, 0, min(6, len(results)))
for idx, item := range results {
bySource[item.Source]++
@@ -636,6 +644,12 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat
}
if strings.TrimSpace(item.ThumbnailURL) != "" {
withThumbnail++
if services.HasUsableThumbnail(item.ThumbnailURL) {
withUsableThumbnail++
}
if services.IsLowValueThumbnail(item.ThumbnailURL) {
withLowValueThumbnail++
}
}
if idx < 6 {
top = append(top, map[string]any{
@@ -643,20 +657,23 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat
"source": item.Source,
"hasPreview": item.PreviewVideoURL != "",
"hasThumbnail": item.ThumbnailURL != "",
"usableThumb": services.HasUsableThumbnail(item.ThumbnailURL),
"displayLink": item.DisplayLink,
"snippetSample": truncateText(item.Snippet, 160),
})
}
}
return searchDebugSummary{
Total: len(results),
BySource: bySource,
WithPreview: withPreview,
WithThumbnail: withThumbnail,
Top: top,
Warning: warning,
DurationMS: duration.Milliseconds(),
GeminiCandidateCap: geminiCap,
Total: len(results),
BySource: bySource,
WithPreview: withPreview,
WithThumbnail: withThumbnail,
WithUsableThumbnail: withUsableThumbnail,
WithLowValueThumbnail: withLowValueThumbnail,
Top: top,
Warning: warning,
DurationMS: duration.Milliseconds(),
GeminiCandidateCap: geminiCap,
}
}
@@ -664,6 +681,9 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio
bySource := map[string]int{}
withPreview := 0
withThumbnail := 0
withUsableThumbnail := 0
withLowValueThumbnail := 0
withEmbedURL := 0
top := make([]map[string]any, 0, min(6, len(results)))
for idx, item := range results {
bySource[item.Source]++
@@ -672,6 +692,15 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio
}
if strings.TrimSpace(item.ThumbnailURL) != "" {
withThumbnail++
if services.HasUsableThumbnail(item.ThumbnailURL) {
withUsableThumbnail++
}
if services.IsLowValueThumbnail(item.ThumbnailURL) {
withLowValueThumbnail++
}
}
if strings.TrimSpace(item.EmbedURL) != "" {
withEmbedURL++
}
if idx < 6 {
top = append(top, map[string]any{
@@ -679,19 +708,24 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio
"source": item.Source,
"hasPreview": item.PreviewVideoURL != "",
"hasThumbnail": item.ThumbnailURL != "",
"hasEmbed": item.EmbedURL != "",
"mediaMode": item.MediaMode,
"reasonSample": truncateText(item.Reason, 120),
"snippetSample": truncateText(item.Snippet, 160),
})
}
}
return searchDebugSummary{
Total: len(results),
BySource: bySource,
WithPreview: withPreview,
WithThumbnail: withThumbnail,
Top: top,
Warning: warning,
DurationMS: duration.Milliseconds(),
Total: len(results),
BySource: bySource,
WithPreview: withPreview,
WithThumbnail: withThumbnail,
WithUsableThumbnail: withUsableThumbnail,
WithLowValueThumbnail: withLowValueThumbnail,
WithEmbedURL: withEmbedURL,
Top: top,
Warning: warning,
DurationMS: duration.Milliseconds(),
}
}
+244 -55
View File
@@ -294,31 +294,16 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
extractMetaContent(html, "twitter:image"),
extractJSONLDValue(html, "thumbnailUrl"),
)
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
if hasUsableThumbnail(pageThumbnail) && shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
result.ThumbnailURL = pageThumbnail
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = firstNonEmpty(
videoMeta.ContentURL,
extractJSONLDValue(html, "contentUrl"),
extractMetaContent(html, "twitter:player:stream"),
extractVideoPreviewURL(html),
extractEnvatoPreviewFromHydration(html),
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
)
result.PreviewVideoURL = collectEnvatoPreviewURL(html, pageThumbnail, result.ThumbnailURL, videoMeta.ContentURL)
}
if result.PreviewVideoURL == "" {
time.Sleep(1200 * time.Millisecond)
if retryHTML, retryErr := s.fetchText(result.Link); retryErr == nil {
result.PreviewVideoURL = firstNonEmpty(
extractJSONLDValue(retryHTML, "contentUrl"),
extractMetaContent(retryHTML, "twitter:player:stream"),
extractVideoPreviewURL(retryHTML),
extractEnvatoPreviewFromHydration(retryHTML),
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
)
result.PreviewVideoURL = collectEnvatoPreviewURL(retryHTML, pageThumbnail, result.ThumbnailURL, "")
}
}
s.debug("search_service:enrich_envato_done", map[string]any{
@@ -341,8 +326,8 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
body, err := s.fetchJSONText(apiURL)
if err == nil {
urls := collectURLs(body)
if result.ThumbnailURL == "" {
result.ThumbnailURL = pickImageURL(urls)
if !hasUsableThumbnail(result.ThumbnailURL) {
result.ThumbnailURL = pickArtgridImageURL(urls, clipID)
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = pickVideoURL(urls)
@@ -356,12 +341,16 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
html, err := s.fetchText(result.Link)
if err == nil {
if !isMatchingArtgridClipPage(html, clipID) {
s.debug("search_service:enrich_artgrid_html_mismatch", map[string]any{"link": result.Link, "clipId": clipID})
s.debug("search_service:enrich_artgrid_html_mismatch", map[string]any{
"link": result.Link,
"clipId": clipID,
"signals": artgridHTMLSignals(html, clipID),
})
return result
}
result.Title = firstNonEmpty(
cleanArtgridTitle(extractMetaContent(html, "og:title")),
cleanArtgridTitle(extractMetaContent(html, "title")),
cleanArtgridTitle(extractHTMLTitle(html)),
result.Title,
)
result.Snippet = firstNonEmpty(
@@ -374,15 +363,20 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
extractMetaContent(html, "twitter:image"),
extractArtgridBackgroundThumbnail(html, clipID),
extractJSONLDValue(html, "image"),
pickArtgridImageURL(collectURLs(html), clipID),
)
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
if hasUsableThumbnail(pageThumbnail) && shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
result.ThumbnailURL = pageThumbnail
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = firstNonEmpty(
extractJSONLDValue(html, "contentUrl"),
extractMetaContent(html, "twitter:player:stream"),
extractMetaContent(html, "og:video"),
extractMetaContent(html, "og:video:url"),
extractMetaContent(html, "og:video:secure_url"),
extractVideoPreviewURL(html),
pickVideoURL(collectURLs(html)),
)
}
if result.PreviewVideoURL == "" {
@@ -677,6 +671,112 @@ func deriveThumbnail(link string) string {
return ""
}
func isLowValueThumbnail(raw string) bool {
lower := strings.ToLower(strings.TrimSpace(raw))
if lower == "" {
return true
}
for _, token := range []string{
"favicon", "apple-touch-icon", "/logo", "/icon", "icon.", "logo.", "placehold.co",
} {
if strings.Contains(lower, token) {
return true
}
}
for _, host := range []string{
"googleusercontent.com", "gstatic.com", "bing.com", "duckduckgo.com", "icons.duckduckgo.com",
} {
if strings.Contains(lower, host) && !strings.Contains(lower, "ytimg.com") {
return true
}
}
return false
}
func hasUsableThumbnail(raw string) bool {
return strings.TrimSpace(raw) != "" && !isLowValueThumbnail(raw)
}
func HasUsableThumbnail(raw string) bool {
return hasUsableThumbnail(raw)
}
func IsLowValueThumbnail(raw string) bool {
return isLowValueThumbnail(raw)
}
func buildEmbedURL(source, link string) string {
trimmed := strings.TrimSpace(link)
if trimmed == "" {
return ""
}
if strings.EqualFold(strings.TrimSpace(source), "Google Video") {
if videoID := extractYouTubeID(trimmed); videoID != "" {
return "https://www.youtube-nocookie.com/embed/" + videoID + "?autoplay=1&rel=0&playsinline=1&modestbranding=1&enablejsapi=1"
}
}
return trimmed
}
func defaultMediaMode(source, link, previewURL, thumbnailURL string) (string, string, string) {
embedURL := buildEmbedURL(source, link)
switch source {
case "Google Video":
if embedURL != "" {
return "embed", embedURL, ""
}
if hasUsableThumbnail(thumbnailURL) {
return "thumbnail", "", "missing_google_embed"
}
return "none", "", "missing_google_embed"
case "Envato":
if strings.TrimSpace(previewURL) != "" {
return "preview_video", embedURL, "provider_embed_blocked"
}
if hasUsableThumbnail(thumbnailURL) {
return "thumbnail", embedURL, "provider_embed_blocked"
}
if embedURL != "" {
return "embed", embedURL, ""
}
return "none", "", "provider_embed_blocked"
case "Artgrid":
if hasUsableThumbnail(thumbnailURL) {
return "thumbnail", embedURL, "provider_preview_unavailable"
}
if strings.TrimSpace(previewURL) != "" {
return "preview_video", embedURL, "provider_preview_unavailable"
}
if embedURL != "" {
return "embed", embedURL, ""
}
return "none", "", "provider_preview_unavailable"
default:
if strings.TrimSpace(previewURL) != "" {
return "preview_video", embedURL, ""
}
if hasUsableThumbnail(thumbnailURL) {
return "thumbnail", embedURL, ""
}
if embedURL != "" {
return "embed", embedURL, ""
}
return "none", "", ""
}
}
func DecorateRecommendationMedia(item AIRecommendation) AIRecommendation {
item.EmbedURL = buildEmbedURL(item.Source, item.Link)
item.MediaMode, _, item.PreviewBlockedReason = defaultMediaMode(item.Source, item.Link, item.PreviewVideoURL, item.ThumbnailURL)
if item.MediaMode == "embed" && item.EmbedURL == "" {
item.MediaMode = "none"
}
if item.MediaMode == "thumbnail" && !hasUsableThumbnail(item.ThumbnailURL) && strings.TrimSpace(item.PreviewVideoURL) != "" {
item.MediaMode = "preview_video"
}
return item
}
func extractYouTubeID(link string) string {
patterns := []*regexp.Regexp{
regexp.MustCompile(`(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})`),
@@ -705,6 +805,15 @@ func extractMetaContent(html, property string) string {
return ""
}
func extractHTMLTitle(html string) string {
pattern := regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
matches := pattern.FindStringSubmatch(html)
if len(matches) == 2 {
return htmlUnescape(strings.TrimSpace(matches[1]))
}
return ""
}
func extractVideoPreviewURL(html string) string {
normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`)
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`)
@@ -737,6 +846,19 @@ func extractArtgridBackgroundThumbnail(html, clipID string) string {
return ""
}
func pickArtgridImageURL(urls []string, clipID string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if !(strings.Contains(lower, ".jpg") || strings.Contains(lower, ".jpeg") || strings.Contains(lower, ".png") || strings.Contains(lower, ".webp")) {
continue
}
if strings.Contains(item, clipID) || strings.Contains(lower, "graded-thumbnail") || strings.Contains(lower, "imgix") {
return item
}
}
return pickImageURL(urls)
}
func extractArtgridClipID(link string) string {
patterns := []*regexp.Regexp{
regexp.MustCompile(`/clip/([0-9]+)/`),
@@ -840,30 +962,38 @@ func (s *SearchService) fetchText(target string) (string, error) {
s.debug("search_service:fetch_cache_hit", map[string]any{"type": "html", "target": target, "bytes": len(cached)})
return cached, nil
}
req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
for _, strategy := range []string{"default", "provider"} {
req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", strategy)
if err != nil {
return "", err
}
s.debug("search_service:envato_fetch_strategy", map[string]any{"target": target, "strategy": strategy})
resp, err := s.Client.Do(req)
if err != nil {
continue
}
data, readErr := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
_ = resp.Body.Close()
if readErr != nil {
continue
}
if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable {
continue
}
if resp.StatusCode >= 300 {
continue
}
if looksLikeCloudflareChallenge(string(data)) {
continue
}
body := string(data)
s.setCachedFetchResult(cacheKey, body, 3*time.Minute)
return body, nil
}
body, err := fetchTextViaPython(target)
if err != nil {
return "", err
}
resp, err := s.Client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable {
return fetchTextViaPython(target)
}
if resp.StatusCode >= 300 {
return "", fmt.Errorf("fetch returned status %d", resp.StatusCode)
}
data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
if err != nil {
return "", err
}
if looksLikeCloudflareChallenge(string(data)) {
return fetchTextViaPython(target)
}
body := string(data)
s.setCachedFetchResult(cacheKey, body, 3*time.Minute)
return body, nil
}
@@ -875,7 +1005,7 @@ func (s *SearchService) fetchJSONText(target string) (string, error) {
return cached, nil
}
req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*")
req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*", "provider")
if err != nil {
return "", err
}
@@ -1034,19 +1164,35 @@ func cleanArtgridDescription(description string) string {
return strings.TrimSpace(description)
}
func artgridHTMLSignals(html, clipID string) map[string]bool {
ogURL := extractMetaContent(html, "og:url")
canonical := extractCanonicalURL(html)
alWebURL := extractMetaContent(html, "al:web:url")
lowerHTML := strings.ToLower(html)
title := strings.ToLower(extractHTMLTitle(html))
ogImage := strings.ToLower(extractMetaContent(html, "og:image"))
twitterImage := strings.ToLower(extractMetaContent(html, "twitter:image"))
return map[string]bool{
"og_url_clip": strings.Contains(ogURL, clipID),
"canonical_clip": strings.Contains(canonical, clipID),
"al_web_clip": strings.Contains(alWebURL, clipID),
"body_main_clipvideo": strings.Contains(lowerHTML, "main-clipvideo_"+clipID),
"body_clip_path": strings.Contains(lowerHTML, "/clip/"+clipID+"/"),
"body_clip_id": strings.Contains(lowerHTML, clipID),
"title_mentions_clip": strings.Contains(title, "artgrid") || strings.Contains(title, "artlist"),
"image_clip": strings.Contains(ogImage, strings.ToLower(clipID)) || strings.Contains(twitterImage, strings.ToLower(clipID)),
}
}
func isMatchingArtgridClipPage(html, clipID string) bool {
if clipID == "" {
return false
}
ogURL := extractMetaContent(html, "og:url")
canonical := extractCanonicalURL(html)
lowerHTML := strings.ToLower(html)
for _, candidate := range []string{ogURL, canonical} {
if strings.Contains(candidate, clipID) {
return true
}
signals := artgridHTMLSignals(html, clipID)
if signals["og_url_clip"] || signals["canonical_clip"] || signals["al_web_clip"] || signals["body_main_clipvideo"] || signals["body_clip_path"] || signals["image_clip"] {
return true
}
if strings.Contains(lowerHTML, "main-clipvideo_"+clipID) || strings.Contains(lowerHTML, "/clip/"+clipID+"/") {
if signals["body_clip_id"] && signals["title_mentions_clip"] {
return true
}
return false
@@ -1090,6 +1236,23 @@ func extractEnvatoPreviewFromHydration(html string) string {
return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded)))
}
func collectEnvatoPreviewURL(html, pageThumbnail, currentThumbnail, contentURL string) string {
urls := collectURLs(html)
return firstNonEmpty(
contentURL,
extractJSONLDValue(html, "contentUrl"),
extractMetaContent(html, "twitter:player:stream"),
extractMetaContent(html, "og:video"),
extractMetaContent(html, "og:video:url"),
extractMetaContent(html, "og:video:secure_url"),
extractEnvatoPreviewFromHydration(html),
pickBestEnvatoPreviewURL(urls),
extractVideoPreviewURL(html),
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
deriveEnvatoPreviewFromThumbnail(currentThumbnail),
)
}
func extractWindowAssignedValue(html, variable string) string {
pattern := regexp.MustCompile(`window\.` + regexp.QuoteMeta(variable) + `\s*=\s*"([^"]+)"`)
matches := pattern.FindStringSubmatch(html)
@@ -1121,7 +1284,19 @@ func pickBestEnvatoPreviewURL(urls []string) string {
return ""
}
func newBrowserRequest(method, target, accept string) (*http.Request, error) {
func inferFetchReferer(target string) string {
lower := strings.ToLower(target)
switch {
case strings.Contains(lower, "envatousercontent.com"), strings.Contains(lower, "elements.envato.com"):
return "https://elements.envato.com/"
case strings.Contains(lower, "artgrid"), strings.Contains(lower, "artlist"):
return "https://artgrid.io/"
default:
return ""
}
}
func newBrowserRequest(method, target, accept, strategy string) (*http.Request, error) {
req, err := http.NewRequest(method, target, nil)
if err != nil {
return nil, err
@@ -1131,6 +1306,14 @@ func newBrowserRequest(method, target, accept string) (*http.Request, error) {
if accept != "" {
req.Header.Set("Accept", accept)
}
if strategy == "provider" {
req.Header.Set("Referer", inferFetchReferer(target))
req.Header.Set("Upgrade-Insecure-Requests", "1")
req.Header.Set("Sec-Fetch-Dest", "document")
req.Header.Set("Sec-Fetch-Mode", "navigate")
req.Header.Set("Sec-Fetch-Site", "none")
req.Header.Set("Sec-Fetch-User", "?1")
}
return req, nil
}
@@ -1142,11 +1325,17 @@ req = Request(sys.argv[1], headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
"Referer": sys.argv[2],
"Upgrade-Insecure-Requests": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
})
with urlopen(req, timeout=20) as resp:
sys.stdout.buffer.write(resp.read(1024 * 1024))
`
output, err := exec.Command("python3", "-c", script, target).CombinedOutput()
output, err := exec.Command("python3", "-c", script, target, inferFetchReferer(target)).CombinedOutput()
if err != nil {
return "", fmt.Errorf("python fallback failed: %v: %s", err, truncateBytes(output, 300))
}
+25
View File
@@ -44,6 +44,15 @@ func TestExtractEnvatoPreviewFromHydration(t *testing.T) {
}
}
func TestCollectEnvatoPreviewURLFindsOgVideo(t *testing.T) {
html := `<meta property="og:video" content="https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4">`
got := collectEnvatoPreviewURL(html, "", "", "")
want := "https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) {
result := SearchResult{
Title: "Couple Friendly Sad Bgm Movie Best Bgm",
@@ -109,6 +118,22 @@ func TestIsMatchingArtgridClipPageRejectsHomepage(t *testing.T) {
}
}
func TestIsMatchingArtgridClipPageAcceptsBodySignals(t *testing.T) {
html := `<html><head><title>Night City | Stock Video Footage - Artgrid.io</title></head><body><script>window.__clip="6600269";</script></body></html>`
if !isMatchingArtgridClipPage(html, "6600269") {
t.Fatal("expected body/title signal Artgrid HTML to be accepted")
}
}
func TestLowValueThumbnailDetection(t *testing.T) {
if !IsLowValueThumbnail("https://example.com/favicon.ico") {
t.Fatal("expected favicon to be low-value thumbnail")
}
if IsLowValueThumbnail("https://i.ytimg.com/vi/abcd1234xyz/hqdefault.jpg") {
t.Fatal("expected youtube thumbnail to be usable")
}
}
func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) {
if got := GeminiCandidateLimit(9); got != 9 {
t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got)
+19 -8
View File
@@ -47,14 +47,17 @@ type cachedExpansionValue struct {
}
type AIRecommendation struct {
Title string `json:"title"`
Link string `json:"link"`
Snippet string `json:"snippet"`
ThumbnailURL string `json:"thumbnailUrl"`
PreviewVideoURL string `json:"previewVideoUrl"`
Source string `json:"source"`
Reason string `json:"reason"`
Recommended bool `json:"recommended"`
Title string `json:"title"`
Link string `json:"link"`
Snippet string `json:"snippet"`
ThumbnailURL string `json:"thumbnailUrl"`
PreviewVideoURL string `json:"previewVideoUrl"`
Source string `json:"source"`
Reason string `json:"reason"`
Recommended bool `json:"recommended"`
MediaMode string `json:"mediaMode,omitempty"`
EmbedURL string `json:"embedUrl,omitempty"`
PreviewBlockedReason string `json:"previewBlockedReason,omitempty"`
}
type QueryExpansion struct {
@@ -480,6 +483,14 @@ func (g *GeminiService) fetchCandidateVisualInlineData(candidate SearchResult) (
}
}
if candidate.ThumbnailURL != "" {
if isLowValueThumbnail(candidate.ThumbnailURL) {
g.debug("gemini:vision_candidate_rejected_low_value", map[string]any{
"link": candidate.Link,
"source": candidate.Source,
"thumbnailUrl": candidate.ThumbnailURL,
})
return "", "", fmt.Errorf("candidate thumbnail is low value")
}
cacheKey := "image\n" + candidate.ThumbnailURL
if data, mimeType, ok := g.getCachedVisual(cacheKey); ok {
return data, mimeType, nil
+25
View File
@@ -3,6 +3,7 @@ package services
import (
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
@@ -114,3 +115,27 @@ func TestGeminiExpansionCacheRoundTrip(t *testing.T) {
t.Fatalf("unexpected expansion cache value: %#v", value)
}
}
func TestDecorateRecommendationMediaUsesEmbedForGoogleVideo(t *testing.T) {
item := DecorateRecommendationMedia(AIRecommendation{
Source: "Google Video",
Link: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
})
if item.MediaMode != "embed" {
t.Fatalf("expected embed media mode, got %q", item.MediaMode)
}
if item.EmbedURL == "" || !strings.Contains(item.EmbedURL, "youtube-nocookie.com/embed/") {
t.Fatalf("unexpected embed url: %q", item.EmbedURL)
}
}
func TestRankSearchResultsPrefersUsableVisuals(t *testing.T) {
results := []SearchResult{
{Title: "cyberpunk city", Link: "https://example.com/a", ThumbnailURL: "https://example.com/favicon.ico"},
{Title: "cyberpunk city", Link: "https://example.com/b", ThumbnailURL: "https://example.com/frame.jpg"},
}
ranked := RankSearchResults("cyberpunk city", results)
if ranked[0].Link != "https://example.com/b" {
t.Fatalf("expected usable thumbnail result first, got %#v", ranked)
}
}
+49 -20
View File
@@ -10,6 +10,8 @@ import (
)
const GeminiFallbackReason = "Gemini Vision 응답이 부족해 키워드 기준으로 보강된 결과입니다."
const FallbackPreviewReason = "Fallback due to missing provider preview."
const PendingVisualReason = "Ranked candidate pending stronger visual evidence."
type GeminiBatchStats struct {
CandidateCap int `json:"candidateCap"`
@@ -19,6 +21,7 @@ type GeminiBatchStats struct {
Failed int `json:"failed"`
SequentialRetried int `json:"sequentialRetried"`
RecommendedCount int `json:"recommendedCount"`
VisualRejectCount int `json:"visualRejectCount"`
Errors []string `json:"errors,omitempty"`
}
@@ -58,19 +61,25 @@ func RankSearchResults(query string, results []SearchResult) []SearchResult {
score -= 4
}
}
if result.ThumbnailURL != "" {
score += 2
}
if result.PreviewVideoURL != "" {
score += 3
score += 10
}
if hasUsableThumbnail(result.ThumbnailURL) {
score += 5
}
if isLowValueThumbnail(result.ThumbnailURL) {
score -= 8
}
if strings.TrimSpace(result.PreviewVideoURL) == "" && !hasUsableThumbnail(result.ThumbnailURL) {
score -= 10
}
switch result.Source {
case "Google Video":
score -= 1
score -= 2
case "Envato":
score += 7
score += 5
case "Artgrid":
score += 7
score += 4
}
scored = append(scored, scoredResult{item: result, score: score})
}
@@ -106,6 +115,11 @@ func EvaluateAllCandidatesWithGeminiWithDeadline(service *GeminiService, query s
CandidateCap: limit,
Requested: min(limit, len(ranked)),
}
for _, item := range ranked[:min(limit, len(ranked))] {
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
stats.VisualRejectCount++
}
}
type batchResult struct {
index int
recommendations []AIRecommendation
@@ -231,7 +245,7 @@ func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason strin
fallback := make([]AIRecommendation, 0, min(limit, len(ranked)))
for _, item := range ranked[:min(limit, len(ranked))] {
fallback = append(fallback, AIRecommendation{
fallback = append(fallback, DecorateRecommendationMedia(AIRecommendation{
Title: item.Title,
Link: item.Link,
Snippet: item.Snippet,
@@ -240,7 +254,7 @@ func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason strin
Source: item.Source,
Reason: reason,
Recommended: false,
})
}))
}
return fallback
}
@@ -385,6 +399,8 @@ func looksNegativeReason(reason string) bool {
func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, limit int) []AIRecommendation {
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
seen := map[string]bool{}
fillerCount := 0
maxFiller := min(4, limit)
for _, item := range recommended {
if !item.Recommended {
@@ -394,7 +410,7 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
continue
}
seen[item.Link] = true
merged = append(merged, item)
merged = append(merged, DecorateRecommendationMedia(item))
}
for _, item := range recommended {
@@ -404,8 +420,11 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
if looksNegativeReason(item.Reason) || strings.Contains(item.Reason, GeminiFallbackReason) {
continue
}
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
continue
}
seen[item.Link] = true
merged = append(merged, item)
merged = append(merged, DecorateRecommendationMedia(item))
}
if len(merged) < min(12, limit) {
@@ -413,20 +432,24 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
if len(merged) >= min(12, limit) || item.Link == "" || seen[item.Link] {
continue
}
if strings.TrimSpace(item.ThumbnailURL) == "" && strings.TrimSpace(item.PreviewVideoURL) == "" {
if fillerCount >= maxFiller {
break
}
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
continue
}
seen[item.Link] = true
merged = append(merged, AIRecommendation{
merged = append(merged, DecorateRecommendationMedia(AIRecommendation{
Title: item.Title,
Link: item.Link,
Snippet: item.Snippet,
ThumbnailURL: item.ThumbnailURL,
PreviewVideoURL: item.PreviewVideoURL,
Source: item.Source,
Reason: "Gemini 검토가 부족해 편집용 후보로 추가된 결과입니다.",
Reason: PendingVisualReason,
Recommended: false,
})
}))
fillerCount++
}
}
return merged
@@ -435,31 +458,37 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
func BackfillRecommendations(existing []AIRecommendation, ranked []SearchResult, limit int, reason string) []AIRecommendation {
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
seen := map[string]bool{}
fillerCount := 0
maxFiller := min(4, limit)
for _, item := range existing {
if item.Link == "" || seen[item.Link] {
continue
}
seen[item.Link] = true
merged = append(merged, item)
merged = append(merged, DecorateRecommendationMedia(item))
}
for _, item := range ranked {
if len(merged) >= limit || item.Link == "" || seen[item.Link] {
continue
}
if strings.TrimSpace(item.ThumbnailURL) == "" && strings.TrimSpace(item.PreviewVideoURL) == "" {
if fillerCount >= maxFiller {
break
}
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
continue
}
seen[item.Link] = true
merged = append(merged, AIRecommendation{
merged = append(merged, DecorateRecommendationMedia(AIRecommendation{
Title: item.Title,
Link: item.Link,
Snippet: item.Snippet,
ThumbnailURL: item.ThumbnailURL,
PreviewVideoURL: item.PreviewVideoURL,
Source: item.Source,
Reason: reason,
Reason: firstNonEmpty(strings.TrimSpace(reason), FallbackPreviewReason),
Recommended: false,
})
}))
fillerCount++
}
return merged
}