This commit is contained in:
@@ -255,6 +255,20 @@
|
|||||||
- backend debug broadcasts
|
- backend debug broadcasts
|
||||||
|
|
||||||
## Recent Change Log
|
## Recent Change Log
|
||||||
|
- Date: `2026-03-16`
|
||||||
|
- What changed:
|
||||||
|
- Hardened search result enrichment and recommendation metadata for preview recovery work.
|
||||||
|
- Added provider-aware fetch strategy for source HTML/JSON requests, broader Envato preview parsing, looser Artgrid HTML acceptance, and stronger thumbnail preservation rules.
|
||||||
|
- Added low-value thumbnail detection, ranking penalties for weak visuals, capped filler backfill, and response metadata fields for modal rendering (`mediaMode`, `embedUrl`, `previewBlockedReason`).
|
||||||
|
- Expanded debug summaries with usable-thumbnail and embed counts, and added unit coverage for the new parsing/ranking helpers.
|
||||||
|
- Why it changed:
|
||||||
|
- The latest production log showed Envato enrichment frequently failing, Artgrid enrichment collapsing on `403` plus HTML mismatch, and Gemini seeing too few usable visuals to do meaningful review.
|
||||||
|
- How it was verified:
|
||||||
|
- `go test ./...`
|
||||||
|
- What is still risky or incomplete:
|
||||||
|
- Frontend modal fallback behavior is not updated yet in this batch, so the new response metadata is not fully consumed until the UI patch lands.
|
||||||
|
- Envato source fetches may still fail on some pages if the provider changes challenge behavior again.
|
||||||
|
|
||||||
- Date: `2026-03-16`
|
- Date: `2026-03-16`
|
||||||
- What changed:
|
- What changed:
|
||||||
- Added in-process query translation / expansion cache inside `GeminiService` so repeated identical searches can reuse the same English query and variant list without re-calling Gemini or Google Translate.
|
- Added in-process query translation / expansion cache inside `GeminiService` so repeated identical searches can reuse the same English query and variant list without re-calling Gemini or Google Translate.
|
||||||
|
|||||||
@@ -85,6 +85,9 @@ type searchDebugSummary struct {
|
|||||||
BySource map[string]int `json:"bySource"`
|
BySource map[string]int `json:"bySource"`
|
||||||
WithPreview int `json:"withPreview"`
|
WithPreview int `json:"withPreview"`
|
||||||
WithThumbnail int `json:"withThumbnail"`
|
WithThumbnail int `json:"withThumbnail"`
|
||||||
|
WithUsableThumbnail int `json:"withUsableThumbnail,omitempty"`
|
||||||
|
WithLowValueThumbnail int `json:"withLowValueThumbnail,omitempty"`
|
||||||
|
WithEmbedURL int `json:"withEmbedUrl,omitempty"`
|
||||||
Top []map[string]any `json:"top"`
|
Top []map[string]any `json:"top"`
|
||||||
Warning string `json:"warning,omitempty"`
|
Warning string `json:"warning,omitempty"`
|
||||||
DurationMS int64 `json:"durationMs,omitempty"`
|
DurationMS int64 `json:"durationMs,omitempty"`
|
||||||
@@ -484,6 +487,9 @@ func (a *App) searchMedia(c *gin.Context) {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
merged = services.RandomizeTopRecommendations(merged, 8)
|
merged = services.RandomizeTopRecommendations(merged, 8)
|
||||||
|
for idx := range merged {
|
||||||
|
merged[idx] = services.DecorateRecommendationMedia(merged[idx])
|
||||||
|
}
|
||||||
warning := ""
|
warning := ""
|
||||||
if geminiErr != nil {
|
if geminiErr != nil {
|
||||||
warning = geminiErr.Error()
|
warning = geminiErr.Error()
|
||||||
@@ -628,6 +634,8 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat
|
|||||||
bySource := map[string]int{}
|
bySource := map[string]int{}
|
||||||
withPreview := 0
|
withPreview := 0
|
||||||
withThumbnail := 0
|
withThumbnail := 0
|
||||||
|
withUsableThumbnail := 0
|
||||||
|
withLowValueThumbnail := 0
|
||||||
top := make([]map[string]any, 0, min(6, len(results)))
|
top := make([]map[string]any, 0, min(6, len(results)))
|
||||||
for idx, item := range results {
|
for idx, item := range results {
|
||||||
bySource[item.Source]++
|
bySource[item.Source]++
|
||||||
@@ -636,6 +644,12 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat
|
|||||||
}
|
}
|
||||||
if strings.TrimSpace(item.ThumbnailURL) != "" {
|
if strings.TrimSpace(item.ThumbnailURL) != "" {
|
||||||
withThumbnail++
|
withThumbnail++
|
||||||
|
if services.HasUsableThumbnail(item.ThumbnailURL) {
|
||||||
|
withUsableThumbnail++
|
||||||
|
}
|
||||||
|
if services.IsLowValueThumbnail(item.ThumbnailURL) {
|
||||||
|
withLowValueThumbnail++
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if idx < 6 {
|
if idx < 6 {
|
||||||
top = append(top, map[string]any{
|
top = append(top, map[string]any{
|
||||||
@@ -643,6 +657,7 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat
|
|||||||
"source": item.Source,
|
"source": item.Source,
|
||||||
"hasPreview": item.PreviewVideoURL != "",
|
"hasPreview": item.PreviewVideoURL != "",
|
||||||
"hasThumbnail": item.ThumbnailURL != "",
|
"hasThumbnail": item.ThumbnailURL != "",
|
||||||
|
"usableThumb": services.HasUsableThumbnail(item.ThumbnailURL),
|
||||||
"displayLink": item.DisplayLink,
|
"displayLink": item.DisplayLink,
|
||||||
"snippetSample": truncateText(item.Snippet, 160),
|
"snippetSample": truncateText(item.Snippet, 160),
|
||||||
})
|
})
|
||||||
@@ -653,6 +668,8 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat
|
|||||||
BySource: bySource,
|
BySource: bySource,
|
||||||
WithPreview: withPreview,
|
WithPreview: withPreview,
|
||||||
WithThumbnail: withThumbnail,
|
WithThumbnail: withThumbnail,
|
||||||
|
WithUsableThumbnail: withUsableThumbnail,
|
||||||
|
WithLowValueThumbnail: withLowValueThumbnail,
|
||||||
Top: top,
|
Top: top,
|
||||||
Warning: warning,
|
Warning: warning,
|
||||||
DurationMS: duration.Milliseconds(),
|
DurationMS: duration.Milliseconds(),
|
||||||
@@ -664,6 +681,9 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio
|
|||||||
bySource := map[string]int{}
|
bySource := map[string]int{}
|
||||||
withPreview := 0
|
withPreview := 0
|
||||||
withThumbnail := 0
|
withThumbnail := 0
|
||||||
|
withUsableThumbnail := 0
|
||||||
|
withLowValueThumbnail := 0
|
||||||
|
withEmbedURL := 0
|
||||||
top := make([]map[string]any, 0, min(6, len(results)))
|
top := make([]map[string]any, 0, min(6, len(results)))
|
||||||
for idx, item := range results {
|
for idx, item := range results {
|
||||||
bySource[item.Source]++
|
bySource[item.Source]++
|
||||||
@@ -672,6 +692,15 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio
|
|||||||
}
|
}
|
||||||
if strings.TrimSpace(item.ThumbnailURL) != "" {
|
if strings.TrimSpace(item.ThumbnailURL) != "" {
|
||||||
withThumbnail++
|
withThumbnail++
|
||||||
|
if services.HasUsableThumbnail(item.ThumbnailURL) {
|
||||||
|
withUsableThumbnail++
|
||||||
|
}
|
||||||
|
if services.IsLowValueThumbnail(item.ThumbnailURL) {
|
||||||
|
withLowValueThumbnail++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(item.EmbedURL) != "" {
|
||||||
|
withEmbedURL++
|
||||||
}
|
}
|
||||||
if idx < 6 {
|
if idx < 6 {
|
||||||
top = append(top, map[string]any{
|
top = append(top, map[string]any{
|
||||||
@@ -679,6 +708,8 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio
|
|||||||
"source": item.Source,
|
"source": item.Source,
|
||||||
"hasPreview": item.PreviewVideoURL != "",
|
"hasPreview": item.PreviewVideoURL != "",
|
||||||
"hasThumbnail": item.ThumbnailURL != "",
|
"hasThumbnail": item.ThumbnailURL != "",
|
||||||
|
"hasEmbed": item.EmbedURL != "",
|
||||||
|
"mediaMode": item.MediaMode,
|
||||||
"reasonSample": truncateText(item.Reason, 120),
|
"reasonSample": truncateText(item.Reason, 120),
|
||||||
"snippetSample": truncateText(item.Snippet, 160),
|
"snippetSample": truncateText(item.Snippet, 160),
|
||||||
})
|
})
|
||||||
@@ -689,6 +720,9 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio
|
|||||||
BySource: bySource,
|
BySource: bySource,
|
||||||
WithPreview: withPreview,
|
WithPreview: withPreview,
|
||||||
WithThumbnail: withThumbnail,
|
WithThumbnail: withThumbnail,
|
||||||
|
WithUsableThumbnail: withUsableThumbnail,
|
||||||
|
WithLowValueThumbnail: withLowValueThumbnail,
|
||||||
|
WithEmbedURL: withEmbedURL,
|
||||||
Top: top,
|
Top: top,
|
||||||
Warning: warning,
|
Warning: warning,
|
||||||
DurationMS: duration.Milliseconds(),
|
DurationMS: duration.Milliseconds(),
|
||||||
|
|||||||
+233
-44
@@ -294,31 +294,16 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
|||||||
extractMetaContent(html, "twitter:image"),
|
extractMetaContent(html, "twitter:image"),
|
||||||
extractJSONLDValue(html, "thumbnailUrl"),
|
extractJSONLDValue(html, "thumbnailUrl"),
|
||||||
)
|
)
|
||||||
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
|
if hasUsableThumbnail(pageThumbnail) && shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
|
||||||
result.ThumbnailURL = pageThumbnail
|
result.ThumbnailURL = pageThumbnail
|
||||||
}
|
}
|
||||||
if result.PreviewVideoURL == "" {
|
if result.PreviewVideoURL == "" {
|
||||||
result.PreviewVideoURL = firstNonEmpty(
|
result.PreviewVideoURL = collectEnvatoPreviewURL(html, pageThumbnail, result.ThumbnailURL, videoMeta.ContentURL)
|
||||||
videoMeta.ContentURL,
|
|
||||||
extractJSONLDValue(html, "contentUrl"),
|
|
||||||
extractMetaContent(html, "twitter:player:stream"),
|
|
||||||
extractVideoPreviewURL(html),
|
|
||||||
extractEnvatoPreviewFromHydration(html),
|
|
||||||
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
|
|
||||||
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
if result.PreviewVideoURL == "" {
|
if result.PreviewVideoURL == "" {
|
||||||
time.Sleep(1200 * time.Millisecond)
|
time.Sleep(1200 * time.Millisecond)
|
||||||
if retryHTML, retryErr := s.fetchText(result.Link); retryErr == nil {
|
if retryHTML, retryErr := s.fetchText(result.Link); retryErr == nil {
|
||||||
result.PreviewVideoURL = firstNonEmpty(
|
result.PreviewVideoURL = collectEnvatoPreviewURL(retryHTML, pageThumbnail, result.ThumbnailURL, "")
|
||||||
extractJSONLDValue(retryHTML, "contentUrl"),
|
|
||||||
extractMetaContent(retryHTML, "twitter:player:stream"),
|
|
||||||
extractVideoPreviewURL(retryHTML),
|
|
||||||
extractEnvatoPreviewFromHydration(retryHTML),
|
|
||||||
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
|
|
||||||
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
s.debug("search_service:enrich_envato_done", map[string]any{
|
s.debug("search_service:enrich_envato_done", map[string]any{
|
||||||
@@ -341,8 +326,8 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
|||||||
body, err := s.fetchJSONText(apiURL)
|
body, err := s.fetchJSONText(apiURL)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
urls := collectURLs(body)
|
urls := collectURLs(body)
|
||||||
if result.ThumbnailURL == "" {
|
if !hasUsableThumbnail(result.ThumbnailURL) {
|
||||||
result.ThumbnailURL = pickImageURL(urls)
|
result.ThumbnailURL = pickArtgridImageURL(urls, clipID)
|
||||||
}
|
}
|
||||||
if result.PreviewVideoURL == "" {
|
if result.PreviewVideoURL == "" {
|
||||||
result.PreviewVideoURL = pickVideoURL(urls)
|
result.PreviewVideoURL = pickVideoURL(urls)
|
||||||
@@ -356,12 +341,16 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
|||||||
html, err := s.fetchText(result.Link)
|
html, err := s.fetchText(result.Link)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
if !isMatchingArtgridClipPage(html, clipID) {
|
if !isMatchingArtgridClipPage(html, clipID) {
|
||||||
s.debug("search_service:enrich_artgrid_html_mismatch", map[string]any{"link": result.Link, "clipId": clipID})
|
s.debug("search_service:enrich_artgrid_html_mismatch", map[string]any{
|
||||||
|
"link": result.Link,
|
||||||
|
"clipId": clipID,
|
||||||
|
"signals": artgridHTMLSignals(html, clipID),
|
||||||
|
})
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
result.Title = firstNonEmpty(
|
result.Title = firstNonEmpty(
|
||||||
cleanArtgridTitle(extractMetaContent(html, "og:title")),
|
cleanArtgridTitle(extractMetaContent(html, "og:title")),
|
||||||
cleanArtgridTitle(extractMetaContent(html, "title")),
|
cleanArtgridTitle(extractHTMLTitle(html)),
|
||||||
result.Title,
|
result.Title,
|
||||||
)
|
)
|
||||||
result.Snippet = firstNonEmpty(
|
result.Snippet = firstNonEmpty(
|
||||||
@@ -374,15 +363,20 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
|||||||
extractMetaContent(html, "twitter:image"),
|
extractMetaContent(html, "twitter:image"),
|
||||||
extractArtgridBackgroundThumbnail(html, clipID),
|
extractArtgridBackgroundThumbnail(html, clipID),
|
||||||
extractJSONLDValue(html, "image"),
|
extractJSONLDValue(html, "image"),
|
||||||
|
pickArtgridImageURL(collectURLs(html), clipID),
|
||||||
)
|
)
|
||||||
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
|
if hasUsableThumbnail(pageThumbnail) && shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
|
||||||
result.ThumbnailURL = pageThumbnail
|
result.ThumbnailURL = pageThumbnail
|
||||||
}
|
}
|
||||||
if result.PreviewVideoURL == "" {
|
if result.PreviewVideoURL == "" {
|
||||||
result.PreviewVideoURL = firstNonEmpty(
|
result.PreviewVideoURL = firstNonEmpty(
|
||||||
extractJSONLDValue(html, "contentUrl"),
|
extractJSONLDValue(html, "contentUrl"),
|
||||||
extractMetaContent(html, "twitter:player:stream"),
|
extractMetaContent(html, "twitter:player:stream"),
|
||||||
|
extractMetaContent(html, "og:video"),
|
||||||
|
extractMetaContent(html, "og:video:url"),
|
||||||
|
extractMetaContent(html, "og:video:secure_url"),
|
||||||
extractVideoPreviewURL(html),
|
extractVideoPreviewURL(html),
|
||||||
|
pickVideoURL(collectURLs(html)),
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
if result.PreviewVideoURL == "" {
|
if result.PreviewVideoURL == "" {
|
||||||
@@ -677,6 +671,112 @@ func deriveThumbnail(link string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isLowValueThumbnail(raw string) bool {
|
||||||
|
lower := strings.ToLower(strings.TrimSpace(raw))
|
||||||
|
if lower == "" {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for _, token := range []string{
|
||||||
|
"favicon", "apple-touch-icon", "/logo", "/icon", "icon.", "logo.", "placehold.co",
|
||||||
|
} {
|
||||||
|
if strings.Contains(lower, token) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, host := range []string{
|
||||||
|
"googleusercontent.com", "gstatic.com", "bing.com", "duckduckgo.com", "icons.duckduckgo.com",
|
||||||
|
} {
|
||||||
|
if strings.Contains(lower, host) && !strings.Contains(lower, "ytimg.com") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func hasUsableThumbnail(raw string) bool {
|
||||||
|
return strings.TrimSpace(raw) != "" && !isLowValueThumbnail(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
func HasUsableThumbnail(raw string) bool {
|
||||||
|
return hasUsableThumbnail(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
func IsLowValueThumbnail(raw string) bool {
|
||||||
|
return isLowValueThumbnail(raw)
|
||||||
|
}
|
||||||
|
|
||||||
|
func buildEmbedURL(source, link string) string {
|
||||||
|
trimmed := strings.TrimSpace(link)
|
||||||
|
if trimmed == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
if strings.EqualFold(strings.TrimSpace(source), "Google Video") {
|
||||||
|
if videoID := extractYouTubeID(trimmed); videoID != "" {
|
||||||
|
return "https://www.youtube-nocookie.com/embed/" + videoID + "?autoplay=1&rel=0&playsinline=1&modestbranding=1&enablejsapi=1"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return trimmed
|
||||||
|
}
|
||||||
|
|
||||||
|
func defaultMediaMode(source, link, previewURL, thumbnailURL string) (string, string, string) {
|
||||||
|
embedURL := buildEmbedURL(source, link)
|
||||||
|
switch source {
|
||||||
|
case "Google Video":
|
||||||
|
if embedURL != "" {
|
||||||
|
return "embed", embedURL, ""
|
||||||
|
}
|
||||||
|
if hasUsableThumbnail(thumbnailURL) {
|
||||||
|
return "thumbnail", "", "missing_google_embed"
|
||||||
|
}
|
||||||
|
return "none", "", "missing_google_embed"
|
||||||
|
case "Envato":
|
||||||
|
if strings.TrimSpace(previewURL) != "" {
|
||||||
|
return "preview_video", embedURL, "provider_embed_blocked"
|
||||||
|
}
|
||||||
|
if hasUsableThumbnail(thumbnailURL) {
|
||||||
|
return "thumbnail", embedURL, "provider_embed_blocked"
|
||||||
|
}
|
||||||
|
if embedURL != "" {
|
||||||
|
return "embed", embedURL, ""
|
||||||
|
}
|
||||||
|
return "none", "", "provider_embed_blocked"
|
||||||
|
case "Artgrid":
|
||||||
|
if hasUsableThumbnail(thumbnailURL) {
|
||||||
|
return "thumbnail", embedURL, "provider_preview_unavailable"
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(previewURL) != "" {
|
||||||
|
return "preview_video", embedURL, "provider_preview_unavailable"
|
||||||
|
}
|
||||||
|
if embedURL != "" {
|
||||||
|
return "embed", embedURL, ""
|
||||||
|
}
|
||||||
|
return "none", "", "provider_preview_unavailable"
|
||||||
|
default:
|
||||||
|
if strings.TrimSpace(previewURL) != "" {
|
||||||
|
return "preview_video", embedURL, ""
|
||||||
|
}
|
||||||
|
if hasUsableThumbnail(thumbnailURL) {
|
||||||
|
return "thumbnail", embedURL, ""
|
||||||
|
}
|
||||||
|
if embedURL != "" {
|
||||||
|
return "embed", embedURL, ""
|
||||||
|
}
|
||||||
|
return "none", "", ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func DecorateRecommendationMedia(item AIRecommendation) AIRecommendation {
|
||||||
|
item.EmbedURL = buildEmbedURL(item.Source, item.Link)
|
||||||
|
item.MediaMode, _, item.PreviewBlockedReason = defaultMediaMode(item.Source, item.Link, item.PreviewVideoURL, item.ThumbnailURL)
|
||||||
|
if item.MediaMode == "embed" && item.EmbedURL == "" {
|
||||||
|
item.MediaMode = "none"
|
||||||
|
}
|
||||||
|
if item.MediaMode == "thumbnail" && !hasUsableThumbnail(item.ThumbnailURL) && strings.TrimSpace(item.PreviewVideoURL) != "" {
|
||||||
|
item.MediaMode = "preview_video"
|
||||||
|
}
|
||||||
|
return item
|
||||||
|
}
|
||||||
|
|
||||||
func extractYouTubeID(link string) string {
|
func extractYouTubeID(link string) string {
|
||||||
patterns := []*regexp.Regexp{
|
patterns := []*regexp.Regexp{
|
||||||
regexp.MustCompile(`(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})`),
|
regexp.MustCompile(`(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})`),
|
||||||
@@ -705,6 +805,15 @@ func extractMetaContent(html, property string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func extractHTMLTitle(html string) string {
|
||||||
|
pattern := regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
|
||||||
|
matches := pattern.FindStringSubmatch(html)
|
||||||
|
if len(matches) == 2 {
|
||||||
|
return htmlUnescape(strings.TrimSpace(matches[1]))
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
func extractVideoPreviewURL(html string) string {
|
func extractVideoPreviewURL(html string) string {
|
||||||
normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`)
|
normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`)
|
||||||
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`)
|
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`)
|
||||||
@@ -737,6 +846,19 @@ func extractArtgridBackgroundThumbnail(html, clipID string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func pickArtgridImageURL(urls []string, clipID string) string {
|
||||||
|
for _, item := range urls {
|
||||||
|
lower := strings.ToLower(item)
|
||||||
|
if !(strings.Contains(lower, ".jpg") || strings.Contains(lower, ".jpeg") || strings.Contains(lower, ".png") || strings.Contains(lower, ".webp")) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if strings.Contains(item, clipID) || strings.Contains(lower, "graded-thumbnail") || strings.Contains(lower, "imgix") {
|
||||||
|
return item
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return pickImageURL(urls)
|
||||||
|
}
|
||||||
|
|
||||||
func extractArtgridClipID(link string) string {
|
func extractArtgridClipID(link string) string {
|
||||||
patterns := []*regexp.Regexp{
|
patterns := []*regexp.Regexp{
|
||||||
regexp.MustCompile(`/clip/([0-9]+)/`),
|
regexp.MustCompile(`/clip/([0-9]+)/`),
|
||||||
@@ -840,30 +962,38 @@ func (s *SearchService) fetchText(target string) (string, error) {
|
|||||||
s.debug("search_service:fetch_cache_hit", map[string]any{"type": "html", "target": target, "bytes": len(cached)})
|
s.debug("search_service:fetch_cache_hit", map[string]any{"type": "html", "target": target, "bytes": len(cached)})
|
||||||
return cached, nil
|
return cached, nil
|
||||||
}
|
}
|
||||||
|
for _, strategy := range []string{"default", "provider"} {
|
||||||
req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
|
req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", strategy)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
|
s.debug("search_service:envato_fetch_strategy", map[string]any{"target": target, "strategy": strategy})
|
||||||
resp, err := s.Client.Do(req)
|
resp, err := s.Client.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
continue
|
||||||
|
}
|
||||||
|
data, readErr := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
|
||||||
|
_ = resp.Body.Close()
|
||||||
|
if readErr != nil {
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
|
||||||
if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable {
|
if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable {
|
||||||
return fetchTextViaPython(target)
|
continue
|
||||||
}
|
}
|
||||||
if resp.StatusCode >= 300 {
|
if resp.StatusCode >= 300 {
|
||||||
return "", fmt.Errorf("fetch returned status %d", resp.StatusCode)
|
continue
|
||||||
}
|
}
|
||||||
data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
|
if looksLikeCloudflareChallenge(string(data)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
body := string(data)
|
||||||
|
s.setCachedFetchResult(cacheKey, body, 3*time.Minute)
|
||||||
|
return body, nil
|
||||||
|
}
|
||||||
|
body, err := fetchTextViaPython(target)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
if looksLikeCloudflareChallenge(string(data)) {
|
|
||||||
return fetchTextViaPython(target)
|
|
||||||
}
|
|
||||||
body := string(data)
|
|
||||||
s.setCachedFetchResult(cacheKey, body, 3*time.Minute)
|
s.setCachedFetchResult(cacheKey, body, 3*time.Minute)
|
||||||
return body, nil
|
return body, nil
|
||||||
}
|
}
|
||||||
@@ -875,7 +1005,7 @@ func (s *SearchService) fetchJSONText(target string) (string, error) {
|
|||||||
return cached, nil
|
return cached, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*")
|
req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*", "provider")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", err
|
return "", err
|
||||||
}
|
}
|
||||||
@@ -1034,19 +1164,35 @@ func cleanArtgridDescription(description string) string {
|
|||||||
return strings.TrimSpace(description)
|
return strings.TrimSpace(description)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func artgridHTMLSignals(html, clipID string) map[string]bool {
|
||||||
|
ogURL := extractMetaContent(html, "og:url")
|
||||||
|
canonical := extractCanonicalURL(html)
|
||||||
|
alWebURL := extractMetaContent(html, "al:web:url")
|
||||||
|
lowerHTML := strings.ToLower(html)
|
||||||
|
title := strings.ToLower(extractHTMLTitle(html))
|
||||||
|
ogImage := strings.ToLower(extractMetaContent(html, "og:image"))
|
||||||
|
twitterImage := strings.ToLower(extractMetaContent(html, "twitter:image"))
|
||||||
|
return map[string]bool{
|
||||||
|
"og_url_clip": strings.Contains(ogURL, clipID),
|
||||||
|
"canonical_clip": strings.Contains(canonical, clipID),
|
||||||
|
"al_web_clip": strings.Contains(alWebURL, clipID),
|
||||||
|
"body_main_clipvideo": strings.Contains(lowerHTML, "main-clipvideo_"+clipID),
|
||||||
|
"body_clip_path": strings.Contains(lowerHTML, "/clip/"+clipID+"/"),
|
||||||
|
"body_clip_id": strings.Contains(lowerHTML, clipID),
|
||||||
|
"title_mentions_clip": strings.Contains(title, "artgrid") || strings.Contains(title, "artlist"),
|
||||||
|
"image_clip": strings.Contains(ogImage, strings.ToLower(clipID)) || strings.Contains(twitterImage, strings.ToLower(clipID)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func isMatchingArtgridClipPage(html, clipID string) bool {
|
func isMatchingArtgridClipPage(html, clipID string) bool {
|
||||||
if clipID == "" {
|
if clipID == "" {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
ogURL := extractMetaContent(html, "og:url")
|
signals := artgridHTMLSignals(html, clipID)
|
||||||
canonical := extractCanonicalURL(html)
|
if signals["og_url_clip"] || signals["canonical_clip"] || signals["al_web_clip"] || signals["body_main_clipvideo"] || signals["body_clip_path"] || signals["image_clip"] {
|
||||||
lowerHTML := strings.ToLower(html)
|
|
||||||
for _, candidate := range []string{ogURL, canonical} {
|
|
||||||
if strings.Contains(candidate, clipID) {
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
if signals["body_clip_id"] && signals["title_mentions_clip"] {
|
||||||
if strings.Contains(lowerHTML, "main-clipvideo_"+clipID) || strings.Contains(lowerHTML, "/clip/"+clipID+"/") {
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
@@ -1090,6 +1236,23 @@ func extractEnvatoPreviewFromHydration(html string) string {
|
|||||||
return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded)))
|
return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded)))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func collectEnvatoPreviewURL(html, pageThumbnail, currentThumbnail, contentURL string) string {
|
||||||
|
urls := collectURLs(html)
|
||||||
|
return firstNonEmpty(
|
||||||
|
contentURL,
|
||||||
|
extractJSONLDValue(html, "contentUrl"),
|
||||||
|
extractMetaContent(html, "twitter:player:stream"),
|
||||||
|
extractMetaContent(html, "og:video"),
|
||||||
|
extractMetaContent(html, "og:video:url"),
|
||||||
|
extractMetaContent(html, "og:video:secure_url"),
|
||||||
|
extractEnvatoPreviewFromHydration(html),
|
||||||
|
pickBestEnvatoPreviewURL(urls),
|
||||||
|
extractVideoPreviewURL(html),
|
||||||
|
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
|
||||||
|
deriveEnvatoPreviewFromThumbnail(currentThumbnail),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
func extractWindowAssignedValue(html, variable string) string {
|
func extractWindowAssignedValue(html, variable string) string {
|
||||||
pattern := regexp.MustCompile(`window\.` + regexp.QuoteMeta(variable) + `\s*=\s*"([^"]+)"`)
|
pattern := regexp.MustCompile(`window\.` + regexp.QuoteMeta(variable) + `\s*=\s*"([^"]+)"`)
|
||||||
matches := pattern.FindStringSubmatch(html)
|
matches := pattern.FindStringSubmatch(html)
|
||||||
@@ -1121,7 +1284,19 @@ func pickBestEnvatoPreviewURL(urls []string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
func newBrowserRequest(method, target, accept string) (*http.Request, error) {
|
func inferFetchReferer(target string) string {
|
||||||
|
lower := strings.ToLower(target)
|
||||||
|
switch {
|
||||||
|
case strings.Contains(lower, "envatousercontent.com"), strings.Contains(lower, "elements.envato.com"):
|
||||||
|
return "https://elements.envato.com/"
|
||||||
|
case strings.Contains(lower, "artgrid"), strings.Contains(lower, "artlist"):
|
||||||
|
return "https://artgrid.io/"
|
||||||
|
default:
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newBrowserRequest(method, target, accept, strategy string) (*http.Request, error) {
|
||||||
req, err := http.NewRequest(method, target, nil)
|
req, err := http.NewRequest(method, target, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
@@ -1131,6 +1306,14 @@ func newBrowserRequest(method, target, accept string) (*http.Request, error) {
|
|||||||
if accept != "" {
|
if accept != "" {
|
||||||
req.Header.Set("Accept", accept)
|
req.Header.Set("Accept", accept)
|
||||||
}
|
}
|
||||||
|
if strategy == "provider" {
|
||||||
|
req.Header.Set("Referer", inferFetchReferer(target))
|
||||||
|
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||||||
|
req.Header.Set("Sec-Fetch-Dest", "document")
|
||||||
|
req.Header.Set("Sec-Fetch-Mode", "navigate")
|
||||||
|
req.Header.Set("Sec-Fetch-Site", "none")
|
||||||
|
req.Header.Set("Sec-Fetch-User", "?1")
|
||||||
|
}
|
||||||
return req, nil
|
return req, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1142,11 +1325,17 @@ req = Request(sys.argv[1], headers={
|
|||||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||||
"Accept-Language": "en-US,en;q=0.9",
|
"Accept-Language": "en-US,en;q=0.9",
|
||||||
|
"Referer": sys.argv[2],
|
||||||
|
"Upgrade-Insecure-Requests": "1",
|
||||||
|
"Sec-Fetch-Dest": "document",
|
||||||
|
"Sec-Fetch-Mode": "navigate",
|
||||||
|
"Sec-Fetch-Site": "none",
|
||||||
|
"Sec-Fetch-User": "?1",
|
||||||
})
|
})
|
||||||
with urlopen(req, timeout=20) as resp:
|
with urlopen(req, timeout=20) as resp:
|
||||||
sys.stdout.buffer.write(resp.read(1024 * 1024))
|
sys.stdout.buffer.write(resp.read(1024 * 1024))
|
||||||
`
|
`
|
||||||
output, err := exec.Command("python3", "-c", script, target).CombinedOutput()
|
output, err := exec.Command("python3", "-c", script, target, inferFetchReferer(target)).CombinedOutput()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("python fallback failed: %v: %s", err, truncateBytes(output, 300))
|
return "", fmt.Errorf("python fallback failed: %v: %s", err, truncateBytes(output, 300))
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -44,6 +44,15 @@ func TestExtractEnvatoPreviewFromHydration(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCollectEnvatoPreviewURLFindsOgVideo(t *testing.T) {
|
||||||
|
html := `<meta property="og:video" content="https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4">`
|
||||||
|
got := collectEnvatoPreviewURL(html, "", "", "")
|
||||||
|
want := "https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"
|
||||||
|
if got != want {
|
||||||
|
t.Fatalf("expected %q, got %q", want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) {
|
func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) {
|
||||||
result := SearchResult{
|
result := SearchResult{
|
||||||
Title: "Couple Friendly Sad Bgm Movie Best Bgm",
|
Title: "Couple Friendly Sad Bgm Movie Best Bgm",
|
||||||
@@ -109,6 +118,22 @@ func TestIsMatchingArtgridClipPageRejectsHomepage(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIsMatchingArtgridClipPageAcceptsBodySignals(t *testing.T) {
|
||||||
|
html := `<html><head><title>Night City | Stock Video Footage - Artgrid.io</title></head><body><script>window.__clip="6600269";</script></body></html>`
|
||||||
|
if !isMatchingArtgridClipPage(html, "6600269") {
|
||||||
|
t.Fatal("expected body/title signal Artgrid HTML to be accepted")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLowValueThumbnailDetection(t *testing.T) {
|
||||||
|
if !IsLowValueThumbnail("https://example.com/favicon.ico") {
|
||||||
|
t.Fatal("expected favicon to be low-value thumbnail")
|
||||||
|
}
|
||||||
|
if IsLowValueThumbnail("https://i.ytimg.com/vi/abcd1234xyz/hqdefault.jpg") {
|
||||||
|
t.Fatal("expected youtube thumbnail to be usable")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) {
|
func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) {
|
||||||
if got := GeminiCandidateLimit(9); got != 9 {
|
if got := GeminiCandidateLimit(9); got != 9 {
|
||||||
t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got)
|
t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got)
|
||||||
|
|||||||
@@ -55,6 +55,9 @@ type AIRecommendation struct {
|
|||||||
Source string `json:"source"`
|
Source string `json:"source"`
|
||||||
Reason string `json:"reason"`
|
Reason string `json:"reason"`
|
||||||
Recommended bool `json:"recommended"`
|
Recommended bool `json:"recommended"`
|
||||||
|
MediaMode string `json:"mediaMode,omitempty"`
|
||||||
|
EmbedURL string `json:"embedUrl,omitempty"`
|
||||||
|
PreviewBlockedReason string `json:"previewBlockedReason,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type QueryExpansion struct {
|
type QueryExpansion struct {
|
||||||
@@ -480,6 +483,14 @@ func (g *GeminiService) fetchCandidateVisualInlineData(candidate SearchResult) (
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
if candidate.ThumbnailURL != "" {
|
if candidate.ThumbnailURL != "" {
|
||||||
|
if isLowValueThumbnail(candidate.ThumbnailURL) {
|
||||||
|
g.debug("gemini:vision_candidate_rejected_low_value", map[string]any{
|
||||||
|
"link": candidate.Link,
|
||||||
|
"source": candidate.Source,
|
||||||
|
"thumbnailUrl": candidate.ThumbnailURL,
|
||||||
|
})
|
||||||
|
return "", "", fmt.Errorf("candidate thumbnail is low value")
|
||||||
|
}
|
||||||
cacheKey := "image\n" + candidate.ThumbnailURL
|
cacheKey := "image\n" + candidate.ThumbnailURL
|
||||||
if data, mimeType, ok := g.getCachedVisual(cacheKey); ok {
|
if data, mimeType, ok := g.getCachedVisual(cacheKey); ok {
|
||||||
return data, mimeType, nil
|
return data, mimeType, nil
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ package services
|
|||||||
import (
|
import (
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
"testing"
|
"testing"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
@@ -114,3 +115,27 @@ func TestGeminiExpansionCacheRoundTrip(t *testing.T) {
|
|||||||
t.Fatalf("unexpected expansion cache value: %#v", value)
|
t.Fatalf("unexpected expansion cache value: %#v", value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDecorateRecommendationMediaUsesEmbedForGoogleVideo(t *testing.T) {
|
||||||
|
item := DecorateRecommendationMedia(AIRecommendation{
|
||||||
|
Source: "Google Video",
|
||||||
|
Link: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
||||||
|
})
|
||||||
|
if item.MediaMode != "embed" {
|
||||||
|
t.Fatalf("expected embed media mode, got %q", item.MediaMode)
|
||||||
|
}
|
||||||
|
if item.EmbedURL == "" || !strings.Contains(item.EmbedURL, "youtube-nocookie.com/embed/") {
|
||||||
|
t.Fatalf("unexpected embed url: %q", item.EmbedURL)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRankSearchResultsPrefersUsableVisuals(t *testing.T) {
|
||||||
|
results := []SearchResult{
|
||||||
|
{Title: "cyberpunk city", Link: "https://example.com/a", ThumbnailURL: "https://example.com/favicon.ico"},
|
||||||
|
{Title: "cyberpunk city", Link: "https://example.com/b", ThumbnailURL: "https://example.com/frame.jpg"},
|
||||||
|
}
|
||||||
|
ranked := RankSearchResults("cyberpunk city", results)
|
||||||
|
if ranked[0].Link != "https://example.com/b" {
|
||||||
|
t.Fatalf("expected usable thumbnail result first, got %#v", ranked)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
+49
-20
@@ -10,6 +10,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const GeminiFallbackReason = "Gemini Vision 응답이 부족해 키워드 기준으로 보강된 결과입니다."
|
const GeminiFallbackReason = "Gemini Vision 응답이 부족해 키워드 기준으로 보강된 결과입니다."
|
||||||
|
const FallbackPreviewReason = "Fallback due to missing provider preview."
|
||||||
|
const PendingVisualReason = "Ranked candidate pending stronger visual evidence."
|
||||||
|
|
||||||
type GeminiBatchStats struct {
|
type GeminiBatchStats struct {
|
||||||
CandidateCap int `json:"candidateCap"`
|
CandidateCap int `json:"candidateCap"`
|
||||||
@@ -19,6 +21,7 @@ type GeminiBatchStats struct {
|
|||||||
Failed int `json:"failed"`
|
Failed int `json:"failed"`
|
||||||
SequentialRetried int `json:"sequentialRetried"`
|
SequentialRetried int `json:"sequentialRetried"`
|
||||||
RecommendedCount int `json:"recommendedCount"`
|
RecommendedCount int `json:"recommendedCount"`
|
||||||
|
VisualRejectCount int `json:"visualRejectCount"`
|
||||||
Errors []string `json:"errors,omitempty"`
|
Errors []string `json:"errors,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,19 +61,25 @@ func RankSearchResults(query string, results []SearchResult) []SearchResult {
|
|||||||
score -= 4
|
score -= 4
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if result.ThumbnailURL != "" {
|
|
||||||
score += 2
|
|
||||||
}
|
|
||||||
if result.PreviewVideoURL != "" {
|
if result.PreviewVideoURL != "" {
|
||||||
score += 3
|
score += 10
|
||||||
|
}
|
||||||
|
if hasUsableThumbnail(result.ThumbnailURL) {
|
||||||
|
score += 5
|
||||||
|
}
|
||||||
|
if isLowValueThumbnail(result.ThumbnailURL) {
|
||||||
|
score -= 8
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(result.PreviewVideoURL) == "" && !hasUsableThumbnail(result.ThumbnailURL) {
|
||||||
|
score -= 10
|
||||||
}
|
}
|
||||||
switch result.Source {
|
switch result.Source {
|
||||||
case "Google Video":
|
case "Google Video":
|
||||||
score -= 1
|
score -= 2
|
||||||
case "Envato":
|
case "Envato":
|
||||||
score += 7
|
score += 5
|
||||||
case "Artgrid":
|
case "Artgrid":
|
||||||
score += 7
|
score += 4
|
||||||
}
|
}
|
||||||
scored = append(scored, scoredResult{item: result, score: score})
|
scored = append(scored, scoredResult{item: result, score: score})
|
||||||
}
|
}
|
||||||
@@ -106,6 +115,11 @@ func EvaluateAllCandidatesWithGeminiWithDeadline(service *GeminiService, query s
|
|||||||
CandidateCap: limit,
|
CandidateCap: limit,
|
||||||
Requested: min(limit, len(ranked)),
|
Requested: min(limit, len(ranked)),
|
||||||
}
|
}
|
||||||
|
for _, item := range ranked[:min(limit, len(ranked))] {
|
||||||
|
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
|
||||||
|
stats.VisualRejectCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
type batchResult struct {
|
type batchResult struct {
|
||||||
index int
|
index int
|
||||||
recommendations []AIRecommendation
|
recommendations []AIRecommendation
|
||||||
@@ -231,7 +245,7 @@ func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason strin
|
|||||||
|
|
||||||
fallback := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
fallback := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
||||||
for _, item := range ranked[:min(limit, len(ranked))] {
|
for _, item := range ranked[:min(limit, len(ranked))] {
|
||||||
fallback = append(fallback, AIRecommendation{
|
fallback = append(fallback, DecorateRecommendationMedia(AIRecommendation{
|
||||||
Title: item.Title,
|
Title: item.Title,
|
||||||
Link: item.Link,
|
Link: item.Link,
|
||||||
Snippet: item.Snippet,
|
Snippet: item.Snippet,
|
||||||
@@ -240,7 +254,7 @@ func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason strin
|
|||||||
Source: item.Source,
|
Source: item.Source,
|
||||||
Reason: reason,
|
Reason: reason,
|
||||||
Recommended: false,
|
Recommended: false,
|
||||||
})
|
}))
|
||||||
}
|
}
|
||||||
return fallback
|
return fallback
|
||||||
}
|
}
|
||||||
@@ -385,6 +399,8 @@ func looksNegativeReason(reason string) bool {
|
|||||||
func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, limit int) []AIRecommendation {
|
func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, limit int) []AIRecommendation {
|
||||||
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
||||||
seen := map[string]bool{}
|
seen := map[string]bool{}
|
||||||
|
fillerCount := 0
|
||||||
|
maxFiller := min(4, limit)
|
||||||
|
|
||||||
for _, item := range recommended {
|
for _, item := range recommended {
|
||||||
if !item.Recommended {
|
if !item.Recommended {
|
||||||
@@ -394,7 +410,7 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
seen[item.Link] = true
|
seen[item.Link] = true
|
||||||
merged = append(merged, item)
|
merged = append(merged, DecorateRecommendationMedia(item))
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, item := range recommended {
|
for _, item := range recommended {
|
||||||
@@ -404,8 +420,11 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
|
|||||||
if looksNegativeReason(item.Reason) || strings.Contains(item.Reason, GeminiFallbackReason) {
|
if looksNegativeReason(item.Reason) || strings.Contains(item.Reason, GeminiFallbackReason) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
seen[item.Link] = true
|
seen[item.Link] = true
|
||||||
merged = append(merged, item)
|
merged = append(merged, DecorateRecommendationMedia(item))
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(merged) < min(12, limit) {
|
if len(merged) < min(12, limit) {
|
||||||
@@ -413,20 +432,24 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
|
|||||||
if len(merged) >= min(12, limit) || item.Link == "" || seen[item.Link] {
|
if len(merged) >= min(12, limit) || item.Link == "" || seen[item.Link] {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if strings.TrimSpace(item.ThumbnailURL) == "" && strings.TrimSpace(item.PreviewVideoURL) == "" {
|
if fillerCount >= maxFiller {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
seen[item.Link] = true
|
seen[item.Link] = true
|
||||||
merged = append(merged, AIRecommendation{
|
merged = append(merged, DecorateRecommendationMedia(AIRecommendation{
|
||||||
Title: item.Title,
|
Title: item.Title,
|
||||||
Link: item.Link,
|
Link: item.Link,
|
||||||
Snippet: item.Snippet,
|
Snippet: item.Snippet,
|
||||||
ThumbnailURL: item.ThumbnailURL,
|
ThumbnailURL: item.ThumbnailURL,
|
||||||
PreviewVideoURL: item.PreviewVideoURL,
|
PreviewVideoURL: item.PreviewVideoURL,
|
||||||
Source: item.Source,
|
Source: item.Source,
|
||||||
Reason: "Gemini 검토가 부족해 편집용 후보로 추가된 결과입니다.",
|
Reason: PendingVisualReason,
|
||||||
Recommended: false,
|
Recommended: false,
|
||||||
})
|
}))
|
||||||
|
fillerCount++
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return merged
|
return merged
|
||||||
@@ -435,31 +458,37 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
|
|||||||
func BackfillRecommendations(existing []AIRecommendation, ranked []SearchResult, limit int, reason string) []AIRecommendation {
|
func BackfillRecommendations(existing []AIRecommendation, ranked []SearchResult, limit int, reason string) []AIRecommendation {
|
||||||
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
||||||
seen := map[string]bool{}
|
seen := map[string]bool{}
|
||||||
|
fillerCount := 0
|
||||||
|
maxFiller := min(4, limit)
|
||||||
for _, item := range existing {
|
for _, item := range existing {
|
||||||
if item.Link == "" || seen[item.Link] {
|
if item.Link == "" || seen[item.Link] {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
seen[item.Link] = true
|
seen[item.Link] = true
|
||||||
merged = append(merged, item)
|
merged = append(merged, DecorateRecommendationMedia(item))
|
||||||
}
|
}
|
||||||
for _, item := range ranked {
|
for _, item := range ranked {
|
||||||
if len(merged) >= limit || item.Link == "" || seen[item.Link] {
|
if len(merged) >= limit || item.Link == "" || seen[item.Link] {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if strings.TrimSpace(item.ThumbnailURL) == "" && strings.TrimSpace(item.PreviewVideoURL) == "" {
|
if fillerCount >= maxFiller {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
seen[item.Link] = true
|
seen[item.Link] = true
|
||||||
merged = append(merged, AIRecommendation{
|
merged = append(merged, DecorateRecommendationMedia(AIRecommendation{
|
||||||
Title: item.Title,
|
Title: item.Title,
|
||||||
Link: item.Link,
|
Link: item.Link,
|
||||||
Snippet: item.Snippet,
|
Snippet: item.Snippet,
|
||||||
ThumbnailURL: item.ThumbnailURL,
|
ThumbnailURL: item.ThumbnailURL,
|
||||||
PreviewVideoURL: item.PreviewVideoURL,
|
PreviewVideoURL: item.PreviewVideoURL,
|
||||||
Source: item.Source,
|
Source: item.Source,
|
||||||
Reason: reason,
|
Reason: firstNonEmpty(strings.TrimSpace(reason), FallbackPreviewReason),
|
||||||
Recommended: false,
|
Recommended: false,
|
||||||
})
|
}))
|
||||||
|
fillerCount++
|
||||||
}
|
}
|
||||||
return merged
|
return merged
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user