This commit is contained in:
@@ -255,6 +255,20 @@
|
||||
- backend debug broadcasts
|
||||
|
||||
## Recent Change Log
|
||||
- Date: `2026-03-16`
|
||||
- What changed:
|
||||
- Hardened search result enrichment and recommendation metadata for preview recovery work.
|
||||
- Added provider-aware fetch strategy for source HTML/JSON requests, broader Envato preview parsing, looser Artgrid HTML acceptance, and stronger thumbnail preservation rules.
|
||||
- Added low-value thumbnail detection, ranking penalties for weak visuals, capped filler backfill, and response metadata fields for modal rendering (`mediaMode`, `embedUrl`, `previewBlockedReason`).
|
||||
- Expanded debug summaries with usable-thumbnail and embed counts, and added unit coverage for the new parsing/ranking helpers.
|
||||
- Why it changed:
|
||||
- The latest production log showed Envato enrichment frequently failing, Artgrid enrichment collapsing on `403` plus HTML mismatch, and Gemini seeing too few usable visuals to do meaningful review.
|
||||
- How it was verified:
|
||||
- `go test ./...`
|
||||
- What is still risky or incomplete:
|
||||
- Frontend modal fallback behavior is not updated yet in this batch, so the new response metadata is not fully consumed until the UI patch lands.
|
||||
- Envato source fetches may still fail on some pages if the provider changes challenge behavior again.
|
||||
|
||||
- Date: `2026-03-16`
|
||||
- What changed:
|
||||
- Added in-process query translation / expansion cache inside `GeminiService` so repeated identical searches can reuse the same English query and variant list without re-calling Gemini or Google Translate.
|
||||
|
||||
+57
-23
@@ -81,14 +81,17 @@ type PreviewResponse struct {
|
||||
}
|
||||
|
||||
type searchDebugSummary struct {
|
||||
Total int `json:"total"`
|
||||
BySource map[string]int `json:"bySource"`
|
||||
WithPreview int `json:"withPreview"`
|
||||
WithThumbnail int `json:"withThumbnail"`
|
||||
Top []map[string]any `json:"top"`
|
||||
Warning string `json:"warning,omitempty"`
|
||||
DurationMS int64 `json:"durationMs,omitempty"`
|
||||
GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"`
|
||||
Total int `json:"total"`
|
||||
BySource map[string]int `json:"bySource"`
|
||||
WithPreview int `json:"withPreview"`
|
||||
WithThumbnail int `json:"withThumbnail"`
|
||||
WithUsableThumbnail int `json:"withUsableThumbnail,omitempty"`
|
||||
WithLowValueThumbnail int `json:"withLowValueThumbnail,omitempty"`
|
||||
WithEmbedURL int `json:"withEmbedUrl,omitempty"`
|
||||
Top []map[string]any `json:"top"`
|
||||
Warning string `json:"warning,omitempty"`
|
||||
DurationMS int64 `json:"durationMs,omitempty"`
|
||||
GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"`
|
||||
}
|
||||
|
||||
type debugResponseWriter struct {
|
||||
@@ -484,6 +487,9 @@ func (a *App) searchMedia(c *gin.Context) {
|
||||
)
|
||||
}
|
||||
merged = services.RandomizeTopRecommendations(merged, 8)
|
||||
for idx := range merged {
|
||||
merged[idx] = services.DecorateRecommendationMedia(merged[idx])
|
||||
}
|
||||
warning := ""
|
||||
if geminiErr != nil {
|
||||
warning = geminiErr.Error()
|
||||
@@ -628,6 +634,8 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat
|
||||
bySource := map[string]int{}
|
||||
withPreview := 0
|
||||
withThumbnail := 0
|
||||
withUsableThumbnail := 0
|
||||
withLowValueThumbnail := 0
|
||||
top := make([]map[string]any, 0, min(6, len(results)))
|
||||
for idx, item := range results {
|
||||
bySource[item.Source]++
|
||||
@@ -636,6 +644,12 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat
|
||||
}
|
||||
if strings.TrimSpace(item.ThumbnailURL) != "" {
|
||||
withThumbnail++
|
||||
if services.HasUsableThumbnail(item.ThumbnailURL) {
|
||||
withUsableThumbnail++
|
||||
}
|
||||
if services.IsLowValueThumbnail(item.ThumbnailURL) {
|
||||
withLowValueThumbnail++
|
||||
}
|
||||
}
|
||||
if idx < 6 {
|
||||
top = append(top, map[string]any{
|
||||
@@ -643,20 +657,23 @@ func summarizeSearchResults(results []services.SearchResult, duration time.Durat
|
||||
"source": item.Source,
|
||||
"hasPreview": item.PreviewVideoURL != "",
|
||||
"hasThumbnail": item.ThumbnailURL != "",
|
||||
"usableThumb": services.HasUsableThumbnail(item.ThumbnailURL),
|
||||
"displayLink": item.DisplayLink,
|
||||
"snippetSample": truncateText(item.Snippet, 160),
|
||||
})
|
||||
}
|
||||
}
|
||||
return searchDebugSummary{
|
||||
Total: len(results),
|
||||
BySource: bySource,
|
||||
WithPreview: withPreview,
|
||||
WithThumbnail: withThumbnail,
|
||||
Top: top,
|
||||
Warning: warning,
|
||||
DurationMS: duration.Milliseconds(),
|
||||
GeminiCandidateCap: geminiCap,
|
||||
Total: len(results),
|
||||
BySource: bySource,
|
||||
WithPreview: withPreview,
|
||||
WithThumbnail: withThumbnail,
|
||||
WithUsableThumbnail: withUsableThumbnail,
|
||||
WithLowValueThumbnail: withLowValueThumbnail,
|
||||
Top: top,
|
||||
Warning: warning,
|
||||
DurationMS: duration.Milliseconds(),
|
||||
GeminiCandidateCap: geminiCap,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -664,6 +681,9 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio
|
||||
bySource := map[string]int{}
|
||||
withPreview := 0
|
||||
withThumbnail := 0
|
||||
withUsableThumbnail := 0
|
||||
withLowValueThumbnail := 0
|
||||
withEmbedURL := 0
|
||||
top := make([]map[string]any, 0, min(6, len(results)))
|
||||
for idx, item := range results {
|
||||
bySource[item.Source]++
|
||||
@@ -672,6 +692,15 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio
|
||||
}
|
||||
if strings.TrimSpace(item.ThumbnailURL) != "" {
|
||||
withThumbnail++
|
||||
if services.HasUsableThumbnail(item.ThumbnailURL) {
|
||||
withUsableThumbnail++
|
||||
}
|
||||
if services.IsLowValueThumbnail(item.ThumbnailURL) {
|
||||
withLowValueThumbnail++
|
||||
}
|
||||
}
|
||||
if strings.TrimSpace(item.EmbedURL) != "" {
|
||||
withEmbedURL++
|
||||
}
|
||||
if idx < 6 {
|
||||
top = append(top, map[string]any{
|
||||
@@ -679,19 +708,24 @@ func summarizeRecommendationResults(results []services.AIRecommendation, duratio
|
||||
"source": item.Source,
|
||||
"hasPreview": item.PreviewVideoURL != "",
|
||||
"hasThumbnail": item.ThumbnailURL != "",
|
||||
"hasEmbed": item.EmbedURL != "",
|
||||
"mediaMode": item.MediaMode,
|
||||
"reasonSample": truncateText(item.Reason, 120),
|
||||
"snippetSample": truncateText(item.Snippet, 160),
|
||||
})
|
||||
}
|
||||
}
|
||||
return searchDebugSummary{
|
||||
Total: len(results),
|
||||
BySource: bySource,
|
||||
WithPreview: withPreview,
|
||||
WithThumbnail: withThumbnail,
|
||||
Top: top,
|
||||
Warning: warning,
|
||||
DurationMS: duration.Milliseconds(),
|
||||
Total: len(results),
|
||||
BySource: bySource,
|
||||
WithPreview: withPreview,
|
||||
WithThumbnail: withThumbnail,
|
||||
WithUsableThumbnail: withUsableThumbnail,
|
||||
WithLowValueThumbnail: withLowValueThumbnail,
|
||||
WithEmbedURL: withEmbedURL,
|
||||
Top: top,
|
||||
Warning: warning,
|
||||
DurationMS: duration.Milliseconds(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
+244
-55
@@ -294,31 +294,16 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
||||
extractMetaContent(html, "twitter:image"),
|
||||
extractJSONLDValue(html, "thumbnailUrl"),
|
||||
)
|
||||
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
|
||||
if hasUsableThumbnail(pageThumbnail) && shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
|
||||
result.ThumbnailURL = pageThumbnail
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
result.PreviewVideoURL = firstNonEmpty(
|
||||
videoMeta.ContentURL,
|
||||
extractJSONLDValue(html, "contentUrl"),
|
||||
extractMetaContent(html, "twitter:player:stream"),
|
||||
extractVideoPreviewURL(html),
|
||||
extractEnvatoPreviewFromHydration(html),
|
||||
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
|
||||
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
|
||||
)
|
||||
result.PreviewVideoURL = collectEnvatoPreviewURL(html, pageThumbnail, result.ThumbnailURL, videoMeta.ContentURL)
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
time.Sleep(1200 * time.Millisecond)
|
||||
if retryHTML, retryErr := s.fetchText(result.Link); retryErr == nil {
|
||||
result.PreviewVideoURL = firstNonEmpty(
|
||||
extractJSONLDValue(retryHTML, "contentUrl"),
|
||||
extractMetaContent(retryHTML, "twitter:player:stream"),
|
||||
extractVideoPreviewURL(retryHTML),
|
||||
extractEnvatoPreviewFromHydration(retryHTML),
|
||||
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
|
||||
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
|
||||
)
|
||||
result.PreviewVideoURL = collectEnvatoPreviewURL(retryHTML, pageThumbnail, result.ThumbnailURL, "")
|
||||
}
|
||||
}
|
||||
s.debug("search_service:enrich_envato_done", map[string]any{
|
||||
@@ -341,8 +326,8 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
body, err := s.fetchJSONText(apiURL)
|
||||
if err == nil {
|
||||
urls := collectURLs(body)
|
||||
if result.ThumbnailURL == "" {
|
||||
result.ThumbnailURL = pickImageURL(urls)
|
||||
if !hasUsableThumbnail(result.ThumbnailURL) {
|
||||
result.ThumbnailURL = pickArtgridImageURL(urls, clipID)
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
result.PreviewVideoURL = pickVideoURL(urls)
|
||||
@@ -356,12 +341,16 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
html, err := s.fetchText(result.Link)
|
||||
if err == nil {
|
||||
if !isMatchingArtgridClipPage(html, clipID) {
|
||||
s.debug("search_service:enrich_artgrid_html_mismatch", map[string]any{"link": result.Link, "clipId": clipID})
|
||||
s.debug("search_service:enrich_artgrid_html_mismatch", map[string]any{
|
||||
"link": result.Link,
|
||||
"clipId": clipID,
|
||||
"signals": artgridHTMLSignals(html, clipID),
|
||||
})
|
||||
return result
|
||||
}
|
||||
result.Title = firstNonEmpty(
|
||||
cleanArtgridTitle(extractMetaContent(html, "og:title")),
|
||||
cleanArtgridTitle(extractMetaContent(html, "title")),
|
||||
cleanArtgridTitle(extractHTMLTitle(html)),
|
||||
result.Title,
|
||||
)
|
||||
result.Snippet = firstNonEmpty(
|
||||
@@ -374,15 +363,20 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
extractMetaContent(html, "twitter:image"),
|
||||
extractArtgridBackgroundThumbnail(html, clipID),
|
||||
extractJSONLDValue(html, "image"),
|
||||
pickArtgridImageURL(collectURLs(html), clipID),
|
||||
)
|
||||
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
|
||||
if hasUsableThumbnail(pageThumbnail) && shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
|
||||
result.ThumbnailURL = pageThumbnail
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
result.PreviewVideoURL = firstNonEmpty(
|
||||
extractJSONLDValue(html, "contentUrl"),
|
||||
extractMetaContent(html, "twitter:player:stream"),
|
||||
extractMetaContent(html, "og:video"),
|
||||
extractMetaContent(html, "og:video:url"),
|
||||
extractMetaContent(html, "og:video:secure_url"),
|
||||
extractVideoPreviewURL(html),
|
||||
pickVideoURL(collectURLs(html)),
|
||||
)
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
@@ -677,6 +671,112 @@ func deriveThumbnail(link string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func isLowValueThumbnail(raw string) bool {
|
||||
lower := strings.ToLower(strings.TrimSpace(raw))
|
||||
if lower == "" {
|
||||
return true
|
||||
}
|
||||
for _, token := range []string{
|
||||
"favicon", "apple-touch-icon", "/logo", "/icon", "icon.", "logo.", "placehold.co",
|
||||
} {
|
||||
if strings.Contains(lower, token) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
for _, host := range []string{
|
||||
"googleusercontent.com", "gstatic.com", "bing.com", "duckduckgo.com", "icons.duckduckgo.com",
|
||||
} {
|
||||
if strings.Contains(lower, host) && !strings.Contains(lower, "ytimg.com") {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func hasUsableThumbnail(raw string) bool {
|
||||
return strings.TrimSpace(raw) != "" && !isLowValueThumbnail(raw)
|
||||
}
|
||||
|
||||
func HasUsableThumbnail(raw string) bool {
|
||||
return hasUsableThumbnail(raw)
|
||||
}
|
||||
|
||||
func IsLowValueThumbnail(raw string) bool {
|
||||
return isLowValueThumbnail(raw)
|
||||
}
|
||||
|
||||
func buildEmbedURL(source, link string) string {
|
||||
trimmed := strings.TrimSpace(link)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
if strings.EqualFold(strings.TrimSpace(source), "Google Video") {
|
||||
if videoID := extractYouTubeID(trimmed); videoID != "" {
|
||||
return "https://www.youtube-nocookie.com/embed/" + videoID + "?autoplay=1&rel=0&playsinline=1&modestbranding=1&enablejsapi=1"
|
||||
}
|
||||
}
|
||||
return trimmed
|
||||
}
|
||||
|
||||
func defaultMediaMode(source, link, previewURL, thumbnailURL string) (string, string, string) {
|
||||
embedURL := buildEmbedURL(source, link)
|
||||
switch source {
|
||||
case "Google Video":
|
||||
if embedURL != "" {
|
||||
return "embed", embedURL, ""
|
||||
}
|
||||
if hasUsableThumbnail(thumbnailURL) {
|
||||
return "thumbnail", "", "missing_google_embed"
|
||||
}
|
||||
return "none", "", "missing_google_embed"
|
||||
case "Envato":
|
||||
if strings.TrimSpace(previewURL) != "" {
|
||||
return "preview_video", embedURL, "provider_embed_blocked"
|
||||
}
|
||||
if hasUsableThumbnail(thumbnailURL) {
|
||||
return "thumbnail", embedURL, "provider_embed_blocked"
|
||||
}
|
||||
if embedURL != "" {
|
||||
return "embed", embedURL, ""
|
||||
}
|
||||
return "none", "", "provider_embed_blocked"
|
||||
case "Artgrid":
|
||||
if hasUsableThumbnail(thumbnailURL) {
|
||||
return "thumbnail", embedURL, "provider_preview_unavailable"
|
||||
}
|
||||
if strings.TrimSpace(previewURL) != "" {
|
||||
return "preview_video", embedURL, "provider_preview_unavailable"
|
||||
}
|
||||
if embedURL != "" {
|
||||
return "embed", embedURL, ""
|
||||
}
|
||||
return "none", "", "provider_preview_unavailable"
|
||||
default:
|
||||
if strings.TrimSpace(previewURL) != "" {
|
||||
return "preview_video", embedURL, ""
|
||||
}
|
||||
if hasUsableThumbnail(thumbnailURL) {
|
||||
return "thumbnail", embedURL, ""
|
||||
}
|
||||
if embedURL != "" {
|
||||
return "embed", embedURL, ""
|
||||
}
|
||||
return "none", "", ""
|
||||
}
|
||||
}
|
||||
|
||||
func DecorateRecommendationMedia(item AIRecommendation) AIRecommendation {
|
||||
item.EmbedURL = buildEmbedURL(item.Source, item.Link)
|
||||
item.MediaMode, _, item.PreviewBlockedReason = defaultMediaMode(item.Source, item.Link, item.PreviewVideoURL, item.ThumbnailURL)
|
||||
if item.MediaMode == "embed" && item.EmbedURL == "" {
|
||||
item.MediaMode = "none"
|
||||
}
|
||||
if item.MediaMode == "thumbnail" && !hasUsableThumbnail(item.ThumbnailURL) && strings.TrimSpace(item.PreviewVideoURL) != "" {
|
||||
item.MediaMode = "preview_video"
|
||||
}
|
||||
return item
|
||||
}
|
||||
|
||||
func extractYouTubeID(link string) string {
|
||||
patterns := []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})`),
|
||||
@@ -705,6 +805,15 @@ func extractMetaContent(html, property string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractHTMLTitle(html string) string {
|
||||
pattern := regexp.MustCompile(`(?is)<title[^>]*>(.*?)</title>`)
|
||||
matches := pattern.FindStringSubmatch(html)
|
||||
if len(matches) == 2 {
|
||||
return htmlUnescape(strings.TrimSpace(matches[1]))
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractVideoPreviewURL(html string) string {
|
||||
normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`)
|
||||
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`)
|
||||
@@ -737,6 +846,19 @@ func extractArtgridBackgroundThumbnail(html, clipID string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func pickArtgridImageURL(urls []string, clipID string) string {
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if !(strings.Contains(lower, ".jpg") || strings.Contains(lower, ".jpeg") || strings.Contains(lower, ".png") || strings.Contains(lower, ".webp")) {
|
||||
continue
|
||||
}
|
||||
if strings.Contains(item, clipID) || strings.Contains(lower, "graded-thumbnail") || strings.Contains(lower, "imgix") {
|
||||
return item
|
||||
}
|
||||
}
|
||||
return pickImageURL(urls)
|
||||
}
|
||||
|
||||
func extractArtgridClipID(link string) string {
|
||||
patterns := []*regexp.Regexp{
|
||||
regexp.MustCompile(`/clip/([0-9]+)/`),
|
||||
@@ -840,30 +962,38 @@ func (s *SearchService) fetchText(target string) (string, error) {
|
||||
s.debug("search_service:fetch_cache_hit", map[string]any{"type": "html", "target": target, "bytes": len(cached)})
|
||||
return cached, nil
|
||||
}
|
||||
|
||||
req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
|
||||
for _, strategy := range []string{"default", "provider"} {
|
||||
req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", strategy)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
s.debug("search_service:envato_fetch_strategy", map[string]any{"target": target, "strategy": strategy})
|
||||
resp, err := s.Client.Do(req)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
data, readErr := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
|
||||
_ = resp.Body.Close()
|
||||
if readErr != nil {
|
||||
continue
|
||||
}
|
||||
if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable {
|
||||
continue
|
||||
}
|
||||
if resp.StatusCode >= 300 {
|
||||
continue
|
||||
}
|
||||
if looksLikeCloudflareChallenge(string(data)) {
|
||||
continue
|
||||
}
|
||||
body := string(data)
|
||||
s.setCachedFetchResult(cacheKey, body, 3*time.Minute)
|
||||
return body, nil
|
||||
}
|
||||
body, err := fetchTextViaPython(target)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
resp, err := s.Client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable {
|
||||
return fetchTextViaPython(target)
|
||||
}
|
||||
if resp.StatusCode >= 300 {
|
||||
return "", fmt.Errorf("fetch returned status %d", resp.StatusCode)
|
||||
}
|
||||
data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
if looksLikeCloudflareChallenge(string(data)) {
|
||||
return fetchTextViaPython(target)
|
||||
}
|
||||
body := string(data)
|
||||
s.setCachedFetchResult(cacheKey, body, 3*time.Minute)
|
||||
return body, nil
|
||||
}
|
||||
@@ -875,7 +1005,7 @@ func (s *SearchService) fetchJSONText(target string) (string, error) {
|
||||
return cached, nil
|
||||
}
|
||||
|
||||
req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*")
|
||||
req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*", "provider")
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
@@ -1034,19 +1164,35 @@ func cleanArtgridDescription(description string) string {
|
||||
return strings.TrimSpace(description)
|
||||
}
|
||||
|
||||
func artgridHTMLSignals(html, clipID string) map[string]bool {
|
||||
ogURL := extractMetaContent(html, "og:url")
|
||||
canonical := extractCanonicalURL(html)
|
||||
alWebURL := extractMetaContent(html, "al:web:url")
|
||||
lowerHTML := strings.ToLower(html)
|
||||
title := strings.ToLower(extractHTMLTitle(html))
|
||||
ogImage := strings.ToLower(extractMetaContent(html, "og:image"))
|
||||
twitterImage := strings.ToLower(extractMetaContent(html, "twitter:image"))
|
||||
return map[string]bool{
|
||||
"og_url_clip": strings.Contains(ogURL, clipID),
|
||||
"canonical_clip": strings.Contains(canonical, clipID),
|
||||
"al_web_clip": strings.Contains(alWebURL, clipID),
|
||||
"body_main_clipvideo": strings.Contains(lowerHTML, "main-clipvideo_"+clipID),
|
||||
"body_clip_path": strings.Contains(lowerHTML, "/clip/"+clipID+"/"),
|
||||
"body_clip_id": strings.Contains(lowerHTML, clipID),
|
||||
"title_mentions_clip": strings.Contains(title, "artgrid") || strings.Contains(title, "artlist"),
|
||||
"image_clip": strings.Contains(ogImage, strings.ToLower(clipID)) || strings.Contains(twitterImage, strings.ToLower(clipID)),
|
||||
}
|
||||
}
|
||||
|
||||
func isMatchingArtgridClipPage(html, clipID string) bool {
|
||||
if clipID == "" {
|
||||
return false
|
||||
}
|
||||
ogURL := extractMetaContent(html, "og:url")
|
||||
canonical := extractCanonicalURL(html)
|
||||
lowerHTML := strings.ToLower(html)
|
||||
for _, candidate := range []string{ogURL, canonical} {
|
||||
if strings.Contains(candidate, clipID) {
|
||||
return true
|
||||
}
|
||||
signals := artgridHTMLSignals(html, clipID)
|
||||
if signals["og_url_clip"] || signals["canonical_clip"] || signals["al_web_clip"] || signals["body_main_clipvideo"] || signals["body_clip_path"] || signals["image_clip"] {
|
||||
return true
|
||||
}
|
||||
if strings.Contains(lowerHTML, "main-clipvideo_"+clipID) || strings.Contains(lowerHTML, "/clip/"+clipID+"/") {
|
||||
if signals["body_clip_id"] && signals["title_mentions_clip"] {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@@ -1090,6 +1236,23 @@ func extractEnvatoPreviewFromHydration(html string) string {
|
||||
return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded)))
|
||||
}
|
||||
|
||||
func collectEnvatoPreviewURL(html, pageThumbnail, currentThumbnail, contentURL string) string {
|
||||
urls := collectURLs(html)
|
||||
return firstNonEmpty(
|
||||
contentURL,
|
||||
extractJSONLDValue(html, "contentUrl"),
|
||||
extractMetaContent(html, "twitter:player:stream"),
|
||||
extractMetaContent(html, "og:video"),
|
||||
extractMetaContent(html, "og:video:url"),
|
||||
extractMetaContent(html, "og:video:secure_url"),
|
||||
extractEnvatoPreviewFromHydration(html),
|
||||
pickBestEnvatoPreviewURL(urls),
|
||||
extractVideoPreviewURL(html),
|
||||
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
|
||||
deriveEnvatoPreviewFromThumbnail(currentThumbnail),
|
||||
)
|
||||
}
|
||||
|
||||
func extractWindowAssignedValue(html, variable string) string {
|
||||
pattern := regexp.MustCompile(`window\.` + regexp.QuoteMeta(variable) + `\s*=\s*"([^"]+)"`)
|
||||
matches := pattern.FindStringSubmatch(html)
|
||||
@@ -1121,7 +1284,19 @@ func pickBestEnvatoPreviewURL(urls []string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func newBrowserRequest(method, target, accept string) (*http.Request, error) {
|
||||
func inferFetchReferer(target string) string {
|
||||
lower := strings.ToLower(target)
|
||||
switch {
|
||||
case strings.Contains(lower, "envatousercontent.com"), strings.Contains(lower, "elements.envato.com"):
|
||||
return "https://elements.envato.com/"
|
||||
case strings.Contains(lower, "artgrid"), strings.Contains(lower, "artlist"):
|
||||
return "https://artgrid.io/"
|
||||
default:
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
func newBrowserRequest(method, target, accept, strategy string) (*http.Request, error) {
|
||||
req, err := http.NewRequest(method, target, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -1131,6 +1306,14 @@ func newBrowserRequest(method, target, accept string) (*http.Request, error) {
|
||||
if accept != "" {
|
||||
req.Header.Set("Accept", accept)
|
||||
}
|
||||
if strategy == "provider" {
|
||||
req.Header.Set("Referer", inferFetchReferer(target))
|
||||
req.Header.Set("Upgrade-Insecure-Requests", "1")
|
||||
req.Header.Set("Sec-Fetch-Dest", "document")
|
||||
req.Header.Set("Sec-Fetch-Mode", "navigate")
|
||||
req.Header.Set("Sec-Fetch-Site", "none")
|
||||
req.Header.Set("Sec-Fetch-User", "?1")
|
||||
}
|
||||
return req, nil
|
||||
}
|
||||
|
||||
@@ -1142,11 +1325,17 @@ req = Request(sys.argv[1], headers={
|
||||
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
|
||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
"Referer": sys.argv[2],
|
||||
"Upgrade-Insecure-Requests": "1",
|
||||
"Sec-Fetch-Dest": "document",
|
||||
"Sec-Fetch-Mode": "navigate",
|
||||
"Sec-Fetch-Site": "none",
|
||||
"Sec-Fetch-User": "?1",
|
||||
})
|
||||
with urlopen(req, timeout=20) as resp:
|
||||
sys.stdout.buffer.write(resp.read(1024 * 1024))
|
||||
`
|
||||
output, err := exec.Command("python3", "-c", script, target).CombinedOutput()
|
||||
output, err := exec.Command("python3", "-c", script, target, inferFetchReferer(target)).CombinedOutput()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("python fallback failed: %v: %s", err, truncateBytes(output, 300))
|
||||
}
|
||||
|
||||
@@ -44,6 +44,15 @@ func TestExtractEnvatoPreviewFromHydration(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCollectEnvatoPreviewURLFindsOgVideo(t *testing.T) {
|
||||
html := `<meta property="og:video" content="https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4">`
|
||||
got := collectEnvatoPreviewURL(html, "", "", "")
|
||||
want := "https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"
|
||||
if got != want {
|
||||
t.Fatalf("expected %q, got %q", want, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) {
|
||||
result := SearchResult{
|
||||
Title: "Couple Friendly Sad Bgm Movie Best Bgm",
|
||||
@@ -109,6 +118,22 @@ func TestIsMatchingArtgridClipPageRejectsHomepage(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsMatchingArtgridClipPageAcceptsBodySignals(t *testing.T) {
|
||||
html := `<html><head><title>Night City | Stock Video Footage - Artgrid.io</title></head><body><script>window.__clip="6600269";</script></body></html>`
|
||||
if !isMatchingArtgridClipPage(html, "6600269") {
|
||||
t.Fatal("expected body/title signal Artgrid HTML to be accepted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestLowValueThumbnailDetection(t *testing.T) {
|
||||
if !IsLowValueThumbnail("https://example.com/favicon.ico") {
|
||||
t.Fatal("expected favicon to be low-value thumbnail")
|
||||
}
|
||||
if IsLowValueThumbnail("https://i.ytimg.com/vi/abcd1234xyz/hqdefault.jpg") {
|
||||
t.Fatal("expected youtube thumbnail to be usable")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) {
|
||||
if got := GeminiCandidateLimit(9); got != 9 {
|
||||
t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got)
|
||||
|
||||
@@ -47,14 +47,17 @@ type cachedExpansionValue struct {
|
||||
}
|
||||
|
||||
type AIRecommendation struct {
|
||||
Title string `json:"title"`
|
||||
Link string `json:"link"`
|
||||
Snippet string `json:"snippet"`
|
||||
ThumbnailURL string `json:"thumbnailUrl"`
|
||||
PreviewVideoURL string `json:"previewVideoUrl"`
|
||||
Source string `json:"source"`
|
||||
Reason string `json:"reason"`
|
||||
Recommended bool `json:"recommended"`
|
||||
Title string `json:"title"`
|
||||
Link string `json:"link"`
|
||||
Snippet string `json:"snippet"`
|
||||
ThumbnailURL string `json:"thumbnailUrl"`
|
||||
PreviewVideoURL string `json:"previewVideoUrl"`
|
||||
Source string `json:"source"`
|
||||
Reason string `json:"reason"`
|
||||
Recommended bool `json:"recommended"`
|
||||
MediaMode string `json:"mediaMode,omitempty"`
|
||||
EmbedURL string `json:"embedUrl,omitempty"`
|
||||
PreviewBlockedReason string `json:"previewBlockedReason,omitempty"`
|
||||
}
|
||||
|
||||
type QueryExpansion struct {
|
||||
@@ -480,6 +483,14 @@ func (g *GeminiService) fetchCandidateVisualInlineData(candidate SearchResult) (
|
||||
}
|
||||
}
|
||||
if candidate.ThumbnailURL != "" {
|
||||
if isLowValueThumbnail(candidate.ThumbnailURL) {
|
||||
g.debug("gemini:vision_candidate_rejected_low_value", map[string]any{
|
||||
"link": candidate.Link,
|
||||
"source": candidate.Source,
|
||||
"thumbnailUrl": candidate.ThumbnailURL,
|
||||
})
|
||||
return "", "", fmt.Errorf("candidate thumbnail is low value")
|
||||
}
|
||||
cacheKey := "image\n" + candidate.ThumbnailURL
|
||||
if data, mimeType, ok := g.getCachedVisual(cacheKey); ok {
|
||||
return data, mimeType, nil
|
||||
|
||||
@@ -3,6 +3,7 @@ package services
|
||||
import (
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -114,3 +115,27 @@ func TestGeminiExpansionCacheRoundTrip(t *testing.T) {
|
||||
t.Fatalf("unexpected expansion cache value: %#v", value)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDecorateRecommendationMediaUsesEmbedForGoogleVideo(t *testing.T) {
|
||||
item := DecorateRecommendationMedia(AIRecommendation{
|
||||
Source: "Google Video",
|
||||
Link: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
||||
})
|
||||
if item.MediaMode != "embed" {
|
||||
t.Fatalf("expected embed media mode, got %q", item.MediaMode)
|
||||
}
|
||||
if item.EmbedURL == "" || !strings.Contains(item.EmbedURL, "youtube-nocookie.com/embed/") {
|
||||
t.Fatalf("unexpected embed url: %q", item.EmbedURL)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRankSearchResultsPrefersUsableVisuals(t *testing.T) {
|
||||
results := []SearchResult{
|
||||
{Title: "cyberpunk city", Link: "https://example.com/a", ThumbnailURL: "https://example.com/favicon.ico"},
|
||||
{Title: "cyberpunk city", Link: "https://example.com/b", ThumbnailURL: "https://example.com/frame.jpg"},
|
||||
}
|
||||
ranked := RankSearchResults("cyberpunk city", results)
|
||||
if ranked[0].Link != "https://example.com/b" {
|
||||
t.Fatalf("expected usable thumbnail result first, got %#v", ranked)
|
||||
}
|
||||
}
|
||||
|
||||
+49
-20
@@ -10,6 +10,8 @@ import (
|
||||
)
|
||||
|
||||
const GeminiFallbackReason = "Gemini Vision 응답이 부족해 키워드 기준으로 보강된 결과입니다."
|
||||
const FallbackPreviewReason = "Fallback due to missing provider preview."
|
||||
const PendingVisualReason = "Ranked candidate pending stronger visual evidence."
|
||||
|
||||
type GeminiBatchStats struct {
|
||||
CandidateCap int `json:"candidateCap"`
|
||||
@@ -19,6 +21,7 @@ type GeminiBatchStats struct {
|
||||
Failed int `json:"failed"`
|
||||
SequentialRetried int `json:"sequentialRetried"`
|
||||
RecommendedCount int `json:"recommendedCount"`
|
||||
VisualRejectCount int `json:"visualRejectCount"`
|
||||
Errors []string `json:"errors,omitempty"`
|
||||
}
|
||||
|
||||
@@ -58,19 +61,25 @@ func RankSearchResults(query string, results []SearchResult) []SearchResult {
|
||||
score -= 4
|
||||
}
|
||||
}
|
||||
if result.ThumbnailURL != "" {
|
||||
score += 2
|
||||
}
|
||||
if result.PreviewVideoURL != "" {
|
||||
score += 3
|
||||
score += 10
|
||||
}
|
||||
if hasUsableThumbnail(result.ThumbnailURL) {
|
||||
score += 5
|
||||
}
|
||||
if isLowValueThumbnail(result.ThumbnailURL) {
|
||||
score -= 8
|
||||
}
|
||||
if strings.TrimSpace(result.PreviewVideoURL) == "" && !hasUsableThumbnail(result.ThumbnailURL) {
|
||||
score -= 10
|
||||
}
|
||||
switch result.Source {
|
||||
case "Google Video":
|
||||
score -= 1
|
||||
score -= 2
|
||||
case "Envato":
|
||||
score += 7
|
||||
score += 5
|
||||
case "Artgrid":
|
||||
score += 7
|
||||
score += 4
|
||||
}
|
||||
scored = append(scored, scoredResult{item: result, score: score})
|
||||
}
|
||||
@@ -106,6 +115,11 @@ func EvaluateAllCandidatesWithGeminiWithDeadline(service *GeminiService, query s
|
||||
CandidateCap: limit,
|
||||
Requested: min(limit, len(ranked)),
|
||||
}
|
||||
for _, item := range ranked[:min(limit, len(ranked))] {
|
||||
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
|
||||
stats.VisualRejectCount++
|
||||
}
|
||||
}
|
||||
type batchResult struct {
|
||||
index int
|
||||
recommendations []AIRecommendation
|
||||
@@ -231,7 +245,7 @@ func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason strin
|
||||
|
||||
fallback := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
||||
for _, item := range ranked[:min(limit, len(ranked))] {
|
||||
fallback = append(fallback, AIRecommendation{
|
||||
fallback = append(fallback, DecorateRecommendationMedia(AIRecommendation{
|
||||
Title: item.Title,
|
||||
Link: item.Link,
|
||||
Snippet: item.Snippet,
|
||||
@@ -240,7 +254,7 @@ func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason strin
|
||||
Source: item.Source,
|
||||
Reason: reason,
|
||||
Recommended: false,
|
||||
})
|
||||
}))
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
@@ -385,6 +399,8 @@ func looksNegativeReason(reason string) bool {
|
||||
func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, limit int) []AIRecommendation {
|
||||
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
||||
seen := map[string]bool{}
|
||||
fillerCount := 0
|
||||
maxFiller := min(4, limit)
|
||||
|
||||
for _, item := range recommended {
|
||||
if !item.Recommended {
|
||||
@@ -394,7 +410,7 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, item)
|
||||
merged = append(merged, DecorateRecommendationMedia(item))
|
||||
}
|
||||
|
||||
for _, item := range recommended {
|
||||
@@ -404,8 +420,11 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
|
||||
if looksNegativeReason(item.Reason) || strings.Contains(item.Reason, GeminiFallbackReason) {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, item)
|
||||
merged = append(merged, DecorateRecommendationMedia(item))
|
||||
}
|
||||
|
||||
if len(merged) < min(12, limit) {
|
||||
@@ -413,20 +432,24 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
|
||||
if len(merged) >= min(12, limit) || item.Link == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(item.ThumbnailURL) == "" && strings.TrimSpace(item.PreviewVideoURL) == "" {
|
||||
if fillerCount >= maxFiller {
|
||||
break
|
||||
}
|
||||
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, AIRecommendation{
|
||||
merged = append(merged, DecorateRecommendationMedia(AIRecommendation{
|
||||
Title: item.Title,
|
||||
Link: item.Link,
|
||||
Snippet: item.Snippet,
|
||||
ThumbnailURL: item.ThumbnailURL,
|
||||
PreviewVideoURL: item.PreviewVideoURL,
|
||||
Source: item.Source,
|
||||
Reason: "Gemini 검토가 부족해 편집용 후보로 추가된 결과입니다.",
|
||||
Reason: PendingVisualReason,
|
||||
Recommended: false,
|
||||
})
|
||||
}))
|
||||
fillerCount++
|
||||
}
|
||||
}
|
||||
return merged
|
||||
@@ -435,31 +458,37 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult,
|
||||
func BackfillRecommendations(existing []AIRecommendation, ranked []SearchResult, limit int, reason string) []AIRecommendation {
|
||||
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
||||
seen := map[string]bool{}
|
||||
fillerCount := 0
|
||||
maxFiller := min(4, limit)
|
||||
for _, item := range existing {
|
||||
if item.Link == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, item)
|
||||
merged = append(merged, DecorateRecommendationMedia(item))
|
||||
}
|
||||
for _, item := range ranked {
|
||||
if len(merged) >= limit || item.Link == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
if strings.TrimSpace(item.ThumbnailURL) == "" && strings.TrimSpace(item.PreviewVideoURL) == "" {
|
||||
if fillerCount >= maxFiller {
|
||||
break
|
||||
}
|
||||
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, AIRecommendation{
|
||||
merged = append(merged, DecorateRecommendationMedia(AIRecommendation{
|
||||
Title: item.Title,
|
||||
Link: item.Link,
|
||||
Snippet: item.Snippet,
|
||||
ThumbnailURL: item.ThumbnailURL,
|
||||
PreviewVideoURL: item.PreviewVideoURL,
|
||||
Source: item.Source,
|
||||
Reason: reason,
|
||||
Reason: firstNonEmpty(strings.TrimSpace(reason), FallbackPreviewReason),
|
||||
Recommended: false,
|
||||
})
|
||||
}))
|
||||
fillerCount++
|
||||
}
|
||||
return merged
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user