Stabilize search pipeline and improve preview diagnostics
build-push / docker (push) Successful in 4m14s

This commit is contained in:
AI Assistant
2026-03-13 18:32:54 +09:00
parent 6f3149a443
commit 7dfb1ad2de
8 changed files with 463 additions and 45 deletions
+148 -13
View File
@@ -76,6 +76,27 @@ type PreviewResponse struct {
Qualities []map[string]any `json:"qualities"`
}
type searchDebugSummary struct {
Total int `json:"total"`
BySource map[string]int `json:"bySource"`
WithPreview int `json:"withPreview"`
WithThumbnail int `json:"withThumbnail"`
Top []map[string]any `json:"top"`
Warning string `json:"warning,omitempty"`
DurationMS int64 `json:"durationMs,omitempty"`
GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"`
}
type geminiBatchStats struct {
CandidateCap int `json:"candidateCap"`
Requested int `json:"requested"`
Batches int `json:"batches"`
Succeeded int `json:"succeeded"`
Failed int `json:"failed"`
RecommendedCount int `json:"recommendedCount"`
Errors []string `json:"errors,omitempty"`
}
func RegisterRoutes(router *gin.Engine, app *App) {
router.GET("/healthz", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"status": "ok"})
@@ -259,6 +280,7 @@ func (a *App) runDownload(recordID int64, url, start, end, quality, outputPath s
}
func (a *App) searchMedia(c *gin.Context) {
started := time.Now()
var req struct {
Query string `json:"query"`
Platforms []string `json:"platforms"`
@@ -277,18 +299,24 @@ func (a *App) searchMedia(c *gin.Context) {
if len(queryVariants) == 0 {
queryVariants = []string{req.Query}
}
a.debug("search query variants", gin.H{"query": req.Query, "variants": queryVariants, "platforms": req.Platforms})
a.debug("search query variants", gin.H{
"query": req.Query,
"platforms": req.Platforms,
"variants": queryVariants,
"variantCount": len(queryVariants),
"requestIdHint": time.Now().UnixNano(),
})
enabledPlatforms := normalizePlatforms(req.Platforms)
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "searching " + selectedPlatformLabel(enabledPlatforms), "progress": 35})
results, err := a.SearchService.SearchMedia(queryVariants, enabledPlatforms)
if err != nil {
a.debug("search backend failed", gin.H{"error": err.Error(), "variants": queryVariants})
a.debug("search backend failed", gin.H{"error": err.Error(), "variants": queryVariants, "durationMs": time.Since(started).Milliseconds()})
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "search failed", "progress": 100, "message": err.Error()})
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
return
}
a.debug("search backend results", gin.H{"count": len(results), "results": results})
a.debug("search backend summary", summarizeSearchResults(results, time.Since(started), 0, ""))
if len(results) == 0 {
warning := "SearXNG returned no renderable results."
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "no renderable search results", "progress": 100, "message": warning})
@@ -302,10 +330,10 @@ func (a *App) searchMedia(c *gin.Context) {
rankQuery = strings.Join(queryVariants[:min(len(queryVariants), 3)], " ")
}
scored := rankSearchResults(rankQuery, results)
a.debug("search ranked results", gin.H{"count": len(scored), "results": scored})
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "analyzing all candidate visuals with Gemini Vision", "progress": 75})
recommended := evaluateAllCandidatesWithGemini(a.GeminiService, req.Query, scored)
a.debug("search gemini recommendations", gin.H{"count": len(recommended), "results": recommended})
a.debug("search ranked summary", summarizeSearchResults(scored, time.Since(started), geminiCandidateLimit(len(scored)), ""))
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "analyzing top candidate visuals with Gemini Vision", "progress": 75})
recommended, geminiStats := evaluateAllCandidatesWithGemini(a.GeminiService, req.Query, scored)
a.debug("search gemini evaluation", geminiStats)
err = nil
if len(recommended) == 0 {
err = fmt.Errorf("gemini vision returned no recommended items across all candidate batches")
@@ -316,6 +344,7 @@ func (a *App) searchMedia(c *gin.Context) {
fallback = append(fallback, services.AIRecommendation{
Title: result.Title,
Link: result.Link,
Snippet: result.Snippet,
ThumbnailURL: result.ThumbnailURL,
PreviewVideoURL: result.PreviewVideoURL,
Source: result.Source,
@@ -324,12 +353,15 @@ func (a *App) searchMedia(c *gin.Context) {
})
}
warning := err.Error()
a.debug("search fallback summary", summarizeRecommendationResults(fallback, time.Since(started), warning))
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "Gemini Vision fallback to ranked results", "progress": 90, "message": warning})
c.JSON(http.StatusOK, gin.H{"results": fallback, "warning": warning, "queries": queryVariants})
return
}
response := gin.H{"results": mergeRecommendations(recommended, scored, 20), "queries": queryVariants}
merged := mergeRecommendations(recommended, scored, 20)
a.debug("search complete summary", summarizeRecommendationResults(merged, time.Since(started), ""))
response := gin.H{"results": merged, "queries": queryVariants}
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "search complete", "progress": 100})
c.JSON(http.StatusOK, response)
}
@@ -406,20 +438,31 @@ func selectedPlatformLabel(platforms map[string]bool) string {
return strings.Join(labels, ", ")
}
func evaluateAllCandidatesWithGemini(service *services.GeminiService, query string, ranked []services.SearchResult) []services.AIRecommendation {
func evaluateAllCandidatesWithGemini(service *services.GeminiService, query string, ranked []services.SearchResult) ([]services.AIRecommendation, geminiBatchStats) {
const chunkSize = 8
limit := geminiCandidateLimit(len(ranked))
stats := geminiBatchStats{
CandidateCap: limit,
Requested: min(limit, len(ranked)),
}
merged := make([]services.AIRecommendation, 0, len(ranked))
seen := map[string]bool{}
for start := 0; start < len(ranked); start += chunkSize {
for start := 0; start < limit; start += chunkSize {
end := start + chunkSize
if end > len(ranked) {
end = len(ranked)
if end > limit {
end = limit
}
batch := ranked[start:end]
stats.Batches++
recommended, err := service.Recommend(query, batch)
if err != nil {
stats.Failed++
if len(stats.Errors) < 5 {
stats.Errors = append(stats.Errors, err.Error())
}
continue
}
stats.Succeeded++
for _, item := range recommended {
if item.Link == "" || seen[item.Link] {
continue
@@ -428,7 +471,8 @@ func evaluateAllCandidatesWithGemini(service *services.GeminiService, query stri
merged = append(merged, item)
}
}
return merged
stats.RecommendedCount = len(merged)
return merged, stats
}
func rankSearchResults(query string, results []services.SearchResult) []services.SearchResult {
@@ -515,6 +559,7 @@ func mergeRecommendations(recommended []services.AIRecommendation, ranked []serv
merged = append(merged, services.AIRecommendation{
Title: item.Title,
Link: item.Link,
Snippet: item.Snippet,
ThumbnailURL: item.ThumbnailURL,
PreviewVideoURL: item.PreviewVideoURL,
Source: item.Source,
@@ -525,6 +570,96 @@ func mergeRecommendations(recommended []services.AIRecommendation, ranked []serv
return merged
}
func geminiCandidateLimit(total int) int {
switch {
case total <= 8:
return total
case total <= 16:
return 12
default:
return 16
}
}
func summarizeSearchResults(results []services.SearchResult, duration time.Duration, geminiCap int, warning string) searchDebugSummary {
bySource := map[string]int{}
withPreview := 0
withThumbnail := 0
top := make([]map[string]any, 0, min(6, len(results)))
for idx, item := range results {
bySource[item.Source]++
if strings.TrimSpace(item.PreviewVideoURL) != "" {
withPreview++
}
if strings.TrimSpace(item.ThumbnailURL) != "" {
withThumbnail++
}
if idx < 6 {
top = append(top, map[string]any{
"title": truncateText(item.Title, 120),
"source": item.Source,
"hasPreview": item.PreviewVideoURL != "",
"hasThumbnail": item.ThumbnailURL != "",
"displayLink": item.DisplayLink,
"snippetSample": truncateText(item.Snippet, 160),
})
}
}
return searchDebugSummary{
Total: len(results),
BySource: bySource,
WithPreview: withPreview,
WithThumbnail: withThumbnail,
Top: top,
Warning: warning,
DurationMS: duration.Milliseconds(),
GeminiCandidateCap: geminiCap,
}
}
func summarizeRecommendationResults(results []services.AIRecommendation, duration time.Duration, warning string) searchDebugSummary {
bySource := map[string]int{}
withPreview := 0
withThumbnail := 0
top := make([]map[string]any, 0, min(6, len(results)))
for idx, item := range results {
bySource[item.Source]++
if strings.TrimSpace(item.PreviewVideoURL) != "" {
withPreview++
}
if strings.TrimSpace(item.ThumbnailURL) != "" {
withThumbnail++
}
if idx < 6 {
top = append(top, map[string]any{
"title": truncateText(item.Title, 120),
"source": item.Source,
"hasPreview": item.PreviewVideoURL != "",
"hasThumbnail": item.ThumbnailURL != "",
"reasonSample": truncateText(item.Reason, 120),
"snippetSample": truncateText(item.Snippet, 160),
})
}
}
return searchDebugSummary{
Total: len(results),
BySource: bySource,
WithPreview: withPreview,
WithThumbnail: withThumbnail,
Top: top,
Warning: warning,
DurationMS: duration.Milliseconds(),
}
}
func truncateText(text string, limit int) string {
trimmed := strings.TrimSpace(text)
if len(trimmed) <= limit {
return trimmed
}
return trimmed[:limit] + "..."
}
func EnsurePaths(downloadsDir, workerScript string) error {
if err := os.MkdirAll(downloadsDir, 0o755); err != nil {
return err
+178 -21
View File
@@ -6,6 +6,7 @@ import (
"io"
"net/http"
"net/url"
"os/exec"
"regexp"
"sort"
"strings"
@@ -54,6 +55,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
name string
categories string
engine string
maxResults int
build func(string) []string
accept func(SearchResult) bool
}
@@ -63,6 +65,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
name: "Envato",
categories: "general",
engine: s.WebEngine,
maxResults: 8,
build: buildEnvatoQueries,
accept: isRenderableEnvatoResult,
},
@@ -70,6 +73,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
name: "Artgrid",
categories: "general",
engine: s.WebEngine,
maxResults: 8,
build: buildArtgridQueries,
accept: isRenderableArtgridResult,
},
@@ -77,16 +81,18 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
name: "Google Video",
categories: "videos",
engine: s.GoogleVideoEngine,
maxResults: 6,
build: buildGoogleVideoQueries,
accept: isUsefulGoogleVideoResult,
},
}
seen := map[string]bool{}
sourceCounts := map[string]int{}
results := make([]SearchResult, 0, 90)
var lastErr error
baseQueries := limitQueries(queries, 5)
baseQueries := limitQueries(queries, 3)
for _, base := range baseQueries {
base = strings.TrimSpace(base)
if base == "" {
@@ -96,7 +102,13 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
if len(enabledPlatforms) > 0 && !enabledPlatforms[strings.ToLower(source.name)] {
continue
}
if sourceCounts[source.name] >= source.maxResults {
continue
}
for _, searchQuery := range source.build(base) {
if sourceCounts[source.name] >= source.maxResults {
break
}
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
if err != nil {
lastErr = err
@@ -112,6 +124,10 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
}
seen[item.Link] = true
results = append(results, item)
sourceCounts[source.name]++
if sourceCounts[source.name] >= source.maxResults {
break
}
}
}
}
@@ -128,7 +144,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
}
func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
limit := minInt(len(results), 24)
limit := minInt(len(results), 18)
if limit == 0 {
return results
}
@@ -170,14 +186,32 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
if err != nil {
return result
}
if result.ThumbnailURL == "" {
result.ThumbnailURL = firstNonEmpty(
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
)
result.Title = firstNonEmpty(
extractMetaContent(html, "og:title"),
result.Title,
)
result.Snippet = firstNonEmpty(
extractMetaContent(html, "og:description"),
extractMetaContent(html, "description"),
result.Snippet,
)
pageThumbnail := firstNonEmpty(
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
extractJSONLDValue(html, "thumbnailUrl"),
)
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
result.ThumbnailURL = pageThumbnail
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = extractVideoPreviewURL(html)
result.PreviewVideoURL = firstNonEmpty(
extractJSONLDValue(html, "contentUrl"),
extractMetaContent(html, "twitter:player:stream"),
extractVideoPreviewURL(html),
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
)
}
return result
}
@@ -203,17 +237,30 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
html, err := s.fetchText(result.Link)
if err == nil {
if result.ThumbnailURL == "" {
result.ThumbnailURL = firstNonEmpty(
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
)
if result.ThumbnailURL == "" {
result.ThumbnailURL = extractArtgridBackgroundThumbnail(html, clipID)
}
result.Title = firstNonEmpty(
extractMetaContent(html, "og:title"),
result.Title,
)
result.Snippet = firstNonEmpty(
extractMetaContent(html, "og:description"),
extractMetaContent(html, "description"),
result.Snippet,
)
pageThumbnail := firstNonEmpty(
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
extractArtgridBackgroundThumbnail(html, clipID),
extractJSONLDValue(html, "image"),
)
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
result.ThumbnailURL = pageThumbnail
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = extractVideoPreviewURL(html)
result.PreviewVideoURL = firstNonEmpty(
extractJSONLDValue(html, "contentUrl"),
extractMetaContent(html, "twitter:player:stream"),
extractVideoPreviewURL(html),
)
}
}
}
@@ -282,7 +329,6 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
func buildGoogleVideoQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
fmt.Sprintf(`"%s" ("cinematic footage" OR "free stock footage" OR "4k footage") -tutorial -"how to" -review`, base),
}
}
@@ -310,6 +356,8 @@ func isUsefulGoogleVideoResult(result SearchResult) bool {
"tutorial", "how to", "review", "reaction", "podcast", "interview", "walkthrough",
"course", "lesson", "edit tutorial", "editing tutorial", "premiere pro", "after effects",
"breakdown", "explained", "vlog", "tips", "guide", "learn", "free download",
"bgm", "music", "song", "lyrics", "audio", "soundtrack", "trailer", "teaser",
"full movie", "movie clip", "status", "whatsapp status", "fan cam", "fancam",
} {
if strings.Contains(text, banned) {
return false
@@ -477,11 +525,18 @@ func pickVideoURL(urls []string) string {
}
func (s *SearchService) fetchText(target string) (string, error) {
resp, err := s.Client.Get(target)
req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
if err != nil {
return "", err
}
resp, err := s.Client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable {
return fetchTextViaPython(target)
}
if resp.StatusCode >= 300 {
return "", fmt.Errorf("fetch returned status %d", resp.StatusCode)
}
@@ -489,15 +544,17 @@ func (s *SearchService) fetchText(target string) (string, error) {
if err != nil {
return "", err
}
if looksLikeCloudflareChallenge(string(data)) {
return fetchTextViaPython(target)
}
return string(data), nil
}
func (s *SearchService) fetchJSONText(target string) (string, error) {
req, err := http.NewRequest(http.MethodGet, target, nil)
req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*")
if err != nil {
return "", err
}
req.Header.Set("Accept", "application/json, text/json")
resp, err := s.Client.Do(req)
if err != nil {
return "", err
@@ -522,6 +579,106 @@ func firstNonEmpty(values ...string) string {
return ""
}
func shouldPreferPageThumbnail(current, pageLink string) bool {
current = strings.TrimSpace(current)
if current == "" {
return true
}
lower := strings.ToLower(current)
if strings.Contains(lower, "imgs.search.brave.com") || strings.Contains(lower, "googleusercontent.com") || strings.Contains(lower, "bing.com") {
return true
}
currentHost := hostOf(current)
pageHost := hostOf(pageLink)
return currentHost == "" || (pageHost != "" && currentHost != pageHost)
}
func hostOf(raw string) string {
parsed, err := url.Parse(raw)
if err != nil {
return ""
}
return strings.ToLower(parsed.Host)
}
func extractJSONLDValue(html, key string) string {
pattern := regexp.MustCompile(`"` + regexp.QuoteMeta(key) + `"\s*:\s*"(https?:\\?/\\?/[^"]+|[^"]+)"`)
matches := pattern.FindAllStringSubmatch(html, -1)
for _, match := range matches {
if len(match) != 2 {
continue
}
value := strings.ReplaceAll(match[1], `\/`, `/`)
value = strings.ReplaceAll(value, `\u002F`, `/`)
value = strings.ReplaceAll(value, `\\`, "")
value = htmlUnescape(value)
if strings.TrimSpace(value) != "" {
return value
}
}
return ""
}
func deriveEnvatoPreviewFromThumbnail(thumbnail string) string {
candidate := htmlUnescape(strings.TrimSpace(thumbnail))
if candidate == "" {
return ""
}
candidate = strings.ReplaceAll(candidate, "&amp;", "&")
if strings.Contains(candidate, "/video_preview/") {
if idx := strings.Index(candidate, "?"); idx >= 0 {
candidate = candidate[:idx]
}
return regexp.MustCompile(`/video_preview/[^/]+\.(?:jpg|jpeg|png|webp)$`).ReplaceAllString(candidate, `/watermarked_preview/watermarked_preview.mp4`)
}
return ""
}
func newBrowserRequest(method, target, accept string) (*http.Request, error) {
req, err := http.NewRequest(method, target, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
if accept != "" {
req.Header.Set("Accept", accept)
}
return req, nil
}
func fetchTextViaPython(target string) (string, error) {
script := `
from urllib.request import Request, urlopen
import sys
req = Request(sys.argv[1], headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
})
with urlopen(req, timeout=20) as resp:
sys.stdout.buffer.write(resp.read(1024 * 1024))
`
output, err := exec.Command("python3", "-c", script, target).CombinedOutput()
if err != nil {
return "", fmt.Errorf("python fallback failed: %v: %s", err, truncateBytes(output, 300))
}
return string(output), nil
}
func looksLikeCloudflareChallenge(body string) bool {
lower := strings.ToLower(body)
return strings.Contains(lower, "cf-mitigated") || strings.Contains(lower, "attention required") || strings.Contains(lower, "just a moment")
}
func truncateBytes(data []byte, limit int) string {
trimmed := strings.TrimSpace(string(data))
if len(trimmed) <= limit {
return trimmed
}
return trimmed[:limit] + "..."
}
func limitQueries(queries []string, limit int) []string {
seen := map[string]bool{}
filtered := make([]string, 0, minInt(len(queries), limit))
+32
View File
@@ -0,0 +1,32 @@
package services
import "testing"
func TestExtractVideoPreviewURLFindsEnvatoPreview(t *testing.T) {
html := `<script type="application/ld+json">{"contentUrl":"https://video-previews.elements.envatousercontent.com/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"}</script>`
got := firstNonEmpty(extractJSONLDValue(html, "contentUrl"), extractVideoPreviewURL(html))
want := "https://video-previews.elements.envatousercontent.com/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) {
thumb := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/video_preview/video_preview_0000.jpg?w=1200&amp;h=630"
got := deriveEnvatoPreviewFromThumbnail(thumb)
want := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) {
result := SearchResult{
Title: "Couple Friendly Sad Bgm Movie Best Bgm",
Link: "https://www.youtube.com/watch?v=LGP4wiXSw8c",
Snippet: "romantic bgm soundtrack",
}
if isUsefulGoogleVideoResult(result) {
t.Fatal("expected bgm/music result to be rejected")
}
}
+33 -3
View File
@@ -27,6 +27,7 @@ type GeminiService struct {
type AIRecommendation struct {
Title string `json:"title"`
Link string `json:"link"`
Snippet string `json:"snippet"`
ThumbnailURL string `json:"thumbnailUrl"`
PreviewVideoURL string `json:"previewVideoUrl"`
Source string `json:"source"`
@@ -232,6 +233,7 @@ User query: ` + query,
recommendations = append(recommendations, AIRecommendation{
Title: src.Title,
Link: src.Link,
Snippet: src.Snippet,
ThumbnailURL: src.ThumbnailURL,
PreviewVideoURL: src.PreviewVideoURL,
Source: src.Source,
@@ -245,6 +247,7 @@ User query: ` + query,
recommendations = append(recommendations, AIRecommendation{
Title: candidate.Title,
Link: candidate.Link,
Snippet: candidate.Snippet,
ThumbnailURL: candidate.ThumbnailURL,
PreviewVideoURL: candidate.PreviewVideoURL,
Source: candidate.Source,
@@ -262,10 +265,26 @@ func fetchImageAsInlineData(client *http.Client, imageURL string) (string, strin
return "", "", fmt.Errorf("image url is empty")
}
resp, err := client.Get(imageURL)
if err != nil {
return "", "", err
if err == nil {
defer resp.Body.Close()
}
if err != nil || resp.StatusCode >= 300 {
req, reqErr := newBrowserStyleImageRequest(imageURL)
if reqErr != nil {
if err != nil {
return "", "", err
}
return "", "", reqErr
}
if resp != nil {
resp.Body.Close()
}
resp, err = client.Do(req)
if err != nil {
return "", "", err
}
defer resp.Body.Close()
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
return "", "", fmt.Errorf("thumbnail fetch failed with %d", resp.StatusCode)
@@ -284,6 +303,17 @@ func fetchImageAsInlineData(client *http.Client, imageURL string) (string, strin
return base64.StdEncoding.EncodeToString(data), mimeType, nil
}
func newBrowserStyleImageRequest(imageURL string) (*http.Request, error) {
req, err := http.NewRequest(http.MethodGet, imageURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")
req.Header.Set("Accept", "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
return req, nil
}
func fetchCandidateVisualInlineData(client *http.Client, candidate SearchResult) (string, string, error) {
if candidate.ThumbnailURL != "" {
data, mimeType, err := fetchImageAsInlineData(client, candidate.ThumbnailURL)