diff --git a/TODO.md b/TODO.md index 0c9d258..75bd53f 100644 --- a/TODO.md +++ b/TODO.md @@ -23,6 +23,33 @@ - `go build ./backend` from repo root conflicts with the existing `backend/` directory name - verified build command is now treated as `go build -o /tmp/... ./backend` +## Current Session Update (2026-03-13, Search/Preview Follow-up) +- Investigated a production search failure using downloaded frontend logs. +- Identified the main timeout cause: + - too many search results were being collected + - too many Gemini Vision batches were being evaluated sequentially + - backend debug messages were broadcasting oversized result payloads +- Applied search pipeline optimization: + - reduced per-source result caps + - reduced query fan-out for Google Video + - reduced enrichment cap + - limited Gemini Vision evaluation to top-ranked candidates only +- Improved Google Video filtering: + - added bans for music/BGM/trailer-style noise results +- Improved Envato enrichment fidelity: + - source page metadata is now preferred over search-engine proxy thumbnails + - source snippet/title are now taken from page metadata when available + - preview mp4 extraction now works via HTML/JSON-LD parsing + - added Python HTML fetch fallback for Cloudflare-challenged Envato pages because Go HTTP alone was receiving 403 challenge pages in testing +- Improved Artgrid fidelity: + - source page title/description/thumbnail are now preferred over search-engine snippets when available + - preview extraction is still not considered solved for all Artgrid clips because public HTML tested here did not expose a stable mp4/m3u8 URL +- Improved logging: + - backend search debug events now emit summaries, timings, source counts, preview counts, and Gemini batch stats instead of giant raw arrays + - frontend now logs raw non-JSON error bodies instead of collapsing them to `{}` on gateway/proxy failures +- Improved result rendering: + - search cards now show source snippet/description separately from AI reason to reduce confusion between asset metadata and Gemini commentary + ## Local Self-Test Workflow - Primary command: - `bash scripts/selftest.sh` @@ -145,7 +172,8 @@ - Gemini batch evaluation exists, but search quality can still degrade if upstream SearXNG results are noisy. - Frontend JavaScript was not linted with Node tooling in this environment because `node` is not installed here. - Full browser-level preview validation is still not covered by the local self-test script. -- Search cards still render recommendation reason text, not a robust asset description/snippet mapping. +- Search cards now separate source snippet from AI reason, but metadata fidelity still depends on source enrichment quality. +- Artgrid public pages inspected from this environment still did not expose a stable public preview video URL in HTML, so Artgrid hover-video support may remain partial until a browser-captured HTML/HAR sample reveals the real preview source pattern. ## Frontend Debug Logger - UI button: bottom-right `Logs` @@ -215,6 +243,7 @@ - [ ] Better matching between rendered description and actual linked asset - [ ] Add browser-level verification for preview/HLS behavior - [ ] Add more automated coverage for search ranking / filtering logic +- [ ] If Artgrid hover preview is still required, collect one real clip HTML/HAR from a browser session and derive a stable preview URL parser - [ ] Add proper frontend build/lint step if Node becomes available ## Verified Locally In This Environment diff --git a/backend/handlers/api.go b/backend/handlers/api.go index e71345c..d25b574 100644 --- a/backend/handlers/api.go +++ b/backend/handlers/api.go @@ -76,6 +76,27 @@ type PreviewResponse struct { Qualities []map[string]any `json:"qualities"` } +type searchDebugSummary struct { + Total int `json:"total"` + BySource map[string]int `json:"bySource"` + WithPreview int `json:"withPreview"` + WithThumbnail int `json:"withThumbnail"` + Top []map[string]any `json:"top"` + Warning string `json:"warning,omitempty"` + DurationMS int64 `json:"durationMs,omitempty"` + GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"` +} + +type geminiBatchStats struct { + CandidateCap int `json:"candidateCap"` + Requested int `json:"requested"` + Batches int `json:"batches"` + Succeeded int `json:"succeeded"` + Failed int `json:"failed"` + RecommendedCount int `json:"recommendedCount"` + Errors []string `json:"errors,omitempty"` +} + func RegisterRoutes(router *gin.Engine, app *App) { router.GET("/healthz", func(c *gin.Context) { c.JSON(http.StatusOK, gin.H{"status": "ok"}) @@ -259,6 +280,7 @@ func (a *App) runDownload(recordID int64, url, start, end, quality, outputPath s } func (a *App) searchMedia(c *gin.Context) { + started := time.Now() var req struct { Query string `json:"query"` Platforms []string `json:"platforms"` @@ -277,18 +299,24 @@ func (a *App) searchMedia(c *gin.Context) { if len(queryVariants) == 0 { queryVariants = []string{req.Query} } - a.debug("search query variants", gin.H{"query": req.Query, "variants": queryVariants, "platforms": req.Platforms}) + a.debug("search query variants", gin.H{ + "query": req.Query, + "platforms": req.Platforms, + "variants": queryVariants, + "variantCount": len(queryVariants), + "requestIdHint": time.Now().UnixNano(), + }) enabledPlatforms := normalizePlatforms(req.Platforms) a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "searching " + selectedPlatformLabel(enabledPlatforms), "progress": 35}) results, err := a.SearchService.SearchMedia(queryVariants, enabledPlatforms) if err != nil { - a.debug("search backend failed", gin.H{"error": err.Error(), "variants": queryVariants}) + a.debug("search backend failed", gin.H{"error": err.Error(), "variants": queryVariants, "durationMs": time.Since(started).Milliseconds()}) a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "search failed", "progress": 100, "message": err.Error()}) c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()}) return } - a.debug("search backend results", gin.H{"count": len(results), "results": results}) + a.debug("search backend summary", summarizeSearchResults(results, time.Since(started), 0, "")) if len(results) == 0 { warning := "SearXNG returned no renderable results." a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "no renderable search results", "progress": 100, "message": warning}) @@ -302,10 +330,10 @@ func (a *App) searchMedia(c *gin.Context) { rankQuery = strings.Join(queryVariants[:min(len(queryVariants), 3)], " ") } scored := rankSearchResults(rankQuery, results) - a.debug("search ranked results", gin.H{"count": len(scored), "results": scored}) - a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "analyzing all candidate visuals with Gemini Vision", "progress": 75}) - recommended := evaluateAllCandidatesWithGemini(a.GeminiService, req.Query, scored) - a.debug("search gemini recommendations", gin.H{"count": len(recommended), "results": recommended}) + a.debug("search ranked summary", summarizeSearchResults(scored, time.Since(started), geminiCandidateLimit(len(scored)), "")) + a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "analyzing top candidate visuals with Gemini Vision", "progress": 75}) + recommended, geminiStats := evaluateAllCandidatesWithGemini(a.GeminiService, req.Query, scored) + a.debug("search gemini evaluation", geminiStats) err = nil if len(recommended) == 0 { err = fmt.Errorf("gemini vision returned no recommended items across all candidate batches") @@ -316,6 +344,7 @@ func (a *App) searchMedia(c *gin.Context) { fallback = append(fallback, services.AIRecommendation{ Title: result.Title, Link: result.Link, + Snippet: result.Snippet, ThumbnailURL: result.ThumbnailURL, PreviewVideoURL: result.PreviewVideoURL, Source: result.Source, @@ -324,12 +353,15 @@ func (a *App) searchMedia(c *gin.Context) { }) } warning := err.Error() + a.debug("search fallback summary", summarizeRecommendationResults(fallback, time.Since(started), warning)) a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "Gemini Vision fallback to ranked results", "progress": 90, "message": warning}) c.JSON(http.StatusOK, gin.H{"results": fallback, "warning": warning, "queries": queryVariants}) return } - response := gin.H{"results": mergeRecommendations(recommended, scored, 20), "queries": queryVariants} + merged := mergeRecommendations(recommended, scored, 20) + a.debug("search complete summary", summarizeRecommendationResults(merged, time.Since(started), "")) + response := gin.H{"results": merged, "queries": queryVariants} a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "search complete", "progress": 100}) c.JSON(http.StatusOK, response) } @@ -406,20 +438,31 @@ func selectedPlatformLabel(platforms map[string]bool) string { return strings.Join(labels, ", ") } -func evaluateAllCandidatesWithGemini(service *services.GeminiService, query string, ranked []services.SearchResult) []services.AIRecommendation { +func evaluateAllCandidatesWithGemini(service *services.GeminiService, query string, ranked []services.SearchResult) ([]services.AIRecommendation, geminiBatchStats) { const chunkSize = 8 + limit := geminiCandidateLimit(len(ranked)) + stats := geminiBatchStats{ + CandidateCap: limit, + Requested: min(limit, len(ranked)), + } merged := make([]services.AIRecommendation, 0, len(ranked)) seen := map[string]bool{} - for start := 0; start < len(ranked); start += chunkSize { + for start := 0; start < limit; start += chunkSize { end := start + chunkSize - if end > len(ranked) { - end = len(ranked) + if end > limit { + end = limit } batch := ranked[start:end] + stats.Batches++ recommended, err := service.Recommend(query, batch) if err != nil { + stats.Failed++ + if len(stats.Errors) < 5 { + stats.Errors = append(stats.Errors, err.Error()) + } continue } + stats.Succeeded++ for _, item := range recommended { if item.Link == "" || seen[item.Link] { continue @@ -428,7 +471,8 @@ func evaluateAllCandidatesWithGemini(service *services.GeminiService, query stri merged = append(merged, item) } } - return merged + stats.RecommendedCount = len(merged) + return merged, stats } func rankSearchResults(query string, results []services.SearchResult) []services.SearchResult { @@ -515,6 +559,7 @@ func mergeRecommendations(recommended []services.AIRecommendation, ranked []serv merged = append(merged, services.AIRecommendation{ Title: item.Title, Link: item.Link, + Snippet: item.Snippet, ThumbnailURL: item.ThumbnailURL, PreviewVideoURL: item.PreviewVideoURL, Source: item.Source, @@ -525,6 +570,96 @@ func mergeRecommendations(recommended []services.AIRecommendation, ranked []serv return merged } +func geminiCandidateLimit(total int) int { + switch { + case total <= 8: + return total + case total <= 16: + return 12 + default: + return 16 + } +} + +func summarizeSearchResults(results []services.SearchResult, duration time.Duration, geminiCap int, warning string) searchDebugSummary { + bySource := map[string]int{} + withPreview := 0 + withThumbnail := 0 + top := make([]map[string]any, 0, min(6, len(results))) + for idx, item := range results { + bySource[item.Source]++ + if strings.TrimSpace(item.PreviewVideoURL) != "" { + withPreview++ + } + if strings.TrimSpace(item.ThumbnailURL) != "" { + withThumbnail++ + } + if idx < 6 { + top = append(top, map[string]any{ + "title": truncateText(item.Title, 120), + "source": item.Source, + "hasPreview": item.PreviewVideoURL != "", + "hasThumbnail": item.ThumbnailURL != "", + "displayLink": item.DisplayLink, + "snippetSample": truncateText(item.Snippet, 160), + }) + } + } + return searchDebugSummary{ + Total: len(results), + BySource: bySource, + WithPreview: withPreview, + WithThumbnail: withThumbnail, + Top: top, + Warning: warning, + DurationMS: duration.Milliseconds(), + GeminiCandidateCap: geminiCap, + } +} + +func summarizeRecommendationResults(results []services.AIRecommendation, duration time.Duration, warning string) searchDebugSummary { + bySource := map[string]int{} + withPreview := 0 + withThumbnail := 0 + top := make([]map[string]any, 0, min(6, len(results))) + for idx, item := range results { + bySource[item.Source]++ + if strings.TrimSpace(item.PreviewVideoURL) != "" { + withPreview++ + } + if strings.TrimSpace(item.ThumbnailURL) != "" { + withThumbnail++ + } + if idx < 6 { + top = append(top, map[string]any{ + "title": truncateText(item.Title, 120), + "source": item.Source, + "hasPreview": item.PreviewVideoURL != "", + "hasThumbnail": item.ThumbnailURL != "", + "reasonSample": truncateText(item.Reason, 120), + "snippetSample": truncateText(item.Snippet, 160), + }) + } + } + return searchDebugSummary{ + Total: len(results), + BySource: bySource, + WithPreview: withPreview, + WithThumbnail: withThumbnail, + Top: top, + Warning: warning, + DurationMS: duration.Milliseconds(), + } +} + +func truncateText(text string, limit int) string { + trimmed := strings.TrimSpace(text) + if len(trimmed) <= limit { + return trimmed + } + return trimmed[:limit] + "..." +} + func EnsurePaths(downloadsDir, workerScript string) error { if err := os.MkdirAll(downloadsDir, 0o755); err != nil { return err diff --git a/backend/services/cse.go b/backend/services/cse.go index e43a4a4..dff937b 100644 --- a/backend/services/cse.go +++ b/backend/services/cse.go @@ -6,6 +6,7 @@ import ( "io" "net/http" "net/url" + "os/exec" "regexp" "sort" "strings" @@ -54,6 +55,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin name string categories string engine string + maxResults int build func(string) []string accept func(SearchResult) bool } @@ -63,6 +65,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin name: "Envato", categories: "general", engine: s.WebEngine, + maxResults: 8, build: buildEnvatoQueries, accept: isRenderableEnvatoResult, }, @@ -70,6 +73,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin name: "Artgrid", categories: "general", engine: s.WebEngine, + maxResults: 8, build: buildArtgridQueries, accept: isRenderableArtgridResult, }, @@ -77,16 +81,18 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin name: "Google Video", categories: "videos", engine: s.GoogleVideoEngine, + maxResults: 6, build: buildGoogleVideoQueries, accept: isUsefulGoogleVideoResult, }, } seen := map[string]bool{} + sourceCounts := map[string]int{} results := make([]SearchResult, 0, 90) var lastErr error - baseQueries := limitQueries(queries, 5) + baseQueries := limitQueries(queries, 3) for _, base := range baseQueries { base = strings.TrimSpace(base) if base == "" { @@ -96,7 +102,13 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin if len(enabledPlatforms) > 0 && !enabledPlatforms[strings.ToLower(source.name)] { continue } + if sourceCounts[source.name] >= source.maxResults { + continue + } for _, searchQuery := range source.build(base) { + if sourceCounts[source.name] >= source.maxResults { + break + } items, err := s.search(searchQuery, source.categories, source.engine, source.name) if err != nil { lastErr = err @@ -112,6 +124,10 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin } seen[item.Link] = true results = append(results, item) + sourceCounts[source.name]++ + if sourceCounts[source.name] >= source.maxResults { + break + } } } } @@ -128,7 +144,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin } func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult { - limit := minInt(len(results), 24) + limit := minInt(len(results), 18) if limit == 0 { return results } @@ -170,14 +186,32 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult { if err != nil { return result } - if result.ThumbnailURL == "" { - result.ThumbnailURL = firstNonEmpty( - extractMetaContent(html, "og:image"), - extractMetaContent(html, "twitter:image"), - ) + result.Title = firstNonEmpty( + extractMetaContent(html, "og:title"), + result.Title, + ) + result.Snippet = firstNonEmpty( + extractMetaContent(html, "og:description"), + extractMetaContent(html, "description"), + result.Snippet, + ) + + pageThumbnail := firstNonEmpty( + extractMetaContent(html, "og:image"), + extractMetaContent(html, "twitter:image"), + extractJSONLDValue(html, "thumbnailUrl"), + ) + if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) { + result.ThumbnailURL = pageThumbnail } if result.PreviewVideoURL == "" { - result.PreviewVideoURL = extractVideoPreviewURL(html) + result.PreviewVideoURL = firstNonEmpty( + extractJSONLDValue(html, "contentUrl"), + extractMetaContent(html, "twitter:player:stream"), + extractVideoPreviewURL(html), + deriveEnvatoPreviewFromThumbnail(pageThumbnail), + deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL), + ) } return result } @@ -203,17 +237,30 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult { if result.ThumbnailURL == "" || result.PreviewVideoURL == "" { html, err := s.fetchText(result.Link) if err == nil { - if result.ThumbnailURL == "" { - result.ThumbnailURL = firstNonEmpty( - extractMetaContent(html, "og:image"), - extractMetaContent(html, "twitter:image"), - ) - if result.ThumbnailURL == "" { - result.ThumbnailURL = extractArtgridBackgroundThumbnail(html, clipID) - } + result.Title = firstNonEmpty( + extractMetaContent(html, "og:title"), + result.Title, + ) + result.Snippet = firstNonEmpty( + extractMetaContent(html, "og:description"), + extractMetaContent(html, "description"), + result.Snippet, + ) + pageThumbnail := firstNonEmpty( + extractMetaContent(html, "og:image"), + extractMetaContent(html, "twitter:image"), + extractArtgridBackgroundThumbnail(html, clipID), + extractJSONLDValue(html, "image"), + ) + if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) { + result.ThumbnailURL = pageThumbnail } if result.PreviewVideoURL == "" { - result.PreviewVideoURL = extractVideoPreviewURL(html) + result.PreviewVideoURL = firstNonEmpty( + extractJSONLDValue(html, "contentUrl"), + extractMetaContent(html, "twitter:player:stream"), + extractVideoPreviewURL(html), + ) } } } @@ -282,7 +329,6 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear func buildGoogleVideoQueries(base string) []string { return []string{ fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base), - fmt.Sprintf(`"%s" ("cinematic footage" OR "free stock footage" OR "4k footage") -tutorial -"how to" -review`, base), } } @@ -310,6 +356,8 @@ func isUsefulGoogleVideoResult(result SearchResult) bool { "tutorial", "how to", "review", "reaction", "podcast", "interview", "walkthrough", "course", "lesson", "edit tutorial", "editing tutorial", "premiere pro", "after effects", "breakdown", "explained", "vlog", "tips", "guide", "learn", "free download", + "bgm", "music", "song", "lyrics", "audio", "soundtrack", "trailer", "teaser", + "full movie", "movie clip", "status", "whatsapp status", "fan cam", "fancam", } { if strings.Contains(text, banned) { return false @@ -477,11 +525,18 @@ func pickVideoURL(urls []string) string { } func (s *SearchService) fetchText(target string) (string, error) { - resp, err := s.Client.Get(target) + req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8") + if err != nil { + return "", err + } + resp, err := s.Client.Do(req) if err != nil { return "", err } defer resp.Body.Close() + if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable { + return fetchTextViaPython(target) + } if resp.StatusCode >= 300 { return "", fmt.Errorf("fetch returned status %d", resp.StatusCode) } @@ -489,15 +544,17 @@ func (s *SearchService) fetchText(target string) (string, error) { if err != nil { return "", err } + if looksLikeCloudflareChallenge(string(data)) { + return fetchTextViaPython(target) + } return string(data), nil } func (s *SearchService) fetchJSONText(target string) (string, error) { - req, err := http.NewRequest(http.MethodGet, target, nil) + req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*") if err != nil { return "", err } - req.Header.Set("Accept", "application/json, text/json") resp, err := s.Client.Do(req) if err != nil { return "", err @@ -522,6 +579,106 @@ func firstNonEmpty(values ...string) string { return "" } +func shouldPreferPageThumbnail(current, pageLink string) bool { + current = strings.TrimSpace(current) + if current == "" { + return true + } + lower := strings.ToLower(current) + if strings.Contains(lower, "imgs.search.brave.com") || strings.Contains(lower, "googleusercontent.com") || strings.Contains(lower, "bing.com") { + return true + } + currentHost := hostOf(current) + pageHost := hostOf(pageLink) + return currentHost == "" || (pageHost != "" && currentHost != pageHost) +} + +func hostOf(raw string) string { + parsed, err := url.Parse(raw) + if err != nil { + return "" + } + return strings.ToLower(parsed.Host) +} + +func extractJSONLDValue(html, key string) string { + pattern := regexp.MustCompile(`"` + regexp.QuoteMeta(key) + `"\s*:\s*"(https?:\\?/\\?/[^"]+|[^"]+)"`) + matches := pattern.FindAllStringSubmatch(html, -1) + for _, match := range matches { + if len(match) != 2 { + continue + } + value := strings.ReplaceAll(match[1], `\/`, `/`) + value = strings.ReplaceAll(value, `\u002F`, `/`) + value = strings.ReplaceAll(value, `\\`, "") + value = htmlUnescape(value) + if strings.TrimSpace(value) != "" { + return value + } + } + return "" +} + +func deriveEnvatoPreviewFromThumbnail(thumbnail string) string { + candidate := htmlUnescape(strings.TrimSpace(thumbnail)) + if candidate == "" { + return "" + } + candidate = strings.ReplaceAll(candidate, "&", "&") + if strings.Contains(candidate, "/video_preview/") { + if idx := strings.Index(candidate, "?"); idx >= 0 { + candidate = candidate[:idx] + } + return regexp.MustCompile(`/video_preview/[^/]+\.(?:jpg|jpeg|png|webp)$`).ReplaceAllString(candidate, `/watermarked_preview/watermarked_preview.mp4`) + } + return "" +} + +func newBrowserRequest(method, target, accept string) (*http.Request, error) { + req, err := http.NewRequest(method, target, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36") + req.Header.Set("Accept-Language", "en-US,en;q=0.9") + if accept != "" { + req.Header.Set("Accept", accept) + } + return req, nil +} + +func fetchTextViaPython(target string) (string, error) { + script := ` +from urllib.request import Request, urlopen +import sys +req = Request(sys.argv[1], headers={ + "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.9", +}) +with urlopen(req, timeout=20) as resp: + sys.stdout.buffer.write(resp.read(1024 * 1024)) +` + output, err := exec.Command("python3", "-c", script, target).CombinedOutput() + if err != nil { + return "", fmt.Errorf("python fallback failed: %v: %s", err, truncateBytes(output, 300)) + } + return string(output), nil +} + +func looksLikeCloudflareChallenge(body string) bool { + lower := strings.ToLower(body) + return strings.Contains(lower, "cf-mitigated") || strings.Contains(lower, "attention required") || strings.Contains(lower, "just a moment") +} + +func truncateBytes(data []byte, limit int) string { + trimmed := strings.TrimSpace(string(data)) + if len(trimmed) <= limit { + return trimmed + } + return trimmed[:limit] + "..." +} + func limitQueries(queries []string, limit int) []string { seen := map[string]bool{} filtered := make([]string, 0, minInt(len(queries), limit)) diff --git a/backend/services/cse_test.go b/backend/services/cse_test.go new file mode 100644 index 0000000..0e790c3 --- /dev/null +++ b/backend/services/cse_test.go @@ -0,0 +1,32 @@ +package services + +import "testing" + +func TestExtractVideoPreviewURLFindsEnvatoPreview(t *testing.T) { + html := `` + got := firstNonEmpty(extractJSONLDValue(html, "contentUrl"), extractVideoPreviewURL(html)) + want := "https://video-previews.elements.envatousercontent.com/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4" + if got != want { + t.Fatalf("expected %q, got %q", want, got) + } +} + +func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) { + thumb := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/video_preview/video_preview_0000.jpg?w=1200&h=630" + got := deriveEnvatoPreviewFromThumbnail(thumb) + want := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4" + if got != want { + t.Fatalf("expected %q, got %q", want, got) + } +} + +func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) { + result := SearchResult{ + Title: "Couple Friendly Sad Bgm Movie Best Bgm", + Link: "https://www.youtube.com/watch?v=LGP4wiXSw8c", + Snippet: "romantic bgm soundtrack", + } + if isUsefulGoogleVideoResult(result) { + t.Fatal("expected bgm/music result to be rejected") + } +} diff --git a/backend/services/gemini.go b/backend/services/gemini.go index 9a4b181..ec307f6 100644 --- a/backend/services/gemini.go +++ b/backend/services/gemini.go @@ -27,6 +27,7 @@ type GeminiService struct { type AIRecommendation struct { Title string `json:"title"` Link string `json:"link"` + Snippet string `json:"snippet"` ThumbnailURL string `json:"thumbnailUrl"` PreviewVideoURL string `json:"previewVideoUrl"` Source string `json:"source"` @@ -232,6 +233,7 @@ User query: ` + query, recommendations = append(recommendations, AIRecommendation{ Title: src.Title, Link: src.Link, + Snippet: src.Snippet, ThumbnailURL: src.ThumbnailURL, PreviewVideoURL: src.PreviewVideoURL, Source: src.Source, @@ -245,6 +247,7 @@ User query: ` + query, recommendations = append(recommendations, AIRecommendation{ Title: candidate.Title, Link: candidate.Link, + Snippet: candidate.Snippet, ThumbnailURL: candidate.ThumbnailURL, PreviewVideoURL: candidate.PreviewVideoURL, Source: candidate.Source, @@ -262,10 +265,26 @@ func fetchImageAsInlineData(client *http.Client, imageURL string) (string, strin return "", "", fmt.Errorf("image url is empty") } resp, err := client.Get(imageURL) - if err != nil { - return "", "", err + if err == nil { + defer resp.Body.Close() + } + if err != nil || resp.StatusCode >= 300 { + req, reqErr := newBrowserStyleImageRequest(imageURL) + if reqErr != nil { + if err != nil { + return "", "", err + } + return "", "", reqErr + } + if resp != nil { + resp.Body.Close() + } + resp, err = client.Do(req) + if err != nil { + return "", "", err + } + defer resp.Body.Close() } - defer resp.Body.Close() if resp.StatusCode >= 300 { return "", "", fmt.Errorf("thumbnail fetch failed with %d", resp.StatusCode) @@ -284,6 +303,17 @@ func fetchImageAsInlineData(client *http.Client, imageURL string) (string, strin return base64.StdEncoding.EncodeToString(data), mimeType, nil } +func newBrowserStyleImageRequest(imageURL string) (*http.Request, error) { + req, err := http.NewRequest(http.MethodGet, imageURL, nil) + if err != nil { + return nil, err + } + req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36") + req.Header.Set("Accept", "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8") + req.Header.Set("Accept-Language", "en-US,en;q=0.9") + return req, nil +} + func fetchCandidateVisualInlineData(client *http.Client, candidate SearchResult) (string, string, error) { if candidate.ThumbnailURL != "" { data, mimeType, err := fetchImageAsInlineData(client, candidate.ThumbnailURL) diff --git a/frontend/app.js b/frontend/app.js index 9b6fa4a..2059591 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -68,12 +68,36 @@ function logEvent(type, payload) { function safeStringify(value) { try { - return JSON.stringify(value, null, 2); + return JSON.stringify(compactPayload(value), null, 2); } catch { return String(value); } } +function compactPayload(value, depth = 0) { + if (depth > 3) { + return "[truncated]"; + } + if (Array.isArray(value)) { + if (value.length > 8) { + return { + type: "array", + length: value.length, + sample: value.slice(0, 5).map((item) => compactPayload(item, depth + 1)), + }; + } + return value.map((item) => compactPayload(item, depth + 1)); + } + if (value && typeof value === "object") { + const entries = Object.entries(value); + return Object.fromEntries(entries.map(([key, item]) => [key, compactPayload(item, depth + 1)])); + } + if (typeof value === "string" && value.length > 500) { + return `${value.slice(0, 500)}...`; + } + return value; +} + function renderLogs() { debugSummary.textContent = `${debugEntries.length} events captured`; debugLogList.innerHTML = ""; @@ -194,15 +218,24 @@ async function api(path, options = {}) { bodyPreview: typeof options.body === "string" ? options.body.slice(0, 800) : "[non-string body]", }); const response = await fetch(path, options); - const data = await response.json().catch(() => ({})); + const rawText = await response.text(); + let data = {}; + if (rawText) { + try { + data = JSON.parse(rawText); + } catch { + data = { rawText }; + } + } logEvent("api:response", { path, status: response.status, ok: response.ok, - body: data, + body: compactPayload(data), }); if (!response.ok) { - const error = new Error(data.error || "request failed"); + const message = data.error || data.rawText || `request failed (${response.status})`; + const error = new Error(message); error.status = response.status; error.data = data; throw error; @@ -263,7 +296,8 @@ function renderResults(results) { image.src = item.thumbnailUrl || "https://placehold.co/1280x720/0a0a0a/ffffff?text=Preview"; image.alt = item.title; node.querySelector("h3").textContent = item.title; - node.querySelector("p").textContent = item.reason; + node.querySelector(".result-snippet").textContent = item.snippet || item.reason || item.source || ""; + node.querySelector(".result-reason").textContent = item.reason ? `AI note: ${item.reason}` : ""; node.querySelector(".source-badge").textContent = item.source; if (item.previewVideoUrl) { attachVideoSource(previewVideo, item.previewVideoUrl); diff --git a/frontend/index.html b/frontend/index.html index d6016ad..31b7338 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -159,7 +159,8 @@