Stabilize search pipeline and improve preview diagnostics
build-push / docker (push) Successful in 4m14s

This commit is contained in:
AI Assistant
2026-03-13 18:32:54 +09:00
parent 6f3149a443
commit 7dfb1ad2de
8 changed files with 463 additions and 45 deletions
+30 -1
View File
@@ -23,6 +23,33 @@
- `go build ./backend` from repo root conflicts with the existing `backend/` directory name
- verified build command is now treated as `go build -o /tmp/... ./backend`
## Current Session Update (2026-03-13, Search/Preview Follow-up)
- Investigated a production search failure using downloaded frontend logs.
- Identified the main timeout cause:
- too many search results were being collected
- too many Gemini Vision batches were being evaluated sequentially
- backend debug messages were broadcasting oversized result payloads
- Applied search pipeline optimization:
- reduced per-source result caps
- reduced query fan-out for Google Video
- reduced enrichment cap
- limited Gemini Vision evaluation to top-ranked candidates only
- Improved Google Video filtering:
- added bans for music/BGM/trailer-style noise results
- Improved Envato enrichment fidelity:
- source page metadata is now preferred over search-engine proxy thumbnails
- source snippet/title are now taken from page metadata when available
- preview mp4 extraction now works via HTML/JSON-LD parsing
- added Python HTML fetch fallback for Cloudflare-challenged Envato pages because Go HTTP alone was receiving 403 challenge pages in testing
- Improved Artgrid fidelity:
- source page title/description/thumbnail are now preferred over search-engine snippets when available
- preview extraction is still not considered solved for all Artgrid clips because public HTML tested here did not expose a stable mp4/m3u8 URL
- Improved logging:
- backend search debug events now emit summaries, timings, source counts, preview counts, and Gemini batch stats instead of giant raw arrays
- frontend now logs raw non-JSON error bodies instead of collapsing them to `{}` on gateway/proxy failures
- Improved result rendering:
- search cards now show source snippet/description separately from AI reason to reduce confusion between asset metadata and Gemini commentary
## Local Self-Test Workflow
- Primary command:
- `bash scripts/selftest.sh`
@@ -145,7 +172,8 @@
- Gemini batch evaluation exists, but search quality can still degrade if upstream SearXNG results are noisy.
- Frontend JavaScript was not linted with Node tooling in this environment because `node` is not installed here.
- Full browser-level preview validation is still not covered by the local self-test script.
- Search cards still render recommendation reason text, not a robust asset description/snippet mapping.
- Search cards now separate source snippet from AI reason, but metadata fidelity still depends on source enrichment quality.
- Artgrid public pages inspected from this environment still did not expose a stable public preview video URL in HTML, so Artgrid hover-video support may remain partial until a browser-captured HTML/HAR sample reveals the real preview source pattern.
## Frontend Debug Logger
- UI button: bottom-right `Logs`
@@ -215,6 +243,7 @@
- [ ] Better matching between rendered description and actual linked asset
- [ ] Add browser-level verification for preview/HLS behavior
- [ ] Add more automated coverage for search ranking / filtering logic
- [ ] If Artgrid hover preview is still required, collect one real clip HTML/HAR from a browser session and derive a stable preview URL parser
- [ ] Add proper frontend build/lint step if Node becomes available
## Verified Locally In This Environment
+148 -13
View File
@@ -76,6 +76,27 @@ type PreviewResponse struct {
Qualities []map[string]any `json:"qualities"`
}
type searchDebugSummary struct {
Total int `json:"total"`
BySource map[string]int `json:"bySource"`
WithPreview int `json:"withPreview"`
WithThumbnail int `json:"withThumbnail"`
Top []map[string]any `json:"top"`
Warning string `json:"warning,omitempty"`
DurationMS int64 `json:"durationMs,omitempty"`
GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"`
}
type geminiBatchStats struct {
CandidateCap int `json:"candidateCap"`
Requested int `json:"requested"`
Batches int `json:"batches"`
Succeeded int `json:"succeeded"`
Failed int `json:"failed"`
RecommendedCount int `json:"recommendedCount"`
Errors []string `json:"errors,omitempty"`
}
func RegisterRoutes(router *gin.Engine, app *App) {
router.GET("/healthz", func(c *gin.Context) {
c.JSON(http.StatusOK, gin.H{"status": "ok"})
@@ -259,6 +280,7 @@ func (a *App) runDownload(recordID int64, url, start, end, quality, outputPath s
}
func (a *App) searchMedia(c *gin.Context) {
started := time.Now()
var req struct {
Query string `json:"query"`
Platforms []string `json:"platforms"`
@@ -277,18 +299,24 @@ func (a *App) searchMedia(c *gin.Context) {
if len(queryVariants) == 0 {
queryVariants = []string{req.Query}
}
a.debug("search query variants", gin.H{"query": req.Query, "variants": queryVariants, "platforms": req.Platforms})
a.debug("search query variants", gin.H{
"query": req.Query,
"platforms": req.Platforms,
"variants": queryVariants,
"variantCount": len(queryVariants),
"requestIdHint": time.Now().UnixNano(),
})
enabledPlatforms := normalizePlatforms(req.Platforms)
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "searching " + selectedPlatformLabel(enabledPlatforms), "progress": 35})
results, err := a.SearchService.SearchMedia(queryVariants, enabledPlatforms)
if err != nil {
a.debug("search backend failed", gin.H{"error": err.Error(), "variants": queryVariants})
a.debug("search backend failed", gin.H{"error": err.Error(), "variants": queryVariants, "durationMs": time.Since(started).Milliseconds()})
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "search failed", "progress": 100, "message": err.Error()})
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
return
}
a.debug("search backend results", gin.H{"count": len(results), "results": results})
a.debug("search backend summary", summarizeSearchResults(results, time.Since(started), 0, ""))
if len(results) == 0 {
warning := "SearXNG returned no renderable results."
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "no renderable search results", "progress": 100, "message": warning})
@@ -302,10 +330,10 @@ func (a *App) searchMedia(c *gin.Context) {
rankQuery = strings.Join(queryVariants[:min(len(queryVariants), 3)], " ")
}
scored := rankSearchResults(rankQuery, results)
a.debug("search ranked results", gin.H{"count": len(scored), "results": scored})
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "analyzing all candidate visuals with Gemini Vision", "progress": 75})
recommended := evaluateAllCandidatesWithGemini(a.GeminiService, req.Query, scored)
a.debug("search gemini recommendations", gin.H{"count": len(recommended), "results": recommended})
a.debug("search ranked summary", summarizeSearchResults(scored, time.Since(started), geminiCandidateLimit(len(scored)), ""))
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "analyzing top candidate visuals with Gemini Vision", "progress": 75})
recommended, geminiStats := evaluateAllCandidatesWithGemini(a.GeminiService, req.Query, scored)
a.debug("search gemini evaluation", geminiStats)
err = nil
if len(recommended) == 0 {
err = fmt.Errorf("gemini vision returned no recommended items across all candidate batches")
@@ -316,6 +344,7 @@ func (a *App) searchMedia(c *gin.Context) {
fallback = append(fallback, services.AIRecommendation{
Title: result.Title,
Link: result.Link,
Snippet: result.Snippet,
ThumbnailURL: result.ThumbnailURL,
PreviewVideoURL: result.PreviewVideoURL,
Source: result.Source,
@@ -324,12 +353,15 @@ func (a *App) searchMedia(c *gin.Context) {
})
}
warning := err.Error()
a.debug("search fallback summary", summarizeRecommendationResults(fallback, time.Since(started), warning))
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "Gemini Vision fallback to ranked results", "progress": 90, "message": warning})
c.JSON(http.StatusOK, gin.H{"results": fallback, "warning": warning, "queries": queryVariants})
return
}
response := gin.H{"results": mergeRecommendations(recommended, scored, 20), "queries": queryVariants}
merged := mergeRecommendations(recommended, scored, 20)
a.debug("search complete summary", summarizeRecommendationResults(merged, time.Since(started), ""))
response := gin.H{"results": merged, "queries": queryVariants}
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "search complete", "progress": 100})
c.JSON(http.StatusOK, response)
}
@@ -406,20 +438,31 @@ func selectedPlatformLabel(platforms map[string]bool) string {
return strings.Join(labels, ", ")
}
func evaluateAllCandidatesWithGemini(service *services.GeminiService, query string, ranked []services.SearchResult) []services.AIRecommendation {
func evaluateAllCandidatesWithGemini(service *services.GeminiService, query string, ranked []services.SearchResult) ([]services.AIRecommendation, geminiBatchStats) {
const chunkSize = 8
limit := geminiCandidateLimit(len(ranked))
stats := geminiBatchStats{
CandidateCap: limit,
Requested: min(limit, len(ranked)),
}
merged := make([]services.AIRecommendation, 0, len(ranked))
seen := map[string]bool{}
for start := 0; start < len(ranked); start += chunkSize {
for start := 0; start < limit; start += chunkSize {
end := start + chunkSize
if end > len(ranked) {
end = len(ranked)
if end > limit {
end = limit
}
batch := ranked[start:end]
stats.Batches++
recommended, err := service.Recommend(query, batch)
if err != nil {
stats.Failed++
if len(stats.Errors) < 5 {
stats.Errors = append(stats.Errors, err.Error())
}
continue
}
stats.Succeeded++
for _, item := range recommended {
if item.Link == "" || seen[item.Link] {
continue
@@ -428,7 +471,8 @@ func evaluateAllCandidatesWithGemini(service *services.GeminiService, query stri
merged = append(merged, item)
}
}
return merged
stats.RecommendedCount = len(merged)
return merged, stats
}
func rankSearchResults(query string, results []services.SearchResult) []services.SearchResult {
@@ -515,6 +559,7 @@ func mergeRecommendations(recommended []services.AIRecommendation, ranked []serv
merged = append(merged, services.AIRecommendation{
Title: item.Title,
Link: item.Link,
Snippet: item.Snippet,
ThumbnailURL: item.ThumbnailURL,
PreviewVideoURL: item.PreviewVideoURL,
Source: item.Source,
@@ -525,6 +570,96 @@ func mergeRecommendations(recommended []services.AIRecommendation, ranked []serv
return merged
}
func geminiCandidateLimit(total int) int {
switch {
case total <= 8:
return total
case total <= 16:
return 12
default:
return 16
}
}
func summarizeSearchResults(results []services.SearchResult, duration time.Duration, geminiCap int, warning string) searchDebugSummary {
bySource := map[string]int{}
withPreview := 0
withThumbnail := 0
top := make([]map[string]any, 0, min(6, len(results)))
for idx, item := range results {
bySource[item.Source]++
if strings.TrimSpace(item.PreviewVideoURL) != "" {
withPreview++
}
if strings.TrimSpace(item.ThumbnailURL) != "" {
withThumbnail++
}
if idx < 6 {
top = append(top, map[string]any{
"title": truncateText(item.Title, 120),
"source": item.Source,
"hasPreview": item.PreviewVideoURL != "",
"hasThumbnail": item.ThumbnailURL != "",
"displayLink": item.DisplayLink,
"snippetSample": truncateText(item.Snippet, 160),
})
}
}
return searchDebugSummary{
Total: len(results),
BySource: bySource,
WithPreview: withPreview,
WithThumbnail: withThumbnail,
Top: top,
Warning: warning,
DurationMS: duration.Milliseconds(),
GeminiCandidateCap: geminiCap,
}
}
func summarizeRecommendationResults(results []services.AIRecommendation, duration time.Duration, warning string) searchDebugSummary {
bySource := map[string]int{}
withPreview := 0
withThumbnail := 0
top := make([]map[string]any, 0, min(6, len(results)))
for idx, item := range results {
bySource[item.Source]++
if strings.TrimSpace(item.PreviewVideoURL) != "" {
withPreview++
}
if strings.TrimSpace(item.ThumbnailURL) != "" {
withThumbnail++
}
if idx < 6 {
top = append(top, map[string]any{
"title": truncateText(item.Title, 120),
"source": item.Source,
"hasPreview": item.PreviewVideoURL != "",
"hasThumbnail": item.ThumbnailURL != "",
"reasonSample": truncateText(item.Reason, 120),
"snippetSample": truncateText(item.Snippet, 160),
})
}
}
return searchDebugSummary{
Total: len(results),
BySource: bySource,
WithPreview: withPreview,
WithThumbnail: withThumbnail,
Top: top,
Warning: warning,
DurationMS: duration.Milliseconds(),
}
}
func truncateText(text string, limit int) string {
trimmed := strings.TrimSpace(text)
if len(trimmed) <= limit {
return trimmed
}
return trimmed[:limit] + "..."
}
func EnsurePaths(downloadsDir, workerScript string) error {
if err := os.MkdirAll(downloadsDir, 0o755); err != nil {
return err
+178 -21
View File
@@ -6,6 +6,7 @@ import (
"io"
"net/http"
"net/url"
"os/exec"
"regexp"
"sort"
"strings"
@@ -54,6 +55,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
name string
categories string
engine string
maxResults int
build func(string) []string
accept func(SearchResult) bool
}
@@ -63,6 +65,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
name: "Envato",
categories: "general",
engine: s.WebEngine,
maxResults: 8,
build: buildEnvatoQueries,
accept: isRenderableEnvatoResult,
},
@@ -70,6 +73,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
name: "Artgrid",
categories: "general",
engine: s.WebEngine,
maxResults: 8,
build: buildArtgridQueries,
accept: isRenderableArtgridResult,
},
@@ -77,16 +81,18 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
name: "Google Video",
categories: "videos",
engine: s.GoogleVideoEngine,
maxResults: 6,
build: buildGoogleVideoQueries,
accept: isUsefulGoogleVideoResult,
},
}
seen := map[string]bool{}
sourceCounts := map[string]int{}
results := make([]SearchResult, 0, 90)
var lastErr error
baseQueries := limitQueries(queries, 5)
baseQueries := limitQueries(queries, 3)
for _, base := range baseQueries {
base = strings.TrimSpace(base)
if base == "" {
@@ -96,7 +102,13 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
if len(enabledPlatforms) > 0 && !enabledPlatforms[strings.ToLower(source.name)] {
continue
}
if sourceCounts[source.name] >= source.maxResults {
continue
}
for _, searchQuery := range source.build(base) {
if sourceCounts[source.name] >= source.maxResults {
break
}
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
if err != nil {
lastErr = err
@@ -112,6 +124,10 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
}
seen[item.Link] = true
results = append(results, item)
sourceCounts[source.name]++
if sourceCounts[source.name] >= source.maxResults {
break
}
}
}
}
@@ -128,7 +144,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
}
func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
limit := minInt(len(results), 24)
limit := minInt(len(results), 18)
if limit == 0 {
return results
}
@@ -170,14 +186,32 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
if err != nil {
return result
}
if result.ThumbnailURL == "" {
result.ThumbnailURL = firstNonEmpty(
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
)
result.Title = firstNonEmpty(
extractMetaContent(html, "og:title"),
result.Title,
)
result.Snippet = firstNonEmpty(
extractMetaContent(html, "og:description"),
extractMetaContent(html, "description"),
result.Snippet,
)
pageThumbnail := firstNonEmpty(
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
extractJSONLDValue(html, "thumbnailUrl"),
)
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
result.ThumbnailURL = pageThumbnail
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = extractVideoPreviewURL(html)
result.PreviewVideoURL = firstNonEmpty(
extractJSONLDValue(html, "contentUrl"),
extractMetaContent(html, "twitter:player:stream"),
extractVideoPreviewURL(html),
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
)
}
return result
}
@@ -203,17 +237,30 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
html, err := s.fetchText(result.Link)
if err == nil {
if result.ThumbnailURL == "" {
result.ThumbnailURL = firstNonEmpty(
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
)
if result.ThumbnailURL == "" {
result.ThumbnailURL = extractArtgridBackgroundThumbnail(html, clipID)
}
result.Title = firstNonEmpty(
extractMetaContent(html, "og:title"),
result.Title,
)
result.Snippet = firstNonEmpty(
extractMetaContent(html, "og:description"),
extractMetaContent(html, "description"),
result.Snippet,
)
pageThumbnail := firstNonEmpty(
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
extractArtgridBackgroundThumbnail(html, clipID),
extractJSONLDValue(html, "image"),
)
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
result.ThumbnailURL = pageThumbnail
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = extractVideoPreviewURL(html)
result.PreviewVideoURL = firstNonEmpty(
extractJSONLDValue(html, "contentUrl"),
extractMetaContent(html, "twitter:player:stream"),
extractVideoPreviewURL(html),
)
}
}
}
@@ -282,7 +329,6 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
func buildGoogleVideoQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
fmt.Sprintf(`"%s" ("cinematic footage" OR "free stock footage" OR "4k footage") -tutorial -"how to" -review`, base),
}
}
@@ -310,6 +356,8 @@ func isUsefulGoogleVideoResult(result SearchResult) bool {
"tutorial", "how to", "review", "reaction", "podcast", "interview", "walkthrough",
"course", "lesson", "edit tutorial", "editing tutorial", "premiere pro", "after effects",
"breakdown", "explained", "vlog", "tips", "guide", "learn", "free download",
"bgm", "music", "song", "lyrics", "audio", "soundtrack", "trailer", "teaser",
"full movie", "movie clip", "status", "whatsapp status", "fan cam", "fancam",
} {
if strings.Contains(text, banned) {
return false
@@ -477,11 +525,18 @@ func pickVideoURL(urls []string) string {
}
func (s *SearchService) fetchText(target string) (string, error) {
resp, err := s.Client.Get(target)
req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
if err != nil {
return "", err
}
resp, err := s.Client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable {
return fetchTextViaPython(target)
}
if resp.StatusCode >= 300 {
return "", fmt.Errorf("fetch returned status %d", resp.StatusCode)
}
@@ -489,15 +544,17 @@ func (s *SearchService) fetchText(target string) (string, error) {
if err != nil {
return "", err
}
if looksLikeCloudflareChallenge(string(data)) {
return fetchTextViaPython(target)
}
return string(data), nil
}
func (s *SearchService) fetchJSONText(target string) (string, error) {
req, err := http.NewRequest(http.MethodGet, target, nil)
req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*")
if err != nil {
return "", err
}
req.Header.Set("Accept", "application/json, text/json")
resp, err := s.Client.Do(req)
if err != nil {
return "", err
@@ -522,6 +579,106 @@ func firstNonEmpty(values ...string) string {
return ""
}
func shouldPreferPageThumbnail(current, pageLink string) bool {
current = strings.TrimSpace(current)
if current == "" {
return true
}
lower := strings.ToLower(current)
if strings.Contains(lower, "imgs.search.brave.com") || strings.Contains(lower, "googleusercontent.com") || strings.Contains(lower, "bing.com") {
return true
}
currentHost := hostOf(current)
pageHost := hostOf(pageLink)
return currentHost == "" || (pageHost != "" && currentHost != pageHost)
}
func hostOf(raw string) string {
parsed, err := url.Parse(raw)
if err != nil {
return ""
}
return strings.ToLower(parsed.Host)
}
func extractJSONLDValue(html, key string) string {
pattern := regexp.MustCompile(`"` + regexp.QuoteMeta(key) + `"\s*:\s*"(https?:\\?/\\?/[^"]+|[^"]+)"`)
matches := pattern.FindAllStringSubmatch(html, -1)
for _, match := range matches {
if len(match) != 2 {
continue
}
value := strings.ReplaceAll(match[1], `\/`, `/`)
value = strings.ReplaceAll(value, `\u002F`, `/`)
value = strings.ReplaceAll(value, `\\`, "")
value = htmlUnescape(value)
if strings.TrimSpace(value) != "" {
return value
}
}
return ""
}
func deriveEnvatoPreviewFromThumbnail(thumbnail string) string {
candidate := htmlUnescape(strings.TrimSpace(thumbnail))
if candidate == "" {
return ""
}
candidate = strings.ReplaceAll(candidate, "&amp;", "&")
if strings.Contains(candidate, "/video_preview/") {
if idx := strings.Index(candidate, "?"); idx >= 0 {
candidate = candidate[:idx]
}
return regexp.MustCompile(`/video_preview/[^/]+\.(?:jpg|jpeg|png|webp)$`).ReplaceAllString(candidate, `/watermarked_preview/watermarked_preview.mp4`)
}
return ""
}
func newBrowserRequest(method, target, accept string) (*http.Request, error) {
req, err := http.NewRequest(method, target, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
if accept != "" {
req.Header.Set("Accept", accept)
}
return req, nil
}
func fetchTextViaPython(target string) (string, error) {
script := `
from urllib.request import Request, urlopen
import sys
req = Request(sys.argv[1], headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
})
with urlopen(req, timeout=20) as resp:
sys.stdout.buffer.write(resp.read(1024 * 1024))
`
output, err := exec.Command("python3", "-c", script, target).CombinedOutput()
if err != nil {
return "", fmt.Errorf("python fallback failed: %v: %s", err, truncateBytes(output, 300))
}
return string(output), nil
}
func looksLikeCloudflareChallenge(body string) bool {
lower := strings.ToLower(body)
return strings.Contains(lower, "cf-mitigated") || strings.Contains(lower, "attention required") || strings.Contains(lower, "just a moment")
}
func truncateBytes(data []byte, limit int) string {
trimmed := strings.TrimSpace(string(data))
if len(trimmed) <= limit {
return trimmed
}
return trimmed[:limit] + "..."
}
func limitQueries(queries []string, limit int) []string {
seen := map[string]bool{}
filtered := make([]string, 0, minInt(len(queries), limit))
+32
View File
@@ -0,0 +1,32 @@
package services
import "testing"
func TestExtractVideoPreviewURLFindsEnvatoPreview(t *testing.T) {
html := `<script type="application/ld+json">{"contentUrl":"https://video-previews.elements.envatousercontent.com/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"}</script>`
got := firstNonEmpty(extractJSONLDValue(html, "contentUrl"), extractVideoPreviewURL(html))
want := "https://video-previews.elements.envatousercontent.com/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) {
thumb := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/video_preview/video_preview_0000.jpg?w=1200&amp;h=630"
got := deriveEnvatoPreviewFromThumbnail(thumb)
want := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) {
result := SearchResult{
Title: "Couple Friendly Sad Bgm Movie Best Bgm",
Link: "https://www.youtube.com/watch?v=LGP4wiXSw8c",
Snippet: "romantic bgm soundtrack",
}
if isUsefulGoogleVideoResult(result) {
t.Fatal("expected bgm/music result to be rejected")
}
}
+33 -3
View File
@@ -27,6 +27,7 @@ type GeminiService struct {
type AIRecommendation struct {
Title string `json:"title"`
Link string `json:"link"`
Snippet string `json:"snippet"`
ThumbnailURL string `json:"thumbnailUrl"`
PreviewVideoURL string `json:"previewVideoUrl"`
Source string `json:"source"`
@@ -232,6 +233,7 @@ User query: ` + query,
recommendations = append(recommendations, AIRecommendation{
Title: src.Title,
Link: src.Link,
Snippet: src.Snippet,
ThumbnailURL: src.ThumbnailURL,
PreviewVideoURL: src.PreviewVideoURL,
Source: src.Source,
@@ -245,6 +247,7 @@ User query: ` + query,
recommendations = append(recommendations, AIRecommendation{
Title: candidate.Title,
Link: candidate.Link,
Snippet: candidate.Snippet,
ThumbnailURL: candidate.ThumbnailURL,
PreviewVideoURL: candidate.PreviewVideoURL,
Source: candidate.Source,
@@ -262,10 +265,26 @@ func fetchImageAsInlineData(client *http.Client, imageURL string) (string, strin
return "", "", fmt.Errorf("image url is empty")
}
resp, err := client.Get(imageURL)
if err != nil {
return "", "", err
if err == nil {
defer resp.Body.Close()
}
if err != nil || resp.StatusCode >= 300 {
req, reqErr := newBrowserStyleImageRequest(imageURL)
if reqErr != nil {
if err != nil {
return "", "", err
}
return "", "", reqErr
}
if resp != nil {
resp.Body.Close()
}
resp, err = client.Do(req)
if err != nil {
return "", "", err
}
defer resp.Body.Close()
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
return "", "", fmt.Errorf("thumbnail fetch failed with %d", resp.StatusCode)
@@ -284,6 +303,17 @@ func fetchImageAsInlineData(client *http.Client, imageURL string) (string, strin
return base64.StdEncoding.EncodeToString(data), mimeType, nil
}
func newBrowserStyleImageRequest(imageURL string) (*http.Request, error) {
req, err := http.NewRequest(http.MethodGet, imageURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")
req.Header.Set("Accept", "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
return req, nil
}
func fetchCandidateVisualInlineData(client *http.Client, candidate SearchResult) (string, string, error) {
if candidate.ThumbnailURL != "" {
data, mimeType, err := fetchImageAsInlineData(client, candidate.ThumbnailURL)
+39 -5
View File
@@ -68,12 +68,36 @@ function logEvent(type, payload) {
function safeStringify(value) {
try {
return JSON.stringify(value, null, 2);
return JSON.stringify(compactPayload(value), null, 2);
} catch {
return String(value);
}
}
function compactPayload(value, depth = 0) {
if (depth > 3) {
return "[truncated]";
}
if (Array.isArray(value)) {
if (value.length > 8) {
return {
type: "array",
length: value.length,
sample: value.slice(0, 5).map((item) => compactPayload(item, depth + 1)),
};
}
return value.map((item) => compactPayload(item, depth + 1));
}
if (value && typeof value === "object") {
const entries = Object.entries(value);
return Object.fromEntries(entries.map(([key, item]) => [key, compactPayload(item, depth + 1)]));
}
if (typeof value === "string" && value.length > 500) {
return `${value.slice(0, 500)}...`;
}
return value;
}
function renderLogs() {
debugSummary.textContent = `${debugEntries.length} events captured`;
debugLogList.innerHTML = "";
@@ -194,15 +218,24 @@ async function api(path, options = {}) {
bodyPreview: typeof options.body === "string" ? options.body.slice(0, 800) : "[non-string body]",
});
const response = await fetch(path, options);
const data = await response.json().catch(() => ({}));
const rawText = await response.text();
let data = {};
if (rawText) {
try {
data = JSON.parse(rawText);
} catch {
data = { rawText };
}
}
logEvent("api:response", {
path,
status: response.status,
ok: response.ok,
body: data,
body: compactPayload(data),
});
if (!response.ok) {
const error = new Error(data.error || "request failed");
const message = data.error || data.rawText || `request failed (${response.status})`;
const error = new Error(message);
error.status = response.status;
error.data = data;
throw error;
@@ -263,7 +296,8 @@ function renderResults(results) {
image.src = item.thumbnailUrl || "https://placehold.co/1280x720/0a0a0a/ffffff?text=Preview";
image.alt = item.title;
node.querySelector("h3").textContent = item.title;
node.querySelector("p").textContent = item.reason;
node.querySelector(".result-snippet").textContent = item.snippet || item.reason || item.source || "";
node.querySelector(".result-reason").textContent = item.reason ? `AI note: ${item.reason}` : "";
node.querySelector(".source-badge").textContent = item.source;
if (item.previewVideoUrl) {
attachVideoSource(previewVideo, item.previewVideoUrl);
+2 -1
View File
@@ -159,7 +159,8 @@
</div>
<div class="space-y-2 p-5">
<h3 class="line-clamp-2 text-base font-medium text-white"></h3>
<p class="line-clamp-3 text-sm text-zinc-400"></p>
<p class="result-snippet line-clamp-3 text-sm text-zinc-400"></p>
<p class="result-reason line-clamp-2 text-xs uppercase tracking-[0.12em] text-zinc-500"></p>
</div>
</a>
</template>
+1 -1
View File
@@ -24,7 +24,7 @@ trap cleanup EXIT
cd "${ROOT_DIR}"
echo "[selftest] gofmt"
gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/gemini.go backend/services/gemini_test.go
gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/gemini.go backend/services/gemini_test.go
echo "[selftest] python syntax"
python3 -m py_compile worker/downloader.py scripts/mock_searxng.py