From 9637b761bd549f17cd0151e752726b0b58f7485f Mon Sep 17 00:00:00 2001 From: AI Assistant Date: Mon, 16 Mar 2026 09:55:14 +0900 Subject: [PATCH] Improve query intent handling and preview playback --- TODO.md | 13 +++++++++ backend/services/cse.go | 30 ++++++++++++++++++++ backend/services/cse_test.go | 7 +++++ backend/services/gemini.go | 50 ++++++++++++++++++++++++++++++--- backend/services/gemini_test.go | 7 +++++ frontend/app.js | 34 +++++++++++++++++++--- scripts/selftest.sh | 13 ++++++--- 7 files changed, 142 insertions(+), 12 deletions(-) diff --git a/TODO.md b/TODO.md index fa2f693..2f90dc5 100644 --- a/TODO.md +++ b/TODO.md @@ -141,6 +141,19 @@ - Effect: - improves Artgrid recall in SearXNG result sets that favor canonical Artlist URLs over Artgrid URLs +## Current Session Update (2026-03-16, Query / Preview Follow-up) +- Search intent translation was updated to better preserve compound media phrases: + - added explicit normalization for terms like `사이버 펑크` -> `cyberpunk` + - added a guard that rejects over-compressed translations when the original query contains a richer multi-word intent +- Artgrid page parsing was tightened: + - generic Artgrid homepage / challenge HTML should no longer be mistaken for a real clip page during enrichment + - this prevents homepage thumbnails/descriptions from overwriting real search result metadata +- Hover preview playback was changed to lazy attach on hover: + - preview source is now attached on mouseenter + - playback waits for media readiness instead of trying to play immediately from the render path + - source is detached again on mouseleave +- Self-test script search step now retries to reduce flaky startup timing failures during local smoke tests + ## Local Self-Test Workflow - Primary command: - `bash scripts/selftest.sh` diff --git a/backend/services/cse.go b/backend/services/cse.go index 3fbc94d..888cbd1 100644 --- a/backend/services/cse.go +++ b/backend/services/cse.go @@ -229,6 +229,9 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult { if result.ThumbnailURL == "" || result.PreviewVideoURL == "" { html, err := s.fetchText(result.Link) if err == nil { + if !isMatchingArtgridClipPage(html, clipID) { + return result + } result.Title = firstNonEmpty( cleanArtgridTitle(extractMetaContent(html, "og:title")), cleanArtgridTitle(extractMetaContent(html, "title")), @@ -756,6 +759,33 @@ func cleanArtgridDescription(description string) string { return strings.TrimSpace(description) } +func isMatchingArtgridClipPage(html, clipID string) bool { + if clipID == "" { + return false + } + ogURL := extractMetaContent(html, "og:url") + canonical := extractCanonicalURL(html) + lowerHTML := strings.ToLower(html) + for _, candidate := range []string{ogURL, canonical} { + if strings.Contains(candidate, clipID) { + return true + } + } + if strings.Contains(lowerHTML, "main-clipvideo_"+clipID) || strings.Contains(lowerHTML, "/clip/"+clipID+"/") { + return true + } + return false +} + +func extractCanonicalURL(html string) string { + pattern := regexp.MustCompile(`(?i)]+rel=["']canonical["'][^>]+href=["']([^"']+)`) + matches := pattern.FindStringSubmatch(html) + if len(matches) == 2 { + return htmlUnescape(matches[1]) + } + return "" +} + func deriveEnvatoPreviewFromThumbnail(thumbnail string) string { candidate := htmlUnescape(strings.TrimSpace(thumbnail)) if candidate == "" { diff --git a/backend/services/cse_test.go b/backend/services/cse_test.go index cc32177..fc06c52 100644 --- a/backend/services/cse_test.go +++ b/backend/services/cse_test.go @@ -81,6 +81,13 @@ func TestBuildArtgridQueriesIncludesArtlistCanonicalDomain(t *testing.T) { } } +func TestIsMatchingArtgridClipPageRejectsHomepage(t *testing.T) { + html := `` + if isMatchingArtgridClipPage(html, "114756") { + t.Fatal("expected generic Artgrid homepage HTML to be rejected as a clip page") + } +} + func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) { if got := GeminiCandidateLimit(9); got != 9 { t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got) diff --git a/backend/services/gemini.go b/backend/services/gemini.go index ec307f6..06af951 100644 --- a/backend/services/gemini.go +++ b/backend/services/gemini.go @@ -58,6 +58,10 @@ func (g *GeminiService) TranslateQuery(query string) string { if trimmed == "" { return "" } + normalizedIntent := normalizeKnownMediaPhrases(trimmed) + if looksMostlyASCII(normalizedIntent) { + return strings.TrimSpace(normalizedIntent) + } if looksMostlyASCII(trimmed) { return trimmed } @@ -90,19 +94,19 @@ func (g *GeminiService) TranslateQuery(query string) string { rawText, err := g.generateText(body) if err == nil { translated := sanitizePlainEnglishLine(rawText) - if translated != "" && !strings.EqualFold(translated, trimmed) { + if translated != "" && !strings.EqualFold(translated, trimmed) && !isOvercompressedTranslation(trimmed, translated) { return translated } } } - if translated, err := g.translateViaGoogle(trimmed); err == nil && translated != "" && isLikelyEnglishQuery(translated) { + if translated, err := g.translateViaGoogle(trimmed); err == nil && translated != "" && isLikelyEnglishQuery(translated) && !isOvercompressedTranslation(trimmed, translated) { return translated } - if translated := translateKoreanMediaTerms(trimmed); translated != "" && !strings.EqualFold(translated, trimmed) { + if translated := translateKoreanMediaTerms(normalizedIntent); translated != "" && !strings.EqualFold(translated, trimmed) { return translated } - return trimmed + return strings.TrimSpace(normalizedIntent) } func (g *GeminiService) generateText(body map[string]any) (string, error) { @@ -493,6 +497,12 @@ func translateKoreanMediaTerms(query string) string { korean string english string }{ + {korean: "사이버 펑크 도시", english: "cyberpunk city"}, + {korean: "사이버펑크 도시", english: "cyberpunk city"}, + {korean: "사이버 펑크", english: "cyberpunk"}, + {korean: "사이버펑크", english: "cyberpunk"}, + {korean: "네온 도시", english: "neon city"}, + {korean: "미래 도시", english: "futuristic city"}, {korean: "숲속", english: "forest"}, {korean: "다정한", english: "affectionate"}, {korean: "항공샷", english: "aerial shot"}, @@ -528,6 +538,38 @@ func translateKoreanMediaTerms(query string) string { return strings.TrimSpace(translated) } +func normalizeKnownMediaPhrases(query string) string { + normalized := strings.TrimSpace(query) + replacements := []struct { + from string + to string + }{ + {from: "사이버 펑크 도시", to: "cyberpunk city"}, + {from: "사이버펑크 도시", to: "cyberpunk city"}, + {from: "사이버 펑크", to: "cyberpunk"}, + {from: "사이버펑크", to: "cyberpunk"}, + } + for _, replacement := range replacements { + normalized = strings.ReplaceAll(normalized, replacement.from, replacement.to) + } + return strings.Join(strings.Fields(normalized), " ") +} + +func isOvercompressedTranslation(original, translated string) bool { + originalWords := len(strings.Fields(strings.TrimSpace(original))) + translatedWords := len(strings.Fields(strings.TrimSpace(translated))) + if originalWords < 2 || translatedWords >= 2 { + return false + } + lower := strings.ToLower(strings.TrimSpace(translated)) + for _, allow := range []string{"cyberpunk", "nightlife", "cityscape"} { + if lower == allow { + return false + } + } + return true +} + func (g *GeminiService) translateViaGoogle(query string) (string, error) { baseURL := g.TranslateEndpoint if strings.TrimSpace(baseURL) == "" { diff --git a/backend/services/gemini_test.go b/backend/services/gemini_test.go index c55f5e0..789898f 100644 --- a/backend/services/gemini_test.go +++ b/backend/services/gemini_test.go @@ -39,3 +39,10 @@ func TestTranslateQueryFallsBackToDictionaryWhenTranslateFails(t *testing.T) { t.Fatalf("expected dictionary fallback translation, got %q", translated) } } + +func TestNormalizeKnownMediaPhrases(t *testing.T) { + translated := translateKoreanMediaTerms("사이버 펑크 도시") + if translated != "cyberpunk city" { + t.Fatalf("expected cyberpunk city, got %q", translated) + } +} diff --git a/frontend/app.js b/frontend/app.js index 2059591..f1fb8ea 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -270,6 +270,33 @@ function attachVideoSource(video, src) { logEvent("preview:attach:file", { src }); } +function startHoverPreview(video, src) { + if (!src) { + return; + } + attachVideoSource(video, src); + video.classList.remove("hidden"); + const attemptPlay = () => { + video.play().catch((error) => { + logEvent("preview:hover:play:error", { src, message: String(error) }); + }); + }; + if (video.readyState >= 2) { + attemptPlay(); + return; + } + const onReady = () => { + video.removeEventListener("loadeddata", onReady); + video.removeEventListener("canplay", onReady); + attemptPlay(); + }; + video.addEventListener("loadeddata", onReady, { once: true }); + video.addEventListener("canplay", onReady, { once: true }); + if (video.load) { + video.load(); + } +} + function detachVideoSource(video) { const existing = hlsInstances.get(video); if (existing) { @@ -299,21 +326,20 @@ function renderResults(results) { node.querySelector(".result-snippet").textContent = item.snippet || item.reason || item.source || ""; node.querySelector(".result-reason").textContent = item.reason ? `AI note: ${item.reason}` : ""; node.querySelector(".source-badge").textContent = item.source; + previewVideo.poster = item.thumbnailUrl || ""; if (item.previewVideoUrl) { - attachVideoSource(previewVideo, item.previewVideoUrl); - previewVideo.poster = item.thumbnailUrl || ""; const mediaArea = node.querySelector(".relative"); mediaArea.addEventListener("mouseenter", () => { logEvent("preview:hover:start", { title: item.title, source: item.source, previewVideoUrl: item.previewVideoUrl }); overlays.forEach((overlay) => overlay.classList.add("hidden")); - previewVideo.classList.remove("hidden"); - previewVideo.play().catch(() => {}); + startHoverPreview(previewVideo, item.previewVideoUrl); }); mediaArea.addEventListener("mouseleave", () => { logEvent("preview:hover:end", { title: item.title, source: item.source }); previewVideo.pause(); previewVideo.currentTime = 0; previewVideo.classList.add("hidden"); + detachVideoSource(previewVideo); overlays.forEach((overlay) => overlay.classList.remove("hidden")); }); } diff --git a/scripts/selftest.sh b/scripts/selftest.sh index b16b08d..556158d 100755 --- a/scripts/selftest.sh +++ b/scripts/selftest.sh @@ -70,10 +70,15 @@ if payload.get("status") != "ok": PY echo "[selftest] verify search" -curl -fsS \ - -H "Content-Type: application/json" \ - -d '{"query":"city rain","platforms":["envato","artgrid","google video"]}' \ - "http://127.0.0.1:${APP_PORT}/api/search" >"${TMP_DIR}/search.json" +for _ in $(seq 1 5); do + if curl -fsS \ + -H "Content-Type: application/json" \ + -d '{"query":"city rain","platforms":["envato","artgrid","google video"]}' \ + "http://127.0.0.1:${APP_PORT}/api/search" >"${TMP_DIR}/search.json"; then + break + fi + sleep 1 +done python3 - "${TMP_DIR}/search.json" <<'PY' import json import sys