diff --git a/TODO.md b/TODO.md index 2a85b88..2ab3fb1 100644 --- a/TODO.md +++ b/TODO.md @@ -31,6 +31,7 @@ - A local self-test workflow now exists and should be run before container builds or pushes. - A fresh-machine bootstrap was revalidated in a user-local toolchain setup on `2026-03-17`; `go test ./...` and `bash scripts/selftest.sh` now pass in that setup. - Result modal sizing is now being constrained to the viewport, and modal-only source-summary translation is now part of the active implementation path. +- Card summaries now also translate lazily to Korean, and Gemini negative-assessment handling now drives stronger follow-up search behavior than before. ## Current Architecture - `backend/main.go` @@ -227,6 +228,7 @@ - Search cards now separate source snippet from AI reason, but metadata fidelity still depends on source enrichment quality. - Gemini notes are now intended to be Korean, but final output quality still depends on Gemini response consistency. - Source Summary translation now depends on Google Translate HTTP availability; frontend silently falls back to original summary text if translation fails. +- The result modal should now stay within viewport height, but this still needs real browser confirmation on multiple short-height displays because CSS-only constraints were the source of the latest user-visible regression. - The local self-test script is better than before, but it is still a smoke test, not full integration coverage. ## Current Risks Around Search Quality @@ -550,6 +552,7 @@ - [ ] Build a repeatable repo-local bootstrap script or documented setup command set for non-root machines so fresh PC setup does not depend on shell history - [ ] Improve Envato / Artgrid preview acquisition reliability so Gemini Vision sees real frames more often - [ ] Browser-verify the new result modal at multiple viewport heights and confirm translated Source Summary readability on real long descriptions +- [ ] Evaluate whether the new Gemini supplemental-query generation is reducing irrelevant results on a small fixed benchmark query set - [ ] Revisit Google Video UX: - current YouTube embed was abandoned due error `153` - current in-app panel is more reliable but less rich than a true embedded watch page @@ -615,6 +618,25 @@ - If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise ## Recent Change Log +- Date: `2026-03-17` +- What changed: + - Removed the visible `AI Recommended` badge from search cards. + - Extended lazy summary translation so result cards, not just the modal, now request Korean `Source Summary` text as they come into view. + - Reworked Gemini vision parsing and recommendation metadata so candidate assessments now distinguish `positive`, `unclear`, `irrelevant`, and `inappropriate`, with short search hints for weak/negative results. + - Removed the direct `Preview evidence pending` filler path from normal merge behavior and moved fallback filling to a later, more neutral stage. + - Upgraded supplemental search behavior so follow-up queries can be generated from Gemini feedback and provider/source mix instead of relying only on the old fixed fallback list. + - Tightened modal height handling again so the overlay and internal panels can scroll without pushing the popup past the viewport. +- Why it changed: + - The user reported that the modal could still overflow the browser, that the `AI Recommended` chip was unnecessary noise, that card-level source summaries should also be translated, and that weak/negative Gemini evaluations should trigger smarter additional searching instead of surfacing low-confidence filler results. +- How it was verified: + - `go test ./...` + - `bash scripts/selftest.sh` + - `python3 -m py_compile worker/downloader.py scripts/mock_searxng.py` +- What is still risky or incomplete: + - The new supplemental-query generation depends on Gemini availability for the smartest path and falls back to deterministic query building when Gemini text generation is unavailable. + - Card-level lazy translation reduces request burst compared with translating everything during `/api/search`, but still adds client-side translation traffic during scrolling. + - Real browser validation is still needed to confirm the modal no longer exceeds the viewport on the exact user display conditions shown in the screenshot. + - Date: `2026-03-17` - What changed: - Added `POST /api/translate/summary` so the result modal can translate `Source Summary` text to Korean on demand with in-process caching. diff --git a/backend/handlers/api.go b/backend/handlers/api.go index 8e5ba69..d8044ad 100644 --- a/backend/handlers/api.go +++ b/backend/handlers/api.go @@ -467,7 +467,7 @@ func (a *App) searchMedia(c *gin.Context) { supplementalDeadlineLimited := false if services.NeedsSupplementalExploration(recommended) && time.Now().Before(deadline.Add(-10*time.Second)) { a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "Gemini 평가가 약해 추가 후보를 탐색하는 중", "progress": 82}) - explorationQueries := buildSupplementalQueries(req.Query, queryVariants) + explorationQueries := buildSupplementalQueries(a.GeminiService, req.Query, queryVariants, recommended) extraResults, extraMeta, extraErr := a.SearchService.SearchMediaWithDeadline(explorationQueries, enabledPlatforms, deadline.Add(-10*time.Second)) supplementalDeadlineLimited = extraMeta.PartialDueToDeadline if extraErr == nil && len(extraResults) > 0 { @@ -613,15 +613,39 @@ func selectedPlatformLabel(platforms map[string]bool) string { return strings.Join(labels, ", ") } -func buildSupplementalQueries(query string, existing []string) []string { +func buildSupplementalQueries(service *services.GeminiService, query string, existing []string, reviewed []services.AIRecommendation) []string { + if service != nil { + if generated, err := service.BuildSupplementalQueries(query, existing, reviewed); err == nil && len(generated) > 0 { + return mergeSupplementalQuerySets(existing, generated) + } + } + return buildDeterministicSupplementalQueries(query, existing, reviewed) +} + +func buildDeterministicSupplementalQueries(query string, existing []string, reviewed []services.AIRecommendation) []string { candidates := append([]string{}, existing...) + for _, item := range reviewed { + if item.Assessment == "positive" && item.SearchHint != "" { + candidates = append(candidates, item.SearchHint) + } + if (item.Assessment == "unclear" || services.IsExcludedAssessment(item.Assessment)) && item.SearchHint != "" { + candidates = append(candidates, query+" "+item.SearchHint) + } + } candidates = append(candidates, query+" cinematic stock footage", query+" editorial b-roll", query+" establishing shot", query+" drone footage", + query+" authentic candid couple", + query+" urban park lifestyle footage", ) + return mergeSupplementalQuerySets(nil, candidates) +} +func mergeSupplementalQuerySets(base, extra []string) []string { + candidates := append([]string{}, base...) + candidates = append(candidates, extra...) seen := map[string]bool{} result := make([]string, 0, len(candidates)) for _, item := range candidates { diff --git a/backend/services/gemini.go b/backend/services/gemini.go index ab7d360..c61894b 100644 --- a/backend/services/gemini.go +++ b/backend/services/gemini.go @@ -55,6 +55,8 @@ type AIRecommendation struct { Source string `json:"source"` Reason string `json:"reason"` Recommended bool `json:"recommended"` + Assessment string `json:"assessment,omitempty"` + SearchHint string `json:"searchHint,omitempty"` MediaMode string `json:"mediaMode,omitempty"` EmbedURL string `json:"embedUrl,omitempty"` PreviewBlockedReason string `json:"previewBlockedReason,omitempty"` @@ -252,10 +254,17 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI parts := []geminiPart{ { "text": `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape: -{"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true}]} +{"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true,"assessment":"positive","searchHint":"short english hint"}]} Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness. Set verdict to "Yes" or "No" for every candidate. "Yes" means the scene is usable and relevant for editing against the user's keyword. "No" means it is not suitable or not relevant enough. Set recommended=true only when verdict is "Yes". Set recommended=false when verdict is "No". +Set assessment to one of: positive, unclear, irrelevant, inappropriate. +- positive: directly usable and relevant to the query +- unclear: visually ambiguous, weak, or not confident enough +- irrelevant: visibly unrelated to the query intent +- inappropriate: low-quality, spammy, misleading, meme-like, or otherwise unsuitable for professional editing +When assessment is not positive, provide searchHint as a short English stock-footage search phrase that could help find better candidates. Keep it under 8 words. +When assessment is positive, searchHint may be empty. Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails. Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery. Favor scenes that look directly useful for professional editing, sequencing, establishing, cutaway, or mood-building usage. @@ -340,6 +349,8 @@ User query: ` + query, Verdict string `json:"verdict"` Reason string `json:"reason"` Recommended bool `json:"recommended"` + Assessment string `json:"assessment"` + SearchHint string `json:"searchHint"` } `json:"recommendations"` } if err := json.Unmarshal([]byte(jsonText), &parsed); err != nil { @@ -353,6 +364,7 @@ User query: ` + query, } src := candidates[rec.Index] recommended := rec.Recommended || strings.EqualFold(strings.TrimSpace(rec.Verdict), "yes") + assessment := normalizeAssessment(rec.Assessment, recommended) recommendations = append(recommendations, AIRecommendation{ Title: src.Title, Link: src.Link, @@ -362,6 +374,8 @@ User query: ` + query, Source: src.Source, Reason: normalizeKoreanReason(rec.Reason), Recommended: recommended, + Assessment: assessment, + SearchHint: normalizeSearchHint(rec.SearchHint), }) } g.debug("gemini:vision_complete", map[string]any{ @@ -372,6 +386,72 @@ User query: ` + query, return recommendations, nil } +func (g *GeminiService) BuildSupplementalQueries(query string, existing []string, reviewed []AIRecommendation) ([]string, error) { + baseExisting := make([]string, 0, len(existing)) + for _, item := range existing { + trimmed := strings.TrimSpace(item) + if trimmed != "" { + baseExisting = append(baseExisting, trimmed) + } + } + if len(baseExisting) == 0 { + baseExisting = append(baseExisting, query) + } + + positive := make([]string, 0, 3) + negativeHints := make([]string, 0, 4) + sourceCounts := map[string]int{} + for _, item := range reviewed { + sourceCounts[item.Source]++ + if item.Assessment == "positive" && len(positive) < 3 { + positive = append(positive, truncateForError(strings.TrimSpace(item.Title), 80)) + } + if (item.Assessment == "irrelevant" || item.Assessment == "inappropriate" || item.Assessment == "unclear") && item.SearchHint != "" && len(negativeHints) < 4 { + negativeHints = append(negativeHints, item.SearchHint) + } + } + + if g.APIKey == "" { + return nil, fmt.Errorf("gemini api key is not configured") + } + + body := map[string]any{ + "systemInstruction": map[string]any{ + "parts": []map[string]string{{ + "text": "You generate improved stock-footage search phrases. Return 3 to 5 plain English search phrases only, one per line, no numbering, no quotes, no explanations.", + }}, + }, + "contents": []map[string]any{{ + "parts": []map[string]string{{ + "text": fmt.Sprintf("Original query: %s\nExisting search phrases: %s\nPositive candidate titles: %s\nNegative or weak search hints: %s\nSource distribution: Envato=%d, Artgrid=%d, Google Video=%d\nGenerate improved English search phrases that avoid weak or irrelevant results and increase provider diversity.", + query, + strings.Join(baseExisting, " | "), + strings.Join(positive, " | "), + strings.Join(negativeHints, " | "), + sourceCounts["Envato"], + sourceCounts["Artgrid"], + sourceCounts["Google Video"], + ), + }}, + }}, + "generationConfig": map[string]any{ + "responseMimeType": "text/plain", + "temperature": 0.3, + "maxOutputTokens": 120, + }, + } + + rawText, err := g.generateText(body) + if err != nil { + return nil, err + } + queries := parseSupplementalQueryLines(rawText) + if len(queries) == 0 { + return nil, fmt.Errorf("gemini returned no supplemental queries") + } + return queries, nil +} + func (g *GeminiService) debug(message string, data any) { if g != nil && g.Debug != nil { g.Debug(message, data) @@ -655,6 +735,50 @@ func normalizeKoreanReason(reason string) string { return trimmed } +func normalizeAssessment(assessment string, recommended bool) string { + switch strings.ToLower(strings.TrimSpace(assessment)) { + case "positive", "unclear", "irrelevant", "inappropriate": + return strings.ToLower(strings.TrimSpace(assessment)) + } + if recommended { + return "positive" + } + return "unclear" +} + +func normalizeSearchHint(text string) string { + trimmed := strings.Join(strings.Fields(strings.TrimSpace(strings.Trim(text, "\"'`"))), " ") + if trimmed == "" { + return "" + } + if len(trimmed) > 80 { + return trimmed[:80] + } + return trimmed +} + +func parseSupplementalQueryLines(text string) []string { + lines := strings.Split(text, "\n") + seen := map[string]bool{} + queries := make([]string, 0, 5) + for _, line := range lines { + trimmed := strings.TrimSpace(strings.Trim(line, "\"'`-0123456789. ")) + if trimmed == "" { + continue + } + key := strings.ToLower(trimmed) + if seen[key] { + continue + } + seen[key] = true + queries = append(queries, trimmed) + if len(queries) >= 5 { + break + } + } + return queries +} + func buildSearchQueries(originalQuery, englishQuery string) []string { base := strings.TrimSpace(englishQuery) if base == "" { diff --git a/backend/services/gemini_test.go b/backend/services/gemini_test.go index 77ab371..0004f73 100644 --- a/backend/services/gemini_test.go +++ b/backend/services/gemini_test.go @@ -77,6 +77,28 @@ func TestNormalizeKnownMediaPhrases(t *testing.T) { } } +func TestBuildSupplementalQueriesReturnsGeneratedLines(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + _, _ = w.Write([]byte(`{"candidates":[{"content":{"parts":[{"text":"authentic couple city walk\ncandid couple park footage\nnatural lifestyle b-roll"}]}}]}`)) + })) + defer server.Close() + + service := NewGeminiService("dummy-key") + service.Client = &http.Client{Timeout: 2 * time.Second} + service.GenerateEndpoint = server.URL + + queries, err := service.BuildSupplementalQueries("다정한 커플", []string{"friendly couple"}, []AIRecommendation{ + {Assessment: "irrelevant", SearchHint: "authentic lifestyle couple"}, + }) + if err != nil { + t.Fatalf("expected supplemental query generation to succeed, got %v", err) + } + if len(queries) < 3 || queries[0] != "authentic couple city walk" { + t.Fatalf("unexpected supplemental queries: %#v", queries) + } +} + func TestSelectUnevaluatedCandidatesSkipsReviewedLinks(t *testing.T) { ranked := []SearchResult{ {Link: "https://a.example"}, @@ -172,3 +194,22 @@ func TestRankSearchResultsPrefersUsableVisuals(t *testing.T) { t.Fatalf("expected usable thumbnail result first, got %#v", ranked) } } + +func TestMergeRecommendationsExcludesIrrelevantAndPendingFiller(t *testing.T) { + recommended := []AIRecommendation{ + {Title: "keep", Link: "https://a.example", Recommended: true, Assessment: "positive", ThumbnailURL: "https://example.com/a.jpg"}, + {Title: "drop", Link: "https://b.example", Recommended: false, Assessment: "irrelevant", ThumbnailURL: "https://example.com/b.jpg", Reason: "관련이 없습니다."}, + } + ranked := []SearchResult{ + {Title: "keep", Link: "https://a.example", ThumbnailURL: "https://example.com/a.jpg"}, + {Title: "extra", Link: "https://c.example", ThumbnailURL: "https://example.com/c.jpg"}, + } + + merged := MergeRecommendations(recommended, ranked, 16) + if len(merged) != 1 { + t.Fatalf("expected only the positive recommendation without pending filler, got %#v", merged) + } + if merged[0].Link != "https://a.example" { + t.Fatalf("unexpected merged result: %#v", merged) + } +} diff --git a/backend/services/ranker.go b/backend/services/ranker.go index d3e2e20..919d0bd 100644 --- a/backend/services/ranker.go +++ b/backend/services/ranker.go @@ -11,7 +11,7 @@ import ( const GeminiFallbackReason = "Gemini Vision 응답이 부족해 키워드 기준으로 보강된 결과입니다." const FallbackPreviewReason = "Fallback due to missing provider preview." -const PendingVisualReason = "Ranked candidate pending stronger visual evidence." +const SupplementalFallbackReason = "추가 탐색 후에도 충분한 확신 후보가 부족해 시각 자산이 있는 후보를 제한적으로 보강했습니다." type GeminiBatchStats struct { CandidateCap int `json:"candidateCap"` @@ -258,6 +258,7 @@ func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason strin Source: item.Source, Reason: reason, Recommended: false, + Assessment: "unclear", })) } return fallback @@ -370,18 +371,22 @@ func NeedsSupplementalExploration(items []AIRecommendation) bool { recommendedCount := 0 negativeCount := 0 + unclearCount := 0 for _, item := range items { - if item.Recommended { + if item.Recommended && item.Assessment == "positive" { recommendedCount++ } - if looksNegativeReason(item.Reason) { + if IsExcludedAssessment(item.Assessment) || looksNegativeReason(item.Reason) { negativeCount++ } + if item.Assessment == "unclear" { + unclearCount++ + } } - if recommendedCount >= 3 { + if recommendedCount >= 4 { return false } - return negativeCount >= max(2, len(items)/2) + return negativeCount >= max(2, len(items)/3) || unclearCount >= max(2, len(items)/2) } func looksNegativeReason(reason string) bool { @@ -403,11 +408,9 @@ func looksNegativeReason(reason string) bool { func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, limit int) []AIRecommendation { merged := make([]AIRecommendation, 0, min(limit, len(ranked))) seen := map[string]bool{} - fillerCount := 0 - maxFiller := min(4, limit) for _, item := range recommended { - if !item.Recommended { + if !item.Recommended || item.Assessment != "positive" { continue } if item.Link == "" || seen[item.Link] { @@ -421,7 +424,10 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, if item.Recommended || item.Link == "" || seen[item.Link] || len(merged) >= limit { continue } - if looksNegativeReason(item.Reason) || strings.Contains(item.Reason, GeminiFallbackReason) { + if IsExcludedAssessment(item.Assessment) || looksNegativeReason(item.Reason) || strings.Contains(item.Reason, GeminiFallbackReason) { + continue + } + if item.Assessment == "unclear" { continue } if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) { @@ -430,32 +436,6 @@ func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, seen[item.Link] = true merged = append(merged, DecorateRecommendationMedia(item)) } - - if len(merged) < min(16, limit) { - for _, item := range ranked { - if len(merged) >= min(16, limit) || item.Link == "" || seen[item.Link] { - continue - } - if fillerCount >= maxFiller { - break - } - if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) { - continue - } - seen[item.Link] = true - merged = append(merged, DecorateRecommendationMedia(AIRecommendation{ - Title: item.Title, - Link: item.Link, - Snippet: item.Snippet, - ThumbnailURL: item.ThumbnailURL, - PreviewVideoURL: item.PreviewVideoURL, - Source: item.Source, - Reason: PendingVisualReason, - Recommended: false, - })) - fillerCount++ - } - } return merged } @@ -489,14 +469,24 @@ func BackfillRecommendations(existing []AIRecommendation, ranked []SearchResult, ThumbnailURL: item.ThumbnailURL, PreviewVideoURL: item.PreviewVideoURL, Source: item.Source, - Reason: firstNonEmpty(strings.TrimSpace(reason), FallbackPreviewReason), + Reason: firstNonEmpty(strings.TrimSpace(reason), SupplementalFallbackReason), Recommended: false, + Assessment: "unclear", })) fillerCount++ } return merged } +func IsExcludedAssessment(assessment string) bool { + switch strings.ToLower(strings.TrimSpace(assessment)) { + case "irrelevant", "inappropriate": + return true + default: + return false + } +} + func max(a, b int) int { if a > b { return a diff --git a/frontend/app.js b/frontend/app.js index 912c74e..0c64c53 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -85,6 +85,8 @@ const activePlatforms = new Set(["envato", "artgrid", "google video"]); const hlsInstances = new WeakMap(); const debugEntries = []; const summaryTranslationCache = new Map(); +const summaryTranslationInflight = new Map(); +let cardSummaryObserver = null; const PREVIEW_PLACEHOLDER = "https://placehold.co/1280x720/0a0a0a/ffffff?text=Preview"; function proxiedPreviewURL(src) { @@ -120,9 +122,6 @@ function summarizeReason(reason) { if (!text) { return ""; } - if (text === "Ranked candidate pending stronger visual evidence.") { - return "Preview evidence pending"; - } if (text === "Fallback due to missing provider preview.") { return "Provider preview missing"; } @@ -491,34 +490,84 @@ function showResultModalGooglePanel(item, message = "") { } async function translateSummaryForModal(item, originalText, requestId) { + const translated = await translateSummaryText(originalText); + if (!translated) { + return; + } + if (activeResultItem?.link === item.link && activeResultModalSummaryRequest === requestId) { + resultModalSnippet.textContent = translated; + logEvent("result:modal:summary_translated", { title: item.title, source: item.source }); + } +} + +async function translateSummaryText(originalText) { const trimmed = String(originalText || "").trim(); if (!trimmed) { - return; + return ""; } if (summaryTranslationCache.has(trimmed)) { - if (activeResultItem?.link === item.link && activeResultModalSummaryRequest === requestId) { - resultModalSnippet.textContent = summaryTranslationCache.get(trimmed); + return summaryTranslationCache.get(trimmed); + } + if (summaryTranslationInflight.has(trimmed)) { + return summaryTranslationInflight.get(trimmed); + } + + const request = (async () => { + try { + const data = await api("/api/translate/summary", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ text: trimmed }), + }); + const translated = String(data.translatedText || "").trim(); + if (translated) { + summaryTranslationCache.set(trimmed, translated); + } + return translated; + } catch { + return ""; + } finally { + summaryTranslationInflight.delete(trimmed); } + })(); + summaryTranslationInflight.set(trimmed, request); + + try { + return await request; + } catch { + return ""; + } +} + +async function translateCardSummary(node) { + if (!node || node.dataset.summaryTranslated === "true") { return; } - try { - const data = await api("/api/translate/summary", { - method: "POST", - headers: { "Content-Type": "application/json" }, - body: JSON.stringify({ text: trimmed }), - }); - const translated = String(data.translatedText || "").trim(); - if (!translated) { - return; - } - summaryTranslationCache.set(trimmed, translated); - if (activeResultItem?.link === item.link && activeResultModalSummaryRequest === requestId) { - resultModalSnippet.textContent = translated; - logEvent("result:modal:summary_translated", { title: item.title, source: item.source }); - } - } catch (error) { - logEvent("result:modal:summary_translate_failed", { title: item.title, source: item.source, message: error.message }); + node.dataset.summaryTranslated = "true"; + const originalText = node.dataset.summaryOriginal || ""; + const translated = await translateSummaryText(originalText); + if (!translated) { + return; } + const summaryNode = node.querySelector(".result-reason"); + if (summaryNode) { + summaryNode.textContent = translated; + } +} + +function ensureCardSummaryObserver() { + if (cardSummaryObserver || typeof IntersectionObserver === "undefined") { + return; + } + cardSummaryObserver = new IntersectionObserver((entries) => { + for (const entry of entries) { + if (!entry.isIntersecting) { + continue; + } + cardSummaryObserver.unobserve(entry.target); + void translateCardSummary(entry.target); + } + }, { rootMargin: "160px 0px" }); } function fallbackResultModalMedia(item, reason) { @@ -536,6 +585,7 @@ function fallbackResultModalMedia(item, reason) { function renderResults(results) { searchResults.innerHTML = ""; + ensureCardSummaryObserver(); if (!results.length) { searchResults.innerHTML = `