diff --git a/TODO.md b/TODO.md index a162e73..37303df 100644 --- a/TODO.md +++ b/TODO.md @@ -655,24 +655,6 @@ - If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise ## Recent Change Log -- Date: `2026-03-17` -- What changed: - - Added a dedicated single-candidate Gemini recovery path that no longer asks for JSON and instead parses a tiny plain-text key/value response. - - Kept multi-candidate Gemini Vision on compact JSON, but changed sequential recovery to use the shorter plain-text format automatically through the existing `Recommend(..., []SearchResult{item})` path. - - Added unit coverage for the single-candidate plain-text parser. -- Why it changed: - - The user-provided log `ai-media-hub-2026-03-17T08-20-31-074Z.log` showed even more severe truncation: - - `"{\"recommendations\":[{\"index\":"` - - `"{\"recommendations"` - - `"{\"recommend"` - - The same log showed `sequentialRetried: 0`, which means the old single-candidate recovery path was still too verbose and was not successfully rescuing failed batches. -- How it was verified: - - `pwsh -NoProfile -File scripts/selftest.ps1` - - added Go tests for single-candidate Gemini plain-text parsing -- What is still risky or incomplete: - - If Gemini returns malformed plain text that omits the required `verdict:` line, even the single-candidate recovery path can still fail. - - This improves recovery robustness, but total Gemini latency can still rise when many batch failures fall back to candidate-by-candidate evaluation. - - Date: `2026-03-17` - What changed: - Added adaptive Gemini Vision output-token sizing so smaller candidate batches, especially single-candidate sequential recovery calls, now request much shorter responses. diff --git a/backend/services/gemini.go b/backend/services/gemini.go index 136ec23..44110bc 100644 --- a/backend/services/gemini.go +++ b/backend/services/gemini.go @@ -245,9 +245,6 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI if len(candidates) == 0 { return []AIRecommendation{}, nil } - if len(candidates) == 1 { - return g.recommendSingleCandidate(query, candidates[0]) - } g.debug("gemini:vision_start", map[string]any{ "query": query, "candidateCount": len(candidates), @@ -371,92 +368,19 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI return recommendations, nil } -func (g *GeminiService) recommendSingleCandidate(query string, candidate SearchResult) ([]AIRecommendation, error) { - g.debug("gemini:vision_start", map[string]any{ - "query": query, - "candidateCount": 1, - "mode": "single_candidate_recovery", - }) - - img, mimeType, err := g.fetchCandidateVisualInlineData(candidate) - if err != nil { - g.debug("gemini:vision_candidate_visual_error", map[string]any{ - "index": 0, - "link": candidate.Link, - "source": candidate.Source, - "error": err.Error(), - }) - return nil, err +func buildGeminiVisionInstruction(query string, candidateCount int) string { + if candidateCount <= 1 { + return `You are a professional video editor. Analyze the single provided visual for the user's keyword. +Return compact JSON only in this exact shape: +{"recommendations":[{"index":0,"verdict":"Yes","reason":"짧은 한국어 문장","recommended":true,"assessment":"positive","searchHint":""}]} +Return exactly one item. +Use a very short Korean reason. +Use verdict "Yes" or "No". +Set assessment to one of: positive, unclear, irrelevant, inappropriate. +Keep searchHint empty unless the visual is weak or irrelevant, then use a very short English stock-footage phrase. +No markdown fences. No commentary. Compact JSON only. +User query: ` + query } - - g.debug("gemini:vision_visuals_prepared", map[string]any{ - "query": query, - "visualCount": 1, - "maxImages": 1, - "maxOutputTokens": 120, - "mode": "single_candidate_recovery", - }) - - body := map[string]any{ - "contents": []map[string]any{ - { - "parts": []map[string]any{ - { - "text": `You are a professional video editor. Analyze the single provided visual for the user's keyword. -Return plain text only with exactly these 5 lines: -verdict: Yes or No -assessment: positive or unclear or irrelevant or inappropriate -recommended: true or false -reason_ko: very short Korean reason -search_hint: short English stock-footage hint or empty -No JSON. No markdown. No extra text. -User query: ` + query, - }, - {"text": fmt.Sprintf("Candidate 0: title=%s source=%s link=%s", candidate.Title, candidate.Source, candidate.Link)}, - {"inlineData": map[string]string{"mimeType": mimeType, "data": img}}, - }, - }, - }, - "generationConfig": map[string]any{ - "responseMimeType": "text/plain", - "temperature": 0.1, - "maxOutputTokens": 120, - }, - } - - rawText, err := g.generateText(body) - if err != nil { - return nil, err - } - rec, err := parseSingleCandidateVisionText(rawText) - if err != nil { - return nil, fmt.Errorf("gemini single-candidate parse failed: %w; raw=%q", err, truncateForError(rawText, 200)) - } - - recommended := rec.Recommended || strings.EqualFold(strings.TrimSpace(rec.Verdict), "yes") - assessment := normalizeAssessment(rec.Assessment, recommended) - result := AIRecommendation{ - Title: candidate.Title, - Link: candidate.Link, - Snippet: candidate.Snippet, - ThumbnailURL: candidate.ThumbnailURL, - PreviewVideoURL: candidate.PreviewVideoURL, - Source: candidate.Source, - Reason: normalizeKoreanReason(rec.Reason), - Recommended: recommended, - Assessment: assessment, - SearchHint: normalizeSearchHint(rec.SearchHint), - } - - g.debug("gemini:vision_complete", map[string]any{ - "query": query, - "recommendationCount": 1, - "mode": "single_candidate_recovery", - }) - return []AIRecommendation{result}, nil -} - -func buildGeminiVisionInstruction(query string, _ int) string { return `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape: {"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true,"assessment":"positive","searchHint":"short english hint"}]} Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness. @@ -548,47 +472,6 @@ func parseGeminiVisionRecommendations(raw string) (geminiVisionParsedPayload, bo return parsed, true, nil } -type singleCandidateVisionResponse struct { - Verdict string - Assessment string - Recommended bool - Reason string - SearchHint string -} - -func parseSingleCandidateVisionText(raw string) (singleCandidateVisionResponse, error) { - lines := strings.Split(strings.ReplaceAll(strings.TrimSpace(raw), "\r\n", "\n"), "\n") - result := singleCandidateVisionResponse{} - for _, line := range lines { - trimmed := strings.TrimSpace(line) - if trimmed == "" { - continue - } - parts := strings.SplitN(trimmed, ":", 2) - if len(parts) != 2 { - continue - } - key := strings.ToLower(strings.TrimSpace(parts[0])) - value := strings.TrimSpace(parts[1]) - switch key { - case "verdict": - result.Verdict = value - case "assessment": - result.Assessment = value - case "recommended": - result.Recommended = strings.EqualFold(value, "true") || strings.EqualFold(value, "yes") - case "reason_ko": - result.Reason = value - case "search_hint": - result.SearchHint = value - } - } - if strings.TrimSpace(result.Verdict) == "" { - return singleCandidateVisionResponse{}, fmt.Errorf("missing verdict line") - } - return result, nil -} - func extractCompleteRecommendationObjects(text string) []string { cleaned := strings.TrimSpace(text) cleaned = strings.TrimPrefix(cleaned, "```json") diff --git a/backend/services/gemini_test.go b/backend/services/gemini_test.go index c62b61f..f97770d 100644 --- a/backend/services/gemini_test.go +++ b/backend/services/gemini_test.go @@ -265,17 +265,3 @@ func TestGeminiVisionMaxOutputTokensShrinksForSingleCandidate(t *testing.T) { t.Fatalf("expected 420 tokens for four candidates, got %d", got) } } - -func TestParseSingleCandidateVisionTextParsesKeyValueResponse(t *testing.T) { - raw := "verdict: Yes\nassessment: positive\nrecommended: true\nreason_ko: 적합한 도시 야경\nsearch_hint: " - parsed, err := parseSingleCandidateVisionText(raw) - if err != nil { - t.Fatalf("expected parse success, got %v", err) - } - if parsed.Verdict != "Yes" || parsed.Assessment != "positive" || !parsed.Recommended { - t.Fatalf("unexpected parsed result: %#v", parsed) - } - if parsed.Reason != "적합한 도시 야경" { - t.Fatalf("unexpected reason: %#v", parsed) - } -}