Revert "Harden single-candidate gemini recovery"

This reverts commit b6a217cab9.
This commit is contained in:
GHStaK
2026-03-18 13:00:40 +09:00
parent acfad750ab
commit 770aea0f57
3 changed files with 12 additions and 161 deletions
-18
View File
@@ -655,24 +655,6 @@
- If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise
## Recent Change Log
- Date: `2026-03-17`
- What changed:
- Added a dedicated single-candidate Gemini recovery path that no longer asks for JSON and instead parses a tiny plain-text key/value response.
- Kept multi-candidate Gemini Vision on compact JSON, but changed sequential recovery to use the shorter plain-text format automatically through the existing `Recommend(..., []SearchResult{item})` path.
- Added unit coverage for the single-candidate plain-text parser.
- Why it changed:
- The user-provided log `ai-media-hub-2026-03-17T08-20-31-074Z.log` showed even more severe truncation:
- `"{\"recommendations\":[{\"index\":"`
- `"{\"recommendations"`
- `"{\"recommend"`
- The same log showed `sequentialRetried: 0`, which means the old single-candidate recovery path was still too verbose and was not successfully rescuing failed batches.
- How it was verified:
- `pwsh -NoProfile -File scripts/selftest.ps1`
- added Go tests for single-candidate Gemini plain-text parsing
- What is still risky or incomplete:
- If Gemini returns malformed plain text that omits the required `verdict:` line, even the single-candidate recovery path can still fail.
- This improves recovery robustness, but total Gemini latency can still rise when many batch failures fall back to candidate-by-candidate evaluation.
- Date: `2026-03-17`
- What changed:
- Added adaptive Gemini Vision output-token sizing so smaller candidate batches, especially single-candidate sequential recovery calls, now request much shorter responses.
+12 -129
View File
@@ -245,9 +245,6 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
if len(candidates) == 0 {
return []AIRecommendation{}, nil
}
if len(candidates) == 1 {
return g.recommendSingleCandidate(query, candidates[0])
}
g.debug("gemini:vision_start", map[string]any{
"query": query,
"candidateCount": len(candidates),
@@ -371,92 +368,19 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
return recommendations, nil
}
func (g *GeminiService) recommendSingleCandidate(query string, candidate SearchResult) ([]AIRecommendation, error) {
g.debug("gemini:vision_start", map[string]any{
"query": query,
"candidateCount": 1,
"mode": "single_candidate_recovery",
})
img, mimeType, err := g.fetchCandidateVisualInlineData(candidate)
if err != nil {
g.debug("gemini:vision_candidate_visual_error", map[string]any{
"index": 0,
"link": candidate.Link,
"source": candidate.Source,
"error": err.Error(),
})
return nil, err
func buildGeminiVisionInstruction(query string, candidateCount int) string {
if candidateCount <= 1 {
return `You are a professional video editor. Analyze the single provided visual for the user's keyword.
Return compact JSON only in this exact shape:
{"recommendations":[{"index":0,"verdict":"Yes","reason":"짧은 한국어 문장","recommended":true,"assessment":"positive","searchHint":""}]}
Return exactly one item.
Use a very short Korean reason.
Use verdict "Yes" or "No".
Set assessment to one of: positive, unclear, irrelevant, inappropriate.
Keep searchHint empty unless the visual is weak or irrelevant, then use a very short English stock-footage phrase.
No markdown fences. No commentary. Compact JSON only.
User query: ` + query
}
g.debug("gemini:vision_visuals_prepared", map[string]any{
"query": query,
"visualCount": 1,
"maxImages": 1,
"maxOutputTokens": 120,
"mode": "single_candidate_recovery",
})
body := map[string]any{
"contents": []map[string]any{
{
"parts": []map[string]any{
{
"text": `You are a professional video editor. Analyze the single provided visual for the user's keyword.
Return plain text only with exactly these 5 lines:
verdict: Yes or No
assessment: positive or unclear or irrelevant or inappropriate
recommended: true or false
reason_ko: very short Korean reason
search_hint: short English stock-footage hint or empty
No JSON. No markdown. No extra text.
User query: ` + query,
},
{"text": fmt.Sprintf("Candidate 0: title=%s source=%s link=%s", candidate.Title, candidate.Source, candidate.Link)},
{"inlineData": map[string]string{"mimeType": mimeType, "data": img}},
},
},
},
"generationConfig": map[string]any{
"responseMimeType": "text/plain",
"temperature": 0.1,
"maxOutputTokens": 120,
},
}
rawText, err := g.generateText(body)
if err != nil {
return nil, err
}
rec, err := parseSingleCandidateVisionText(rawText)
if err != nil {
return nil, fmt.Errorf("gemini single-candidate parse failed: %w; raw=%q", err, truncateForError(rawText, 200))
}
recommended := rec.Recommended || strings.EqualFold(strings.TrimSpace(rec.Verdict), "yes")
assessment := normalizeAssessment(rec.Assessment, recommended)
result := AIRecommendation{
Title: candidate.Title,
Link: candidate.Link,
Snippet: candidate.Snippet,
ThumbnailURL: candidate.ThumbnailURL,
PreviewVideoURL: candidate.PreviewVideoURL,
Source: candidate.Source,
Reason: normalizeKoreanReason(rec.Reason),
Recommended: recommended,
Assessment: assessment,
SearchHint: normalizeSearchHint(rec.SearchHint),
}
g.debug("gemini:vision_complete", map[string]any{
"query": query,
"recommendationCount": 1,
"mode": "single_candidate_recovery",
})
return []AIRecommendation{result}, nil
}
func buildGeminiVisionInstruction(query string, _ int) string {
return `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
{"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true,"assessment":"positive","searchHint":"short english hint"}]}
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
@@ -548,47 +472,6 @@ func parseGeminiVisionRecommendations(raw string) (geminiVisionParsedPayload, bo
return parsed, true, nil
}
type singleCandidateVisionResponse struct {
Verdict string
Assessment string
Recommended bool
Reason string
SearchHint string
}
func parseSingleCandidateVisionText(raw string) (singleCandidateVisionResponse, error) {
lines := strings.Split(strings.ReplaceAll(strings.TrimSpace(raw), "\r\n", "\n"), "\n")
result := singleCandidateVisionResponse{}
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
continue
}
parts := strings.SplitN(trimmed, ":", 2)
if len(parts) != 2 {
continue
}
key := strings.ToLower(strings.TrimSpace(parts[0]))
value := strings.TrimSpace(parts[1])
switch key {
case "verdict":
result.Verdict = value
case "assessment":
result.Assessment = value
case "recommended":
result.Recommended = strings.EqualFold(value, "true") || strings.EqualFold(value, "yes")
case "reason_ko":
result.Reason = value
case "search_hint":
result.SearchHint = value
}
}
if strings.TrimSpace(result.Verdict) == "" {
return singleCandidateVisionResponse{}, fmt.Errorf("missing verdict line")
}
return result, nil
}
func extractCompleteRecommendationObjects(text string) []string {
cleaned := strings.TrimSpace(text)
cleaned = strings.TrimPrefix(cleaned, "```json")
-14
View File
@@ -265,17 +265,3 @@ func TestGeminiVisionMaxOutputTokensShrinksForSingleCandidate(t *testing.T) {
t.Fatalf("expected 420 tokens for four candidates, got %d", got)
}
}
func TestParseSingleCandidateVisionTextParsesKeyValueResponse(t *testing.T) {
raw := "verdict: Yes\nassessment: positive\nrecommended: true\nreason_ko: 적합한 도시 야경\nsearch_hint: "
parsed, err := parseSingleCandidateVisionText(raw)
if err != nil {
t.Fatalf("expected parse success, got %v", err)
}
if parsed.Verdict != "Yes" || parsed.Assessment != "positive" || !parsed.Recommended {
t.Fatalf("unexpected parsed result: %#v", parsed)
}
if parsed.Reason != "적합한 도시 야경" {
t.Fatalf("unexpected reason: %#v", parsed)
}
}