Revert "Harden single-candidate gemini recovery"

This reverts commit b6a217cab9.
This commit is contained in:
GHStaK
2026-03-18 13:00:40 +09:00
parent acfad750ab
commit 770aea0f57
3 changed files with 12 additions and 161 deletions
-18
View File
@@ -655,24 +655,6 @@
- If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise - If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise
## Recent Change Log ## Recent Change Log
- Date: `2026-03-17`
- What changed:
- Added a dedicated single-candidate Gemini recovery path that no longer asks for JSON and instead parses a tiny plain-text key/value response.
- Kept multi-candidate Gemini Vision on compact JSON, but changed sequential recovery to use the shorter plain-text format automatically through the existing `Recommend(..., []SearchResult{item})` path.
- Added unit coverage for the single-candidate plain-text parser.
- Why it changed:
- The user-provided log `ai-media-hub-2026-03-17T08-20-31-074Z.log` showed even more severe truncation:
- `"{\"recommendations\":[{\"index\":"`
- `"{\"recommendations"`
- `"{\"recommend"`
- The same log showed `sequentialRetried: 0`, which means the old single-candidate recovery path was still too verbose and was not successfully rescuing failed batches.
- How it was verified:
- `pwsh -NoProfile -File scripts/selftest.ps1`
- added Go tests for single-candidate Gemini plain-text parsing
- What is still risky or incomplete:
- If Gemini returns malformed plain text that omits the required `verdict:` line, even the single-candidate recovery path can still fail.
- This improves recovery robustness, but total Gemini latency can still rise when many batch failures fall back to candidate-by-candidate evaluation.
- Date: `2026-03-17` - Date: `2026-03-17`
- What changed: - What changed:
- Added adaptive Gemini Vision output-token sizing so smaller candidate batches, especially single-candidate sequential recovery calls, now request much shorter responses. - Added adaptive Gemini Vision output-token sizing so smaller candidate batches, especially single-candidate sequential recovery calls, now request much shorter responses.
+12 -129
View File
@@ -245,9 +245,6 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
if len(candidates) == 0 { if len(candidates) == 0 {
return []AIRecommendation{}, nil return []AIRecommendation{}, nil
} }
if len(candidates) == 1 {
return g.recommendSingleCandidate(query, candidates[0])
}
g.debug("gemini:vision_start", map[string]any{ g.debug("gemini:vision_start", map[string]any{
"query": query, "query": query,
"candidateCount": len(candidates), "candidateCount": len(candidates),
@@ -371,92 +368,19 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
return recommendations, nil return recommendations, nil
} }
func (g *GeminiService) recommendSingleCandidate(query string, candidate SearchResult) ([]AIRecommendation, error) { func buildGeminiVisionInstruction(query string, candidateCount int) string {
g.debug("gemini:vision_start", map[string]any{ if candidateCount <= 1 {
"query": query, return `You are a professional video editor. Analyze the single provided visual for the user's keyword.
"candidateCount": 1, Return compact JSON only in this exact shape:
"mode": "single_candidate_recovery", {"recommendations":[{"index":0,"verdict":"Yes","reason":"짧은 한국어 문장","recommended":true,"assessment":"positive","searchHint":""}]}
}) Return exactly one item.
Use a very short Korean reason.
img, mimeType, err := g.fetchCandidateVisualInlineData(candidate) Use verdict "Yes" or "No".
if err != nil { Set assessment to one of: positive, unclear, irrelevant, inappropriate.
g.debug("gemini:vision_candidate_visual_error", map[string]any{ Keep searchHint empty unless the visual is weak or irrelevant, then use a very short English stock-footage phrase.
"index": 0, No markdown fences. No commentary. Compact JSON only.
"link": candidate.Link, User query: ` + query
"source": candidate.Source,
"error": err.Error(),
})
return nil, err
} }
g.debug("gemini:vision_visuals_prepared", map[string]any{
"query": query,
"visualCount": 1,
"maxImages": 1,
"maxOutputTokens": 120,
"mode": "single_candidate_recovery",
})
body := map[string]any{
"contents": []map[string]any{
{
"parts": []map[string]any{
{
"text": `You are a professional video editor. Analyze the single provided visual for the user's keyword.
Return plain text only with exactly these 5 lines:
verdict: Yes or No
assessment: positive or unclear or irrelevant or inappropriate
recommended: true or false
reason_ko: very short Korean reason
search_hint: short English stock-footage hint or empty
No JSON. No markdown. No extra text.
User query: ` + query,
},
{"text": fmt.Sprintf("Candidate 0: title=%s source=%s link=%s", candidate.Title, candidate.Source, candidate.Link)},
{"inlineData": map[string]string{"mimeType": mimeType, "data": img}},
},
},
},
"generationConfig": map[string]any{
"responseMimeType": "text/plain",
"temperature": 0.1,
"maxOutputTokens": 120,
},
}
rawText, err := g.generateText(body)
if err != nil {
return nil, err
}
rec, err := parseSingleCandidateVisionText(rawText)
if err != nil {
return nil, fmt.Errorf("gemini single-candidate parse failed: %w; raw=%q", err, truncateForError(rawText, 200))
}
recommended := rec.Recommended || strings.EqualFold(strings.TrimSpace(rec.Verdict), "yes")
assessment := normalizeAssessment(rec.Assessment, recommended)
result := AIRecommendation{
Title: candidate.Title,
Link: candidate.Link,
Snippet: candidate.Snippet,
ThumbnailURL: candidate.ThumbnailURL,
PreviewVideoURL: candidate.PreviewVideoURL,
Source: candidate.Source,
Reason: normalizeKoreanReason(rec.Reason),
Recommended: recommended,
Assessment: assessment,
SearchHint: normalizeSearchHint(rec.SearchHint),
}
g.debug("gemini:vision_complete", map[string]any{
"query": query,
"recommendationCount": 1,
"mode": "single_candidate_recovery",
})
return []AIRecommendation{result}, nil
}
func buildGeminiVisionInstruction(query string, _ int) string {
return `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape: return `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
{"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true,"assessment":"positive","searchHint":"short english hint"}]} {"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true,"assessment":"positive","searchHint":"short english hint"}]}
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness. Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
@@ -548,47 +472,6 @@ func parseGeminiVisionRecommendations(raw string) (geminiVisionParsedPayload, bo
return parsed, true, nil return parsed, true, nil
} }
type singleCandidateVisionResponse struct {
Verdict string
Assessment string
Recommended bool
Reason string
SearchHint string
}
func parseSingleCandidateVisionText(raw string) (singleCandidateVisionResponse, error) {
lines := strings.Split(strings.ReplaceAll(strings.TrimSpace(raw), "\r\n", "\n"), "\n")
result := singleCandidateVisionResponse{}
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
continue
}
parts := strings.SplitN(trimmed, ":", 2)
if len(parts) != 2 {
continue
}
key := strings.ToLower(strings.TrimSpace(parts[0]))
value := strings.TrimSpace(parts[1])
switch key {
case "verdict":
result.Verdict = value
case "assessment":
result.Assessment = value
case "recommended":
result.Recommended = strings.EqualFold(value, "true") || strings.EqualFold(value, "yes")
case "reason_ko":
result.Reason = value
case "search_hint":
result.SearchHint = value
}
}
if strings.TrimSpace(result.Verdict) == "" {
return singleCandidateVisionResponse{}, fmt.Errorf("missing verdict line")
}
return result, nil
}
func extractCompleteRecommendationObjects(text string) []string { func extractCompleteRecommendationObjects(text string) []string {
cleaned := strings.TrimSpace(text) cleaned := strings.TrimSpace(text)
cleaned = strings.TrimPrefix(cleaned, "```json") cleaned = strings.TrimPrefix(cleaned, "```json")
-14
View File
@@ -265,17 +265,3 @@ func TestGeminiVisionMaxOutputTokensShrinksForSingleCandidate(t *testing.T) {
t.Fatalf("expected 420 tokens for four candidates, got %d", got) t.Fatalf("expected 420 tokens for four candidates, got %d", got)
} }
} }
func TestParseSingleCandidateVisionTextParsesKeyValueResponse(t *testing.T) {
raw := "verdict: Yes\nassessment: positive\nrecommended: true\nreason_ko: 적합한 도시 야경\nsearch_hint: "
parsed, err := parseSingleCandidateVisionText(raw)
if err != nil {
t.Fatalf("expected parse success, got %v", err)
}
if parsed.Verdict != "Yes" || parsed.Assessment != "positive" || !parsed.Recommended {
t.Fatalf("unexpected parsed result: %#v", parsed)
}
if parsed.Reason != "적합한 도시 야경" {
t.Fatalf("unexpected reason: %#v", parsed)
}
}