Harden single-candidate gemini recovery
build-push / docker (push) Successful in 4m14s

This commit is contained in:
GHStaK
2026-03-17 17:23:05 +09:00
parent 3be797131a
commit b6a217cab9
3 changed files with 161 additions and 12 deletions
+129 -12
View File
@@ -245,6 +245,9 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
if len(candidates) == 0 {
return []AIRecommendation{}, nil
}
if len(candidates) == 1 {
return g.recommendSingleCandidate(query, candidates[0])
}
g.debug("gemini:vision_start", map[string]any{
"query": query,
"candidateCount": len(candidates),
@@ -368,19 +371,92 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
return recommendations, nil
}
func buildGeminiVisionInstruction(query string, candidateCount int) string {
if candidateCount <= 1 {
return `You are a professional video editor. Analyze the single provided visual for the user's keyword.
Return compact JSON only in this exact shape:
{"recommendations":[{"index":0,"verdict":"Yes","reason":"짧은 한국어 문장","recommended":true,"assessment":"positive","searchHint":""}]}
Return exactly one item.
Use a very short Korean reason.
Use verdict "Yes" or "No".
Set assessment to one of: positive, unclear, irrelevant, inappropriate.
Keep searchHint empty unless the visual is weak or irrelevant, then use a very short English stock-footage phrase.
No markdown fences. No commentary. Compact JSON only.
User query: ` + query
func (g *GeminiService) recommendSingleCandidate(query string, candidate SearchResult) ([]AIRecommendation, error) {
g.debug("gemini:vision_start", map[string]any{
"query": query,
"candidateCount": 1,
"mode": "single_candidate_recovery",
})
img, mimeType, err := g.fetchCandidateVisualInlineData(candidate)
if err != nil {
g.debug("gemini:vision_candidate_visual_error", map[string]any{
"index": 0,
"link": candidate.Link,
"source": candidate.Source,
"error": err.Error(),
})
return nil, err
}
g.debug("gemini:vision_visuals_prepared", map[string]any{
"query": query,
"visualCount": 1,
"maxImages": 1,
"maxOutputTokens": 120,
"mode": "single_candidate_recovery",
})
body := map[string]any{
"contents": []map[string]any{
{
"parts": []map[string]any{
{
"text": `You are a professional video editor. Analyze the single provided visual for the user's keyword.
Return plain text only with exactly these 5 lines:
verdict: Yes or No
assessment: positive or unclear or irrelevant or inappropriate
recommended: true or false
reason_ko: very short Korean reason
search_hint: short English stock-footage hint or empty
No JSON. No markdown. No extra text.
User query: ` + query,
},
{"text": fmt.Sprintf("Candidate 0: title=%s source=%s link=%s", candidate.Title, candidate.Source, candidate.Link)},
{"inlineData": map[string]string{"mimeType": mimeType, "data": img}},
},
},
},
"generationConfig": map[string]any{
"responseMimeType": "text/plain",
"temperature": 0.1,
"maxOutputTokens": 120,
},
}
rawText, err := g.generateText(body)
if err != nil {
return nil, err
}
rec, err := parseSingleCandidateVisionText(rawText)
if err != nil {
return nil, fmt.Errorf("gemini single-candidate parse failed: %w; raw=%q", err, truncateForError(rawText, 200))
}
recommended := rec.Recommended || strings.EqualFold(strings.TrimSpace(rec.Verdict), "yes")
assessment := normalizeAssessment(rec.Assessment, recommended)
result := AIRecommendation{
Title: candidate.Title,
Link: candidate.Link,
Snippet: candidate.Snippet,
ThumbnailURL: candidate.ThumbnailURL,
PreviewVideoURL: candidate.PreviewVideoURL,
Source: candidate.Source,
Reason: normalizeKoreanReason(rec.Reason),
Recommended: recommended,
Assessment: assessment,
SearchHint: normalizeSearchHint(rec.SearchHint),
}
g.debug("gemini:vision_complete", map[string]any{
"query": query,
"recommendationCount": 1,
"mode": "single_candidate_recovery",
})
return []AIRecommendation{result}, nil
}
func buildGeminiVisionInstruction(query string, _ int) string {
return `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
{"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true,"assessment":"positive","searchHint":"short english hint"}]}
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
@@ -472,6 +548,47 @@ func parseGeminiVisionRecommendations(raw string) (geminiVisionParsedPayload, bo
return parsed, true, nil
}
type singleCandidateVisionResponse struct {
Verdict string
Assessment string
Recommended bool
Reason string
SearchHint string
}
func parseSingleCandidateVisionText(raw string) (singleCandidateVisionResponse, error) {
lines := strings.Split(strings.ReplaceAll(strings.TrimSpace(raw), "\r\n", "\n"), "\n")
result := singleCandidateVisionResponse{}
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
continue
}
parts := strings.SplitN(trimmed, ":", 2)
if len(parts) != 2 {
continue
}
key := strings.ToLower(strings.TrimSpace(parts[0]))
value := strings.TrimSpace(parts[1])
switch key {
case "verdict":
result.Verdict = value
case "assessment":
result.Assessment = value
case "recommended":
result.Recommended = strings.EqualFold(value, "true") || strings.EqualFold(value, "yes")
case "reason_ko":
result.Reason = value
case "search_hint":
result.SearchHint = value
}
}
if strings.TrimSpace(result.Verdict) == "" {
return singleCandidateVisionResponse{}, fmt.Errorf("missing verdict line")
}
return result, nil
}
func extractCompleteRecommendationObjects(text string) []string {
cleaned := strings.TrimSpace(text)
cleaned = strings.TrimPrefix(cleaned, "```json")