Harden gemini vision JSON recovery
build-push / docker (push) Successful in 4m13s

This commit is contained in:
GHStaK
2026-03-17 16:33:09 +09:00
parent 91ee37593c
commit 513199f426
4 changed files with 177 additions and 16 deletions
+16
View File
@@ -655,6 +655,22 @@
- If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise - If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise
## Recent Change Log ## Recent Change Log
- Date: `2026-03-17`
- What changed:
- Reduced Gemini Vision batch size from `6` to `4` so each model response carries fewer recommendation objects and is less likely to be truncated mid-JSON.
- Tightened the Gemini Vision prompt to ask for shorter Korean reasons and compact JSON-only output.
- Lowered Gemini Vision `maxOutputTokens` and added partial JSON recovery so already-complete recommendation objects can still be salvaged when the model output is cut off before the final closing braces.
- Added unit coverage for truncated Gemini Vision JSON recovery.
- Why it changed:
- The user-provided log `ai-media-hub-2026-03-17T07-29-44-949Z.log` showed that visuals were prepared successfully, but every batch failed with `gemini vision JSON extraction failed: no complete JSON object found ...`.
- The failure pattern indicates output truncation rather than missing thumbnails or preview frames.
- How it was verified:
- `pwsh -NoProfile -File scripts/selftest.ps1`
- added Go tests for partial Gemini JSON recovery behavior
- What is still risky or incomplete:
- If Gemini returns severely malformed output before even one full recommendation object closes, the parser still cannot recover useful results from that batch.
- Smaller batch size improves reliability but can increase total Gemini round trips and latency on some searches.
- Date: `2026-03-17` - Date: `2026-03-17`
- What changed: - What changed:
- Fixed a search-budget regression where source collection could consume the full `SearchService` deadline and leave no time for Envato / Artgrid enrichment, causing Gemini to see only missing or low-value visuals. - Fixed a search-budget regression where source collection could consume the full `SearchService` deadline and leave no time for Envato / Artgrid enrichment, causing Gemini to see only missing or low-value visuals.
+132 -15
View File
@@ -256,6 +256,7 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
"text": `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape: "text": `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
{"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true,"assessment":"positive","searchHint":"short english hint"}]} {"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true,"assessment":"positive","searchHint":"short english hint"}]}
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness. Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
Keep each Korean reason very short, ideally one sentence under 24 Korean characters when possible.
Set verdict to "Yes" or "No" for every candidate. "Yes" means the scene is usable and relevant for editing against the user's keyword. "No" means it is not suitable or not relevant enough. Set verdict to "Yes" or "No" for every candidate. "Yes" means the scene is usable and relevant for editing against the user's keyword. "No" means it is not suitable or not relevant enough.
Set recommended=true only when verdict is "Yes". Set recommended=false when verdict is "No". Set recommended=true only when verdict is "Yes". Set recommended=false when verdict is "No".
Set assessment to one of: positive, unclear, irrelevant, inappropriate. Set assessment to one of: positive, unclear, irrelevant, inappropriate.
@@ -265,6 +266,7 @@ Set assessment to one of: positive, unclear, irrelevant, inappropriate.
- inappropriate: low-quality, spammy, misleading, meme-like, or otherwise unsuitable for professional editing - inappropriate: low-quality, spammy, misleading, meme-like, or otherwise unsuitable for professional editing
When assessment is not positive, provide searchHint as a short English stock-footage search phrase that could help find better candidates. Keep it under 8 words. When assessment is not positive, provide searchHint as a short English stock-footage search phrase that could help find better candidates. Keep it under 8 words.
When assessment is positive, searchHint may be empty. When assessment is positive, searchHint may be empty.
Do not include markdown fences, explanations, or comments. Output compact JSON only.
Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails. Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails.
Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery. Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery.
Favor scenes that look directly useful for professional editing, sequencing, establishing, cutaway, or mood-building usage. Favor scenes that look directly useful for professional editing, sequencing, establishing, cutaway, or mood-building usage.
@@ -306,7 +308,8 @@ User query: ` + query,
}, },
"generationConfig": map[string]any{ "generationConfig": map[string]any{
"responseMimeType": "application/json", "responseMimeType": "application/json",
"maxOutputTokens": 1400, "temperature": 0.1,
"maxOutputTokens": 900,
}, },
} }
@@ -339,23 +342,17 @@ User query: ` + query,
return nil, fmt.Errorf("gemini vision returned no candidates") return nil, fmt.Errorf("gemini vision returned no candidates")
} }
jsonText, err := extractJSONObject(payload.Candidates[0].Content.Parts[0].Text) rawText := payload.Candidates[0].Content.Parts[0].Text
parsed, recoveredPartial, err := parseGeminiVisionRecommendations(rawText)
if err != nil { if err != nil {
return nil, fmt.Errorf("gemini vision JSON extraction failed: %w", err) return nil, fmt.Errorf("gemini vision JSON extraction failed: %w", err)
} }
if recoveredPartial {
var parsed struct { g.debug("gemini:vision_partial_json_recovered", map[string]any{
Recommendations []struct { "query": query,
Index int `json:"index"` "candidateCount": len(candidates),
Verdict string `json:"verdict"` "recommendationCount": len(parsed.Recommendations),
Reason string `json:"reason"` })
Recommended bool `json:"recommended"`
Assessment string `json:"assessment"`
SearchHint string `json:"searchHint"`
} `json:"recommendations"`
}
if err := json.Unmarshal([]byte(jsonText), &parsed); err != nil {
return nil, fmt.Errorf("gemini vision JSON parse failed: %w; raw=%q", err, truncateForError(payload.Candidates[0].Content.Parts[0].Text, 200))
} }
recommendations := make([]AIRecommendation, 0, len(parsed.Recommendations)) recommendations := make([]AIRecommendation, 0, len(parsed.Recommendations))
@@ -387,6 +384,126 @@ User query: ` + query,
return recommendations, nil return recommendations, nil
} }
type geminiVisionParsedPayload struct {
Recommendations []struct {
Index int `json:"index"`
Verdict string `json:"verdict"`
Reason string `json:"reason"`
Recommended bool `json:"recommended"`
Assessment string `json:"assessment"`
SearchHint string `json:"searchHint"`
} `json:"recommendations"`
}
func parseGeminiVisionRecommendations(raw string) (geminiVisionParsedPayload, bool, error) {
jsonText, err := extractJSONObject(raw)
if err == nil {
var parsed geminiVisionParsedPayload
if unmarshalErr := json.Unmarshal([]byte(jsonText), &parsed); unmarshalErr != nil {
return geminiVisionParsedPayload{}, false, fmt.Errorf("json parse failed: %w; raw=%q", unmarshalErr, truncateForError(raw, 200))
}
return parsed, false, nil
}
objects := extractCompleteRecommendationObjects(raw)
if len(objects) == 0 {
return geminiVisionParsedPayload{}, false, err
}
parsed := geminiVisionParsedPayload{
Recommendations: make([]struct {
Index int `json:"index"`
Verdict string `json:"verdict"`
Reason string `json:"reason"`
Recommended bool `json:"recommended"`
Assessment string `json:"assessment"`
SearchHint string `json:"searchHint"`
}, 0, len(objects)),
}
for _, objectText := range objects {
var item struct {
Index int `json:"index"`
Verdict string `json:"verdict"`
Reason string `json:"reason"`
Recommended bool `json:"recommended"`
Assessment string `json:"assessment"`
SearchHint string `json:"searchHint"`
}
if unmarshalErr := json.Unmarshal([]byte(objectText), &item); unmarshalErr != nil {
continue
}
parsed.Recommendations = append(parsed.Recommendations, item)
}
if len(parsed.Recommendations) == 0 {
return geminiVisionParsedPayload{}, false, err
}
return parsed, true, nil
}
func extractCompleteRecommendationObjects(text string) []string {
cleaned := strings.TrimSpace(text)
cleaned = strings.TrimPrefix(cleaned, "```json")
cleaned = strings.TrimPrefix(cleaned, "```")
cleaned = strings.TrimSuffix(cleaned, "```")
cleaned = strings.TrimSpace(cleaned)
recommendationsIndex := strings.Index(cleaned, `"recommendations"`)
if recommendationsIndex == -1 {
return nil
}
arrayStart := strings.Index(cleaned[recommendationsIndex:], "[")
if arrayStart == -1 {
return nil
}
arrayStart += recommendationsIndex
objects := make([]string, 0, 4)
inString := false
escaped := false
objectDepth := 0
objectStart := -1
for idx := arrayStart + 1; idx < len(cleaned); idx++ {
ch := cleaned[idx]
if escaped {
escaped = false
continue
}
if ch == '\\' && inString {
escaped = true
continue
}
if ch == '"' {
inString = !inString
continue
}
if inString {
continue
}
switch ch {
case '{':
if objectDepth == 0 {
objectStart = idx
}
objectDepth++
case '}':
if objectDepth == 0 {
continue
}
objectDepth--
if objectDepth == 0 && objectStart >= 0 {
objects = append(objects, cleaned[objectStart:idx+1])
objectStart = -1
}
case ']':
if objectDepth == 0 {
return objects
}
}
}
return objects
}
func (g *GeminiService) BuildSupplementalQueries(query string, existing []string, reviewed []AIRecommendation) ([]string, error) { func (g *GeminiService) BuildSupplementalQueries(query string, existing []string, reviewed []AIRecommendation) ([]string, error) {
baseExisting := make([]string, 0, len(existing)) baseExisting := make([]string, 0, len(existing))
for _, item := range existing { for _, item := range existing {
+28
View File
@@ -228,3 +228,31 @@ func TestFilterHardGeminiErrorsIgnoresLowValueVisualFailures(t *testing.T) {
t.Fatalf("unexpected filtered errors: %#v", filtered) t.Fatalf("unexpected filtered errors: %#v", filtered)
} }
} }
func TestParseGeminiVisionRecommendationsRecoversCompleteObjectsFromTruncatedJSON(t *testing.T) {
raw := "{\n" +
" \"recommendations\": [\n" +
" {\"index\":0,\"verdict\":\"Yes\",\"reason\":\"적합\",\"recommended\":true,\"assessment\":\"positive\",\"searchHint\":\"\"},\n" +
" {\"index\":1,\"verdict\":\"No\",\"reason\":\"부적합\",\"recommended\":false,\"assessment\":\"irrelevant\",\"searchHint\":\"night city b-roll\"},\n" +
" {\"index\":2,\"verdict\":\"Yes\",\"reason\":\"잘림"
parsed, recoveredPartial, err := parseGeminiVisionRecommendations(raw)
if err != nil {
t.Fatalf("expected partial recovery, got error: %v", err)
}
if !recoveredPartial {
t.Fatal("expected partial recovery flag to be true")
}
if len(parsed.Recommendations) != 2 {
t.Fatalf("expected 2 recovered recommendation objects, got %#v", parsed.Recommendations)
}
if parsed.Recommendations[0].Index != 0 || parsed.Recommendations[1].Index != 1 {
t.Fatalf("unexpected recovered recommendations: %#v", parsed.Recommendations)
}
}
func TestExtractCompleteRecommendationObjectsReturnsNilWhenArrayMissing(t *testing.T) {
if got := extractCompleteRecommendationObjects(`{"message":"no recommendations here"}`); len(got) != 0 {
t.Fatalf("expected no objects, got %#v", got)
}
}
+1 -1
View File
@@ -105,7 +105,7 @@ func EvaluateAllCandidatesWithGemini(service *GeminiService, query string, ranke
} }
func EvaluateAllCandidatesWithGeminiWithDeadline(service *GeminiService, query string, ranked []SearchResult, deadline time.Time) ([]AIRecommendation, GeminiBatchStats, error) { func EvaluateAllCandidatesWithGeminiWithDeadline(service *GeminiService, query string, ranked []SearchResult, deadline time.Time) ([]AIRecommendation, GeminiBatchStats, error) {
const chunkSize = 6 const chunkSize = 4
const maxConcurrentBatches = 2 const maxConcurrentBatches = 2
if service == nil { if service == nil {
return nil, GeminiBatchStats{}, fmt.Errorf("gemini service is not configured") return nil, GeminiBatchStats{}, fmt.Errorf("gemini service is not configured")