Revert "Reduce gemini partial batch noise"

This reverts commit 3be797131a.
This commit is contained in:
GHStaK
2026-03-18 13:00:41 +09:00
parent 770aea0f57
commit 9a33ecc6b5
4 changed files with 30 additions and 79 deletions
-16
View File
@@ -655,22 +655,6 @@
- If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise
## Recent Change Log
- Date: `2026-03-17`
- What changed:
- Added adaptive Gemini Vision output-token sizing so smaller candidate batches, especially single-candidate sequential recovery calls, now request much shorter responses.
- Added a dedicated shorter single-candidate Gemini Vision instruction path for sequential recovery after batch failure.
- Stopped counting a batch as a strong user-facing partial failure when sequential recovery still salvages recommendations from that batch.
- Added unit coverage for the adaptive Gemini Vision token budget helper.
- Why it changed:
- The user-provided log `ai-media-hub-2026-03-17T07-55-17-127Z.log` still showed `gemini vision partially failed on 4 of 6 batches`.
- The same log also showed `sequentialRetried: 0`, which means the fallback single-candidate reevaluation path was still not recovering those truncated JSON batches well enough.
- How it was verified:
- `pwsh -NoProfile -File scripts/selftest.ps1`
- added Go tests for adaptive Gemini token sizing
- What is still risky or incomplete:
- This reduces partial-failure pressure further, but extremely short or malformed Gemini outputs can still fail before one complete recommendation object is emitted.
- Smaller recovery responses improve reliability, but repeated sequential recovery can still add latency on difficult searches.
- Date: `2026-03-17`
- What changed:
- Reduced Gemini Vision batch size from `6` to `4` so each model response carries fewer recommendation objects and is less likely to be truncated mid-JSON.
+22 -54
View File
@@ -253,7 +253,24 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
type geminiPart map[string]any
parts := []geminiPart{
{
"text": buildGeminiVisionInstruction(query, len(candidates)),
"text": `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
{"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true,"assessment":"positive","searchHint":"short english hint"}]}
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
Keep each Korean reason very short, ideally one sentence under 24 Korean characters when possible.
Set verdict to "Yes" or "No" for every candidate. "Yes" means the scene is usable and relevant for editing against the user's keyword. "No" means it is not suitable or not relevant enough.
Set recommended=true only when verdict is "Yes". Set recommended=false when verdict is "No".
Set assessment to one of: positive, unclear, irrelevant, inappropriate.
- positive: directly usable and relevant to the query
- unclear: visually ambiguous, weak, or not confident enough
- irrelevant: visibly unrelated to the query intent
- inappropriate: low-quality, spammy, misleading, meme-like, or otherwise unsuitable for professional editing
When assessment is not positive, provide searchHint as a short English stock-footage search phrase that could help find better candidates. Keep it under 8 words.
When assessment is positive, searchHint may be empty.
Do not include markdown fences, explanations, or comments. Output compact JSON only.
Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails.
Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery.
Favor scenes that look directly useful for professional editing, sequencing, establishing, cutaway, or mood-building usage.
User query: ` + query,
},
}
@@ -280,10 +297,9 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
return nil, fmt.Errorf("no candidate thumbnails or preview frames could be fetched for gemini vision")
}
g.debug("gemini:vision_visuals_prepared", map[string]any{
"query": query,
"visualCount": visualCount,
"maxImages": maxImages,
"maxOutputTokens": geminiVisionMaxOutputTokens(visualCount),
"query": query,
"visualCount": visualCount,
"maxImages": maxImages,
})
body := map[string]any{
@@ -293,7 +309,7 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
"generationConfig": map[string]any{
"responseMimeType": "application/json",
"temperature": 0.1,
"maxOutputTokens": geminiVisionMaxOutputTokens(visualCount),
"maxOutputTokens": 900,
},
}
@@ -368,54 +384,6 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
return recommendations, nil
}
func buildGeminiVisionInstruction(query string, candidateCount int) string {
if candidateCount <= 1 {
return `You are a professional video editor. Analyze the single provided visual for the user's keyword.
Return compact JSON only in this exact shape:
{"recommendations":[{"index":0,"verdict":"Yes","reason":"짧은 한국어 문장","recommended":true,"assessment":"positive","searchHint":""}]}
Return exactly one item.
Use a very short Korean reason.
Use verdict "Yes" or "No".
Set assessment to one of: positive, unclear, irrelevant, inappropriate.
Keep searchHint empty unless the visual is weak or irrelevant, then use a very short English stock-footage phrase.
No markdown fences. No commentary. Compact JSON only.
User query: ` + query
}
return `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
{"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true,"assessment":"positive","searchHint":"short english hint"}]}
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
Keep each Korean reason very short, ideally one sentence under 24 Korean characters when possible.
Set verdict to "Yes" or "No" for every candidate. "Yes" means the scene is usable and relevant for editing against the user's keyword. "No" means it is not suitable or not relevant enough.
Set recommended=true only when verdict is "Yes". Set recommended=false when verdict is "No".
Set assessment to one of: positive, unclear, irrelevant, inappropriate.
- positive: directly usable and relevant to the query
- unclear: visually ambiguous, weak, or not confident enough
- irrelevant: visibly unrelated to the query intent
- inappropriate: low-quality, spammy, misleading, meme-like, or otherwise unsuitable for professional editing
When assessment is not positive, provide searchHint as a short English stock-footage search phrase that could help find better candidates. Keep it under 8 words.
When assessment is positive, searchHint may be empty.
Do not include markdown fences, explanations, or comments. Output compact JSON only.
Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails.
Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery.
Favor scenes that look directly useful for professional editing, sequencing, establishing, cutaway, or mood-building usage.
User query: ` + query
}
func geminiVisionMaxOutputTokens(candidateCount int) int {
switch {
case candidateCount <= 1:
return 180
case candidateCount == 2:
return 260
case candidateCount == 3:
return 340
case candidateCount == 4:
return 420
default:
return 520
}
}
type geminiVisionParsedPayload struct {
Recommendations []struct {
Index int `json:"index"`
-9
View File
@@ -256,12 +256,3 @@ func TestExtractCompleteRecommendationObjectsReturnsNilWhenArrayMissing(t *testi
t.Fatalf("expected no objects, got %#v", got)
}
}
func TestGeminiVisionMaxOutputTokensShrinksForSingleCandidate(t *testing.T) {
if got := geminiVisionMaxOutputTokens(1); got != 180 {
t.Fatalf("expected 180 tokens for single candidate, got %d", got)
}
if got := geminiVisionMaxOutputTokens(4); got != 420 {
t.Fatalf("expected 420 tokens for four candidates, got %d", got)
}
}
+8
View File
@@ -198,6 +198,14 @@ func EvaluateAllCandidatesWithGeminiWithDeadline(service *GeminiService, query s
seen[item.Link] = true
merged = append(merged, item)
}
if len(hardErrs) > 0 {
stats.Failed++
for _, recoveredErr := range hardErrs {
if len(stats.Errors) < 5 {
stats.Errors = append(stats.Errors, recoveredErr)
}
}
}
continue
}
if len(hardErrs) == 0 {