Files
ai-media-hub/backend/services/ranker.go
T
GHStaK 513199f426
build-push / docker (push) Successful in 4m13s
Harden gemini vision JSON recovery
2026-03-17 16:33:09 +09:00

535 lines
15 KiB
Go

package services
import (
"fmt"
"math/rand"
"sort"
"strings"
"sync"
"time"
)
const GeminiFallbackReason = "Gemini Vision 응답이 부족해 키워드 기준으로 보강된 결과입니다."
const FallbackPreviewReason = "Fallback due to missing provider preview."
const SupplementalFallbackReason = "추가 탐색 후에도 충분한 확신 후보가 부족해 시각 자산이 있는 후보를 제한적으로 보강했습니다."
type GeminiBatchStats struct {
CandidateCap int `json:"candidateCap"`
Requested int `json:"requested"`
Batches int `json:"batches"`
Succeeded int `json:"succeeded"`
Failed int `json:"failed"`
SequentialRetried int `json:"sequentialRetried"`
RecommendedCount int `json:"recommendedCount"`
VisualRejectCount int `json:"visualRejectCount"`
DeadlineLimited bool `json:"deadlineLimited,omitempty"`
Errors []string `json:"errors,omitempty"`
}
func RankSearchResults(query string, results []SearchResult) []SearchResult {
queryTerms := strings.Fields(strings.ToLower(query))
positiveTerms := []string{
"b-roll", "b roll", "stock", "stock footage", "footage", "cinematic", "editorial",
"establishing", "4k", "hd", "drone", "ambient", "scene", "urban", "cityscape",
}
negativeTerms := []string{
"shocking", "amazing", "crazy", "must watch", "reaction", "gossip", "celebrity",
"thumbnail", "meme", "prank", "drama", "breaking", "viral", "tutorial",
"how to", "review", "walkthrough", "course", "lesson", "podcast", "interview",
"premiere pro", "after effects", "explained", "breakdown", "vlog",
}
type scoredResult struct {
item SearchResult
score int
}
scored := make([]scoredResult, 0, len(results))
for _, result := range results {
score := 0
text := strings.ToLower(result.Title + " " + result.Snippet + " " + result.Source)
for _, term := range queryTerms {
if strings.Contains(text, term) {
score += 3
}
}
for _, term := range positiveTerms {
if strings.Contains(text, term) {
score += 2
}
}
for _, term := range negativeTerms {
if strings.Contains(text, term) {
score -= 4
}
}
if result.PreviewVideoURL != "" {
score += 10
}
if hasUsableThumbnail(result.ThumbnailURL) {
score += 5
}
if isLowValueThumbnail(result.ThumbnailURL) {
score -= 8
}
if strings.TrimSpace(result.PreviewVideoURL) == "" && !hasUsableThumbnail(result.ThumbnailURL) {
score -= 10
}
switch result.Source {
case "Google Video":
score -= 2
case "Envato":
score += 5
case "Artgrid":
score += 4
}
scored = append(scored, scoredResult{item: result, score: score})
}
sort.SliceStable(scored, func(i, j int) bool {
return scored[i].score > scored[j].score
})
ranked := make([]SearchResult, 0, len(scored))
for _, item := range scored {
ranked = append(ranked, item.item)
}
return ranked
}
func GeminiCandidateLimit(total int) int {
return min(total, 24)
}
func EvaluateAllCandidatesWithGemini(service *GeminiService, query string, ranked []SearchResult) ([]AIRecommendation, GeminiBatchStats, error) {
return EvaluateAllCandidatesWithGeminiWithDeadline(service, query, ranked, time.Time{})
}
func EvaluateAllCandidatesWithGeminiWithDeadline(service *GeminiService, query string, ranked []SearchResult, deadline time.Time) ([]AIRecommendation, GeminiBatchStats, error) {
const chunkSize = 4
const maxConcurrentBatches = 2
if service == nil {
return nil, GeminiBatchStats{}, fmt.Errorf("gemini service is not configured")
}
limit := GeminiCandidateLimit(len(ranked))
stats := GeminiBatchStats{
CandidateCap: limit,
Requested: min(limit, len(ranked)),
}
for _, item := range ranked[:min(limit, len(ranked))] {
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
stats.VisualRejectCount++
}
}
type batchResult struct {
index int
recommendations []AIRecommendation
err error
}
batches := make([][]SearchResult, 0, (limit+chunkSize-1)/chunkSize)
for start := 0; start < limit; start += chunkSize {
end := start + chunkSize
if end > limit {
end = limit
}
batches = append(batches, ranked[start:end])
}
stats.Batches = len(batches)
if len(batches) == 0 {
return []AIRecommendation{}, stats, nil
}
if service != nil && service.Debug != nil {
service.Debug("ranker:gemini_batches_created", map[string]any{
"query": query,
"limit": limit,
"batches": len(batches),
"chunkSize": chunkSize,
})
}
results := make([]batchResult, len(batches))
var wg sync.WaitGroup
sem := make(chan struct{}, maxConcurrentBatches)
for idx, batch := range batches {
wg.Add(1)
go func(batchIndex int, candidates []SearchResult) {
defer wg.Done()
if !deadline.IsZero() && time.Now().After(deadline) {
results[batchIndex] = batchResult{
index: batchIndex,
err: fmt.Errorf("skipped gemini batch due to deadline"),
}
return
}
sem <- struct{}{}
defer func() { <-sem }()
recommended, err := service.Recommend(query, candidates)
results[batchIndex] = batchResult{
index: batchIndex,
recommendations: recommended,
err: err,
}
}(idx, batch)
}
wg.Wait()
merged := make([]AIRecommendation, 0, len(ranked))
seen := map[string]bool{}
for _, batch := range results {
if batch.err != nil {
if strings.Contains(batch.err.Error(), "due to deadline") {
stats.DeadlineLimited = true
}
if service != nil && service.Debug != nil {
service.Debug("ranker:gemini_batch_error", map[string]any{
"batchIndex": batch.index,
"error": batch.err.Error(),
})
}
recovered, recoveredErrs := recoverGeminiBatchSequentially(service, query, ranked, batch.index*chunkSize, chunkSize, deadline)
hardErrs := filterHardGeminiErrors(recoveredErrs)
if len(recovered) > 0 {
stats.SequentialRetried++
stats.Succeeded++
for _, item := range recovered {
if item.Link == "" || seen[item.Link] {
continue
}
seen[item.Link] = true
merged = append(merged, item)
}
if len(hardErrs) > 0 {
stats.Failed++
for _, recoveredErr := range hardErrs {
if len(stats.Errors) < 5 {
stats.Errors = append(stats.Errors, recoveredErr)
}
}
}
continue
}
if len(hardErrs) == 0 {
continue
}
stats.Failed++
if len(stats.Errors) < 5 {
stats.Errors = append(stats.Errors, batch.err.Error())
}
continue
}
stats.Succeeded++
for _, item := range batch.recommendations {
if item.Link == "" || seen[item.Link] {
continue
}
seen[item.Link] = true
merged = append(merged, item)
}
}
stats.RecommendedCount = len(merged)
if service != nil && service.Debug != nil {
service.Debug("ranker:gemini_batches_complete", stats)
}
switch {
case len(merged) > 0 && stats.Failed == 0:
return merged, stats, nil
case len(merged) > 0 && stats.Failed > 0:
return merged, stats, fmt.Errorf("gemini vision partially failed on %d of %d batches", stats.Failed, stats.Batches)
case stats.Failed == stats.Batches:
if len(stats.Errors) > 0 {
return nil, stats, fmt.Errorf("gemini vision failed for all batches: %s", strings.Join(stats.Errors, "; "))
}
return nil, stats, fmt.Errorf("gemini vision failed for all batches")
default:
return nil, stats, fmt.Errorf("gemini vision returned no candidate evaluations")
}
}
func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason string) []AIRecommendation {
if strings.TrimSpace(reason) == "" {
reason = GeminiFallbackReason
}
fallback := make([]AIRecommendation, 0, min(limit, len(ranked)))
for _, item := range ranked[:min(limit, len(ranked))] {
fallback = append(fallback, DecorateRecommendationMedia(AIRecommendation{
Title: item.Title,
Link: item.Link,
Snippet: item.Snippet,
ThumbnailURL: item.ThumbnailURL,
PreviewVideoURL: item.PreviewVideoURL,
Source: item.Source,
Reason: reason,
Recommended: false,
Assessment: "unclear",
}))
}
return fallback
}
func RandomizeTopRecommendations(items []AIRecommendation, window int) []AIRecommendation {
if len(items) < 2 || window < 2 {
return items
}
limit := min(window, len(items))
shuffled := make([]AIRecommendation, len(items))
copy(shuffled, items)
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
rng.Shuffle(limit, func(i, j int) {
shuffled[i], shuffled[j] = shuffled[j], shuffled[i]
})
return shuffled
}
func ReviewedRecommendationLinks(items []AIRecommendation) map[string]bool {
seen := map[string]bool{}
for _, item := range items {
if item.Link == "" {
continue
}
seen[item.Link] = true
}
return seen
}
func RemainingGeminiCapacity(reviewed []AIRecommendation) int {
remaining := GeminiCandidateLimit(24) - len(ReviewedRecommendationLinks(reviewed))
if remaining < 0 {
return 0
}
return remaining
}
func SelectUnevaluatedCandidates(ranked []SearchResult, reviewedLinks map[string]bool, limit int) []SearchResult {
if limit <= 0 {
return []SearchResult{}
}
selected := make([]SearchResult, 0, min(limit, len(ranked)))
for _, item := range ranked {
if len(selected) >= limit {
break
}
if item.Link == "" || reviewedLinks[item.Link] {
continue
}
selected = append(selected, item)
}
return selected
}
func MergeUniqueRecommendations(base, extra []AIRecommendation) []AIRecommendation {
merged := make([]AIRecommendation, 0, len(base)+len(extra))
seen := map[string]bool{}
for _, item := range append(base, extra...) {
if item.Link == "" || seen[item.Link] {
continue
}
seen[item.Link] = true
merged = append(merged, item)
}
return merged
}
func MergeGeminiBatchStats(base, extra GeminiBatchStats) GeminiBatchStats {
merged := base
merged.CandidateCap += extra.CandidateCap
merged.Requested += extra.Requested
merged.Batches += extra.Batches
merged.Succeeded += extra.Succeeded
merged.Failed += extra.Failed
merged.SequentialRetried += extra.SequentialRetried
merged.RecommendedCount += extra.RecommendedCount
merged.Errors = append(append([]string{}, base.Errors...), extra.Errors...)
if len(merged.Errors) > 5 {
merged.Errors = merged.Errors[:5]
}
return merged
}
func filterHardGeminiErrors(errs []string) []string {
filtered := make([]string, 0, len(errs))
for _, item := range errs {
if isIgnorableGeminiError(item) {
continue
}
filtered = append(filtered, item)
}
return filtered
}
func isIgnorableGeminiError(message string) bool {
lower := strings.ToLower(strings.TrimSpace(message))
if lower == "" {
return false
}
for _, token := range []string{
"no candidate thumbnails or preview frames could be fetched for gemini vision",
"candidate thumbnail is low value",
"candidate has no thumbnail or preview video",
"image url is empty",
} {
if strings.Contains(lower, token) {
return true
}
}
return false
}
func recoverGeminiBatchSequentially(service *GeminiService, query string, ranked []SearchResult, startIndex, chunkSize int, deadline time.Time) ([]AIRecommendation, []string) {
recovered := make([]AIRecommendation, 0, chunkSize)
errs := make([]string, 0, 4)
endIndex := min(startIndex+chunkSize, len(ranked))
for idx := startIndex; idx < endIndex; idx++ {
if !deadline.IsZero() && time.Now().After(deadline) {
if len(errs) < 4 {
errs = append(errs, "sequential gemini recovery stopped at deadline")
}
break
}
recs, err := service.Recommend(query, []SearchResult{ranked[idx]})
if err != nil {
if len(errs) < 4 {
errs = append(errs, err.Error())
}
time.Sleep(150 * time.Millisecond)
continue
}
recovered = append(recovered, recs...)
time.Sleep(150 * time.Millisecond)
}
return recovered, errs
}
func NeedsSupplementalExploration(items []AIRecommendation) bool {
if len(items) == 0 {
return true
}
recommendedCount := 0
negativeCount := 0
unclearCount := 0
for _, item := range items {
if item.Recommended && item.Assessment == "positive" {
recommendedCount++
}
if IsExcludedAssessment(item.Assessment) || looksNegativeReason(item.Reason) {
negativeCount++
}
if item.Assessment == "unclear" {
unclearCount++
}
}
if recommendedCount >= 4 {
return false
}
return negativeCount >= max(2, len(items)/3) || unclearCount >= max(2, len(items)/2)
}
func looksNegativeReason(reason string) bool {
lower := strings.ToLower(strings.TrimSpace(reason))
if lower == "" {
return false
}
for _, token := range []string{
"부적합", "관련이 없", "맞지 않", "의도와 맞지", "무관", "연관성 낮", "적절하지 않", "불일치",
"not relevant", "irrelevant", "mismatch", "does not match", "unsuitable",
} {
if strings.Contains(lower, token) {
return true
}
}
return false
}
func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, limit int) []AIRecommendation {
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
seen := map[string]bool{}
for _, item := range recommended {
if !item.Recommended || item.Assessment != "positive" {
continue
}
if item.Link == "" || seen[item.Link] {
continue
}
seen[item.Link] = true
merged = append(merged, DecorateRecommendationMedia(item))
}
for _, item := range recommended {
if item.Recommended || item.Link == "" || seen[item.Link] || len(merged) >= limit {
continue
}
if IsExcludedAssessment(item.Assessment) || looksNegativeReason(item.Reason) || strings.Contains(item.Reason, GeminiFallbackReason) {
continue
}
if item.Assessment == "unclear" {
continue
}
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
continue
}
seen[item.Link] = true
merged = append(merged, DecorateRecommendationMedia(item))
}
return merged
}
func BackfillRecommendations(existing []AIRecommendation, ranked []SearchResult, limit int, reason string) []AIRecommendation {
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
seen := map[string]bool{}
fillerCount := 0
maxFiller := min(4, limit)
for _, item := range existing {
if item.Link == "" || seen[item.Link] {
continue
}
seen[item.Link] = true
merged = append(merged, DecorateRecommendationMedia(item))
}
for _, item := range ranked {
if len(merged) >= limit || item.Link == "" || seen[item.Link] {
continue
}
if fillerCount >= maxFiller {
break
}
if strings.TrimSpace(item.PreviewVideoURL) == "" && !hasUsableThumbnail(item.ThumbnailURL) {
continue
}
seen[item.Link] = true
merged = append(merged, DecorateRecommendationMedia(AIRecommendation{
Title: item.Title,
Link: item.Link,
Snippet: item.Snippet,
ThumbnailURL: item.ThumbnailURL,
PreviewVideoURL: item.PreviewVideoURL,
Source: item.Source,
Reason: firstNonEmpty(strings.TrimSpace(reason), SupplementalFallbackReason),
Recommended: false,
Assessment: "unclear",
}))
fillerCount++
}
return merged
}
func IsExcludedAssessment(assessment string) bool {
switch strings.ToLower(strings.TrimSpace(assessment)) {
case "irrelevant", "inappropriate":
return true
default:
return false
}
}
func max(a, b int) int {
if a > b {
return a
}
return b
}