package services import ( "bytes" "encoding/base64" "encoding/json" "fmt" "io" "mime" "net/http" neturl "net/url" "os" "os/exec" "path/filepath" "sort" "strconv" "strings" "sync" "time" ) type GeminiService struct { APIKey string Client *http.Client GenerateEndpoint string TranslateEndpoint string Debug func(message string, data any) cacheMu sync.Mutex visualCache map[string]cachedVisualData translationCache map[string]cachedStringValue expansionCache map[string]cachedExpansionValue } type cachedVisualData struct { data string mimeType string expiresAt time.Time } type cachedStringValue struct { value string expiresAt time.Time } type cachedExpansionValue struct { value []string expiresAt time.Time } type AIRecommendation struct { Title string `json:"title"` Link string `json:"link"` Snippet string `json:"snippet"` ThumbnailURL string `json:"thumbnailUrl"` PreviewVideoURL string `json:"previewVideoUrl"` Source string `json:"source"` Reason string `json:"reason"` Recommended bool `json:"recommended"` Assessment string `json:"assessment,omitempty"` SearchHint string `json:"searchHint,omitempty"` MediaMode string `json:"mediaMode,omitempty"` EmbedURL string `json:"embedUrl,omitempty"` PreviewBlockedReason string `json:"previewBlockedReason,omitempty"` ActionLabel string `json:"actionLabel,omitempty"` ActionType string `json:"actionType,omitempty"` SecondaryActionLabel string `json:"secondaryActionLabel,omitempty"` } type QueryExpansion struct { Querywords []string `json:"querywords"` } func NewGeminiService(apiKey string) *GeminiService { return &GeminiService{ APIKey: apiKey, Client: &http.Client{Timeout: 40 * time.Second}, GenerateEndpoint: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent", TranslateEndpoint: "https://translate.googleapis.com/translate_a/single", visualCache: map[string]cachedVisualData{}, translationCache: map[string]cachedStringValue{}, expansionCache: map[string]cachedExpansionValue{}, } } func (g *GeminiService) ExpandQuery(query string) ([]string, error) { cacheKey := strings.TrimSpace(query) if cached, ok := g.getCachedExpansion(cacheKey); ok { g.debug("gemini:expand_query_cache_hit", map[string]any{"query": query, "expanded": cached}) return cached, nil } englishBase := g.TranslateQuery(query) expanded := buildSearchQueries(query, englishBase) g.setCachedExpansion(cacheKey, expanded, 15*time.Minute) g.debug("gemini:expand_query", map[string]any{ "original": query, "english": englishBase, "expanded": expanded, }) return expanded, nil } func (g *GeminiService) TranslateSummaryToKorean(text string) (string, error) { trimmed := strings.TrimSpace(text) if trimmed == "" { return "", nil } cacheKey := "summary-ko\n" + trimmed if cached, ok := g.getCachedTranslation(cacheKey); ok { g.debug("gemini:summary_translate_cache_hit", map[string]any{"length": len(trimmed)}) return cached, nil } if !looksMostlyASCII(trimmed) { g.setCachedTranslation(cacheKey, trimmed, 15*time.Minute) return trimmed, nil } g.debug("gemini:summary_translate_attempt", map[string]any{"length": len(trimmed)}) translated, err := g.translateViaGoogleToTarget(trimmed, "ko") if err != nil { g.debug("gemini:summary_translate_error", map[string]any{"length": len(trimmed), "error": err.Error()}) return "", err } translated = strings.TrimSpace(translated) if translated == "" { return "", fmt.Errorf("google translate summary returned empty translation") } g.debug("gemini:summary_translate_success", map[string]any{"length": len(trimmed)}) g.setCachedTranslation(cacheKey, translated, 15*time.Minute) return translated, nil } func (g *GeminiService) TranslateQuery(query string) string { trimmed := strings.TrimSpace(query) if trimmed == "" { return "" } if cached, ok := g.getCachedTranslation(trimmed); ok { g.debug("gemini:translate_cache_hit", map[string]any{"query": trimmed, "translated": cached}) return cached } normalizedIntent := normalizeKnownMediaPhrases(trimmed) if looksMostlyASCII(normalizedIntent) { result := strings.TrimSpace(normalizedIntent) g.setCachedTranslation(trimmed, result, 15*time.Minute) return result } if looksMostlyASCII(trimmed) { g.setCachedTranslation(trimmed, trimmed, 15*time.Minute) return trimmed } if g.APIKey != "" { g.debug("gemini:translate_attempt", map[string]any{"mode": "gemini", "query": trimmed}) body := map[string]any{ "systemInstruction": map[string]any{ "parts": []map[string]string{ { "text": "You are a professional video editor. Infer stronger stock-footage and scene-search wording from the user's keyword, and expand it into natural English that a professional editor would use to find usable footage. Output one plain English search phrase only. No labels, no quotes, no explanations.", }, }, }, "contents": []map[string]any{ { "parts": []map[string]string{ { "text": "Expand this user query into a concise but editor-grade English footage search phrase suitable for stock-footage discovery: " + trimmed, }, }, }, }, "generationConfig": map[string]any{ "responseMimeType": "text/plain", "temperature": 0.1, "maxOutputTokens": 40, }, } rawText, err := g.generateText(body) if err == nil { translated := sanitizePlainEnglishLine(rawText) if translated != "" && !strings.EqualFold(translated, trimmed) && !isOvercompressedTranslation(trimmed, translated) { g.debug("gemini:translate_success", map[string]any{"mode": "gemini", "query": trimmed, "translated": translated}) g.setCachedTranslation(trimmed, translated, 15*time.Minute) return translated } } if err != nil { g.debug("gemini:translate_error", map[string]any{"mode": "gemini", "query": trimmed, "error": err.Error()}) } } g.debug("gemini:translate_attempt", map[string]any{"mode": "google", "query": trimmed}) if translated, err := g.translateViaGoogle(trimmed); err == nil && translated != "" && isLikelyEnglishQuery(translated) && !isOvercompressedTranslation(trimmed, translated) { g.debug("gemini:translate_success", map[string]any{"mode": "google", "query": trimmed, "translated": translated}) g.setCachedTranslation(trimmed, translated, 15*time.Minute) return translated } if translated := translateKoreanMediaTerms(normalizedIntent); translated != "" && !strings.EqualFold(translated, trimmed) { g.debug("gemini:translate_success", map[string]any{"mode": "dictionary", "query": trimmed, "translated": translated}) g.setCachedTranslation(trimmed, translated, 15*time.Minute) return translated } g.debug("gemini:translate_fallback_original", map[string]any{"query": trimmed, "normalized": normalizedIntent}) result := strings.TrimSpace(normalizedIntent) g.setCachedTranslation(trimmed, result, 15*time.Minute) return result } func (g *GeminiService) generateText(body map[string]any) (string, error) { rawBody, _ := json.Marshal(body) endpoint := strings.TrimRight(g.GenerateEndpoint, "?") + "?key=" + g.APIKey resp, err := g.Client.Post(endpoint, "application/json", bytes.NewReader(rawBody)) if err != nil { return "", fmt.Errorf("gemini request failed: %w", err) } defer resp.Body.Close() if resp.StatusCode >= 300 { data, _ := io.ReadAll(io.LimitReader(resp.Body, 2048)) return "", fmt.Errorf("gemini returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(data))) } var payload struct { Candidates []struct { Content struct { Parts []struct { Text string `json:"text"` } `json:"parts"` } `json:"content"` } `json:"candidates"` } if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { return "", fmt.Errorf("gemini response decode failed: %w", err) } if len(payload.Candidates) == 0 || len(payload.Candidates[0].Content.Parts) == 0 { return "", fmt.Errorf("gemini returned no candidates") } return payload.Candidates[0].Content.Parts[0].Text, nil } func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AIRecommendation, error) { if g.APIKey == "" { return nil, fmt.Errorf("gemini api key is not configured") } if len(candidates) == 0 { return []AIRecommendation{}, nil } if len(candidates) == 1 { return g.recommendSingleCandidate(query, candidates[0]) } g.debug("gemini:vision_start", map[string]any{ "query": query, "candidateCount": len(candidates), }) type geminiPart map[string]any parts := []geminiPart{ { "text": buildGeminiVisionInstruction(query, len(candidates)), }, } maxImages := min(len(candidates), 10) visualCount := 0 for idx := 0; idx < maxImages; idx++ { img, mimeType, err := g.fetchCandidateVisualInlineData(candidates[idx]) if err != nil { g.debug("gemini:vision_candidate_visual_error", map[string]any{ "index": idx, "link": candidates[idx].Link, "source": candidates[idx].Source, "error": err.Error(), }) continue } visualCount++ parts = append(parts, geminiPart{"text": fmt.Sprintf("Candidate %d: title=%s source=%s link=%s", idx, candidates[idx].Title, candidates[idx].Source, candidates[idx].Link)}, geminiPart{"inlineData": map[string]string{"mimeType": mimeType, "data": img}}, ) } if visualCount == 0 { return nil, fmt.Errorf("no candidate thumbnails or preview frames could be fetched for gemini vision") } g.debug("gemini:vision_visuals_prepared", map[string]any{ "query": query, "visualCount": visualCount, "maxImages": maxImages, "maxOutputTokens": geminiVisionMaxOutputTokens(visualCount), }) body := map[string]any{ "contents": []map[string]any{ {"parts": parts}, }, "generationConfig": map[string]any{ "responseMimeType": "text/plain", "temperature": 0.1, "maxOutputTokens": geminiVisionMaxOutputTokens(visualCount), }, } rawBody, _ := json.Marshal(body) endpoint := strings.TrimRight(g.GenerateEndpoint, "?") + "?key=" + g.APIKey resp, err := g.Client.Post(endpoint, "application/json", bytes.NewReader(rawBody)) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode >= 300 { data, _ := io.ReadAll(io.LimitReader(resp.Body, 2048)) return nil, fmt.Errorf("gemini vision returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(data))) } var payload struct { Candidates []struct { Content struct { Parts []struct { Text string `json:"text"` } `json:"parts"` } `json:"content"` } `json:"candidates"` } if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { return nil, fmt.Errorf("gemini vision response decode failed: %w", err) } if len(payload.Candidates) == 0 || len(payload.Candidates[0].Content.Parts) == 0 { return nil, fmt.Errorf("gemini vision returned no candidates") } rawText := payload.Candidates[0].Content.Parts[0].Text parsed, recoveredPartial, err := parseGeminiVisionRecommendations(rawText) if err != nil { return nil, fmt.Errorf("gemini vision JSON extraction failed: %w", err) } if recoveredPartial { g.debug("gemini:vision_partial_json_recovered", map[string]any{ "query": query, "candidateCount": len(candidates), "recommendationCount": len(parsed.Recommendations), }) } recommendations := make([]AIRecommendation, 0, len(parsed.Recommendations)) for _, rec := range parsed.Recommendations { if rec.Index < 0 || rec.Index >= len(candidates) { continue } src := candidates[rec.Index] recommended := rec.Recommended || strings.EqualFold(strings.TrimSpace(rec.Verdict), "yes") assessment := normalizeAssessment(rec.Assessment, recommended) recommendations = append(recommendations, AIRecommendation{ Title: src.Title, Link: src.Link, Snippet: src.Snippet, ThumbnailURL: src.ThumbnailURL, PreviewVideoURL: src.PreviewVideoURL, Source: src.Source, Reason: normalizeKoreanReason(rec.Reason), Recommended: recommended, Assessment: assessment, SearchHint: normalizeSearchHint(rec.SearchHint), }) } g.debug("gemini:vision_complete", map[string]any{ "query": query, "recommendationCount": len(recommendations), }) return recommendations, nil } func (g *GeminiService) recommendSingleCandidate(query string, candidate SearchResult) ([]AIRecommendation, error) { g.debug("gemini:vision_start", map[string]any{ "query": query, "candidateCount": 1, "mode": "single_candidate_recovery", }) img, mimeType, err := g.fetchCandidateVisualInlineData(candidate) if err != nil { g.debug("gemini:vision_candidate_visual_error", map[string]any{ "index": 0, "link": candidate.Link, "source": candidate.Source, "error": err.Error(), }) return nil, err } g.debug("gemini:vision_visuals_prepared", map[string]any{ "query": query, "visualCount": 1, "maxImages": 1, "maxOutputTokens": 120, "mode": "single_candidate_recovery", }) body := map[string]any{ "contents": []map[string]any{ { "parts": []map[string]any{ { "text": `You are a professional video editor. Analyze the single provided visual for the user's keyword. Return plain text only with exactly these 5 lines: verdict: Yes or No assessment: positive or unclear or irrelevant or inappropriate recommended: true or false reason_ko: very short Korean reason search_hint: short English stock-footage hint or empty No JSON. No markdown. No extra text. User query: ` + query, }, {"text": fmt.Sprintf("Candidate 0: title=%s source=%s link=%s", candidate.Title, candidate.Source, candidate.Link)}, {"inlineData": map[string]string{"mimeType": mimeType, "data": img}}, }, }, }, "generationConfig": map[string]any{ "responseMimeType": "text/plain", "temperature": 0.1, "maxOutputTokens": 120, }, } rawText, err := g.generateText(body) if err != nil { return nil, err } rec, err := parseSingleCandidateVisionText(rawText) if err != nil { return nil, fmt.Errorf("gemini single-candidate parse failed: %w; raw=%q", err, truncateForError(rawText, 200)) } recommended := rec.Recommended || strings.EqualFold(strings.TrimSpace(rec.Verdict), "yes") assessment := normalizeAssessment(rec.Assessment, recommended) result := AIRecommendation{ Title: candidate.Title, Link: candidate.Link, Snippet: candidate.Snippet, ThumbnailURL: candidate.ThumbnailURL, PreviewVideoURL: candidate.PreviewVideoURL, Source: candidate.Source, Reason: normalizeKoreanReason(rec.Reason), Recommended: recommended, Assessment: assessment, SearchHint: normalizeSearchHint(rec.SearchHint), } g.debug("gemini:vision_complete", map[string]any{ "query": query, "recommendationCount": 1, "mode": "single_candidate_recovery", }) return []AIRecommendation{result}, nil } func buildGeminiVisionInstruction(query string, _ int) string { return `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return plain text only. Return exactly one line per analyzed candidate in this exact format: index|verdict|assessment|recommended|reason_ko|search_hint Rules: - index: integer candidate index - verdict: Yes or No - assessment: positive or unclear or irrelevant or inappropriate - recommended: true or false - reason_ko: very short Korean reason without line breaks and without | - search_hint: short English stock-footage phrase or empty, without | Do not include markdown fences, JSON, bullets, numbering, or any other text. Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails. Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery. User query: ` + query } func geminiVisionMaxOutputTokens(candidateCount int) int { switch { case candidateCount <= 1: return 120 case candidateCount == 2: return 180 case candidateCount == 3: return 240 case candidateCount == 4: return 300 default: return 360 } } type geminiVisionParsedPayload struct { Recommendations []struct { Index int `json:"index"` Verdict string `json:"verdict"` Reason string `json:"reason"` Recommended bool `json:"recommended"` Assessment string `json:"assessment"` SearchHint string `json:"searchHint"` } `json:"recommendations"` } func parseGeminiVisionRecommendations(raw string) (geminiVisionParsedPayload, bool, error) { if parsed, ok := parseGeminiVisionLines(raw); ok { return parsed, false, nil } jsonText, err := extractJSONObject(raw) if err == nil { var parsed geminiVisionParsedPayload if unmarshalErr := json.Unmarshal([]byte(jsonText), &parsed); unmarshalErr != nil { return geminiVisionParsedPayload{}, false, fmt.Errorf("json parse failed: %w; raw=%q", unmarshalErr, truncateForError(raw, 200)) } return parsed, false, nil } objects := extractCompleteRecommendationObjects(raw) if len(objects) == 0 { return geminiVisionParsedPayload{}, false, err } parsed := geminiVisionParsedPayload{ Recommendations: make([]struct { Index int `json:"index"` Verdict string `json:"verdict"` Reason string `json:"reason"` Recommended bool `json:"recommended"` Assessment string `json:"assessment"` SearchHint string `json:"searchHint"` }, 0, len(objects)), } for _, objectText := range objects { var item struct { Index int `json:"index"` Verdict string `json:"verdict"` Reason string `json:"reason"` Recommended bool `json:"recommended"` Assessment string `json:"assessment"` SearchHint string `json:"searchHint"` } if unmarshalErr := json.Unmarshal([]byte(objectText), &item); unmarshalErr != nil { continue } parsed.Recommendations = append(parsed.Recommendations, item) } if len(parsed.Recommendations) == 0 { return geminiVisionParsedPayload{}, false, err } return parsed, true, nil } func parseGeminiVisionLines(raw string) (geminiVisionParsedPayload, bool) { lines := strings.Split(strings.ReplaceAll(strings.TrimSpace(raw), "\r\n", "\n"), "\n") parsed := geminiVisionParsedPayload{ Recommendations: make([]struct { Index int `json:"index"` Verdict string `json:"verdict"` Reason string `json:"reason"` Recommended bool `json:"recommended"` Assessment string `json:"assessment"` SearchHint string `json:"searchHint"` }, 0, len(lines)), } for _, line := range lines { trimmed := strings.TrimSpace(strings.Trim(line, "`")) if trimmed == "" { continue } parts := strings.SplitN(trimmed, "|", 6) if len(parts) != 6 { continue } index, err := strconv.Atoi(strings.TrimSpace(parts[0])) if err != nil { continue } parsed.Recommendations = append(parsed.Recommendations, struct { Index int `json:"index"` Verdict string `json:"verdict"` Reason string `json:"reason"` Recommended bool `json:"recommended"` Assessment string `json:"assessment"` SearchHint string `json:"searchHint"` }{ Index: index, Verdict: strings.TrimSpace(parts[1]), Assessment: strings.TrimSpace(parts[2]), Recommended: strings.EqualFold(strings.TrimSpace(parts[3]), "true") || strings.EqualFold(strings.TrimSpace(parts[3]), "yes"), Reason: strings.TrimSpace(parts[4]), SearchHint: strings.TrimSpace(parts[5]), }) } return parsed, len(parsed.Recommendations) > 0 } type singleCandidateVisionResponse struct { Verdict string Assessment string Recommended bool Reason string SearchHint string } func parseSingleCandidateVisionText(raw string) (singleCandidateVisionResponse, error) { lines := strings.Split(strings.ReplaceAll(strings.TrimSpace(raw), "\r\n", "\n"), "\n") result := singleCandidateVisionResponse{} for _, line := range lines { trimmed := strings.TrimSpace(line) if trimmed == "" { continue } parts := strings.SplitN(trimmed, ":", 2) if len(parts) != 2 { continue } key := strings.ToLower(strings.TrimSpace(parts[0])) value := strings.TrimSpace(parts[1]) switch key { case "verdict": result.Verdict = value case "assessment": result.Assessment = value case "recommended": result.Recommended = strings.EqualFold(value, "true") || strings.EqualFold(value, "yes") case "reason_ko": result.Reason = value case "search_hint": result.SearchHint = value } } if strings.TrimSpace(result.Verdict) == "" { return singleCandidateVisionResponse{}, fmt.Errorf("missing verdict line") } return result, nil } func extractCompleteRecommendationObjects(text string) []string { cleaned := strings.TrimSpace(text) cleaned = strings.TrimPrefix(cleaned, "```json") cleaned = strings.TrimPrefix(cleaned, "```") cleaned = strings.TrimSuffix(cleaned, "```") cleaned = strings.TrimSpace(cleaned) recommendationsIndex := strings.Index(cleaned, `"recommendations"`) if recommendationsIndex == -1 { return nil } arrayStart := strings.Index(cleaned[recommendationsIndex:], "[") if arrayStart == -1 { return nil } arrayStart += recommendationsIndex objects := make([]string, 0, 4) inString := false escaped := false objectDepth := 0 objectStart := -1 for idx := arrayStart + 1; idx < len(cleaned); idx++ { ch := cleaned[idx] if escaped { escaped = false continue } if ch == '\\' && inString { escaped = true continue } if ch == '"' { inString = !inString continue } if inString { continue } switch ch { case '{': if objectDepth == 0 { objectStart = idx } objectDepth++ case '}': if objectDepth == 0 { continue } objectDepth-- if objectDepth == 0 && objectStart >= 0 { objects = append(objects, cleaned[objectStart:idx+1]) objectStart = -1 } case ']': if objectDepth == 0 { return objects } } } return objects } func (g *GeminiService) BuildSupplementalQueries(query string, existing []string, reviewed []AIRecommendation) ([]string, error) { baseExisting := make([]string, 0, len(existing)) for _, item := range existing { trimmed := strings.TrimSpace(item) if trimmed != "" { baseExisting = append(baseExisting, trimmed) } } if len(baseExisting) == 0 { baseExisting = append(baseExisting, query) } positive := make([]string, 0, 3) negativeHints := make([]string, 0, 4) sourceCounts := map[string]int{} for _, item := range reviewed { sourceCounts[item.Source]++ if item.Assessment == "positive" && len(positive) < 3 { positive = append(positive, truncateForError(strings.TrimSpace(item.Title), 80)) } if (item.Assessment == "irrelevant" || item.Assessment == "inappropriate" || item.Assessment == "unclear") && item.SearchHint != "" && len(negativeHints) < 4 { negativeHints = append(negativeHints, item.SearchHint) } } if g.APIKey == "" { return nil, fmt.Errorf("gemini api key is not configured") } body := map[string]any{ "systemInstruction": map[string]any{ "parts": []map[string]string{{ "text": "You generate improved stock-footage search phrases. Return 3 to 5 plain English search phrases only, one per line, no numbering, no quotes, no explanations.", }}, }, "contents": []map[string]any{{ "parts": []map[string]string{{ "text": fmt.Sprintf("Original query: %s\nExisting search phrases: %s\nPositive candidate titles: %s\nNegative or weak search hints: %s\nSource distribution: Envato=%d, Artgrid=%d, Google Video=%d\nGenerate improved English search phrases that avoid weak or irrelevant results and increase provider diversity.", query, strings.Join(baseExisting, " | "), strings.Join(positive, " | "), strings.Join(negativeHints, " | "), sourceCounts["Envato"], sourceCounts["Artgrid"], sourceCounts["Google Video"], ), }}, }}, "generationConfig": map[string]any{ "responseMimeType": "text/plain", "temperature": 0.3, "maxOutputTokens": 120, }, } rawText, err := g.generateText(body) if err != nil { return nil, err } queries := parseSupplementalQueryLines(rawText) if len(queries) == 0 { return nil, fmt.Errorf("gemini returned no supplemental queries") } return queries, nil } func (g *GeminiService) debug(message string, data any) { if g != nil && g.Debug != nil { g.Debug(message, data) } } func fetchImageAsInlineData(client *http.Client, imageURL, referer string) (string, string, error) { if strings.TrimSpace(imageURL) == "" { return "", "", fmt.Errorf("image url is empty") } req, reqErr := newBrowserStyleImageRequest(imageURL, referer) if reqErr != nil { return "", "", reqErr } resp, err := client.Do(req) if err != nil { return "", "", err } defer resp.Body.Close() if resp.StatusCode >= 300 { return "", "", fmt.Errorf("thumbnail fetch failed with %d", resp.StatusCode) } contentType := resp.Header.Get("Content-Type") mimeType, _, _ := mime.ParseMediaType(contentType) if mimeType == "" || !strings.HasPrefix(mimeType, "image/") { mimeType = "image/jpeg" } data, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024)) if err != nil { return "", "", err } return base64.StdEncoding.EncodeToString(data), mimeType, nil } func (g *GeminiService) getCachedVisual(key string) (string, string, bool) { g.cacheMu.Lock() defer g.cacheMu.Unlock() entry, ok := g.visualCache[key] if !ok { return "", "", false } if time.Now().After(entry.expiresAt) { delete(g.visualCache, key) return "", "", false } return entry.data, entry.mimeType, true } func (g *GeminiService) setCachedVisual(key, data, mimeType string, ttl time.Duration) { g.cacheMu.Lock() defer g.cacheMu.Unlock() g.visualCache[key] = cachedVisualData{ data: data, mimeType: mimeType, expiresAt: time.Now().Add(ttl), } } func (g *GeminiService) getCachedTranslation(key string) (string, bool) { g.cacheMu.Lock() defer g.cacheMu.Unlock() entry, ok := g.translationCache[key] if !ok { return "", false } if time.Now().After(entry.expiresAt) { delete(g.translationCache, key) return "", false } return entry.value, true } func (g *GeminiService) setCachedTranslation(key, value string, ttl time.Duration) { g.cacheMu.Lock() defer g.cacheMu.Unlock() g.translationCache[key] = cachedStringValue{ value: value, expiresAt: time.Now().Add(ttl), } } func (g *GeminiService) getCachedExpansion(key string) ([]string, bool) { g.cacheMu.Lock() defer g.cacheMu.Unlock() entry, ok := g.expansionCache[key] if !ok { return nil, false } if time.Now().After(entry.expiresAt) { delete(g.expansionCache, key) return nil, false } cloned := make([]string, len(entry.value)) copy(cloned, entry.value) return cloned, true } func (g *GeminiService) setCachedExpansion(key string, value []string, ttl time.Duration) { g.cacheMu.Lock() defer g.cacheMu.Unlock() cloned := make([]string, len(value)) copy(cloned, value) g.expansionCache[key] = cachedExpansionValue{ value: cloned, expiresAt: time.Now().Add(ttl), } } func newBrowserStyleImageRequest(imageURL, referer string) (*http.Request, error) { req, err := http.NewRequest(http.MethodGet, imageURL, nil) if err != nil { return nil, err } req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36") req.Header.Set("Accept", "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8") req.Header.Set("Accept-Language", "en-US,en;q=0.9") if strings.TrimSpace(referer) != "" { req.Header.Set("Referer", referer) } return req, nil } func (g *GeminiService) fetchCandidateVisualInlineData(candidate SearchResult) (string, string, error) { lastErr := fmt.Errorf("candidate has no thumbnail or preview video") if candidate.PreviewVideoURL != "" && (candidate.Source == "Envato" || candidate.Source == "Artgrid") { cacheKey := "frame\n" + candidate.PreviewVideoURL if data, mimeType, ok := g.getCachedVisual(cacheKey); ok { return data, mimeType, nil } data, mimeType, err := extractFrameFromVideo(candidate.PreviewVideoURL) if err == nil { g.setCachedVisual(cacheKey, data, mimeType, 10*time.Minute) return data, mimeType, nil } lastErr = err } if candidate.ThumbnailURL != "" { if isLowValueThumbnail(candidate.ThumbnailURL) { g.debug("gemini:vision_candidate_rejected_low_value", map[string]any{ "link": candidate.Link, "source": candidate.Source, "thumbnailUrl": candidate.ThumbnailURL, }) lastErr = fmt.Errorf("candidate thumbnail is low value") } else { cacheKey := "image\n" + candidate.ThumbnailURL if data, mimeType, ok := g.getCachedVisual(cacheKey); ok { return data, mimeType, nil } data, mimeType, err := fetchImageAsInlineData(g.Client, candidate.ThumbnailURL, candidate.Link) if err == nil { g.setCachedVisual(cacheKey, data, mimeType, 10*time.Minute) return data, mimeType, nil } lastErr = err } } if fallbackThumbnail := deriveThumbnail(candidate.Link); fallbackThumbnail != "" && fallbackThumbnail != candidate.ThumbnailURL { cacheKey := "image\n" + fallbackThumbnail if data, mimeType, ok := g.getCachedVisual(cacheKey); ok { return data, mimeType, nil } data, mimeType, err := fetchImageAsInlineData(g.Client, fallbackThumbnail, candidate.Link) if err == nil { g.setCachedVisual(cacheKey, data, mimeType, 10*time.Minute) return data, mimeType, nil } lastErr = err } if candidate.PreviewVideoURL != "" { cacheKey := "frame\n" + candidate.PreviewVideoURL if data, mimeType, ok := g.getCachedVisual(cacheKey); ok { return data, mimeType, nil } data, mimeType, err := extractFrameFromVideo(candidate.PreviewVideoURL) if err != nil { lastErr = err } else { g.setCachedVisual(cacheKey, data, mimeType, 10*time.Minute) return data, mimeType, nil } } return "", "", lastErr } func extractFrameFromVideo(videoURL string) (string, string, error) { tempDir, err := os.MkdirTemp("", "gemini-frame-*") if err != nil { return "", "", err } defer os.RemoveAll(tempDir) framePath := filepath.Join(tempDir, "frame.jpg") cmd := exec.Command("ffmpeg", "-y", "-ss", "00:00:00.500", "-i", videoURL, "-frames:v", "1", "-q:v", "2", framePath) output, err := cmd.CombinedOutput() if err != nil { return "", "", fmt.Errorf("ffmpeg frame extraction failed: %s", strings.TrimSpace(string(output))) } data, err := os.ReadFile(framePath) if err != nil { return "", "", err } return base64.StdEncoding.EncodeToString(data), "image/jpeg", nil } func min(a, b int) int { if a < b { return a } return b } func extractJSONObject(text string) (string, error) { cleaned := strings.TrimSpace(text) cleaned = strings.TrimPrefix(cleaned, "```json") cleaned = strings.TrimPrefix(cleaned, "```") cleaned = strings.TrimSuffix(cleaned, "```") cleaned = strings.TrimSpace(cleaned) start := strings.Index(cleaned, "{") if start == -1 { return "", fmt.Errorf("no JSON object start found in %q", truncateForError(cleaned, 200)) } depth := 0 inString := false escaped := false for i := start; i < len(cleaned); i++ { ch := cleaned[i] if escaped { escaped = false continue } if ch == '\\' && inString { escaped = true continue } if ch == '"' { inString = !inString continue } if inString { continue } switch ch { case '{': depth++ case '}': depth-- if depth == 0 { return cleaned[start : i+1], nil } } } return "", fmt.Errorf("no complete JSON object found in %q", truncateForError(cleaned, 200)) } func truncateForError(text string, limit int) string { trimmed := strings.TrimSpace(text) if len(trimmed) <= limit { return trimmed } return trimmed[:limit] + "..." } func normalizeKoreanReason(reason string) string { trimmed := strings.TrimSpace(reason) if trimmed == "" { return "시각 정보가 제한적이지만 검색 의도와의 관련성을 기준으로 평가했습니다." } return trimmed } func normalizeAssessment(assessment string, recommended bool) string { switch strings.ToLower(strings.TrimSpace(assessment)) { case "positive", "unclear", "irrelevant", "inappropriate": return strings.ToLower(strings.TrimSpace(assessment)) } if recommended { return "positive" } return "unclear" } func normalizeSearchHint(text string) string { trimmed := strings.Join(strings.Fields(strings.TrimSpace(strings.Trim(text, "\"'`"))), " ") if trimmed == "" { return "" } if len(trimmed) > 80 { return trimmed[:80] } return trimmed } func parseSupplementalQueryLines(text string) []string { lines := strings.Split(text, "\n") seen := map[string]bool{} queries := make([]string, 0, 5) for _, line := range lines { trimmed := strings.TrimSpace(strings.Trim(line, "\"'`-0123456789. ")) if trimmed == "" { continue } key := strings.ToLower(trimmed) if seen[key] { continue } seen[key] = true queries = append(queries, trimmed) if len(queries) >= 5 { break } } return queries } func buildSearchQueries(originalQuery, englishQuery string) []string { base := strings.TrimSpace(englishQuery) if base == "" { base = strings.TrimSpace(originalQuery) } candidates := []string{ base, strings.ReplaceAll(base, "pov", "point of view"), base + " stock footage", base + " b-roll", base + " cinematic footage", base + " editorial footage", base + " establishing shot", } seen := map[string]bool{} queries := make([]string, 0, len(candidates)) for _, item := range candidates { trimmed := strings.TrimSpace(strings.Join(strings.Fields(item), " ")) if trimmed == "" { continue } key := strings.ToLower(trimmed) if seen[key] { continue } seen[key] = true queries = append(queries, trimmed) } return queries } func sanitizePlainEnglishLine(text string) string { lines := strings.Split(text, "\n") for _, line := range lines { line = strings.TrimSpace(strings.Trim(line, "\"'`")) if line == "" { continue } lower := strings.ToLower(line) for _, prefix := range []string{"translation:", "english:", "translated query:"} { if strings.HasPrefix(lower, prefix) { line = strings.TrimSpace(line[len(prefix):]) lower = strings.ToLower(line) } } if strings.HasPrefix(lower, "here is") || strings.HasPrefix(lower, "the translation") { continue } if line != "" { return line } } return "" } func looksMostlyASCII(text string) bool { ascii := 0 runes := []rune(text) for _, r := range runes { if r <= 127 { ascii++ } } return ascii >= len(runes)*8/10 } func isLikelyEnglishQuery(text string) bool { alpha := 0 nonASCII := 0 for _, r := range text { switch { case r >= 'A' && r <= 'Z', r >= 'a' && r <= 'z': alpha++ case r > 127: nonASCII++ } } return alpha > 0 && nonASCII == 0 } func translateKoreanMediaTerms(query string) string { replacements := []struct { korean string english string }{ {korean: "사이버 펑크 도시", english: "cyberpunk city"}, {korean: "사이버펑크 도시", english: "cyberpunk city"}, {korean: "사이버 펑크", english: "cyberpunk"}, {korean: "사이버펑크", english: "cyberpunk"}, {korean: "네온 도시", english: "neon city"}, {korean: "미래 도시", english: "futuristic city"}, {korean: "숲속", english: "forest"}, {korean: "다정한", english: "affectionate"}, {korean: "항공샷", english: "aerial shot"}, {korean: "사람들", english: "people"}, {korean: "행복한", english: "happy"}, {korean: "커플", english: "couple"}, {korean: "연인", english: "lovers"}, {korean: "도시", english: "city"}, {korean: "야경", english: "night city"}, {korean: "거리", english: "street"}, {korean: "골목", english: "alley"}, {korean: "바다", english: "ocean"}, {korean: "해변", english: "beach"}, {korean: "노을", english: "sunset"}, {korean: "자연", english: "nature"}, {korean: "드론", english: "drone"}, {korean: "인파", english: "crowd"}, {korean: "공원", english: "park"}, {korean: "숲", english: "forest"}, {korean: "비", english: "rain"}, {korean: "눈", english: "snow"}, {korean: "산", english: "mountain"}, } sort.SliceStable(replacements, func(i, j int) bool { return len([]rune(replacements[i].korean)) > len([]rune(replacements[j].korean)) }) translated := strings.TrimSpace(query) for _, replacement := range replacements { translated = strings.ReplaceAll(translated, replacement.korean, replacement.english) } translated = strings.Join(strings.Fields(translated), " ") return strings.TrimSpace(translated) } func normalizeKnownMediaPhrases(query string) string { normalized := strings.TrimSpace(query) replacements := []struct { from string to string }{ {from: "사이버 펑크 도시", to: "cyberpunk city"}, {from: "사이버펑크 도시", to: "cyberpunk city"}, {from: "사이버 펑크", to: "cyberpunk"}, {from: "사이버펑크", to: "cyberpunk"}, } for _, replacement := range replacements { normalized = strings.ReplaceAll(normalized, replacement.from, replacement.to) } return strings.Join(strings.Fields(normalized), " ") } func isOvercompressedTranslation(original, translated string) bool { originalWords := len(strings.Fields(strings.TrimSpace(original))) translatedWords := len(strings.Fields(strings.TrimSpace(translated))) if originalWords < 2 || translatedWords >= 2 { return false } lower := strings.ToLower(strings.TrimSpace(translated)) for _, allow := range []string{"cyberpunk", "nightlife", "cityscape"} { if lower == allow { return false } } return true } func (g *GeminiService) translateViaGoogle(query string) (string, error) { return g.translateViaGoogleToTarget(query, "en") } func (g *GeminiService) translateViaGoogleToTarget(query, targetLanguage string) (string, error) { baseURL := g.TranslateEndpoint if strings.TrimSpace(baseURL) == "" { baseURL = "https://translate.googleapis.com/translate_a/single" } targetLanguage = strings.TrimSpace(targetLanguage) if targetLanguage == "" { targetLanguage = "en" } endpoint := baseURL + "?client=gtx&sl=auto&tl=" + neturl.QueryEscape(targetLanguage) + "&dt=t&q=" + neturl.QueryEscape(query) resp, err := g.Client.Get(endpoint) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode >= 300 { return "", fmt.Errorf("google translate fallback returned status %d", resp.StatusCode) } var payload []any if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { return "", err } if len(payload) == 0 { return "", fmt.Errorf("google translate fallback returned no payload") } top, ok := payload[0].([]any) if !ok { return "", fmt.Errorf("google translate fallback returned unexpected payload") } var builder strings.Builder for _, part := range top { segment, ok := part.([]any) if !ok || len(segment) == 0 { continue } if text, ok := segment[0].(string); ok { builder.WriteString(text) } } translated := strings.TrimSpace(builder.String()) if translated == "" { return "", fmt.Errorf("google translate fallback returned empty translation") } return translated, nil }