868 lines
27 KiB
Go
868 lines
27 KiB
Go
package services
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/base64"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"mime"
|
|
"net/http"
|
|
neturl "net/url"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
)
|
|
|
|
type GeminiService struct {
|
|
APIKey string
|
|
Client *http.Client
|
|
GenerateEndpoint string
|
|
TranslateEndpoint string
|
|
Debug func(message string, data any)
|
|
cacheMu sync.Mutex
|
|
visualCache map[string]cachedVisualData
|
|
translationCache map[string]cachedStringValue
|
|
expansionCache map[string]cachedExpansionValue
|
|
}
|
|
|
|
type cachedVisualData struct {
|
|
data string
|
|
mimeType string
|
|
expiresAt time.Time
|
|
}
|
|
|
|
type cachedStringValue struct {
|
|
value string
|
|
expiresAt time.Time
|
|
}
|
|
|
|
type cachedExpansionValue struct {
|
|
value []string
|
|
expiresAt time.Time
|
|
}
|
|
|
|
type AIRecommendation struct {
|
|
Title string `json:"title"`
|
|
Link string `json:"link"`
|
|
Snippet string `json:"snippet"`
|
|
ThumbnailURL string `json:"thumbnailUrl"`
|
|
PreviewVideoURL string `json:"previewVideoUrl"`
|
|
Source string `json:"source"`
|
|
Reason string `json:"reason"`
|
|
Recommended bool `json:"recommended"`
|
|
MediaMode string `json:"mediaMode,omitempty"`
|
|
EmbedURL string `json:"embedUrl,omitempty"`
|
|
PreviewBlockedReason string `json:"previewBlockedReason,omitempty"`
|
|
ActionLabel string `json:"actionLabel,omitempty"`
|
|
ActionType string `json:"actionType,omitempty"`
|
|
SecondaryActionLabel string `json:"secondaryActionLabel,omitempty"`
|
|
}
|
|
|
|
type QueryExpansion struct {
|
|
Querywords []string `json:"querywords"`
|
|
}
|
|
|
|
func NewGeminiService(apiKey string) *GeminiService {
|
|
return &GeminiService{
|
|
APIKey: apiKey,
|
|
Client: &http.Client{Timeout: 40 * time.Second},
|
|
GenerateEndpoint: "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent",
|
|
TranslateEndpoint: "https://translate.googleapis.com/translate_a/single",
|
|
visualCache: map[string]cachedVisualData{},
|
|
translationCache: map[string]cachedStringValue{},
|
|
expansionCache: map[string]cachedExpansionValue{},
|
|
}
|
|
}
|
|
|
|
func (g *GeminiService) ExpandQuery(query string) ([]string, error) {
|
|
cacheKey := strings.TrimSpace(query)
|
|
if cached, ok := g.getCachedExpansion(cacheKey); ok {
|
|
g.debug("gemini:expand_query_cache_hit", map[string]any{"query": query, "expanded": cached})
|
|
return cached, nil
|
|
}
|
|
|
|
englishBase := g.TranslateQuery(query)
|
|
expanded := buildSearchQueries(query, englishBase)
|
|
g.setCachedExpansion(cacheKey, expanded, 15*time.Minute)
|
|
g.debug("gemini:expand_query", map[string]any{
|
|
"original": query,
|
|
"english": englishBase,
|
|
"expanded": expanded,
|
|
})
|
|
return expanded, nil
|
|
}
|
|
|
|
func (g *GeminiService) TranslateSummaryToKorean(text string) (string, error) {
|
|
trimmed := strings.TrimSpace(text)
|
|
if trimmed == "" {
|
|
return "", nil
|
|
}
|
|
cacheKey := "summary-ko\n" + trimmed
|
|
if cached, ok := g.getCachedTranslation(cacheKey); ok {
|
|
g.debug("gemini:summary_translate_cache_hit", map[string]any{"length": len(trimmed)})
|
|
return cached, nil
|
|
}
|
|
if !looksMostlyASCII(trimmed) {
|
|
g.setCachedTranslation(cacheKey, trimmed, 15*time.Minute)
|
|
return trimmed, nil
|
|
}
|
|
|
|
g.debug("gemini:summary_translate_attempt", map[string]any{"length": len(trimmed)})
|
|
translated, err := g.translateViaGoogleToTarget(trimmed, "ko")
|
|
if err != nil {
|
|
g.debug("gemini:summary_translate_error", map[string]any{"length": len(trimmed), "error": err.Error()})
|
|
return "", err
|
|
}
|
|
translated = strings.TrimSpace(translated)
|
|
if translated == "" {
|
|
return "", fmt.Errorf("google translate summary returned empty translation")
|
|
}
|
|
g.debug("gemini:summary_translate_success", map[string]any{"length": len(trimmed)})
|
|
g.setCachedTranslation(cacheKey, translated, 15*time.Minute)
|
|
return translated, nil
|
|
}
|
|
|
|
func (g *GeminiService) TranslateQuery(query string) string {
|
|
trimmed := strings.TrimSpace(query)
|
|
if trimmed == "" {
|
|
return ""
|
|
}
|
|
if cached, ok := g.getCachedTranslation(trimmed); ok {
|
|
g.debug("gemini:translate_cache_hit", map[string]any{"query": trimmed, "translated": cached})
|
|
return cached
|
|
}
|
|
normalizedIntent := normalizeKnownMediaPhrases(trimmed)
|
|
if looksMostlyASCII(normalizedIntent) {
|
|
result := strings.TrimSpace(normalizedIntent)
|
|
g.setCachedTranslation(trimmed, result, 15*time.Minute)
|
|
return result
|
|
}
|
|
if looksMostlyASCII(trimmed) {
|
|
g.setCachedTranslation(trimmed, trimmed, 15*time.Minute)
|
|
return trimmed
|
|
}
|
|
|
|
if g.APIKey != "" {
|
|
g.debug("gemini:translate_attempt", map[string]any{"mode": "gemini", "query": trimmed})
|
|
body := map[string]any{
|
|
"systemInstruction": map[string]any{
|
|
"parts": []map[string]string{
|
|
{
|
|
"text": "You are a professional video editor. Infer stronger stock-footage and scene-search wording from the user's keyword, and expand it into natural English that a professional editor would use to find usable footage. Output one plain English search phrase only. No labels, no quotes, no explanations.",
|
|
},
|
|
},
|
|
},
|
|
"contents": []map[string]any{
|
|
{
|
|
"parts": []map[string]string{
|
|
{
|
|
"text": "Expand this user query into a concise but editor-grade English footage search phrase suitable for stock-footage discovery: " + trimmed,
|
|
},
|
|
},
|
|
},
|
|
},
|
|
"generationConfig": map[string]any{
|
|
"responseMimeType": "text/plain",
|
|
"temperature": 0.1,
|
|
"maxOutputTokens": 40,
|
|
},
|
|
}
|
|
|
|
rawText, err := g.generateText(body)
|
|
if err == nil {
|
|
translated := sanitizePlainEnglishLine(rawText)
|
|
if translated != "" && !strings.EqualFold(translated, trimmed) && !isOvercompressedTranslation(trimmed, translated) {
|
|
g.debug("gemini:translate_success", map[string]any{"mode": "gemini", "query": trimmed, "translated": translated})
|
|
g.setCachedTranslation(trimmed, translated, 15*time.Minute)
|
|
return translated
|
|
}
|
|
}
|
|
if err != nil {
|
|
g.debug("gemini:translate_error", map[string]any{"mode": "gemini", "query": trimmed, "error": err.Error()})
|
|
}
|
|
}
|
|
|
|
g.debug("gemini:translate_attempt", map[string]any{"mode": "google", "query": trimmed})
|
|
if translated, err := g.translateViaGoogle(trimmed); err == nil && translated != "" && isLikelyEnglishQuery(translated) && !isOvercompressedTranslation(trimmed, translated) {
|
|
g.debug("gemini:translate_success", map[string]any{"mode": "google", "query": trimmed, "translated": translated})
|
|
g.setCachedTranslation(trimmed, translated, 15*time.Minute)
|
|
return translated
|
|
}
|
|
if translated := translateKoreanMediaTerms(normalizedIntent); translated != "" && !strings.EqualFold(translated, trimmed) {
|
|
g.debug("gemini:translate_success", map[string]any{"mode": "dictionary", "query": trimmed, "translated": translated})
|
|
g.setCachedTranslation(trimmed, translated, 15*time.Minute)
|
|
return translated
|
|
}
|
|
g.debug("gemini:translate_fallback_original", map[string]any{"query": trimmed, "normalized": normalizedIntent})
|
|
result := strings.TrimSpace(normalizedIntent)
|
|
g.setCachedTranslation(trimmed, result, 15*time.Minute)
|
|
return result
|
|
}
|
|
|
|
func (g *GeminiService) generateText(body map[string]any) (string, error) {
|
|
rawBody, _ := json.Marshal(body)
|
|
endpoint := strings.TrimRight(g.GenerateEndpoint, "?") + "?key=" + g.APIKey
|
|
resp, err := g.Client.Post(endpoint, "application/json", bytes.NewReader(rawBody))
|
|
if err != nil {
|
|
return "", fmt.Errorf("gemini request failed: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode >= 300 {
|
|
data, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
|
|
return "", fmt.Errorf("gemini returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(data)))
|
|
}
|
|
|
|
var payload struct {
|
|
Candidates []struct {
|
|
Content struct {
|
|
Parts []struct {
|
|
Text string `json:"text"`
|
|
} `json:"parts"`
|
|
} `json:"content"`
|
|
} `json:"candidates"`
|
|
}
|
|
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
|
return "", fmt.Errorf("gemini response decode failed: %w", err)
|
|
}
|
|
if len(payload.Candidates) == 0 || len(payload.Candidates[0].Content.Parts) == 0 {
|
|
return "", fmt.Errorf("gemini returned no candidates")
|
|
}
|
|
return payload.Candidates[0].Content.Parts[0].Text, nil
|
|
}
|
|
|
|
func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AIRecommendation, error) {
|
|
if g.APIKey == "" {
|
|
return nil, fmt.Errorf("gemini api key is not configured")
|
|
}
|
|
if len(candidates) == 0 {
|
|
return []AIRecommendation{}, nil
|
|
}
|
|
g.debug("gemini:vision_start", map[string]any{
|
|
"query": query,
|
|
"candidateCount": len(candidates),
|
|
})
|
|
|
|
type geminiPart map[string]any
|
|
parts := []geminiPart{
|
|
{
|
|
"text": `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
|
|
{"recommendations":[{"index":0,"verdict":"Yes","reason":"short reason","recommended":true}]}
|
|
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
|
|
Set verdict to "Yes" or "No" for every candidate. "Yes" means the scene is usable and relevant for editing against the user's keyword. "No" means it is not suitable or not relevant enough.
|
|
Set recommended=true only when verdict is "Yes". Set recommended=false when verdict is "No".
|
|
Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails.
|
|
Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery.
|
|
Favor scenes that look directly useful for professional editing, sequencing, establishing, cutaway, or mood-building usage.
|
|
User query: ` + query,
|
|
},
|
|
}
|
|
|
|
maxImages := min(len(candidates), 10)
|
|
visualCount := 0
|
|
for idx := 0; idx < maxImages; idx++ {
|
|
img, mimeType, err := g.fetchCandidateVisualInlineData(candidates[idx])
|
|
if err != nil {
|
|
g.debug("gemini:vision_candidate_visual_error", map[string]any{
|
|
"index": idx,
|
|
"link": candidates[idx].Link,
|
|
"source": candidates[idx].Source,
|
|
"error": err.Error(),
|
|
})
|
|
continue
|
|
}
|
|
visualCount++
|
|
parts = append(parts,
|
|
geminiPart{"text": fmt.Sprintf("Candidate %d: title=%s source=%s link=%s", idx, candidates[idx].Title, candidates[idx].Source, candidates[idx].Link)},
|
|
geminiPart{"inlineData": map[string]string{"mimeType": mimeType, "data": img}},
|
|
)
|
|
}
|
|
if visualCount == 0 {
|
|
return nil, fmt.Errorf("no candidate thumbnails or preview frames could be fetched for gemini vision")
|
|
}
|
|
g.debug("gemini:vision_visuals_prepared", map[string]any{
|
|
"query": query,
|
|
"visualCount": visualCount,
|
|
"maxImages": maxImages,
|
|
})
|
|
|
|
body := map[string]any{
|
|
"contents": []map[string]any{
|
|
{"parts": parts},
|
|
},
|
|
"generationConfig": map[string]any{
|
|
"responseMimeType": "application/json",
|
|
},
|
|
}
|
|
|
|
rawBody, _ := json.Marshal(body)
|
|
endpoint := strings.TrimRight(g.GenerateEndpoint, "?") + "?key=" + g.APIKey
|
|
resp, err := g.Client.Post(endpoint, "application/json", bytes.NewReader(rawBody))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode >= 300 {
|
|
data, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
|
|
return nil, fmt.Errorf("gemini vision returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(data)))
|
|
}
|
|
|
|
var payload struct {
|
|
Candidates []struct {
|
|
Content struct {
|
|
Parts []struct {
|
|
Text string `json:"text"`
|
|
} `json:"parts"`
|
|
} `json:"content"`
|
|
} `json:"candidates"`
|
|
}
|
|
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
|
return nil, fmt.Errorf("gemini vision response decode failed: %w", err)
|
|
}
|
|
if len(payload.Candidates) == 0 || len(payload.Candidates[0].Content.Parts) == 0 {
|
|
return nil, fmt.Errorf("gemini vision returned no candidates")
|
|
}
|
|
|
|
jsonText, err := extractJSONObject(payload.Candidates[0].Content.Parts[0].Text)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("gemini vision JSON extraction failed: %w", err)
|
|
}
|
|
|
|
var parsed struct {
|
|
Recommendations []struct {
|
|
Index int `json:"index"`
|
|
Verdict string `json:"verdict"`
|
|
Reason string `json:"reason"`
|
|
Recommended bool `json:"recommended"`
|
|
} `json:"recommendations"`
|
|
}
|
|
if err := json.Unmarshal([]byte(jsonText), &parsed); err != nil {
|
|
return nil, fmt.Errorf("gemini vision JSON parse failed: %w; raw=%q", err, truncateForError(payload.Candidates[0].Content.Parts[0].Text, 200))
|
|
}
|
|
|
|
recommendations := make([]AIRecommendation, 0, len(parsed.Recommendations))
|
|
for _, rec := range parsed.Recommendations {
|
|
if rec.Index < 0 || rec.Index >= len(candidates) {
|
|
continue
|
|
}
|
|
src := candidates[rec.Index]
|
|
recommended := rec.Recommended || strings.EqualFold(strings.TrimSpace(rec.Verdict), "yes")
|
|
recommendations = append(recommendations, AIRecommendation{
|
|
Title: src.Title,
|
|
Link: src.Link,
|
|
Snippet: src.Snippet,
|
|
ThumbnailURL: src.ThumbnailURL,
|
|
PreviewVideoURL: src.PreviewVideoURL,
|
|
Source: src.Source,
|
|
Reason: normalizeKoreanReason(rec.Reason),
|
|
Recommended: recommended,
|
|
})
|
|
}
|
|
g.debug("gemini:vision_complete", map[string]any{
|
|
"query": query,
|
|
"recommendationCount": len(recommendations),
|
|
})
|
|
|
|
return recommendations, nil
|
|
}
|
|
|
|
func (g *GeminiService) debug(message string, data any) {
|
|
if g != nil && g.Debug != nil {
|
|
g.Debug(message, data)
|
|
}
|
|
}
|
|
|
|
func fetchImageAsInlineData(client *http.Client, imageURL, referer string) (string, string, error) {
|
|
if strings.TrimSpace(imageURL) == "" {
|
|
return "", "", fmt.Errorf("image url is empty")
|
|
}
|
|
req, reqErr := newBrowserStyleImageRequest(imageURL, referer)
|
|
if reqErr != nil {
|
|
return "", "", reqErr
|
|
}
|
|
resp, err := client.Do(req)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode >= 300 {
|
|
return "", "", fmt.Errorf("thumbnail fetch failed with %d", resp.StatusCode)
|
|
}
|
|
|
|
contentType := resp.Header.Get("Content-Type")
|
|
mimeType, _, _ := mime.ParseMediaType(contentType)
|
|
if mimeType == "" || !strings.HasPrefix(mimeType, "image/") {
|
|
mimeType = "image/jpeg"
|
|
}
|
|
|
|
data, err := io.ReadAll(io.LimitReader(resp.Body, 2*1024*1024))
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
return base64.StdEncoding.EncodeToString(data), mimeType, nil
|
|
}
|
|
|
|
func (g *GeminiService) getCachedVisual(key string) (string, string, bool) {
|
|
g.cacheMu.Lock()
|
|
defer g.cacheMu.Unlock()
|
|
|
|
entry, ok := g.visualCache[key]
|
|
if !ok {
|
|
return "", "", false
|
|
}
|
|
if time.Now().After(entry.expiresAt) {
|
|
delete(g.visualCache, key)
|
|
return "", "", false
|
|
}
|
|
return entry.data, entry.mimeType, true
|
|
}
|
|
|
|
func (g *GeminiService) setCachedVisual(key, data, mimeType string, ttl time.Duration) {
|
|
g.cacheMu.Lock()
|
|
defer g.cacheMu.Unlock()
|
|
|
|
g.visualCache[key] = cachedVisualData{
|
|
data: data,
|
|
mimeType: mimeType,
|
|
expiresAt: time.Now().Add(ttl),
|
|
}
|
|
}
|
|
|
|
func (g *GeminiService) getCachedTranslation(key string) (string, bool) {
|
|
g.cacheMu.Lock()
|
|
defer g.cacheMu.Unlock()
|
|
|
|
entry, ok := g.translationCache[key]
|
|
if !ok {
|
|
return "", false
|
|
}
|
|
if time.Now().After(entry.expiresAt) {
|
|
delete(g.translationCache, key)
|
|
return "", false
|
|
}
|
|
return entry.value, true
|
|
}
|
|
|
|
func (g *GeminiService) setCachedTranslation(key, value string, ttl time.Duration) {
|
|
g.cacheMu.Lock()
|
|
defer g.cacheMu.Unlock()
|
|
|
|
g.translationCache[key] = cachedStringValue{
|
|
value: value,
|
|
expiresAt: time.Now().Add(ttl),
|
|
}
|
|
}
|
|
|
|
func (g *GeminiService) getCachedExpansion(key string) ([]string, bool) {
|
|
g.cacheMu.Lock()
|
|
defer g.cacheMu.Unlock()
|
|
|
|
entry, ok := g.expansionCache[key]
|
|
if !ok {
|
|
return nil, false
|
|
}
|
|
if time.Now().After(entry.expiresAt) {
|
|
delete(g.expansionCache, key)
|
|
return nil, false
|
|
}
|
|
cloned := make([]string, len(entry.value))
|
|
copy(cloned, entry.value)
|
|
return cloned, true
|
|
}
|
|
|
|
func (g *GeminiService) setCachedExpansion(key string, value []string, ttl time.Duration) {
|
|
g.cacheMu.Lock()
|
|
defer g.cacheMu.Unlock()
|
|
|
|
cloned := make([]string, len(value))
|
|
copy(cloned, value)
|
|
g.expansionCache[key] = cachedExpansionValue{
|
|
value: cloned,
|
|
expiresAt: time.Now().Add(ttl),
|
|
}
|
|
}
|
|
|
|
func newBrowserStyleImageRequest(imageURL, referer string) (*http.Request, error) {
|
|
req, err := http.NewRequest(http.MethodGet, imageURL, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")
|
|
req.Header.Set("Accept", "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8")
|
|
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
|
|
if strings.TrimSpace(referer) != "" {
|
|
req.Header.Set("Referer", referer)
|
|
}
|
|
return req, nil
|
|
}
|
|
|
|
func (g *GeminiService) fetchCandidateVisualInlineData(candidate SearchResult) (string, string, error) {
|
|
lastErr := fmt.Errorf("candidate has no thumbnail or preview video")
|
|
if candidate.PreviewVideoURL != "" && (candidate.Source == "Envato" || candidate.Source == "Artgrid") {
|
|
cacheKey := "frame\n" + candidate.PreviewVideoURL
|
|
if data, mimeType, ok := g.getCachedVisual(cacheKey); ok {
|
|
return data, mimeType, nil
|
|
}
|
|
data, mimeType, err := extractFrameFromVideo(candidate.PreviewVideoURL)
|
|
if err == nil {
|
|
g.setCachedVisual(cacheKey, data, mimeType, 10*time.Minute)
|
|
return data, mimeType, nil
|
|
}
|
|
lastErr = err
|
|
}
|
|
if candidate.ThumbnailURL != "" {
|
|
if isLowValueThumbnail(candidate.ThumbnailURL) {
|
|
g.debug("gemini:vision_candidate_rejected_low_value", map[string]any{
|
|
"link": candidate.Link,
|
|
"source": candidate.Source,
|
|
"thumbnailUrl": candidate.ThumbnailURL,
|
|
})
|
|
lastErr = fmt.Errorf("candidate thumbnail is low value")
|
|
} else {
|
|
cacheKey := "image\n" + candidate.ThumbnailURL
|
|
if data, mimeType, ok := g.getCachedVisual(cacheKey); ok {
|
|
return data, mimeType, nil
|
|
}
|
|
data, mimeType, err := fetchImageAsInlineData(g.Client, candidate.ThumbnailURL, candidate.Link)
|
|
if err == nil {
|
|
g.setCachedVisual(cacheKey, data, mimeType, 10*time.Minute)
|
|
return data, mimeType, nil
|
|
}
|
|
lastErr = err
|
|
}
|
|
}
|
|
if fallbackThumbnail := deriveThumbnail(candidate.Link); fallbackThumbnail != "" && fallbackThumbnail != candidate.ThumbnailURL {
|
|
cacheKey := "image\n" + fallbackThumbnail
|
|
if data, mimeType, ok := g.getCachedVisual(cacheKey); ok {
|
|
return data, mimeType, nil
|
|
}
|
|
data, mimeType, err := fetchImageAsInlineData(g.Client, fallbackThumbnail, candidate.Link)
|
|
if err == nil {
|
|
g.setCachedVisual(cacheKey, data, mimeType, 10*time.Minute)
|
|
return data, mimeType, nil
|
|
}
|
|
lastErr = err
|
|
}
|
|
if candidate.PreviewVideoURL != "" {
|
|
cacheKey := "frame\n" + candidate.PreviewVideoURL
|
|
if data, mimeType, ok := g.getCachedVisual(cacheKey); ok {
|
|
return data, mimeType, nil
|
|
}
|
|
data, mimeType, err := extractFrameFromVideo(candidate.PreviewVideoURL)
|
|
if err != nil {
|
|
lastErr = err
|
|
} else {
|
|
g.setCachedVisual(cacheKey, data, mimeType, 10*time.Minute)
|
|
return data, mimeType, nil
|
|
}
|
|
}
|
|
return "", "", lastErr
|
|
}
|
|
|
|
func extractFrameFromVideo(videoURL string) (string, string, error) {
|
|
tempDir, err := os.MkdirTemp("", "gemini-frame-*")
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
defer os.RemoveAll(tempDir)
|
|
|
|
framePath := filepath.Join(tempDir, "frame.jpg")
|
|
cmd := exec.Command("ffmpeg", "-y", "-ss", "00:00:00.500", "-i", videoURL, "-frames:v", "1", "-q:v", "2", framePath)
|
|
output, err := cmd.CombinedOutput()
|
|
if err != nil {
|
|
return "", "", fmt.Errorf("ffmpeg frame extraction failed: %s", strings.TrimSpace(string(output)))
|
|
}
|
|
|
|
data, err := os.ReadFile(framePath)
|
|
if err != nil {
|
|
return "", "", err
|
|
}
|
|
return base64.StdEncoding.EncodeToString(data), "image/jpeg", nil
|
|
}
|
|
|
|
func min(a, b int) int {
|
|
if a < b {
|
|
return a
|
|
}
|
|
return b
|
|
}
|
|
|
|
func extractJSONObject(text string) (string, error) {
|
|
cleaned := strings.TrimSpace(text)
|
|
cleaned = strings.TrimPrefix(cleaned, "```json")
|
|
cleaned = strings.TrimPrefix(cleaned, "```")
|
|
cleaned = strings.TrimSuffix(cleaned, "```")
|
|
cleaned = strings.TrimSpace(cleaned)
|
|
|
|
start := strings.Index(cleaned, "{")
|
|
if start == -1 {
|
|
return "", fmt.Errorf("no JSON object start found in %q", truncateForError(cleaned, 200))
|
|
}
|
|
|
|
depth := 0
|
|
inString := false
|
|
escaped := false
|
|
for i := start; i < len(cleaned); i++ {
|
|
ch := cleaned[i]
|
|
if escaped {
|
|
escaped = false
|
|
continue
|
|
}
|
|
if ch == '\\' && inString {
|
|
escaped = true
|
|
continue
|
|
}
|
|
if ch == '"' {
|
|
inString = !inString
|
|
continue
|
|
}
|
|
if inString {
|
|
continue
|
|
}
|
|
switch ch {
|
|
case '{':
|
|
depth++
|
|
case '}':
|
|
depth--
|
|
if depth == 0 {
|
|
return cleaned[start : i+1], nil
|
|
}
|
|
}
|
|
}
|
|
return "", fmt.Errorf("no complete JSON object found in %q", truncateForError(cleaned, 200))
|
|
}
|
|
|
|
func truncateForError(text string, limit int) string {
|
|
trimmed := strings.TrimSpace(text)
|
|
if len(trimmed) <= limit {
|
|
return trimmed
|
|
}
|
|
return trimmed[:limit] + "..."
|
|
}
|
|
|
|
func normalizeKoreanReason(reason string) string {
|
|
trimmed := strings.TrimSpace(reason)
|
|
if trimmed == "" {
|
|
return "시각 정보가 제한적이지만 검색 의도와의 관련성을 기준으로 평가했습니다."
|
|
}
|
|
return trimmed
|
|
}
|
|
|
|
func buildSearchQueries(originalQuery, englishQuery string) []string {
|
|
base := strings.TrimSpace(englishQuery)
|
|
if base == "" {
|
|
base = strings.TrimSpace(originalQuery)
|
|
}
|
|
candidates := []string{
|
|
base,
|
|
strings.ReplaceAll(base, "pov", "point of view"),
|
|
base + " stock footage",
|
|
base + " b-roll",
|
|
base + " cinematic footage",
|
|
base + " editorial footage",
|
|
base + " establishing shot",
|
|
}
|
|
seen := map[string]bool{}
|
|
queries := make([]string, 0, len(candidates))
|
|
for _, item := range candidates {
|
|
trimmed := strings.TrimSpace(strings.Join(strings.Fields(item), " "))
|
|
if trimmed == "" {
|
|
continue
|
|
}
|
|
key := strings.ToLower(trimmed)
|
|
if seen[key] {
|
|
continue
|
|
}
|
|
seen[key] = true
|
|
queries = append(queries, trimmed)
|
|
}
|
|
return queries
|
|
}
|
|
|
|
func sanitizePlainEnglishLine(text string) string {
|
|
lines := strings.Split(text, "\n")
|
|
for _, line := range lines {
|
|
line = strings.TrimSpace(strings.Trim(line, "\"'`"))
|
|
if line == "" {
|
|
continue
|
|
}
|
|
lower := strings.ToLower(line)
|
|
for _, prefix := range []string{"translation:", "english:", "translated query:"} {
|
|
if strings.HasPrefix(lower, prefix) {
|
|
line = strings.TrimSpace(line[len(prefix):])
|
|
lower = strings.ToLower(line)
|
|
}
|
|
}
|
|
if strings.HasPrefix(lower, "here is") || strings.HasPrefix(lower, "the translation") {
|
|
continue
|
|
}
|
|
if line != "" {
|
|
return line
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func looksMostlyASCII(text string) bool {
|
|
ascii := 0
|
|
runes := []rune(text)
|
|
for _, r := range runes {
|
|
if r <= 127 {
|
|
ascii++
|
|
}
|
|
}
|
|
return ascii >= len(runes)*8/10
|
|
}
|
|
|
|
func isLikelyEnglishQuery(text string) bool {
|
|
alpha := 0
|
|
nonASCII := 0
|
|
for _, r := range text {
|
|
switch {
|
|
case r >= 'A' && r <= 'Z', r >= 'a' && r <= 'z':
|
|
alpha++
|
|
case r > 127:
|
|
nonASCII++
|
|
}
|
|
}
|
|
return alpha > 0 && nonASCII == 0
|
|
}
|
|
|
|
func translateKoreanMediaTerms(query string) string {
|
|
replacements := []struct {
|
|
korean string
|
|
english string
|
|
}{
|
|
{korean: "사이버 펑크 도시", english: "cyberpunk city"},
|
|
{korean: "사이버펑크 도시", english: "cyberpunk city"},
|
|
{korean: "사이버 펑크", english: "cyberpunk"},
|
|
{korean: "사이버펑크", english: "cyberpunk"},
|
|
{korean: "네온 도시", english: "neon city"},
|
|
{korean: "미래 도시", english: "futuristic city"},
|
|
{korean: "숲속", english: "forest"},
|
|
{korean: "다정한", english: "affectionate"},
|
|
{korean: "항공샷", english: "aerial shot"},
|
|
{korean: "사람들", english: "people"},
|
|
{korean: "행복한", english: "happy"},
|
|
{korean: "커플", english: "couple"},
|
|
{korean: "연인", english: "lovers"},
|
|
{korean: "도시", english: "city"},
|
|
{korean: "야경", english: "night city"},
|
|
{korean: "거리", english: "street"},
|
|
{korean: "골목", english: "alley"},
|
|
{korean: "바다", english: "ocean"},
|
|
{korean: "해변", english: "beach"},
|
|
{korean: "노을", english: "sunset"},
|
|
{korean: "자연", english: "nature"},
|
|
{korean: "드론", english: "drone"},
|
|
{korean: "인파", english: "crowd"},
|
|
{korean: "공원", english: "park"},
|
|
{korean: "숲", english: "forest"},
|
|
{korean: "비", english: "rain"},
|
|
{korean: "눈", english: "snow"},
|
|
{korean: "산", english: "mountain"},
|
|
}
|
|
sort.SliceStable(replacements, func(i, j int) bool {
|
|
return len([]rune(replacements[i].korean)) > len([]rune(replacements[j].korean))
|
|
})
|
|
|
|
translated := strings.TrimSpace(query)
|
|
for _, replacement := range replacements {
|
|
translated = strings.ReplaceAll(translated, replacement.korean, replacement.english)
|
|
}
|
|
translated = strings.Join(strings.Fields(translated), " ")
|
|
return strings.TrimSpace(translated)
|
|
}
|
|
|
|
func normalizeKnownMediaPhrases(query string) string {
|
|
normalized := strings.TrimSpace(query)
|
|
replacements := []struct {
|
|
from string
|
|
to string
|
|
}{
|
|
{from: "사이버 펑크 도시", to: "cyberpunk city"},
|
|
{from: "사이버펑크 도시", to: "cyberpunk city"},
|
|
{from: "사이버 펑크", to: "cyberpunk"},
|
|
{from: "사이버펑크", to: "cyberpunk"},
|
|
}
|
|
for _, replacement := range replacements {
|
|
normalized = strings.ReplaceAll(normalized, replacement.from, replacement.to)
|
|
}
|
|
return strings.Join(strings.Fields(normalized), " ")
|
|
}
|
|
|
|
func isOvercompressedTranslation(original, translated string) bool {
|
|
originalWords := len(strings.Fields(strings.TrimSpace(original)))
|
|
translatedWords := len(strings.Fields(strings.TrimSpace(translated)))
|
|
if originalWords < 2 || translatedWords >= 2 {
|
|
return false
|
|
}
|
|
lower := strings.ToLower(strings.TrimSpace(translated))
|
|
for _, allow := range []string{"cyberpunk", "nightlife", "cityscape"} {
|
|
if lower == allow {
|
|
return false
|
|
}
|
|
}
|
|
return true
|
|
}
|
|
|
|
func (g *GeminiService) translateViaGoogle(query string) (string, error) {
|
|
return g.translateViaGoogleToTarget(query, "en")
|
|
}
|
|
|
|
func (g *GeminiService) translateViaGoogleToTarget(query, targetLanguage string) (string, error) {
|
|
baseURL := g.TranslateEndpoint
|
|
if strings.TrimSpace(baseURL) == "" {
|
|
baseURL = "https://translate.googleapis.com/translate_a/single"
|
|
}
|
|
targetLanguage = strings.TrimSpace(targetLanguage)
|
|
if targetLanguage == "" {
|
|
targetLanguage = "en"
|
|
}
|
|
endpoint := baseURL + "?client=gtx&sl=auto&tl=" + neturl.QueryEscape(targetLanguage) + "&dt=t&q=" + neturl.QueryEscape(query)
|
|
resp, err := g.Client.Get(endpoint)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode >= 300 {
|
|
return "", fmt.Errorf("google translate fallback returned status %d", resp.StatusCode)
|
|
}
|
|
|
|
var payload []any
|
|
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
|
return "", err
|
|
}
|
|
if len(payload) == 0 {
|
|
return "", fmt.Errorf("google translate fallback returned no payload")
|
|
}
|
|
top, ok := payload[0].([]any)
|
|
if !ok {
|
|
return "", fmt.Errorf("google translate fallback returned unexpected payload")
|
|
}
|
|
|
|
var builder strings.Builder
|
|
for _, part := range top {
|
|
segment, ok := part.([]any)
|
|
if !ok || len(segment) == 0 {
|
|
continue
|
|
}
|
|
if text, ok := segment[0].(string); ok {
|
|
builder.WriteString(text)
|
|
}
|
|
}
|
|
translated := strings.TrimSpace(builder.String())
|
|
if translated == "" {
|
|
return "", fmt.Errorf("google translate fallback returned empty translation")
|
|
}
|
|
return translated, nil
|
|
}
|