This commit is contained in:
+68
-161
@@ -9,6 +9,9 @@ import (
|
||||
"mime"
|
||||
"net/http"
|
||||
neturl "net/url"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
@@ -19,12 +22,13 @@ type GeminiService struct {
|
||||
}
|
||||
|
||||
type AIRecommendation struct {
|
||||
Title string `json:"title"`
|
||||
Link string `json:"link"`
|
||||
ThumbnailURL string `json:"thumbnailUrl"`
|
||||
Source string `json:"source"`
|
||||
Reason string `json:"reason"`
|
||||
Recommended bool `json:"recommended"`
|
||||
Title string `json:"title"`
|
||||
Link string `json:"link"`
|
||||
ThumbnailURL string `json:"thumbnailUrl"`
|
||||
PreviewVideoURL string `json:"previewVideoUrl"`
|
||||
Source string `json:"source"`
|
||||
Reason string `json:"reason"`
|
||||
Recommended bool `json:"recommended"`
|
||||
}
|
||||
|
||||
type QueryExpansion struct {
|
||||
@@ -39,141 +43,8 @@ func NewGeminiService(apiKey string) *GeminiService {
|
||||
}
|
||||
|
||||
func (g *GeminiService) ExpandQuery(query string) ([]string, error) {
|
||||
if g.APIKey == "" {
|
||||
return fallbackQueryExpansion(query, query), nil
|
||||
}
|
||||
|
||||
englishBase := g.TranslateQuery(query)
|
||||
|
||||
body := map[string]any{
|
||||
"systemInstruction": map[string]any{
|
||||
"parts": []map[string]string{
|
||||
{
|
||||
"text": "You are a JSON-only API. Output valid JSON only. Never add prose, labels, markdown, or explanations before or after the JSON.",
|
||||
},
|
||||
},
|
||||
},
|
||||
"contents": []map[string]any{
|
||||
{
|
||||
"parts": []map[string]string{
|
||||
{
|
||||
"text": `Return JSON only in this shape: {"querywords":["..."]}.
|
||||
Generate at most 10 concise English search variations for media discovery across Google Video, Envato, and Artgrid.
|
||||
The queries must be usable directly in English search engines for stock footage discovery.
|
||||
Prioritize media, video footage, stock footage, cinematic b-roll, editorial footage, and scene-based search terms.
|
||||
Avoid celebrity gossip, reaction-style phrasing, clickbait phrasing, and generic web search wording.
|
||||
Do not output Korean unless it is part of a proper noun.
|
||||
Original user query: ` + query + `
|
||||
English base translation: ` + englishBase,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"generationConfig": map[string]any{
|
||||
"responseMimeType": "application/json",
|
||||
"temperature": 0.2,
|
||||
"maxOutputTokens": 220,
|
||||
"responseSchema": map[string]any{
|
||||
"type": "OBJECT",
|
||||
"properties": map[string]any{
|
||||
"querywords": map[string]any{
|
||||
"type": "ARRAY",
|
||||
"items": map[string]any{
|
||||
"type": "STRING",
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": []string{"querywords"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
rawText, err := g.generateText(body)
|
||||
if err != nil {
|
||||
return fallbackQueryExpansion(query, englishBase), nil
|
||||
}
|
||||
|
||||
jsonText, err := extractJSONObject(rawText)
|
||||
if err != nil {
|
||||
strictBody := map[string]any{
|
||||
"systemInstruction": map[string]any{
|
||||
"parts": []map[string]string{
|
||||
{
|
||||
"text": "You are a strict JSON emitter. Output one valid JSON object only. Do not write any other text.",
|
||||
},
|
||||
},
|
||||
},
|
||||
"contents": []map[string]any{
|
||||
{
|
||||
"parts": []map[string]string{
|
||||
{
|
||||
"text": `STRICT JSON ONLY.
|
||||
Output must start with { and end with }.
|
||||
Do not add prose, explanations, markdown, code fences, or labels.
|
||||
Return exactly this shape: {"querywords":["..."]}.
|
||||
Generate up to 10 search queries for media discovery across Google Video, Envato, and Artgrid.
|
||||
Every query must be in natural English and suitable for stock-footage search.
|
||||
Original user query: ` + query + `
|
||||
English base translation: ` + englishBase,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
"generationConfig": map[string]any{
|
||||
"responseMimeType": "application/json",
|
||||
"temperature": 0.1,
|
||||
"maxOutputTokens": 220,
|
||||
"responseSchema": map[string]any{
|
||||
"type": "OBJECT",
|
||||
"properties": map[string]any{
|
||||
"querywords": map[string]any{
|
||||
"type": "ARRAY",
|
||||
"items": map[string]any{
|
||||
"type": "STRING",
|
||||
},
|
||||
},
|
||||
},
|
||||
"required": []string{"querywords"},
|
||||
},
|
||||
},
|
||||
}
|
||||
rawText, retryErr := g.generateText(strictBody)
|
||||
if retryErr != nil {
|
||||
return fallbackQueryExpansion(query, englishBase), nil
|
||||
}
|
||||
jsonText, err = extractJSONObject(rawText)
|
||||
if err != nil {
|
||||
return fallbackQueryExpansion(query, englishBase), nil
|
||||
}
|
||||
}
|
||||
|
||||
var parsed QueryExpansion
|
||||
if err := json.Unmarshal([]byte(jsonText), &parsed); err != nil {
|
||||
return fallbackQueryExpansion(query, englishBase), nil
|
||||
}
|
||||
|
||||
queries := fallbackQueryExpansion(query, englishBase)
|
||||
seen := map[string]bool{}
|
||||
englishOnly := !strings.EqualFold(strings.TrimSpace(englishBase), strings.TrimSpace(query))
|
||||
for _, existing := range queries {
|
||||
seen[strings.ToLower(strings.TrimSpace(existing))] = true
|
||||
}
|
||||
for _, item := range parsed.Querywords {
|
||||
trimmed := strings.TrimSpace(item)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
if englishOnly && !isLikelyEnglishQuery(trimmed) {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(trimmed)
|
||||
if seen[key] {
|
||||
continue
|
||||
}
|
||||
seen[key] = true
|
||||
queries = append(queries, trimmed)
|
||||
}
|
||||
return queries, nil
|
||||
return buildSearchQueries(query, englishBase), nil
|
||||
}
|
||||
|
||||
func (g *GeminiService) TranslateQuery(query string) string {
|
||||
@@ -277,7 +148,7 @@ User query: ` + query,
|
||||
|
||||
maxImages := min(len(candidates), 10)
|
||||
for idx := 0; idx < maxImages; idx++ {
|
||||
img, mimeType, err := fetchImageAsInlineData(g.Client, candidates[idx].ThumbnailURL)
|
||||
img, mimeType, err := fetchCandidateVisualInlineData(g.Client, candidates[idx])
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
@@ -348,24 +219,26 @@ User query: ` + query,
|
||||
}
|
||||
src := candidates[rec.Index]
|
||||
recommendations = append(recommendations, AIRecommendation{
|
||||
Title: src.Title,
|
||||
Link: src.Link,
|
||||
ThumbnailURL: src.ThumbnailURL,
|
||||
Source: src.Source,
|
||||
Reason: rec.Reason,
|
||||
Recommended: true,
|
||||
Title: src.Title,
|
||||
Link: src.Link,
|
||||
ThumbnailURL: src.ThumbnailURL,
|
||||
PreviewVideoURL: src.PreviewVideoURL,
|
||||
Source: src.Source,
|
||||
Reason: rec.Reason,
|
||||
Recommended: true,
|
||||
})
|
||||
}
|
||||
|
||||
if len(recommendations) == 0 {
|
||||
for _, candidate := range candidates[:min(4, len(candidates))] {
|
||||
recommendations = append(recommendations, AIRecommendation{
|
||||
Title: candidate.Title,
|
||||
Link: candidate.Link,
|
||||
ThumbnailURL: candidate.ThumbnailURL,
|
||||
Source: candidate.Source,
|
||||
Reason: "Fallback result because Gemini returned no recommended items.",
|
||||
Recommended: true,
|
||||
Title: candidate.Title,
|
||||
Link: candidate.Link,
|
||||
ThumbnailURL: candidate.ThumbnailURL,
|
||||
PreviewVideoURL: candidate.PreviewVideoURL,
|
||||
Source: candidate.Source,
|
||||
Reason: "Fallback result because Gemini returned no recommended items.",
|
||||
Recommended: true,
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -374,6 +247,9 @@ User query: ` + query,
|
||||
}
|
||||
|
||||
func fetchImageAsInlineData(client *http.Client, imageURL string) (string, string, error) {
|
||||
if strings.TrimSpace(imageURL) == "" {
|
||||
return "", "", fmt.Errorf("image url is empty")
|
||||
}
|
||||
resp, err := client.Get(imageURL)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
@@ -397,6 +273,40 @@ func fetchImageAsInlineData(client *http.Client, imageURL string) (string, strin
|
||||
return base64.StdEncoding.EncodeToString(data), mimeType, nil
|
||||
}
|
||||
|
||||
func fetchCandidateVisualInlineData(client *http.Client, candidate SearchResult) (string, string, error) {
|
||||
if candidate.ThumbnailURL != "" {
|
||||
data, mimeType, err := fetchImageAsInlineData(client, candidate.ThumbnailURL)
|
||||
if err == nil {
|
||||
return data, mimeType, nil
|
||||
}
|
||||
}
|
||||
if candidate.PreviewVideoURL != "" {
|
||||
return extractFrameFromVideo(candidate.PreviewVideoURL)
|
||||
}
|
||||
return "", "", fmt.Errorf("candidate has no thumbnail or preview video")
|
||||
}
|
||||
|
||||
func extractFrameFromVideo(videoURL string) (string, string, error) {
|
||||
tempDir, err := os.MkdirTemp("", "gemini-frame-*")
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
defer os.RemoveAll(tempDir)
|
||||
|
||||
framePath := filepath.Join(tempDir, "frame.jpg")
|
||||
cmd := exec.Command("ffmpeg", "-y", "-ss", "00:00:00.500", "-i", videoURL, "-frames:v", "1", "-q:v", "2", framePath)
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return "", "", fmt.Errorf("ffmpeg frame extraction failed: %s", strings.TrimSpace(string(output)))
|
||||
}
|
||||
|
||||
data, err := os.ReadFile(framePath)
|
||||
if err != nil {
|
||||
return "", "", err
|
||||
}
|
||||
return base64.StdEncoding.EncodeToString(data), "image/jpeg", nil
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
@@ -457,27 +367,24 @@ func truncateForError(text string, limit int) string {
|
||||
return trimmed[:limit] + "..."
|
||||
}
|
||||
|
||||
func fallbackQueryExpansion(originalQuery, englishQuery string) []string {
|
||||
func buildSearchQueries(originalQuery, englishQuery string) []string {
|
||||
base := strings.TrimSpace(englishQuery)
|
||||
if base == "" {
|
||||
base = strings.TrimSpace(originalQuery)
|
||||
}
|
||||
candidates := []string{
|
||||
base,
|
||||
base + " b-roll",
|
||||
strings.ReplaceAll(base, "pov", "point of view"),
|
||||
base + " stock footage",
|
||||
base + " b-roll",
|
||||
base + " cinematic footage",
|
||||
base + " establishing shot",
|
||||
base + " editorial footage",
|
||||
base + " urban scene",
|
||||
base + " ambient footage",
|
||||
base + " 4k footage",
|
||||
base + " cinematic b-roll",
|
||||
base + " establishing shot",
|
||||
}
|
||||
seen := map[string]bool{}
|
||||
queries := make([]string, 0, len(candidates))
|
||||
for _, item := range candidates {
|
||||
trimmed := strings.TrimSpace(item)
|
||||
trimmed := strings.TrimSpace(strings.Join(strings.Fields(item), " "))
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user