Rewrite search flow and enrich preview assets
build-push / docker (push) Successful in 4m6s

This commit is contained in:
AI Assistant
2026-03-13 12:50:25 +09:00
parent de2488654a
commit b78865d4bf
5 changed files with 432 additions and 268 deletions
+68 -161
View File
@@ -9,6 +9,9 @@ import (
"mime"
"net/http"
neturl "net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
@@ -19,12 +22,13 @@ type GeminiService struct {
}
type AIRecommendation struct {
Title string `json:"title"`
Link string `json:"link"`
ThumbnailURL string `json:"thumbnailUrl"`
Source string `json:"source"`
Reason string `json:"reason"`
Recommended bool `json:"recommended"`
Title string `json:"title"`
Link string `json:"link"`
ThumbnailURL string `json:"thumbnailUrl"`
PreviewVideoURL string `json:"previewVideoUrl"`
Source string `json:"source"`
Reason string `json:"reason"`
Recommended bool `json:"recommended"`
}
type QueryExpansion struct {
@@ -39,141 +43,8 @@ func NewGeminiService(apiKey string) *GeminiService {
}
func (g *GeminiService) ExpandQuery(query string) ([]string, error) {
if g.APIKey == "" {
return fallbackQueryExpansion(query, query), nil
}
englishBase := g.TranslateQuery(query)
body := map[string]any{
"systemInstruction": map[string]any{
"parts": []map[string]string{
{
"text": "You are a JSON-only API. Output valid JSON only. Never add prose, labels, markdown, or explanations before or after the JSON.",
},
},
},
"contents": []map[string]any{
{
"parts": []map[string]string{
{
"text": `Return JSON only in this shape: {"querywords":["..."]}.
Generate at most 10 concise English search variations for media discovery across Google Video, Envato, and Artgrid.
The queries must be usable directly in English search engines for stock footage discovery.
Prioritize media, video footage, stock footage, cinematic b-roll, editorial footage, and scene-based search terms.
Avoid celebrity gossip, reaction-style phrasing, clickbait phrasing, and generic web search wording.
Do not output Korean unless it is part of a proper noun.
Original user query: ` + query + `
English base translation: ` + englishBase,
},
},
},
},
"generationConfig": map[string]any{
"responseMimeType": "application/json",
"temperature": 0.2,
"maxOutputTokens": 220,
"responseSchema": map[string]any{
"type": "OBJECT",
"properties": map[string]any{
"querywords": map[string]any{
"type": "ARRAY",
"items": map[string]any{
"type": "STRING",
},
},
},
"required": []string{"querywords"},
},
},
}
rawText, err := g.generateText(body)
if err != nil {
return fallbackQueryExpansion(query, englishBase), nil
}
jsonText, err := extractJSONObject(rawText)
if err != nil {
strictBody := map[string]any{
"systemInstruction": map[string]any{
"parts": []map[string]string{
{
"text": "You are a strict JSON emitter. Output one valid JSON object only. Do not write any other text.",
},
},
},
"contents": []map[string]any{
{
"parts": []map[string]string{
{
"text": `STRICT JSON ONLY.
Output must start with { and end with }.
Do not add prose, explanations, markdown, code fences, or labels.
Return exactly this shape: {"querywords":["..."]}.
Generate up to 10 search queries for media discovery across Google Video, Envato, and Artgrid.
Every query must be in natural English and suitable for stock-footage search.
Original user query: ` + query + `
English base translation: ` + englishBase,
},
},
},
},
"generationConfig": map[string]any{
"responseMimeType": "application/json",
"temperature": 0.1,
"maxOutputTokens": 220,
"responseSchema": map[string]any{
"type": "OBJECT",
"properties": map[string]any{
"querywords": map[string]any{
"type": "ARRAY",
"items": map[string]any{
"type": "STRING",
},
},
},
"required": []string{"querywords"},
},
},
}
rawText, retryErr := g.generateText(strictBody)
if retryErr != nil {
return fallbackQueryExpansion(query, englishBase), nil
}
jsonText, err = extractJSONObject(rawText)
if err != nil {
return fallbackQueryExpansion(query, englishBase), nil
}
}
var parsed QueryExpansion
if err := json.Unmarshal([]byte(jsonText), &parsed); err != nil {
return fallbackQueryExpansion(query, englishBase), nil
}
queries := fallbackQueryExpansion(query, englishBase)
seen := map[string]bool{}
englishOnly := !strings.EqualFold(strings.TrimSpace(englishBase), strings.TrimSpace(query))
for _, existing := range queries {
seen[strings.ToLower(strings.TrimSpace(existing))] = true
}
for _, item := range parsed.Querywords {
trimmed := strings.TrimSpace(item)
if trimmed == "" {
continue
}
if englishOnly && !isLikelyEnglishQuery(trimmed) {
continue
}
key := strings.ToLower(trimmed)
if seen[key] {
continue
}
seen[key] = true
queries = append(queries, trimmed)
}
return queries, nil
return buildSearchQueries(query, englishBase), nil
}
func (g *GeminiService) TranslateQuery(query string) string {
@@ -277,7 +148,7 @@ User query: ` + query,
maxImages := min(len(candidates), 10)
for idx := 0; idx < maxImages; idx++ {
img, mimeType, err := fetchImageAsInlineData(g.Client, candidates[idx].ThumbnailURL)
img, mimeType, err := fetchCandidateVisualInlineData(g.Client, candidates[idx])
if err != nil {
continue
}
@@ -348,24 +219,26 @@ User query: ` + query,
}
src := candidates[rec.Index]
recommendations = append(recommendations, AIRecommendation{
Title: src.Title,
Link: src.Link,
ThumbnailURL: src.ThumbnailURL,
Source: src.Source,
Reason: rec.Reason,
Recommended: true,
Title: src.Title,
Link: src.Link,
ThumbnailURL: src.ThumbnailURL,
PreviewVideoURL: src.PreviewVideoURL,
Source: src.Source,
Reason: rec.Reason,
Recommended: true,
})
}
if len(recommendations) == 0 {
for _, candidate := range candidates[:min(4, len(candidates))] {
recommendations = append(recommendations, AIRecommendation{
Title: candidate.Title,
Link: candidate.Link,
ThumbnailURL: candidate.ThumbnailURL,
Source: candidate.Source,
Reason: "Fallback result because Gemini returned no recommended items.",
Recommended: true,
Title: candidate.Title,
Link: candidate.Link,
ThumbnailURL: candidate.ThumbnailURL,
PreviewVideoURL: candidate.PreviewVideoURL,
Source: candidate.Source,
Reason: "Fallback result because Gemini returned no recommended items.",
Recommended: true,
})
}
}
@@ -374,6 +247,9 @@ User query: ` + query,
}
func fetchImageAsInlineData(client *http.Client, imageURL string) (string, string, error) {
if strings.TrimSpace(imageURL) == "" {
return "", "", fmt.Errorf("image url is empty")
}
resp, err := client.Get(imageURL)
if err != nil {
return "", "", err
@@ -397,6 +273,40 @@ func fetchImageAsInlineData(client *http.Client, imageURL string) (string, strin
return base64.StdEncoding.EncodeToString(data), mimeType, nil
}
func fetchCandidateVisualInlineData(client *http.Client, candidate SearchResult) (string, string, error) {
if candidate.ThumbnailURL != "" {
data, mimeType, err := fetchImageAsInlineData(client, candidate.ThumbnailURL)
if err == nil {
return data, mimeType, nil
}
}
if candidate.PreviewVideoURL != "" {
return extractFrameFromVideo(candidate.PreviewVideoURL)
}
return "", "", fmt.Errorf("candidate has no thumbnail or preview video")
}
func extractFrameFromVideo(videoURL string) (string, string, error) {
tempDir, err := os.MkdirTemp("", "gemini-frame-*")
if err != nil {
return "", "", err
}
defer os.RemoveAll(tempDir)
framePath := filepath.Join(tempDir, "frame.jpg")
cmd := exec.Command("ffmpeg", "-y", "-ss", "00:00:00.500", "-i", videoURL, "-frames:v", "1", "-q:v", "2", framePath)
output, err := cmd.CombinedOutput()
if err != nil {
return "", "", fmt.Errorf("ffmpeg frame extraction failed: %s", strings.TrimSpace(string(output)))
}
data, err := os.ReadFile(framePath)
if err != nil {
return "", "", err
}
return base64.StdEncoding.EncodeToString(data), "image/jpeg", nil
}
func min(a, b int) int {
if a < b {
return a
@@ -457,27 +367,24 @@ func truncateForError(text string, limit int) string {
return trimmed[:limit] + "..."
}
func fallbackQueryExpansion(originalQuery, englishQuery string) []string {
func buildSearchQueries(originalQuery, englishQuery string) []string {
base := strings.TrimSpace(englishQuery)
if base == "" {
base = strings.TrimSpace(originalQuery)
}
candidates := []string{
base,
base + " b-roll",
strings.ReplaceAll(base, "pov", "point of view"),
base + " stock footage",
base + " b-roll",
base + " cinematic footage",
base + " establishing shot",
base + " editorial footage",
base + " urban scene",
base + " ambient footage",
base + " 4k footage",
base + " cinematic b-roll",
base + " establishing shot",
}
seen := map[string]bool{}
queries := make([]string, 0, len(candidates))
for _, item := range candidates {
trimmed := strings.TrimSpace(item)
trimmed := strings.TrimSpace(strings.Join(strings.Fields(item), " "))
if trimmed == "" {
continue
}