Expand search coverage and preview parsing
build-push / docker (push) Successful in 4m17s

This commit is contained in:
AI Assistant
2026-03-16 13:46:28 +09:00
parent 82cead950e
commit c92ef97c98
7 changed files with 143 additions and 19 deletions
+33 -8
View File
@@ -64,9 +64,9 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
results := make([]SearchResult, 0, 90)
var lastErr error
baseQueries := limitQueries(queries, 6)
baseQueries := limitQueries(queries, 10)
shuffleStrings(baseQueries)
primaryQueries := baseQueries[:minInt(len(baseQueries), 3)]
primaryQueries := baseQueries[:minInt(len(baseQueries), 5)]
runSearchPass := func(bases []string, onlyMissing bool) {
for _, base := range bases {
base = strings.TrimSpace(base)
@@ -331,6 +331,8 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
func buildGoogleVideoQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
fmt.Sprintf(`"%s" ("cinematic b-roll" OR "establishing shot" OR "drone footage" OR "urban footage") -tutorial -reaction -vlog -podcast`, base),
fmt.Sprintf(`"%s" ("night drive" OR "city footage" OR "street footage" OR "editorial footage") -tutorial -review -music`, base),
}
}
@@ -338,6 +340,8 @@ func buildEnvatoQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com`, base),
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com/stock-video`, base),
fmt.Sprintf(`"%s" ("motion graphics" OR "backgrounds" OR "establishing shot" OR "loop") site:elements.envato.com`, base),
fmt.Sprintf(`"%s" ("urban" OR "night city" OR "cyberpunk" OR "sci-fi") site:elements.envato.com`, base),
}
}
@@ -347,6 +351,8 @@ func buildArtgridQueries(base string) []string {
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artgrid.io/clip/`, base),
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artlist.io/stock-footage/clip/`, base),
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artlist.io/stock-footage/clip/`, base),
fmt.Sprintf(`"%s" ("night drive" OR "urban night" OR "wet road" OR "cyberpunk") site:artgrid.io/clip/`, base),
fmt.Sprintf(`"%s" ("drone" OR "city skyline" OR "street scene" OR "mood shot") site:artlist.io/stock-footage/clip/`, base),
}
}
@@ -466,16 +472,23 @@ func extractMetaContent(html, property string) string {
}
func extractVideoPreviewURL(html string) string {
pattern := regexp.MustCompile(`https?:\\?/\\?/[^"'\\s>]+(?:mp4|m3u8)`)
matches := pattern.FindAllString(html, -1)
normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`)
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`)
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\u002F`, `/`)
pattern := regexp.MustCompile(`https?://[^"'[:space:]>]+(?:mp4|m3u8)(?:\?[^"'[:space:]>]*)?`)
matches := pattern.FindAllString(normalizedHTML, -1)
for _, match := range matches {
candidate := strings.ReplaceAll(match, `\/`, `/`)
candidate = strings.ReplaceAll(candidate, `\u002F`, `/`)
candidate = strings.ReplaceAll(candidate, `\\`, "")
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
if strings.Contains(strings.ToLower(candidate), "preview") || strings.Contains(strings.ToLower(candidate), "video") || strings.Contains(strings.ToLower(candidate), "watermark") {
return candidate
}
}
for _, match := range matches {
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
if strings.TrimSpace(candidate) != "" {
return candidate
}
}
return ""
}
@@ -572,6 +585,12 @@ func pickImageURL(urls []string) string {
}
func pickVideoURL(urls []string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, ".m3u8") && (strings.Contains(lower, "artgrid") || strings.Contains(lower, "artlist") || strings.Contains(lower, "cdn")) {
return item
}
}
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, ".mp4") || strings.Contains(lower, ".m3u8") {
@@ -818,7 +837,7 @@ func extractEnvatoPreviewFromHydration(html string) string {
return ""
}
urls := collectURLs(string(decoded))
return pickBestEnvatoPreviewURL(urls)
return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded)))
}
func extractWindowAssignedValue(html, variable string) string {
@@ -831,6 +850,12 @@ func extractWindowAssignedValue(html, variable string) string {
}
func pickBestEnvatoPreviewURL(urls []string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, "envatousercontent.com") && strings.HasSuffix(lower, ".mp4") {
return item
}
}
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, "video-previews.elements.envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") {
+9
View File
@@ -15,6 +15,15 @@ func TestExtractVideoPreviewURLFindsEnvatoPreview(t *testing.T) {
}
}
func TestExtractVideoPreviewURLFindsGenericM3U8(t *testing.T) {
html := `<script>window.preview="https:\/\/cdn.example.com\/preview\/master.m3u8?token=abc"</script>`
got := extractVideoPreviewURL(html)
want := "https://cdn.example.com/preview/master.m3u8?token=abc"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) {
thumb := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/video_preview/video_preview_0000.jpg?w=1200&amp;h=630"
got := deriveEnvatoPreviewFromThumbnail(thumb)
+4 -4
View File
@@ -71,7 +71,7 @@ func (g *GeminiService) TranslateQuery(query string) string {
"systemInstruction": map[string]any{
"parts": []map[string]string{
{
"text": "You translate media search intents into natural English. Output one plain English search phrase only. No labels, no quotes, no explanations.",
"text": "You are a professional video editor. Infer stronger stock-footage and scene-search wording from the user's keyword, and expand it into natural English that a professional editor would use to find usable footage. Output one plain English search phrase only. No labels, no quotes, no explanations.",
},
},
},
@@ -79,7 +79,7 @@ func (g *GeminiService) TranslateQuery(query string) string {
{
"parts": []map[string]string{
{
"text": "Translate this user query into concise English suitable for stock-footage search: " + trimmed,
"text": "Expand this user query into a concise but editor-grade English footage search phrase suitable for stock-footage discovery: " + trimmed,
},
},
},
@@ -152,13 +152,13 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
type geminiPart map[string]any
parts := []geminiPart{
{
"text": `Analyze the provided images for the user's search intent. Return JSON only in this shape:
"text": `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
{"recommendations":[{"index":0,"reason":"short reason","recommended":true}]}
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
Mark the strongest matches as recommended=true and weaker matches as recommended=false.
Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails.
Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery.
Favor thumbnails that look directly useful for media editing and footage sourcing.
Favor scenes that look directly useful for professional editing, sequencing, establishing, cutaway, or mood-building usage.
User query: ` + query,
},
}
+3 -3
View File
@@ -15,7 +15,7 @@ type searchCollector interface {
type envatoCollector struct{}
func (envatoCollector) Name() string { return "Envato" }
func (envatoCollector) MaxResults() int { return 8 }
func (envatoCollector) MaxResults() int { return 14 }
func (envatoCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["envato"]
}
@@ -31,7 +31,7 @@ func (envatoCollector) Enrich(searcher *SearchService, result SearchResult) Sear
type artgridCollector struct{}
func (artgridCollector) Name() string { return "Artgrid" }
func (artgridCollector) MaxResults() int { return 8 }
func (artgridCollector) MaxResults() int { return 14 }
func (artgridCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["artgrid"]
}
@@ -47,7 +47,7 @@ func (artgridCollector) Enrich(searcher *SearchService, result SearchResult) Sea
type googleVideoCollector struct{}
func (googleVideoCollector) Name() string { return "Google Video" }
func (googleVideoCollector) MaxResults() int { return 6 }
func (googleVideoCollector) MaxResults() int { return 10 }
func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["google video"]
}