This commit is contained in:
+33
-8
@@ -64,9 +64,9 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
results := make([]SearchResult, 0, 90)
|
||||
var lastErr error
|
||||
|
||||
baseQueries := limitQueries(queries, 6)
|
||||
baseQueries := limitQueries(queries, 10)
|
||||
shuffleStrings(baseQueries)
|
||||
primaryQueries := baseQueries[:minInt(len(baseQueries), 3)]
|
||||
primaryQueries := baseQueries[:minInt(len(baseQueries), 5)]
|
||||
runSearchPass := func(bases []string, onlyMissing bool) {
|
||||
for _, base := range bases {
|
||||
base = strings.TrimSpace(base)
|
||||
@@ -331,6 +331,8 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
|
||||
func buildGoogleVideoQueries(base string) []string {
|
||||
return []string{
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
|
||||
fmt.Sprintf(`"%s" ("cinematic b-roll" OR "establishing shot" OR "drone footage" OR "urban footage") -tutorial -reaction -vlog -podcast`, base),
|
||||
fmt.Sprintf(`"%s" ("night drive" OR "city footage" OR "street footage" OR "editorial footage") -tutorial -review -music`, base),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,6 +340,8 @@ func buildEnvatoQueries(base string) []string {
|
||||
return []string{
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com`, base),
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com/stock-video`, base),
|
||||
fmt.Sprintf(`"%s" ("motion graphics" OR "backgrounds" OR "establishing shot" OR "loop") site:elements.envato.com`, base),
|
||||
fmt.Sprintf(`"%s" ("urban" OR "night city" OR "cyberpunk" OR "sci-fi") site:elements.envato.com`, base),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -347,6 +351,8 @@ func buildArtgridQueries(base string) []string {
|
||||
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artgrid.io/clip/`, base),
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artlist.io/stock-footage/clip/`, base),
|
||||
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artlist.io/stock-footage/clip/`, base),
|
||||
fmt.Sprintf(`"%s" ("night drive" OR "urban night" OR "wet road" OR "cyberpunk") site:artgrid.io/clip/`, base),
|
||||
fmt.Sprintf(`"%s" ("drone" OR "city skyline" OR "street scene" OR "mood shot") site:artlist.io/stock-footage/clip/`, base),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -466,16 +472,23 @@ func extractMetaContent(html, property string) string {
|
||||
}
|
||||
|
||||
func extractVideoPreviewURL(html string) string {
|
||||
pattern := regexp.MustCompile(`https?:\\?/\\?/[^"'\\s>]+(?:mp4|m3u8)`)
|
||||
matches := pattern.FindAllString(html, -1)
|
||||
normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`)
|
||||
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`)
|
||||
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\u002F`, `/`)
|
||||
pattern := regexp.MustCompile(`https?://[^"'[:space:]>]+(?:mp4|m3u8)(?:\?[^"'[:space:]>]*)?`)
|
||||
matches := pattern.FindAllString(normalizedHTML, -1)
|
||||
for _, match := range matches {
|
||||
candidate := strings.ReplaceAll(match, `\/`, `/`)
|
||||
candidate = strings.ReplaceAll(candidate, `\u002F`, `/`)
|
||||
candidate = strings.ReplaceAll(candidate, `\\`, "")
|
||||
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
|
||||
if strings.Contains(strings.ToLower(candidate), "preview") || strings.Contains(strings.ToLower(candidate), "video") || strings.Contains(strings.ToLower(candidate), "watermark") {
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
for _, match := range matches {
|
||||
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
|
||||
if strings.TrimSpace(candidate) != "" {
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -572,6 +585,12 @@ func pickImageURL(urls []string) string {
|
||||
}
|
||||
|
||||
func pickVideoURL(urls []string) string {
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, ".m3u8") && (strings.Contains(lower, "artgrid") || strings.Contains(lower, "artlist") || strings.Contains(lower, "cdn")) {
|
||||
return item
|
||||
}
|
||||
}
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, ".mp4") || strings.Contains(lower, ".m3u8") {
|
||||
@@ -818,7 +837,7 @@ func extractEnvatoPreviewFromHydration(html string) string {
|
||||
return ""
|
||||
}
|
||||
urls := collectURLs(string(decoded))
|
||||
return pickBestEnvatoPreviewURL(urls)
|
||||
return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded)))
|
||||
}
|
||||
|
||||
func extractWindowAssignedValue(html, variable string) string {
|
||||
@@ -831,6 +850,12 @@ func extractWindowAssignedValue(html, variable string) string {
|
||||
}
|
||||
|
||||
func pickBestEnvatoPreviewURL(urls []string) string {
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, "envatousercontent.com") && strings.HasSuffix(lower, ".mp4") {
|
||||
return item
|
||||
}
|
||||
}
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, "video-previews.elements.envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") {
|
||||
|
||||
@@ -15,6 +15,15 @@ func TestExtractVideoPreviewURLFindsEnvatoPreview(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractVideoPreviewURLFindsGenericM3U8(t *testing.T) {
|
||||
html := `<script>window.preview="https:\/\/cdn.example.com\/preview\/master.m3u8?token=abc"</script>`
|
||||
got := extractVideoPreviewURL(html)
|
||||
want := "https://cdn.example.com/preview/master.m3u8?token=abc"
|
||||
if got != want {
|
||||
t.Fatalf("expected %q, got %q", want, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) {
|
||||
thumb := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/video_preview/video_preview_0000.jpg?w=1200&h=630"
|
||||
got := deriveEnvatoPreviewFromThumbnail(thumb)
|
||||
|
||||
@@ -71,7 +71,7 @@ func (g *GeminiService) TranslateQuery(query string) string {
|
||||
"systemInstruction": map[string]any{
|
||||
"parts": []map[string]string{
|
||||
{
|
||||
"text": "You translate media search intents into natural English. Output one plain English search phrase only. No labels, no quotes, no explanations.",
|
||||
"text": "You are a professional video editor. Infer stronger stock-footage and scene-search wording from the user's keyword, and expand it into natural English that a professional editor would use to find usable footage. Output one plain English search phrase only. No labels, no quotes, no explanations.",
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -79,7 +79,7 @@ func (g *GeminiService) TranslateQuery(query string) string {
|
||||
{
|
||||
"parts": []map[string]string{
|
||||
{
|
||||
"text": "Translate this user query into concise English suitable for stock-footage search: " + trimmed,
|
||||
"text": "Expand this user query into a concise but editor-grade English footage search phrase suitable for stock-footage discovery: " + trimmed,
|
||||
},
|
||||
},
|
||||
},
|
||||
@@ -152,13 +152,13 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
|
||||
type geminiPart map[string]any
|
||||
parts := []geminiPart{
|
||||
{
|
||||
"text": `Analyze the provided images for the user's search intent. Return JSON only in this shape:
|
||||
"text": `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
|
||||
{"recommendations":[{"index":0,"reason":"short reason","recommended":true}]}
|
||||
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
|
||||
Mark the strongest matches as recommended=true and weaker matches as recommended=false.
|
||||
Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails.
|
||||
Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery.
|
||||
Favor thumbnails that look directly useful for media editing and footage sourcing.
|
||||
Favor scenes that look directly useful for professional editing, sequencing, establishing, cutaway, or mood-building usage.
|
||||
User query: ` + query,
|
||||
},
|
||||
}
|
||||
|
||||
@@ -15,7 +15,7 @@ type searchCollector interface {
|
||||
type envatoCollector struct{}
|
||||
|
||||
func (envatoCollector) Name() string { return "Envato" }
|
||||
func (envatoCollector) MaxResults() int { return 8 }
|
||||
func (envatoCollector) MaxResults() int { return 14 }
|
||||
func (envatoCollector) Enabled(enabledPlatforms map[string]bool) bool {
|
||||
return len(enabledPlatforms) == 0 || enabledPlatforms["envato"]
|
||||
}
|
||||
@@ -31,7 +31,7 @@ func (envatoCollector) Enrich(searcher *SearchService, result SearchResult) Sear
|
||||
type artgridCollector struct{}
|
||||
|
||||
func (artgridCollector) Name() string { return "Artgrid" }
|
||||
func (artgridCollector) MaxResults() int { return 8 }
|
||||
func (artgridCollector) MaxResults() int { return 14 }
|
||||
func (artgridCollector) Enabled(enabledPlatforms map[string]bool) bool {
|
||||
return len(enabledPlatforms) == 0 || enabledPlatforms["artgrid"]
|
||||
}
|
||||
@@ -47,7 +47,7 @@ func (artgridCollector) Enrich(searcher *SearchService, result SearchResult) Sea
|
||||
type googleVideoCollector struct{}
|
||||
|
||||
func (googleVideoCollector) Name() string { return "Google Video" }
|
||||
func (googleVideoCollector) MaxResults() int { return 6 }
|
||||
func (googleVideoCollector) MaxResults() int { return 10 }
|
||||
func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool {
|
||||
return len(enabledPlatforms) == 0 || enabledPlatforms["google video"]
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user