Expand search coverage and preview parsing
build-push / docker (push) Successful in 4m17s

This commit is contained in:
AI Assistant
2026-03-16 13:46:28 +09:00
parent 82cead950e
commit c92ef97c98
7 changed files with 143 additions and 19 deletions
+33 -8
View File
@@ -64,9 +64,9 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
results := make([]SearchResult, 0, 90)
var lastErr error
baseQueries := limitQueries(queries, 6)
baseQueries := limitQueries(queries, 10)
shuffleStrings(baseQueries)
primaryQueries := baseQueries[:minInt(len(baseQueries), 3)]
primaryQueries := baseQueries[:minInt(len(baseQueries), 5)]
runSearchPass := func(bases []string, onlyMissing bool) {
for _, base := range bases {
base = strings.TrimSpace(base)
@@ -331,6 +331,8 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
func buildGoogleVideoQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
fmt.Sprintf(`"%s" ("cinematic b-roll" OR "establishing shot" OR "drone footage" OR "urban footage") -tutorial -reaction -vlog -podcast`, base),
fmt.Sprintf(`"%s" ("night drive" OR "city footage" OR "street footage" OR "editorial footage") -tutorial -review -music`, base),
}
}
@@ -338,6 +340,8 @@ func buildEnvatoQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com`, base),
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com/stock-video`, base),
fmt.Sprintf(`"%s" ("motion graphics" OR "backgrounds" OR "establishing shot" OR "loop") site:elements.envato.com`, base),
fmt.Sprintf(`"%s" ("urban" OR "night city" OR "cyberpunk" OR "sci-fi") site:elements.envato.com`, base),
}
}
@@ -347,6 +351,8 @@ func buildArtgridQueries(base string) []string {
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artgrid.io/clip/`, base),
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artlist.io/stock-footage/clip/`, base),
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artlist.io/stock-footage/clip/`, base),
fmt.Sprintf(`"%s" ("night drive" OR "urban night" OR "wet road" OR "cyberpunk") site:artgrid.io/clip/`, base),
fmt.Sprintf(`"%s" ("drone" OR "city skyline" OR "street scene" OR "mood shot") site:artlist.io/stock-footage/clip/`, base),
}
}
@@ -466,16 +472,23 @@ func extractMetaContent(html, property string) string {
}
func extractVideoPreviewURL(html string) string {
pattern := regexp.MustCompile(`https?:\\?/\\?/[^"'\\s>]+(?:mp4|m3u8)`)
matches := pattern.FindAllString(html, -1)
normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`)
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`)
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\u002F`, `/`)
pattern := regexp.MustCompile(`https?://[^"'[:space:]>]+(?:mp4|m3u8)(?:\?[^"'[:space:]>]*)?`)
matches := pattern.FindAllString(normalizedHTML, -1)
for _, match := range matches {
candidate := strings.ReplaceAll(match, `\/`, `/`)
candidate = strings.ReplaceAll(candidate, `\u002F`, `/`)
candidate = strings.ReplaceAll(candidate, `\\`, "")
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
if strings.Contains(strings.ToLower(candidate), "preview") || strings.Contains(strings.ToLower(candidate), "video") || strings.Contains(strings.ToLower(candidate), "watermark") {
return candidate
}
}
for _, match := range matches {
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
if strings.TrimSpace(candidate) != "" {
return candidate
}
}
return ""
}
@@ -572,6 +585,12 @@ func pickImageURL(urls []string) string {
}
func pickVideoURL(urls []string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, ".m3u8") && (strings.Contains(lower, "artgrid") || strings.Contains(lower, "artlist") || strings.Contains(lower, "cdn")) {
return item
}
}
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, ".mp4") || strings.Contains(lower, ".m3u8") {
@@ -818,7 +837,7 @@ func extractEnvatoPreviewFromHydration(html string) string {
return ""
}
urls := collectURLs(string(decoded))
return pickBestEnvatoPreviewURL(urls)
return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded)))
}
func extractWindowAssignedValue(html, variable string) string {
@@ -831,6 +850,12 @@ func extractWindowAssignedValue(html, variable string) string {
}
func pickBestEnvatoPreviewURL(urls []string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, "envatousercontent.com") && strings.HasSuffix(lower, ".mp4") {
return item
}
}
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, "video-previews.elements.envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") {