This commit is contained in:
+33
-8
@@ -64,9 +64,9 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
results := make([]SearchResult, 0, 90)
|
||||
var lastErr error
|
||||
|
||||
baseQueries := limitQueries(queries, 6)
|
||||
baseQueries := limitQueries(queries, 10)
|
||||
shuffleStrings(baseQueries)
|
||||
primaryQueries := baseQueries[:minInt(len(baseQueries), 3)]
|
||||
primaryQueries := baseQueries[:minInt(len(baseQueries), 5)]
|
||||
runSearchPass := func(bases []string, onlyMissing bool) {
|
||||
for _, base := range bases {
|
||||
base = strings.TrimSpace(base)
|
||||
@@ -331,6 +331,8 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
|
||||
func buildGoogleVideoQueries(base string) []string {
|
||||
return []string{
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
|
||||
fmt.Sprintf(`"%s" ("cinematic b-roll" OR "establishing shot" OR "drone footage" OR "urban footage") -tutorial -reaction -vlog -podcast`, base),
|
||||
fmt.Sprintf(`"%s" ("night drive" OR "city footage" OR "street footage" OR "editorial footage") -tutorial -review -music`, base),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -338,6 +340,8 @@ func buildEnvatoQueries(base string) []string {
|
||||
return []string{
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com`, base),
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com/stock-video`, base),
|
||||
fmt.Sprintf(`"%s" ("motion graphics" OR "backgrounds" OR "establishing shot" OR "loop") site:elements.envato.com`, base),
|
||||
fmt.Sprintf(`"%s" ("urban" OR "night city" OR "cyberpunk" OR "sci-fi") site:elements.envato.com`, base),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -347,6 +351,8 @@ func buildArtgridQueries(base string) []string {
|
||||
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artgrid.io/clip/`, base),
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artlist.io/stock-footage/clip/`, base),
|
||||
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artlist.io/stock-footage/clip/`, base),
|
||||
fmt.Sprintf(`"%s" ("night drive" OR "urban night" OR "wet road" OR "cyberpunk") site:artgrid.io/clip/`, base),
|
||||
fmt.Sprintf(`"%s" ("drone" OR "city skyline" OR "street scene" OR "mood shot") site:artlist.io/stock-footage/clip/`, base),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -466,16 +472,23 @@ func extractMetaContent(html, property string) string {
|
||||
}
|
||||
|
||||
func extractVideoPreviewURL(html string) string {
|
||||
pattern := regexp.MustCompile(`https?:\\?/\\?/[^"'\\s>]+(?:mp4|m3u8)`)
|
||||
matches := pattern.FindAllString(html, -1)
|
||||
normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`)
|
||||
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`)
|
||||
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\u002F`, `/`)
|
||||
pattern := regexp.MustCompile(`https?://[^"'[:space:]>]+(?:mp4|m3u8)(?:\?[^"'[:space:]>]*)?`)
|
||||
matches := pattern.FindAllString(normalizedHTML, -1)
|
||||
for _, match := range matches {
|
||||
candidate := strings.ReplaceAll(match, `\/`, `/`)
|
||||
candidate = strings.ReplaceAll(candidate, `\u002F`, `/`)
|
||||
candidate = strings.ReplaceAll(candidate, `\\`, "")
|
||||
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
|
||||
if strings.Contains(strings.ToLower(candidate), "preview") || strings.Contains(strings.ToLower(candidate), "video") || strings.Contains(strings.ToLower(candidate), "watermark") {
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
for _, match := range matches {
|
||||
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
|
||||
if strings.TrimSpace(candidate) != "" {
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -572,6 +585,12 @@ func pickImageURL(urls []string) string {
|
||||
}
|
||||
|
||||
func pickVideoURL(urls []string) string {
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, ".m3u8") && (strings.Contains(lower, "artgrid") || strings.Contains(lower, "artlist") || strings.Contains(lower, "cdn")) {
|
||||
return item
|
||||
}
|
||||
}
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, ".mp4") || strings.Contains(lower, ".m3u8") {
|
||||
@@ -818,7 +837,7 @@ func extractEnvatoPreviewFromHydration(html string) string {
|
||||
return ""
|
||||
}
|
||||
urls := collectURLs(string(decoded))
|
||||
return pickBestEnvatoPreviewURL(urls)
|
||||
return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded)))
|
||||
}
|
||||
|
||||
func extractWindowAssignedValue(html, variable string) string {
|
||||
@@ -831,6 +850,12 @@ func extractWindowAssignedValue(html, variable string) string {
|
||||
}
|
||||
|
||||
func pickBestEnvatoPreviewURL(urls []string) string {
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, "envatousercontent.com") && strings.HasSuffix(lower, ".mp4") {
|
||||
return item
|
||||
}
|
||||
}
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, "video-previews.elements.envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") {
|
||||
|
||||
Reference in New Issue
Block a user