This commit is contained in:
+56
-1
@@ -1,9 +1,11 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os/exec"
|
||||
@@ -63,6 +65,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
var lastErr error
|
||||
|
||||
baseQueries := limitQueries(queries, 6)
|
||||
shuffleStrings(baseQueries)
|
||||
primaryQueries := baseQueries[:minInt(len(baseQueries), 3)]
|
||||
runSearchPass := func(bases []string, onlyMissing bool) {
|
||||
for _, base := range bases {
|
||||
@@ -80,7 +83,9 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
if onlyMissing && sourceCounts[collector.Name()] > 0 {
|
||||
continue
|
||||
}
|
||||
for _, searchQuery := range collector.BuildQueries(base) {
|
||||
searchQueries := collector.BuildQueries(base)
|
||||
shuffleStrings(searchQueries)
|
||||
for _, searchQuery := range searchQueries {
|
||||
if sourceCounts[collector.Name()] >= collector.MaxResults() {
|
||||
break
|
||||
}
|
||||
@@ -201,6 +206,7 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
||||
extractJSONLDValue(html, "contentUrl"),
|
||||
extractMetaContent(html, "twitter:player:stream"),
|
||||
extractVideoPreviewURL(html),
|
||||
extractEnvatoPreviewFromHydration(html),
|
||||
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
|
||||
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
|
||||
)
|
||||
@@ -801,6 +807,45 @@ func deriveEnvatoPreviewFromThumbnail(thumbnail string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractEnvatoPreviewFromHydration(html string) string {
|
||||
encoded := extractWindowAssignedValue(html, "INITIAL_HYDRATION_DATA")
|
||||
if encoded == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
decoded, err := base64.StdEncoding.DecodeString(encoded)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
urls := collectURLs(string(decoded))
|
||||
return pickBestEnvatoPreviewURL(urls)
|
||||
}
|
||||
|
||||
func extractWindowAssignedValue(html, variable string) string {
|
||||
pattern := regexp.MustCompile(`window\.` + regexp.QuoteMeta(variable) + `\s*=\s*"([^"]+)"`)
|
||||
matches := pattern.FindStringSubmatch(html)
|
||||
if len(matches) == 2 {
|
||||
return matches[1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func pickBestEnvatoPreviewURL(urls []string) string {
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, "video-previews.elements.envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") {
|
||||
return item
|
||||
}
|
||||
}
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, "envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") {
|
||||
return item
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func newBrowserRequest(method, target, accept string) (*http.Request, error) {
|
||||
req, err := http.NewRequest(method, target, nil)
|
||||
if err != nil {
|
||||
@@ -867,6 +912,16 @@ func limitQueries(queries []string, limit int) []string {
|
||||
return filtered
|
||||
}
|
||||
|
||||
func shuffleStrings(values []string) {
|
||||
if len(values) < 2 {
|
||||
return
|
||||
}
|
||||
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
rng.Shuffle(len(values), func(i, j int) {
|
||||
values[i], values[j] = values[j], values[i]
|
||||
})
|
||||
}
|
||||
|
||||
func htmlUnescape(text string) string {
|
||||
replacer := strings.NewReplacer("&", "&", """, `"`, "'", "'", "<", "<", ">", ">")
|
||||
return replacer.Replace(text)
|
||||
|
||||
Reference in New Issue
Block a user