This commit is contained in:
+56
-1
@@ -1,9 +1,11 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os/exec"
|
||||
@@ -63,6 +65,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
var lastErr error
|
||||
|
||||
baseQueries := limitQueries(queries, 6)
|
||||
shuffleStrings(baseQueries)
|
||||
primaryQueries := baseQueries[:minInt(len(baseQueries), 3)]
|
||||
runSearchPass := func(bases []string, onlyMissing bool) {
|
||||
for _, base := range bases {
|
||||
@@ -80,7 +83,9 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
if onlyMissing && sourceCounts[collector.Name()] > 0 {
|
||||
continue
|
||||
}
|
||||
for _, searchQuery := range collector.BuildQueries(base) {
|
||||
searchQueries := collector.BuildQueries(base)
|
||||
shuffleStrings(searchQueries)
|
||||
for _, searchQuery := range searchQueries {
|
||||
if sourceCounts[collector.Name()] >= collector.MaxResults() {
|
||||
break
|
||||
}
|
||||
@@ -201,6 +206,7 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
||||
extractJSONLDValue(html, "contentUrl"),
|
||||
extractMetaContent(html, "twitter:player:stream"),
|
||||
extractVideoPreviewURL(html),
|
||||
extractEnvatoPreviewFromHydration(html),
|
||||
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
|
||||
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
|
||||
)
|
||||
@@ -801,6 +807,45 @@ func deriveEnvatoPreviewFromThumbnail(thumbnail string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractEnvatoPreviewFromHydration(html string) string {
|
||||
encoded := extractWindowAssignedValue(html, "INITIAL_HYDRATION_DATA")
|
||||
if encoded == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
decoded, err := base64.StdEncoding.DecodeString(encoded)
|
||||
if err != nil {
|
||||
return ""
|
||||
}
|
||||
urls := collectURLs(string(decoded))
|
||||
return pickBestEnvatoPreviewURL(urls)
|
||||
}
|
||||
|
||||
func extractWindowAssignedValue(html, variable string) string {
|
||||
pattern := regexp.MustCompile(`window\.` + regexp.QuoteMeta(variable) + `\s*=\s*"([^"]+)"`)
|
||||
matches := pattern.FindStringSubmatch(html)
|
||||
if len(matches) == 2 {
|
||||
return matches[1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func pickBestEnvatoPreviewURL(urls []string) string {
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, "video-previews.elements.envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") {
|
||||
return item
|
||||
}
|
||||
}
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, "envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") {
|
||||
return item
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func newBrowserRequest(method, target, accept string) (*http.Request, error) {
|
||||
req, err := http.NewRequest(method, target, nil)
|
||||
if err != nil {
|
||||
@@ -867,6 +912,16 @@ func limitQueries(queries []string, limit int) []string {
|
||||
return filtered
|
||||
}
|
||||
|
||||
func shuffleStrings(values []string) {
|
||||
if len(values) < 2 {
|
||||
return
|
||||
}
|
||||
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
rng.Shuffle(len(values), func(i, j int) {
|
||||
values[i], values[j] = values[j], values[i]
|
||||
})
|
||||
}
|
||||
|
||||
func htmlUnescape(text string) string {
|
||||
replacer := strings.NewReplacer("&", "&", """, `"`, "'", "'", "<", "<", ">", ">")
|
||||
return replacer.Replace(text)
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
@@ -23,6 +24,16 @@ func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestExtractEnvatoPreviewFromHydration(t *testing.T) {
|
||||
payload := `{"contentUrl":"https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"}`
|
||||
html := `<script>window.INITIAL_HYDRATION_DATA="` + base64.StdEncoding.EncodeToString([]byte(payload)) + `";window.INITIAL_HYDRATION_DATA_ENCODED=true;</script>`
|
||||
got := extractEnvatoPreviewFromHydration(html)
|
||||
want := "https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"
|
||||
if got != want {
|
||||
t.Fatalf("expected %q, got %q", want, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) {
|
||||
result := SearchResult{
|
||||
Title: "Couple Friendly Sad Bgm Movie Best Bgm",
|
||||
|
||||
@@ -2,9 +2,11 @@ package services
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
const GeminiFallbackReason = "Gemini Vision 응답이 부족해 키워드 기준으로 보강된 결과입니다."
|
||||
@@ -193,6 +195,21 @@ func BuildFallbackRecommendations(ranked []SearchResult, limit int, reason strin
|
||||
return fallback
|
||||
}
|
||||
|
||||
func RandomizeTopRecommendations(items []AIRecommendation, window int) []AIRecommendation {
|
||||
if len(items) < 2 || window < 2 {
|
||||
return items
|
||||
}
|
||||
|
||||
limit := min(window, len(items))
|
||||
shuffled := make([]AIRecommendation, len(items))
|
||||
copy(shuffled, items)
|
||||
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
|
||||
rng.Shuffle(limit, func(i, j int) {
|
||||
shuffled[i], shuffled[j] = shuffled[j], shuffled[i]
|
||||
})
|
||||
return shuffled
|
||||
}
|
||||
|
||||
func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, limit int) []AIRecommendation {
|
||||
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
||||
seen := map[string]bool{}
|
||||
|
||||
Reference in New Issue
Block a user