263 lines
10 KiB
Go
263 lines
10 KiB
Go
package services
|
||
|
||
import (
|
||
"encoding/base64"
|
||
"fmt"
|
||
"net/http"
|
||
"net/http/httptest"
|
||
"net/url"
|
||
"strings"
|
||
"sync/atomic"
|
||
"testing"
|
||
"time"
|
||
)
|
||
|
||
func TestExtractVideoPreviewURLFindsEnvatoPreview(t *testing.T) {
|
||
html := `<script type="application/ld+json">{"contentUrl":"https://video-previews.elements.envatousercontent.com/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"}</script>`
|
||
got := firstNonEmpty(extractJSONLDValue(html, "contentUrl"), extractVideoPreviewURL(html))
|
||
want := "https://video-previews.elements.envatousercontent.com/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"
|
||
if got != want {
|
||
t.Fatalf("expected %q, got %q", want, got)
|
||
}
|
||
}
|
||
|
||
func TestExtractVideoPreviewURLFindsGenericM3U8(t *testing.T) {
|
||
html := `<script>window.preview="https:\/\/cdn.example.com\/preview\/master.m3u8?token=abc"</script>`
|
||
got := extractVideoPreviewURL(html)
|
||
want := "https://cdn.example.com/preview/master.m3u8?token=abc"
|
||
if got != want {
|
||
t.Fatalf("expected %q, got %q", want, got)
|
||
}
|
||
}
|
||
|
||
func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) {
|
||
thumb := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/video_preview/video_preview_0000.jpg?w=1200&h=630"
|
||
got := deriveEnvatoPreviewFromThumbnail(thumb)
|
||
want := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"
|
||
if got != want {
|
||
t.Fatalf("expected %q, got %q", want, got)
|
||
}
|
||
}
|
||
|
||
func TestExtractEnvatoPreviewFromHydration(t *testing.T) {
|
||
payload := `{"contentUrl":"https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"}`
|
||
html := `<script>window.INITIAL_HYDRATION_DATA="` + base64.StdEncoding.EncodeToString([]byte(payload)) + `";window.INITIAL_HYDRATION_DATA_ENCODED=true;</script>`
|
||
got := extractEnvatoPreviewFromHydration(html)
|
||
want := "https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"
|
||
if got != want {
|
||
t.Fatalf("expected %q, got %q", want, got)
|
||
}
|
||
}
|
||
|
||
func TestCollectEnvatoPreviewURLFindsOgVideo(t *testing.T) {
|
||
html := `<meta property="og:video" content="https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4">`
|
||
got := collectEnvatoPreviewURL(html, "", "", "")
|
||
want := "https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"
|
||
if got != want {
|
||
t.Fatalf("expected %q, got %q", want, got)
|
||
}
|
||
}
|
||
|
||
func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) {
|
||
result := SearchResult{
|
||
Title: "Couple Friendly Sad Bgm Movie Best Bgm",
|
||
Link: "https://www.youtube.com/watch?v=LGP4wiXSw8c",
|
||
Snippet: "romantic bgm soundtrack",
|
||
}
|
||
if isUsefulGoogleVideoResult(result) {
|
||
t.Fatal("expected bgm/music result to be rejected")
|
||
}
|
||
}
|
||
|
||
func TestExtractVideoObjectJSONLD(t *testing.T) {
|
||
html := `<script type="application/ld+json">{"@context":"https://schema.org","@type":"VideoObject","name":"Smiling Man and Woman Waving at Camera","description":"Close up shot of a smiling couple waving.","thumbnailUrl":"https://elements-resized.envatousercontent.com/example/video_preview/video_preview_0001.jpg","contentUrl":"https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"}</script>`
|
||
meta := extractVideoObjectJSONLD(html)
|
||
if meta.Name != "Smiling Man and Woman Waving at Camera" {
|
||
t.Fatalf("unexpected name: %#v", meta)
|
||
}
|
||
if meta.ContentURL == "" || meta.ThumbnailURL == "" || meta.Description == "" {
|
||
t.Fatalf("expected full video object metadata, got %#v", meta)
|
||
}
|
||
}
|
||
|
||
func TestCleanArtgridTitle(t *testing.T) {
|
||
got := cleanArtgridTitle("movie film moving slowly from a reel by Arthur Cauty | Royalty Free Stock Footage – Artgrid.io")
|
||
want := "movie film moving slowly from a reel"
|
||
if got != want {
|
||
t.Fatalf("expected %q, got %q", want, got)
|
||
}
|
||
}
|
||
|
||
func TestCanonicalizeArtgridLinkFromArtlist(t *testing.T) {
|
||
got := canonicalizeArtgridLink("https://artlist.io/stock-footage/clip/movie-film-moving-slowly-from-a-reel/114756")
|
||
want := "https://artgrid.io/clip/114756/movie-film-moving-slowly-from-a-reel"
|
||
if got != want {
|
||
t.Fatalf("expected %q, got %q", want, got)
|
||
}
|
||
}
|
||
|
||
func TestIsRenderableArtgridResultAcceptsArtlistCanonical(t *testing.T) {
|
||
if !isRenderableArtgridResult(SearchResult{Link: "https://artlist.io/stock-footage/clip/movie-film-moving-slowly-from-a-reel/114756"}) {
|
||
t.Fatal("expected artlist canonical clip URL to be accepted for Artgrid collector")
|
||
}
|
||
}
|
||
|
||
func TestBuildArtgridQueriesIncludesArtlistCanonicalDomain(t *testing.T) {
|
||
queries := buildArtgridQueries("friendly couple")
|
||
found := false
|
||
for _, query := range queries {
|
||
if strings.Contains(query, "site:artlist.io/stock-footage/clip/") {
|
||
found = true
|
||
break
|
||
}
|
||
}
|
||
if !found {
|
||
t.Fatal("expected Artgrid queries to include artlist canonical domain")
|
||
}
|
||
}
|
||
|
||
func TestIsMatchingArtgridClipPageRejectsHomepage(t *testing.T) {
|
||
html := `<html><head><meta property="og:url" content="https://artgrid.io/"><link rel="canonical" href="https://artgrid.io/"></head></html>`
|
||
if isMatchingArtgridClipPage(html, "114756") {
|
||
t.Fatal("expected generic Artgrid homepage HTML to be rejected as a clip page")
|
||
}
|
||
}
|
||
|
||
func TestIsMatchingArtgridClipPageAcceptsBodySignals(t *testing.T) {
|
||
html := `<html><head><title>Night City | Stock Video Footage - Artgrid.io</title></head><body><script>window.__clip="6600269";</script></body></html>`
|
||
if !isMatchingArtgridClipPage(html, "6600269") {
|
||
t.Fatal("expected body/title signal Artgrid HTML to be accepted")
|
||
}
|
||
}
|
||
|
||
func TestLowValueThumbnailDetection(t *testing.T) {
|
||
if !IsLowValueThumbnail("https://example.com/favicon.ico") {
|
||
t.Fatal("expected favicon to be low-value thumbnail")
|
||
}
|
||
if IsLowValueThumbnail("https://i.ytimg.com/vi/abcd1234xyz/hqdefault.jpg") {
|
||
t.Fatal("expected youtube thumbnail to be usable")
|
||
}
|
||
}
|
||
|
||
func TestGoogleVideoCollectorPrefersYouTubeDerivedThumbnail(t *testing.T) {
|
||
result := googleVideoCollector{}.Enrich(nil, SearchResult{
|
||
Link: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
|
||
ThumbnailURL: "https://example.com/some-search-thumb.jpg",
|
||
})
|
||
if result.ThumbnailURL != "https://i.ytimg.com/vi/dQw4w9WgXcQ/hqdefault.jpg" {
|
||
t.Fatalf("expected derived youtube thumbnail, got %q", result.ThumbnailURL)
|
||
}
|
||
}
|
||
|
||
func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) {
|
||
if got := GeminiCandidateLimit(9); got != 9 {
|
||
t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got)
|
||
}
|
||
}
|
||
|
||
func TestLimitCollectorQueriesUsesSmallerBudgetForMissingPass(t *testing.T) {
|
||
queries := []string{"a", "b", "c", "d"}
|
||
|
||
got := limitCollectorQueries("Artgrid", queries, true)
|
||
if len(got) != 4 {
|
||
t.Fatalf("expected 4 queries for missing-pass Artgrid collector, got %d", len(got))
|
||
}
|
||
|
||
got = limitCollectorQueries("Google Video", queries, false)
|
||
if len(got) != 4 {
|
||
t.Fatalf("expected 4 queries for Google Video collector, got %d", len(got))
|
||
}
|
||
}
|
||
|
||
func TestSearchServiceFetchCacheRoundTrip(t *testing.T) {
|
||
service := NewSearchService("http://example.com", "", "")
|
||
service.setCachedFetchResult("html\nhttps://example.com/item", "<html></html>", time.Minute)
|
||
|
||
body, ok := service.getCachedFetchResult("html\nhttps://example.com/item")
|
||
if !ok {
|
||
t.Fatal("expected cached fetch result")
|
||
}
|
||
if body != "<html></html>" {
|
||
t.Fatalf("unexpected cached body: %q", body)
|
||
}
|
||
}
|
||
|
||
func TestSplitSearchDeadlinesReservesEnrichmentWindow(t *testing.T) {
|
||
deadline := time.Now().Add(20 * time.Second)
|
||
collectionDeadline, enrichmentDeadline := splitSearchDeadlines(deadline)
|
||
|
||
if enrichmentDeadline.IsZero() {
|
||
t.Fatal("expected enrichment deadline to be preserved")
|
||
}
|
||
if !collectionDeadline.Before(enrichmentDeadline) {
|
||
t.Fatalf("expected collection deadline before enrichment deadline, got %v >= %v", collectionDeadline, enrichmentDeadline)
|
||
}
|
||
if gap := enrichmentDeadline.Sub(collectionDeadline); gap < searchEnrichmentReserve-500*time.Millisecond {
|
||
t.Fatalf("expected reserve close to %v, got %v", searchEnrichmentReserve, gap)
|
||
}
|
||
}
|
||
|
||
func TestSplitSearchDeadlinesDoesNotReserveWhenDeadlineIsTooClose(t *testing.T) {
|
||
deadline := time.Now().Add(2 * time.Second)
|
||
collectionDeadline, enrichmentDeadline := splitSearchDeadlines(deadline)
|
||
|
||
if !collectionDeadline.Equal(enrichmentDeadline) {
|
||
t.Fatalf("expected identical deadlines when budget is too tight, got %v and %v", collectionDeadline, enrichmentDeadline)
|
||
}
|
||
}
|
||
|
||
func TestSearchServiceSkipsArtgridAPIAfter403(t *testing.T) {
|
||
var apiRequests atomic.Int32
|
||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||
switch {
|
||
case strings.HasPrefix(r.URL.Path, "/api/clip/details"):
|
||
apiRequests.Add(1)
|
||
http.Error(w, "forbidden", http.StatusForbidden)
|
||
case strings.HasPrefix(r.URL.Path, "/clip/114756/"):
|
||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||
_, _ = fmt.Fprintf(w, `<html><head><title>Friendly Couple | Stock Video Footage - Artgrid.io</title><meta property="og:title" content="Friendly Couple"><meta property="og:description" content="A warm couple moment"></head><body><script>window.__clip="%s";</script></body></html>`, "114756")
|
||
default:
|
||
http.NotFound(w, r)
|
||
}
|
||
}))
|
||
defer server.Close()
|
||
|
||
service := NewSearchService(server.URL, "", "")
|
||
serverURL, err := url.Parse(server.URL)
|
||
if err != nil {
|
||
t.Fatalf("failed to parse test server url: %v", err)
|
||
}
|
||
service.Client = &http.Client{
|
||
Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
|
||
clone := req.Clone(req.Context())
|
||
if clone.URL.Host == "artgrid.io" {
|
||
clone.URL.Scheme = serverURL.Scheme
|
||
clone.URL.Host = serverURL.Host
|
||
clone.Host = serverURL.Host
|
||
}
|
||
return http.DefaultTransport.RoundTrip(clone)
|
||
}),
|
||
}
|
||
|
||
item := SearchResult{
|
||
Link: "https://artgrid.io/clip/114756/friendly-couple",
|
||
Source: "Artgrid",
|
||
}
|
||
|
||
first := service.enrichArtgrid(item)
|
||
second := service.enrichArtgrid(item)
|
||
|
||
if apiRequests.Load() != 1 {
|
||
t.Fatalf("expected artgrid API to be skipped after first 403, got %d requests", apiRequests.Load())
|
||
}
|
||
if first.Title == "" || second.Title == "" {
|
||
t.Fatalf("expected HTML fallback enrichment to preserve title, got %#v %#v", first, second)
|
||
}
|
||
}
|
||
|
||
type roundTripperFunc func(*http.Request) (*http.Response, error)
|
||
|
||
func (fn roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) {
|
||
return fn(req)
|
||
}
|