Files
ai-media-hub/backend/services/cse_test.go
T
AI Assistant 5ca7aef3f1
build-push / docker (push) Successful in 4m14s
Strengthen search breadth and modal fitting
2026-03-17 13:10:53 +09:00

239 lines
9.3 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package services
import (
"encoding/base64"
"fmt"
"net/http"
"net/http/httptest"
"net/url"
"strings"
"sync/atomic"
"testing"
"time"
)
func TestExtractVideoPreviewURLFindsEnvatoPreview(t *testing.T) {
html := `<script type="application/ld+json">{"contentUrl":"https://video-previews.elements.envatousercontent.com/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"}</script>`
got := firstNonEmpty(extractJSONLDValue(html, "contentUrl"), extractVideoPreviewURL(html))
want := "https://video-previews.elements.envatousercontent.com/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestExtractVideoPreviewURLFindsGenericM3U8(t *testing.T) {
html := `<script>window.preview="https:\/\/cdn.example.com\/preview\/master.m3u8?token=abc"</script>`
got := extractVideoPreviewURL(html)
want := "https://cdn.example.com/preview/master.m3u8?token=abc"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) {
thumb := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/video_preview/video_preview_0000.jpg?w=1200&amp;h=630"
got := deriveEnvatoPreviewFromThumbnail(thumb)
want := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/watermarked_preview/watermarked_preview.mp4"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestExtractEnvatoPreviewFromHydration(t *testing.T) {
payload := `{"contentUrl":"https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"}`
html := `<script>window.INITIAL_HYDRATION_DATA="` + base64.StdEncoding.EncodeToString([]byte(payload)) + `";window.INITIAL_HYDRATION_DATA_ENCODED=true;</script>`
got := extractEnvatoPreviewFromHydration(html)
want := "https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestCollectEnvatoPreviewURLFindsOgVideo(t *testing.T) {
html := `<meta property="og:video" content="https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4">`
got := collectEnvatoPreviewURL(html, "", "", "")
want := "https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) {
result := SearchResult{
Title: "Couple Friendly Sad Bgm Movie Best Bgm",
Link: "https://www.youtube.com/watch?v=LGP4wiXSw8c",
Snippet: "romantic bgm soundtrack",
}
if isUsefulGoogleVideoResult(result) {
t.Fatal("expected bgm/music result to be rejected")
}
}
func TestExtractVideoObjectJSONLD(t *testing.T) {
html := `<script type="application/ld+json">{"@context":"https://schema.org","@type":"VideoObject","name":"Smiling Man and Woman Waving at Camera","description":"Close up shot of a smiling couple waving.","thumbnailUrl":"https://elements-resized.envatousercontent.com/example/video_preview/video_preview_0001.jpg","contentUrl":"https://video-previews.elements.envatousercontent.com/example/watermarked_preview/watermarked_preview.mp4"}</script>`
meta := extractVideoObjectJSONLD(html)
if meta.Name != "Smiling Man and Woman Waving at Camera" {
t.Fatalf("unexpected name: %#v", meta)
}
if meta.ContentURL == "" || meta.ThumbnailURL == "" || meta.Description == "" {
t.Fatalf("expected full video object metadata, got %#v", meta)
}
}
func TestCleanArtgridTitle(t *testing.T) {
got := cleanArtgridTitle("movie film moving slowly from a reel by Arthur Cauty | Royalty Free Stock Footage Artgrid.io")
want := "movie film moving slowly from a reel"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestCanonicalizeArtgridLinkFromArtlist(t *testing.T) {
got := canonicalizeArtgridLink("https://artlist.io/stock-footage/clip/movie-film-moving-slowly-from-a-reel/114756")
want := "https://artgrid.io/clip/114756/movie-film-moving-slowly-from-a-reel"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestIsRenderableArtgridResultAcceptsArtlistCanonical(t *testing.T) {
if !isRenderableArtgridResult(SearchResult{Link: "https://artlist.io/stock-footage/clip/movie-film-moving-slowly-from-a-reel/114756"}) {
t.Fatal("expected artlist canonical clip URL to be accepted for Artgrid collector")
}
}
func TestBuildArtgridQueriesIncludesArtlistCanonicalDomain(t *testing.T) {
queries := buildArtgridQueries("friendly couple")
found := false
for _, query := range queries {
if strings.Contains(query, "site:artlist.io/stock-footage/clip/") {
found = true
break
}
}
if !found {
t.Fatal("expected Artgrid queries to include artlist canonical domain")
}
}
func TestIsMatchingArtgridClipPageRejectsHomepage(t *testing.T) {
html := `<html><head><meta property="og:url" content="https://artgrid.io/"><link rel="canonical" href="https://artgrid.io/"></head></html>`
if isMatchingArtgridClipPage(html, "114756") {
t.Fatal("expected generic Artgrid homepage HTML to be rejected as a clip page")
}
}
func TestIsMatchingArtgridClipPageAcceptsBodySignals(t *testing.T) {
html := `<html><head><title>Night City | Stock Video Footage - Artgrid.io</title></head><body><script>window.__clip="6600269";</script></body></html>`
if !isMatchingArtgridClipPage(html, "6600269") {
t.Fatal("expected body/title signal Artgrid HTML to be accepted")
}
}
func TestLowValueThumbnailDetection(t *testing.T) {
if !IsLowValueThumbnail("https://example.com/favicon.ico") {
t.Fatal("expected favicon to be low-value thumbnail")
}
if IsLowValueThumbnail("https://i.ytimg.com/vi/abcd1234xyz/hqdefault.jpg") {
t.Fatal("expected youtube thumbnail to be usable")
}
}
func TestGoogleVideoCollectorPrefersYouTubeDerivedThumbnail(t *testing.T) {
result := googleVideoCollector{}.Enrich(nil, SearchResult{
Link: "https://www.youtube.com/watch?v=dQw4w9WgXcQ",
ThumbnailURL: "https://example.com/some-search-thumb.jpg",
})
if result.ThumbnailURL != "https://i.ytimg.com/vi/dQw4w9WgXcQ/hqdefault.jpg" {
t.Fatalf("expected derived youtube thumbnail, got %q", result.ThumbnailURL)
}
}
func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) {
if got := GeminiCandidateLimit(9); got != 9 {
t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got)
}
}
func TestLimitCollectorQueriesUsesSmallerBudgetForMissingPass(t *testing.T) {
queries := []string{"a", "b", "c", "d"}
got := limitCollectorQueries("Artgrid", queries, true)
if len(got) != 4 {
t.Fatalf("expected 4 queries for missing-pass Artgrid collector, got %d", len(got))
}
got = limitCollectorQueries("Google Video", queries, false)
if len(got) != 4 {
t.Fatalf("expected 4 queries for Google Video collector, got %d", len(got))
}
}
func TestSearchServiceFetchCacheRoundTrip(t *testing.T) {
service := NewSearchService("http://example.com", "", "")
service.setCachedFetchResult("html\nhttps://example.com/item", "<html></html>", time.Minute)
body, ok := service.getCachedFetchResult("html\nhttps://example.com/item")
if !ok {
t.Fatal("expected cached fetch result")
}
if body != "<html></html>" {
t.Fatalf("unexpected cached body: %q", body)
}
}
func TestSearchServiceSkipsArtgridAPIAfter403(t *testing.T) {
var apiRequests atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
switch {
case strings.HasPrefix(r.URL.Path, "/api/clip/details"):
apiRequests.Add(1)
http.Error(w, "forbidden", http.StatusForbidden)
case strings.HasPrefix(r.URL.Path, "/clip/114756/"):
w.Header().Set("Content-Type", "text/html; charset=utf-8")
_, _ = fmt.Fprintf(w, `<html><head><title>Friendly Couple | Stock Video Footage - Artgrid.io</title><meta property="og:title" content="Friendly Couple"><meta property="og:description" content="A warm couple moment"></head><body><script>window.__clip="%s";</script></body></html>`, "114756")
default:
http.NotFound(w, r)
}
}))
defer server.Close()
service := NewSearchService(server.URL, "", "")
serverURL, err := url.Parse(server.URL)
if err != nil {
t.Fatalf("failed to parse test server url: %v", err)
}
service.Client = &http.Client{
Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
clone := req.Clone(req.Context())
if clone.URL.Host == "artgrid.io" {
clone.URL.Scheme = serverURL.Scheme
clone.URL.Host = serverURL.Host
clone.Host = serverURL.Host
}
return http.DefaultTransport.RoundTrip(clone)
}),
}
item := SearchResult{
Link: "https://artgrid.io/clip/114756/friendly-couple",
Source: "Artgrid",
}
first := service.enrichArtgrid(item)
second := service.enrichArtgrid(item)
if apiRequests.Load() != 1 {
t.Fatalf("expected artgrid API to be skipped after first 403, got %d requests", apiRequests.Load())
}
if first.Title == "" || second.Title == "" {
t.Fatalf("expected HTML fallback enrichment to preserve title, got %#v %#v", first, second)
}
}
type roundTripperFunc func(*http.Request) (*http.Response, error)
func (fn roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) {
return fn(req)
}