This commit is contained in:
+74
-31
@@ -27,15 +27,16 @@ type SearchResult struct {
|
||||
}
|
||||
|
||||
type SearchService struct {
|
||||
BaseURL string
|
||||
GoogleVideoEngine string
|
||||
WebEngine string
|
||||
Client *http.Client
|
||||
collectors []searchCollector
|
||||
Debug func(message string, data any)
|
||||
cacheMu sync.Mutex
|
||||
searchCache map[string]cachedSearchResults
|
||||
fetchCache map[string]cachedFetchResult
|
||||
BaseURL string
|
||||
GoogleVideoEngine string
|
||||
WebEngine string
|
||||
Client *http.Client
|
||||
collectors []searchCollector
|
||||
Debug func(message string, data any)
|
||||
cacheMu sync.Mutex
|
||||
searchCache map[string]cachedSearchResults
|
||||
fetchCache map[string]cachedFetchResult
|
||||
artgridAPIBlockedUntil time.Time
|
||||
}
|
||||
|
||||
type cachedSearchResults struct {
|
||||
@@ -48,6 +49,10 @@ type cachedFetchResult struct {
|
||||
expiresAt time.Time
|
||||
}
|
||||
|
||||
type SearchExecutionMeta struct {
|
||||
PartialDueToDeadline bool `json:"partialDueToDeadline"`
|
||||
}
|
||||
|
||||
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
|
||||
if googleVideoEngine == "" {
|
||||
googleVideoEngine = "google videos"
|
||||
@@ -70,13 +75,14 @@ func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchServi
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[string]bool) ([]SearchResult, error) {
|
||||
func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[string]bool) ([]SearchResult, SearchExecutionMeta, error) {
|
||||
return s.SearchMediaWithDeadline(queries, enabledPlatforms, time.Time{})
|
||||
}
|
||||
|
||||
func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatforms map[string]bool, deadline time.Time) ([]SearchResult, error) {
|
||||
func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatforms map[string]bool, deadline time.Time) ([]SearchResult, SearchExecutionMeta, error) {
|
||||
meta := SearchExecutionMeta{}
|
||||
if s.BaseURL == "" {
|
||||
return nil, fmt.Errorf("searxng base url is not configured")
|
||||
return nil, meta, fmt.Errorf("searxng base url is not configured")
|
||||
}
|
||||
s.debug("search_service:start", map[string]any{
|
||||
"queries": queries,
|
||||
@@ -94,6 +100,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
|
||||
runSearchPass := func(bases []string, onlyMissing bool) {
|
||||
for _, base := range bases {
|
||||
if !deadline.IsZero() && time.Now().After(deadline) {
|
||||
meta.PartialDueToDeadline = true
|
||||
s.debug("search_service:deadline_reached", map[string]any{"stage": "runSearchPass", "base": base})
|
||||
return
|
||||
}
|
||||
@@ -103,6 +110,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
|
||||
}
|
||||
for _, collector := range s.collectors {
|
||||
if !deadline.IsZero() && time.Now().After(deadline) {
|
||||
meta.PartialDueToDeadline = true
|
||||
s.debug("search_service:deadline_reached", map[string]any{"stage": "collectorLoop", "collector": collector.Name()})
|
||||
return
|
||||
}
|
||||
@@ -126,6 +134,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
|
||||
})
|
||||
for _, searchQuery := range searchQueries {
|
||||
if !deadline.IsZero() && time.Now().After(deadline) {
|
||||
meta.PartialDueToDeadline = true
|
||||
s.debug("search_service:deadline_reached", map[string]any{"stage": "queryLoop", "collector": collector.Name(), "query": searchQuery})
|
||||
return
|
||||
}
|
||||
@@ -171,28 +180,33 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
|
||||
}
|
||||
|
||||
if len(results) == 0 && lastErr != nil {
|
||||
return nil, lastErr
|
||||
return nil, meta, lastErr
|
||||
}
|
||||
|
||||
sort.SliceStable(results, func(i, j int) bool {
|
||||
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
|
||||
})
|
||||
s.debug("search_service:complete", map[string]any{
|
||||
"resultCount": len(results),
|
||||
"sourceCounts": sourceCounts,
|
||||
"hadError": lastErr != nil,
|
||||
"resultCount": len(results),
|
||||
"sourceCounts": sourceCounts,
|
||||
"hadError": lastErr != nil,
|
||||
"partialDueToDeadline": meta.PartialDueToDeadline,
|
||||
})
|
||||
return s.EnrichResultsWithDeadline(results, deadline), nil
|
||||
enriched, enrichMeta := s.EnrichResultsWithDeadline(results, deadline)
|
||||
meta.PartialDueToDeadline = meta.PartialDueToDeadline || enrichMeta.PartialDueToDeadline
|
||||
return enriched, meta, nil
|
||||
}
|
||||
|
||||
func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
|
||||
return s.EnrichResultsWithDeadline(results, time.Time{})
|
||||
enriched, _ := s.EnrichResultsWithDeadline(results, time.Time{})
|
||||
return enriched
|
||||
}
|
||||
|
||||
func (s *SearchService) EnrichResultsWithDeadline(results []SearchResult, deadline time.Time) []SearchResult {
|
||||
func (s *SearchService) EnrichResultsWithDeadline(results []SearchResult, deadline time.Time) ([]SearchResult, SearchExecutionMeta) {
|
||||
meta := SearchExecutionMeta{}
|
||||
limit := minInt(len(results), 18)
|
||||
if limit == 0 {
|
||||
return results
|
||||
return results, meta
|
||||
}
|
||||
s.debug("search_service:enrich_start", map[string]any{
|
||||
"total": len(results),
|
||||
@@ -203,12 +217,16 @@ func (s *SearchService) EnrichResultsWithDeadline(results []SearchResult, deadli
|
||||
copy(enriched, results)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
var metaMu sync.Mutex
|
||||
sem := make(chan struct{}, 4)
|
||||
for idx := 0; idx < limit; idx++ {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
if !deadline.IsZero() && time.Now().After(deadline) {
|
||||
metaMu.Lock()
|
||||
meta.PartialDueToDeadline = true
|
||||
metaMu.Unlock()
|
||||
return
|
||||
}
|
||||
sem <- struct{}{}
|
||||
@@ -231,7 +249,7 @@ func (s *SearchService) EnrichResultsWithDeadline(results []SearchResult, deadli
|
||||
}
|
||||
wg.Wait()
|
||||
s.debug("search_service:enrich_complete", map[string]any{"limit": limit})
|
||||
return enriched
|
||||
return enriched, meta
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichResult(result SearchResult) SearchResult {
|
||||
@@ -323,19 +341,32 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
s.debug("search_service:enrich_artgrid_start", map[string]any{"link": result.Link, "clipId": clipID})
|
||||
|
||||
apiURL := "https://artgrid.io/api/clip/details?clipId=" + clipID
|
||||
body, err := s.fetchJSONText(apiURL)
|
||||
if err == nil {
|
||||
urls := collectURLs(body)
|
||||
if !hasUsableThumbnail(result.ThumbnailURL) {
|
||||
result.ThumbnailURL = pickArtgridImageURL(urls, clipID)
|
||||
var err error
|
||||
if s.shouldSkipArtgridAPI() {
|
||||
s.debug("search_service:enrich_artgrid_api_skip", map[string]any{
|
||||
"link": result.Link,
|
||||
"clipId": clipID,
|
||||
"reason": "cached_403_guard",
|
||||
})
|
||||
} else {
|
||||
var body string
|
||||
body, err = s.fetchJSONText(apiURL)
|
||||
if err == nil {
|
||||
urls := collectURLs(body)
|
||||
if !hasUsableThumbnail(result.ThumbnailURL) {
|
||||
result.ThumbnailURL = pickArtgridImageURL(urls, clipID)
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
result.PreviewVideoURL = pickVideoURL(urls)
|
||||
}
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
result.PreviewVideoURL = pickVideoURL(urls)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "status 403") {
|
||||
s.blockArtgridAPI(15 * time.Minute)
|
||||
}
|
||||
s.debug("search_service:enrich_artgrid_api_error", map[string]any{"link": result.Link, "clipId": clipID, "error": err.Error()})
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
s.debug("search_service:enrich_artgrid_api_error", map[string]any{"link": result.Link, "clipId": clipID, "error": err.Error()})
|
||||
}
|
||||
|
||||
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
|
||||
html, err := s.fetchText(result.Link)
|
||||
@@ -540,6 +571,18 @@ func (s *SearchService) setCachedFetchResult(key, body string, ttl time.Duration
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SearchService) shouldSkipArtgridAPI() bool {
|
||||
s.cacheMu.Lock()
|
||||
defer s.cacheMu.Unlock()
|
||||
return !s.artgridAPIBlockedUntil.IsZero() && time.Now().Before(s.artgridAPIBlockedUntil)
|
||||
}
|
||||
|
||||
func (s *SearchService) blockArtgridAPI(ttl time.Duration) {
|
||||
s.cacheMu.Lock()
|
||||
defer s.cacheMu.Unlock()
|
||||
s.artgridAPIBlockedUntil = time.Now().Add(ttl)
|
||||
}
|
||||
|
||||
func (s *SearchService) debug(message string, data any) {
|
||||
if s != nil && s.Debug != nil {
|
||||
s.Debug(message, data)
|
||||
|
||||
@@ -2,7 +2,12 @@ package services
|
||||
|
||||
import (
|
||||
"encoding/base64"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
@@ -176,3 +181,58 @@ func TestSearchServiceFetchCacheRoundTrip(t *testing.T) {
|
||||
t.Fatalf("unexpected cached body: %q", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSearchServiceSkipsArtgridAPIAfter403(t *testing.T) {
|
||||
var apiRequests atomic.Int32
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
switch {
|
||||
case strings.HasPrefix(r.URL.Path, "/api/clip/details"):
|
||||
apiRequests.Add(1)
|
||||
http.Error(w, "forbidden", http.StatusForbidden)
|
||||
case strings.HasPrefix(r.URL.Path, "/clip/114756/"):
|
||||
w.Header().Set("Content-Type", "text/html; charset=utf-8")
|
||||
_, _ = fmt.Fprintf(w, `<html><head><title>Friendly Couple | Stock Video Footage - Artgrid.io</title><meta property="og:title" content="Friendly Couple"><meta property="og:description" content="A warm couple moment"></head><body><script>window.__clip="%s";</script></body></html>`, "114756")
|
||||
default:
|
||||
http.NotFound(w, r)
|
||||
}
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
service := NewSearchService(server.URL, "", "")
|
||||
serverURL, err := url.Parse(server.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse test server url: %v", err)
|
||||
}
|
||||
service.Client = &http.Client{
|
||||
Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) {
|
||||
clone := req.Clone(req.Context())
|
||||
if clone.URL.Host == "artgrid.io" {
|
||||
clone.URL.Scheme = serverURL.Scheme
|
||||
clone.URL.Host = serverURL.Host
|
||||
clone.Host = serverURL.Host
|
||||
}
|
||||
return http.DefaultTransport.RoundTrip(clone)
|
||||
}),
|
||||
}
|
||||
|
||||
item := SearchResult{
|
||||
Link: "https://artgrid.io/clip/114756/friendly-couple",
|
||||
Source: "Artgrid",
|
||||
}
|
||||
|
||||
first := service.enrichArtgrid(item)
|
||||
second := service.enrichArtgrid(item)
|
||||
|
||||
if apiRequests.Load() != 1 {
|
||||
t.Fatalf("expected artgrid API to be skipped after first 403, got %d requests", apiRequests.Load())
|
||||
}
|
||||
if first.Title == "" || second.Title == "" {
|
||||
t.Fatalf("expected HTML fallback enrichment to preserve title, got %#v %#v", first, second)
|
||||
}
|
||||
}
|
||||
|
||||
type roundTripperFunc func(*http.Request) (*http.Response, error)
|
||||
|
||||
func (fn roundTripperFunc) RoundTrip(req *http.Request) (*http.Response, error) {
|
||||
return fn(req)
|
||||
}
|
||||
|
||||
@@ -97,6 +97,36 @@ func (g *GeminiService) ExpandQuery(query string) ([]string, error) {
|
||||
return expanded, nil
|
||||
}
|
||||
|
||||
func (g *GeminiService) TranslateSummaryToKorean(text string) (string, error) {
|
||||
trimmed := strings.TrimSpace(text)
|
||||
if trimmed == "" {
|
||||
return "", nil
|
||||
}
|
||||
cacheKey := "summary-ko\n" + trimmed
|
||||
if cached, ok := g.getCachedTranslation(cacheKey); ok {
|
||||
g.debug("gemini:summary_translate_cache_hit", map[string]any{"length": len(trimmed)})
|
||||
return cached, nil
|
||||
}
|
||||
if !looksMostlyASCII(trimmed) {
|
||||
g.setCachedTranslation(cacheKey, trimmed, 15*time.Minute)
|
||||
return trimmed, nil
|
||||
}
|
||||
|
||||
g.debug("gemini:summary_translate_attempt", map[string]any{"length": len(trimmed)})
|
||||
translated, err := g.translateViaGoogleToTarget(trimmed, "ko")
|
||||
if err != nil {
|
||||
g.debug("gemini:summary_translate_error", map[string]any{"length": len(trimmed), "error": err.Error()})
|
||||
return "", err
|
||||
}
|
||||
translated = strings.TrimSpace(translated)
|
||||
if translated == "" {
|
||||
return "", fmt.Errorf("google translate summary returned empty translation")
|
||||
}
|
||||
g.debug("gemini:summary_translate_success", map[string]any{"length": len(trimmed)})
|
||||
g.setCachedTranslation(cacheKey, translated, 15*time.Minute)
|
||||
return translated, nil
|
||||
}
|
||||
|
||||
func (g *GeminiService) TranslateQuery(query string) string {
|
||||
trimmed := strings.TrimSpace(query)
|
||||
if trimmed == "" {
|
||||
@@ -784,11 +814,19 @@ func isOvercompressedTranslation(original, translated string) bool {
|
||||
}
|
||||
|
||||
func (g *GeminiService) translateViaGoogle(query string) (string, error) {
|
||||
return g.translateViaGoogleToTarget(query, "en")
|
||||
}
|
||||
|
||||
func (g *GeminiService) translateViaGoogleToTarget(query, targetLanguage string) (string, error) {
|
||||
baseURL := g.TranslateEndpoint
|
||||
if strings.TrimSpace(baseURL) == "" {
|
||||
baseURL = "https://translate.googleapis.com/translate_a/single"
|
||||
}
|
||||
endpoint := baseURL + "?client=gtx&sl=auto&tl=en&dt=t&q=" + neturl.QueryEscape(query)
|
||||
targetLanguage = strings.TrimSpace(targetLanguage)
|
||||
if targetLanguage == "" {
|
||||
targetLanguage = "en"
|
||||
}
|
||||
endpoint := baseURL + "?client=gtx&sl=auto&tl=" + neturl.QueryEscape(targetLanguage) + "&dt=t&q=" + neturl.QueryEscape(query)
|
||||
resp, err := g.Client.Get(endpoint)
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
||||
@@ -41,6 +41,35 @@ func TestTranslateQueryFallsBackToDictionaryWhenTranslateFails(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestTranslateSummaryToKoreanUsesGoogleAndCaches(t *testing.T) {
|
||||
requests := 0
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
requests++
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
_, _ = w.Write([]byte(`[[["도시에서 웃는 커플","smiling couple in city",null,null,1]],null,"en"]`))
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
service := NewGeminiService("")
|
||||
service.Client = &http.Client{Timeout: 2 * time.Second}
|
||||
service.TranslateEndpoint = server.URL
|
||||
|
||||
first, err := service.TranslateSummaryToKorean("smiling couple in city")
|
||||
if err != nil {
|
||||
t.Fatalf("expected translation to succeed, got error: %v", err)
|
||||
}
|
||||
second, err := service.TranslateSummaryToKorean("smiling couple in city")
|
||||
if err != nil {
|
||||
t.Fatalf("expected cached translation to succeed, got error: %v", err)
|
||||
}
|
||||
if first != "도시에서 웃는 커플" || second != first {
|
||||
t.Fatalf("unexpected translated summary values: %q %q", first, second)
|
||||
}
|
||||
if requests != 1 {
|
||||
t.Fatalf("expected one upstream translation request due to cache, got %d", requests)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeKnownMediaPhrases(t *testing.T) {
|
||||
translated := translateKoreanMediaTerms("사이버 펑크 도시")
|
||||
if translated != "cyberpunk city" {
|
||||
|
||||
@@ -22,6 +22,7 @@ type GeminiBatchStats struct {
|
||||
SequentialRetried int `json:"sequentialRetried"`
|
||||
RecommendedCount int `json:"recommendedCount"`
|
||||
VisualRejectCount int `json:"visualRejectCount"`
|
||||
DeadlineLimited bool `json:"deadlineLimited,omitempty"`
|
||||
Errors []string `json:"errors,omitempty"`
|
||||
}
|
||||
|
||||
@@ -176,6 +177,9 @@ func EvaluateAllCandidatesWithGeminiWithDeadline(service *GeminiService, query s
|
||||
seen := map[string]bool{}
|
||||
for _, batch := range results {
|
||||
if batch.err != nil {
|
||||
if strings.Contains(batch.err.Error(), "due to deadline") {
|
||||
stats.DeadlineLimited = true
|
||||
}
|
||||
if service != nil && service.Debug != nil {
|
||||
service.Debug("ranker:gemini_batch_error", map[string]any{
|
||||
"batchIndex": batch.index,
|
||||
|
||||
Reference in New Issue
Block a user