From d3fb5e15e977317607e5f9cdc9bf92a8c0dd181a Mon Sep 17 00:00:00 2001 From: GHStaK Date: Wed, 18 Mar 2026 14:01:39 +0900 Subject: [PATCH] Reduce search timeout pressure on reverted baseline --- TODO.md | 28 ++++++++++++++++++++ backend/services/cse.go | 38 ++++++++++++++++++++++----- backend/services/cse_test.go | 21 +++++++++++++++ backend/services/search_collectors.go | 2 +- 4 files changed, 82 insertions(+), 7 deletions(-) diff --git a/TODO.md b/TODO.md index d06cd5f..70ff18d 100644 --- a/TODO.md +++ b/TODO.md @@ -624,6 +624,34 @@ - If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise ## Recent Change Log +- Date: `2026-03-18` +- What changed: + - Resumed and completed the interrupted search-timeout mitigation work that had been left locally after the rollback to `f131cee`. + - Split the search-service deadline into: + - collection deadline + - enrichment deadline with a reserved window + - Reduced collector fan-out on the reverted baseline: + - fewer base queries + - no per-request query shuffling + - earlier stop when a collector repeatedly returns `0` results before producing any accepted item + - Raised `Google Video` max results to `12` so visible count does not collapse as hard when Envato / Artgrid are cold. + - Added unit coverage for the search/enrichment deadline split helper. +- Why it changed: + - The user-provided log `ai-media-hub-2026-03-18T04-44-11-440Z.log` showed: + - repeated collector passes with many `rawCount: 0` + - `search_service:deadline_reached` + - `partialDueToDeadline: true` + - final warning `search returned partial results to avoid gateway timeout` + - only `Google Video` surviving into the final result set with `resultCount: 8` + - The real bottleneck in that log was collector-side time waste before enrichment/Gemini, not another Gemini output-format issue. +- How it was verified: + - PowerShell with repo-local tooling: + - `go test ./...` + - `node --check frontend/app.js` +- What is still risky or incomplete: + - This should reduce timeout pressure and improve visible count in the common “Envato/Artgrid zero streak” case, but upstream SearXNG quality can still dominate the final pool. + - A full app-boot smoke flow was not reintroduced into this reverted baseline in this turn. + - Date: `2026-03-17` - What changed: - Reverted commit `5ca7aef` (`Strengthen search breadth and modal fitting`) to restore the previous stable search/modal baseline. diff --git a/backend/services/cse.go b/backend/services/cse.go index 9f44461..32608c5 100644 --- a/backend/services/cse.go +++ b/backend/services/cse.go @@ -53,6 +53,8 @@ type SearchExecutionMeta struct { PartialDueToDeadline bool `json:"partialDueToDeadline"` } +const searchEnrichmentReserve = 4 * time.Second + func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService { if googleVideoEngine == "" { googleVideoEngine = "google videos" @@ -84,6 +86,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor if s.BaseURL == "" { return nil, meta, fmt.Errorf("searxng base url is not configured") } + collectionDeadline, enrichmentDeadline := splitSearchDeadlines(deadline) s.debug("search_service:start", map[string]any{ "queries": queries, "enabledPlatforms": enabledPlatforms, @@ -93,13 +96,13 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor sourceCounts := map[string]int{} results := make([]SearchResult, 0, 90) var lastErr error + collectorZeroStreak := map[string]int{} baseQueries := limitQueries(queries, 8) - shuffleStrings(baseQueries) primaryQueries := baseQueries[:minInt(len(baseQueries), 3)] runSearchPass := func(bases []string, onlyMissing bool) { for _, base := range bases { - if !deadline.IsZero() && time.Now().After(deadline) { + if !collectionDeadline.IsZero() && time.Now().After(collectionDeadline) { meta.PartialDueToDeadline = true s.debug("search_service:deadline_reached", map[string]any{"stage": "runSearchPass", "base": base}) return @@ -109,7 +112,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor continue } for _, collector := range s.collectors { - if !deadline.IsZero() && time.Now().After(deadline) { + if !collectionDeadline.IsZero() && time.Now().After(collectionDeadline) { meta.PartialDueToDeadline = true s.debug("search_service:deadline_reached", map[string]any{"stage": "collectorLoop", "collector": collector.Name()}) return @@ -124,7 +127,6 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor continue } searchQueries := collector.BuildQueries(base) - shuffleStrings(searchQueries) searchQueries = limitCollectorQueries(collector.Name(), searchQueries, onlyMissing) s.debug("search_service:collector_queries", map[string]any{ "collector": collector.Name(), @@ -133,7 +135,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor "searchQueries": searchQueries, }) for _, searchQuery := range searchQueries { - if !deadline.IsZero() && time.Now().After(deadline) { + if !collectionDeadline.IsZero() && time.Now().After(collectionDeadline) { meta.PartialDueToDeadline = true s.debug("search_service:deadline_reached", map[string]any{"stage": "queryLoop", "collector": collector.Name(), "query": searchQuery}) return @@ -157,6 +159,11 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor "rawCount": len(items), "sourceCount": sourceCounts[collector.Name()], }) + if len(items) == 0 && sourceCounts[collector.Name()] == 0 { + collectorZeroStreak[collector.Name()]++ + } else { + collectorZeroStreak[collector.Name()] = 0 + } for _, item := range items { item = normalizeResultForCollector(collector.Name(), item) if item.Link == "" || seen[item.Link] || !collector.Accept(item) { @@ -169,6 +176,14 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor break } } + if collectorZeroStreak[collector.Name()] >= 2 && sourceCounts[collector.Name()] == 0 { + s.debug("search_service:collector_skip_after_zero_streak", map[string]any{ + "collector": collector.Name(), + "base": base, + "streak": collectorZeroStreak[collector.Name()], + }) + break + } } } } @@ -192,11 +207,22 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor "hadError": lastErr != nil, "partialDueToDeadline": meta.PartialDueToDeadline, }) - enriched, enrichMeta := s.EnrichResultsWithDeadline(results, deadline) + enriched, enrichMeta := s.EnrichResultsWithDeadline(results, enrichmentDeadline) meta.PartialDueToDeadline = meta.PartialDueToDeadline || enrichMeta.PartialDueToDeadline return enriched, meta, nil } +func splitSearchDeadlines(deadline time.Time) (time.Time, time.Time) { + if deadline.IsZero() { + return time.Time{}, time.Time{} + } + remaining := time.Until(deadline) + if remaining <= searchEnrichmentReserve { + return deadline, deadline + } + return deadline.Add(-searchEnrichmentReserve), deadline +} + func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult { enriched, _ := s.EnrichResultsWithDeadline(results, time.Time{}) return enriched diff --git a/backend/services/cse_test.go b/backend/services/cse_test.go index 55bf31b..ad6f3d3 100644 --- a/backend/services/cse_test.go +++ b/backend/services/cse_test.go @@ -182,6 +182,27 @@ func TestSearchServiceFetchCacheRoundTrip(t *testing.T) { } } +func TestSplitSearchDeadlinesReservesEnrichmentWindow(t *testing.T) { + deadline := time.Now().Add(20 * time.Second) + collectionDeadline, enrichmentDeadline := splitSearchDeadlines(deadline) + + if enrichmentDeadline.IsZero() { + t.Fatal("expected enrichment deadline to be preserved") + } + if !collectionDeadline.Before(enrichmentDeadline) { + t.Fatalf("expected collection deadline before enrichment deadline, got %v >= %v", collectionDeadline, enrichmentDeadline) + } +} + +func TestSplitSearchDeadlinesDoesNotReserveWhenBudgetTooSmall(t *testing.T) { + deadline := time.Now().Add(2 * time.Second) + collectionDeadline, enrichmentDeadline := splitSearchDeadlines(deadline) + + if !collectionDeadline.Equal(enrichmentDeadline) { + t.Fatalf("expected identical deadlines, got %v and %v", collectionDeadline, enrichmentDeadline) + } +} + func TestSearchServiceSkipsArtgridAPIAfter403(t *testing.T) { var apiRequests atomic.Int32 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { diff --git a/backend/services/search_collectors.go b/backend/services/search_collectors.go index 973f10b..032c019 100644 --- a/backend/services/search_collectors.go +++ b/backend/services/search_collectors.go @@ -47,7 +47,7 @@ func (artgridCollector) Enrich(searcher *SearchService, result SearchResult) Sea type googleVideoCollector struct{} func (googleVideoCollector) Name() string { return "Google Video" } -func (googleVideoCollector) MaxResults() int { return 8 } +func (googleVideoCollector) MaxResults() int { return 12 } func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool { return len(enabledPlatforms) == 0 || enabledPlatforms["google video"] }