Reduce search timeout pressure on reverted baseline
build-push / docker (push) Successful in 4m22s

This commit is contained in:
GHStaK
2026-03-18 14:01:39 +09:00
parent f131cee6de
commit d3fb5e15e9
4 changed files with 82 additions and 7 deletions
+28
View File
@@ -624,6 +624,34 @@
- If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise - If behavior in the browser does not match the latest backend/frontend code, the first assumption should be stale frontend assets until proven otherwise
## Recent Change Log ## Recent Change Log
- Date: `2026-03-18`
- What changed:
- Resumed and completed the interrupted search-timeout mitigation work that had been left locally after the rollback to `f131cee`.
- Split the search-service deadline into:
- collection deadline
- enrichment deadline with a reserved window
- Reduced collector fan-out on the reverted baseline:
- fewer base queries
- no per-request query shuffling
- earlier stop when a collector repeatedly returns `0` results before producing any accepted item
- Raised `Google Video` max results to `12` so visible count does not collapse as hard when Envato / Artgrid are cold.
- Added unit coverage for the search/enrichment deadline split helper.
- Why it changed:
- The user-provided log `ai-media-hub-2026-03-18T04-44-11-440Z.log` showed:
- repeated collector passes with many `rawCount: 0`
- `search_service:deadline_reached`
- `partialDueToDeadline: true`
- final warning `search returned partial results to avoid gateway timeout`
- only `Google Video` surviving into the final result set with `resultCount: 8`
- The real bottleneck in that log was collector-side time waste before enrichment/Gemini, not another Gemini output-format issue.
- How it was verified:
- PowerShell with repo-local tooling:
- `go test ./...`
- `node --check frontend/app.js`
- What is still risky or incomplete:
- This should reduce timeout pressure and improve visible count in the common “Envato/Artgrid zero streak” case, but upstream SearXNG quality can still dominate the final pool.
- A full app-boot smoke flow was not reintroduced into this reverted baseline in this turn.
- Date: `2026-03-17` - Date: `2026-03-17`
- What changed: - What changed:
- Reverted commit `5ca7aef` (`Strengthen search breadth and modal fitting`) to restore the previous stable search/modal baseline. - Reverted commit `5ca7aef` (`Strengthen search breadth and modal fitting`) to restore the previous stable search/modal baseline.
+32 -6
View File
@@ -53,6 +53,8 @@ type SearchExecutionMeta struct {
PartialDueToDeadline bool `json:"partialDueToDeadline"` PartialDueToDeadline bool `json:"partialDueToDeadline"`
} }
const searchEnrichmentReserve = 4 * time.Second
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService { func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
if googleVideoEngine == "" { if googleVideoEngine == "" {
googleVideoEngine = "google videos" googleVideoEngine = "google videos"
@@ -84,6 +86,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
if s.BaseURL == "" { if s.BaseURL == "" {
return nil, meta, fmt.Errorf("searxng base url is not configured") return nil, meta, fmt.Errorf("searxng base url is not configured")
} }
collectionDeadline, enrichmentDeadline := splitSearchDeadlines(deadline)
s.debug("search_service:start", map[string]any{ s.debug("search_service:start", map[string]any{
"queries": queries, "queries": queries,
"enabledPlatforms": enabledPlatforms, "enabledPlatforms": enabledPlatforms,
@@ -93,13 +96,13 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
sourceCounts := map[string]int{} sourceCounts := map[string]int{}
results := make([]SearchResult, 0, 90) results := make([]SearchResult, 0, 90)
var lastErr error var lastErr error
collectorZeroStreak := map[string]int{}
baseQueries := limitQueries(queries, 8) baseQueries := limitQueries(queries, 8)
shuffleStrings(baseQueries)
primaryQueries := baseQueries[:minInt(len(baseQueries), 3)] primaryQueries := baseQueries[:minInt(len(baseQueries), 3)]
runSearchPass := func(bases []string, onlyMissing bool) { runSearchPass := func(bases []string, onlyMissing bool) {
for _, base := range bases { for _, base := range bases {
if !deadline.IsZero() && time.Now().After(deadline) { if !collectionDeadline.IsZero() && time.Now().After(collectionDeadline) {
meta.PartialDueToDeadline = true meta.PartialDueToDeadline = true
s.debug("search_service:deadline_reached", map[string]any{"stage": "runSearchPass", "base": base}) s.debug("search_service:deadline_reached", map[string]any{"stage": "runSearchPass", "base": base})
return return
@@ -109,7 +112,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
continue continue
} }
for _, collector := range s.collectors { for _, collector := range s.collectors {
if !deadline.IsZero() && time.Now().After(deadline) { if !collectionDeadline.IsZero() && time.Now().After(collectionDeadline) {
meta.PartialDueToDeadline = true meta.PartialDueToDeadline = true
s.debug("search_service:deadline_reached", map[string]any{"stage": "collectorLoop", "collector": collector.Name()}) s.debug("search_service:deadline_reached", map[string]any{"stage": "collectorLoop", "collector": collector.Name()})
return return
@@ -124,7 +127,6 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
continue continue
} }
searchQueries := collector.BuildQueries(base) searchQueries := collector.BuildQueries(base)
shuffleStrings(searchQueries)
searchQueries = limitCollectorQueries(collector.Name(), searchQueries, onlyMissing) searchQueries = limitCollectorQueries(collector.Name(), searchQueries, onlyMissing)
s.debug("search_service:collector_queries", map[string]any{ s.debug("search_service:collector_queries", map[string]any{
"collector": collector.Name(), "collector": collector.Name(),
@@ -133,7 +135,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
"searchQueries": searchQueries, "searchQueries": searchQueries,
}) })
for _, searchQuery := range searchQueries { for _, searchQuery := range searchQueries {
if !deadline.IsZero() && time.Now().After(deadline) { if !collectionDeadline.IsZero() && time.Now().After(collectionDeadline) {
meta.PartialDueToDeadline = true meta.PartialDueToDeadline = true
s.debug("search_service:deadline_reached", map[string]any{"stage": "queryLoop", "collector": collector.Name(), "query": searchQuery}) s.debug("search_service:deadline_reached", map[string]any{"stage": "queryLoop", "collector": collector.Name(), "query": searchQuery})
return return
@@ -157,6 +159,11 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
"rawCount": len(items), "rawCount": len(items),
"sourceCount": sourceCounts[collector.Name()], "sourceCount": sourceCounts[collector.Name()],
}) })
if len(items) == 0 && sourceCounts[collector.Name()] == 0 {
collectorZeroStreak[collector.Name()]++
} else {
collectorZeroStreak[collector.Name()] = 0
}
for _, item := range items { for _, item := range items {
item = normalizeResultForCollector(collector.Name(), item) item = normalizeResultForCollector(collector.Name(), item)
if item.Link == "" || seen[item.Link] || !collector.Accept(item) { if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
@@ -169,6 +176,14 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
break break
} }
} }
if collectorZeroStreak[collector.Name()] >= 2 && sourceCounts[collector.Name()] == 0 {
s.debug("search_service:collector_skip_after_zero_streak", map[string]any{
"collector": collector.Name(),
"base": base,
"streak": collectorZeroStreak[collector.Name()],
})
break
}
} }
} }
} }
@@ -192,11 +207,22 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
"hadError": lastErr != nil, "hadError": lastErr != nil,
"partialDueToDeadline": meta.PartialDueToDeadline, "partialDueToDeadline": meta.PartialDueToDeadline,
}) })
enriched, enrichMeta := s.EnrichResultsWithDeadline(results, deadline) enriched, enrichMeta := s.EnrichResultsWithDeadline(results, enrichmentDeadline)
meta.PartialDueToDeadline = meta.PartialDueToDeadline || enrichMeta.PartialDueToDeadline meta.PartialDueToDeadline = meta.PartialDueToDeadline || enrichMeta.PartialDueToDeadline
return enriched, meta, nil return enriched, meta, nil
} }
func splitSearchDeadlines(deadline time.Time) (time.Time, time.Time) {
if deadline.IsZero() {
return time.Time{}, time.Time{}
}
remaining := time.Until(deadline)
if remaining <= searchEnrichmentReserve {
return deadline, deadline
}
return deadline.Add(-searchEnrichmentReserve), deadline
}
func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult { func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
enriched, _ := s.EnrichResultsWithDeadline(results, time.Time{}) enriched, _ := s.EnrichResultsWithDeadline(results, time.Time{})
return enriched return enriched
+21
View File
@@ -182,6 +182,27 @@ func TestSearchServiceFetchCacheRoundTrip(t *testing.T) {
} }
} }
func TestSplitSearchDeadlinesReservesEnrichmentWindow(t *testing.T) {
deadline := time.Now().Add(20 * time.Second)
collectionDeadline, enrichmentDeadline := splitSearchDeadlines(deadline)
if enrichmentDeadline.IsZero() {
t.Fatal("expected enrichment deadline to be preserved")
}
if !collectionDeadline.Before(enrichmentDeadline) {
t.Fatalf("expected collection deadline before enrichment deadline, got %v >= %v", collectionDeadline, enrichmentDeadline)
}
}
func TestSplitSearchDeadlinesDoesNotReserveWhenBudgetTooSmall(t *testing.T) {
deadline := time.Now().Add(2 * time.Second)
collectionDeadline, enrichmentDeadline := splitSearchDeadlines(deadline)
if !collectionDeadline.Equal(enrichmentDeadline) {
t.Fatalf("expected identical deadlines, got %v and %v", collectionDeadline, enrichmentDeadline)
}
}
func TestSearchServiceSkipsArtgridAPIAfter403(t *testing.T) { func TestSearchServiceSkipsArtgridAPIAfter403(t *testing.T) {
var apiRequests atomic.Int32 var apiRequests atomic.Int32
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+1 -1
View File
@@ -47,7 +47,7 @@ func (artgridCollector) Enrich(searcher *SearchService, result SearchResult) Sea
type googleVideoCollector struct{} type googleVideoCollector struct{}
func (googleVideoCollector) Name() string { return "Google Video" } func (googleVideoCollector) Name() string { return "Google Video" }
func (googleVideoCollector) MaxResults() int { return 8 } func (googleVideoCollector) MaxResults() int { return 12 }
func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool { func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["google video"] return len(enabledPlatforms) == 0 || enabledPlatforms["google video"]
} }