From 5aebbef6391406cde01fc50fdd1c6821707a22e1 Mon Sep 17 00:00:00 2001 From: AI Assistant Date: Fri, 13 Mar 2026 19:11:54 +0900 Subject: [PATCH] Refactor search into source-specific collectors --- TODO.md | 17 +++++ backend/services/cse.go | 93 ++++++++++----------------- backend/services/search_collectors.go | 70 ++++++++++++++++++++ scripts/selftest.sh | 2 +- 4 files changed, 122 insertions(+), 60 deletions(-) create mode 100644 backend/services/search_collectors.go diff --git a/TODO.md b/TODO.md index 3a367c4..ed909b1 100644 --- a/TODO.md +++ b/TODO.md @@ -88,6 +88,23 @@ - Artgrid hover-video preview cannot be derived reliably from the provided snapshot alone - if Artgrid preview video is still required, the next useful artifact is a browser HAR or DevTools network capture from an opened clip page +## Current Session Update (2026-03-13, Collector Refactor) +- Refactored the search pipeline into source-specific collectors: + - `envatoCollector` + - `artgridCollector` + - `googleVideoCollector` +- `SearchService` now acts mainly as: + - collector orchestration + - query-pass control + - dedupe + - cross-source enrichment scheduling +- Goal of the refactor: + - reduce cross-source coupling + - make future source-specific fixes safer + - make it easier to replace or disable one source without destabilizing the others +- Current implementation note: + - collectors are still in Go code under backend services, but the responsibilities are now separated by source instead of one monolithic search loop + ## Local Self-Test Workflow - Primary command: - `bash scripts/selftest.sh` diff --git a/backend/services/cse.go b/backend/services/cse.go index 0d657c2..3fdf866 100644 --- a/backend/services/cse.go +++ b/backend/services/cse.go @@ -29,6 +29,7 @@ type SearchService struct { GoogleVideoEngine string WebEngine string Client *http.Client + collectors []searchCollector } func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService { @@ -43,6 +44,11 @@ func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchServi GoogleVideoEngine: googleVideoEngine, WebEngine: webEngine, Client: &http.Client{Timeout: 20 * time.Second}, + collectors: []searchCollector{ + envatoCollector{}, + artgridCollector{}, + googleVideoCollector{}, + }, } } @@ -51,42 +57,6 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin return nil, fmt.Errorf("searxng base url is not configured") } - type sourceConfig struct { - name string - categories string - engine string - maxResults int - build func(string) []string - accept func(SearchResult) bool - } - - sources := []sourceConfig{ - { - name: "Envato", - categories: "general", - engine: s.WebEngine, - maxResults: 8, - build: buildEnvatoQueries, - accept: isRenderableEnvatoResult, - }, - { - name: "Artgrid", - categories: "general", - engine: s.WebEngine, - maxResults: 8, - build: buildArtgridQueries, - accept: isRenderableArtgridResult, - }, - { - name: "Google Video", - categories: "videos", - engine: s.GoogleVideoEngine, - maxResults: 6, - build: buildGoogleVideoQueries, - accept: isUsefulGoogleVideoResult, - }, - } - seen := map[string]bool{} sourceCounts := map[string]int{} results := make([]SearchResult, 0, 90) @@ -100,37 +70,33 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin if base == "" { continue } - for _, source := range sources { - if len(enabledPlatforms) > 0 && !enabledPlatforms[strings.ToLower(source.name)] { + for _, collector := range s.collectors { + if !collector.Enabled(enabledPlatforms) { continue } - if sourceCounts[source.name] >= source.maxResults { + if sourceCounts[collector.Name()] >= collector.MaxResults() { continue } - if onlyMissing && sourceCounts[source.name] > 0 { + if onlyMissing && sourceCounts[collector.Name()] > 0 { continue } - for _, searchQuery := range source.build(base) { - if sourceCounts[source.name] >= source.maxResults { + for _, searchQuery := range collector.BuildQueries(base) { + if sourceCounts[collector.Name()] >= collector.MaxResults() { break } - items, err := s.search(searchQuery, source.categories, source.engine, source.name) - if err != nil { - lastErr = err - items, err = s.search(searchQuery, source.categories, "", source.name) - } + items, err := collector.Collect(s, searchQuery) if err != nil { lastErr = err continue } for _, item := range items { - if item.Link == "" || seen[item.Link] || !source.accept(item) { + if item.Link == "" || seen[item.Link] || !collector.Accept(item) { continue } seen[item.Link] = true results = append(results, item) - sourceCounts[source.name]++ - if sourceCounts[source.name] >= source.maxResults { + sourceCounts[collector.Name()]++ + if sourceCounts[collector.Name()] >= collector.MaxResults() { break } } @@ -179,17 +145,26 @@ func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult { } func (s *SearchService) enrichResult(result SearchResult) SearchResult { - switch result.Source { - case "Envato": - return s.enrichEnvato(result) - case "Artgrid": - return s.enrichArtgrid(result) - default: - if result.ThumbnailURL == "" { - result.ThumbnailURL = deriveThumbnail(result.Link) + for _, collector := range s.collectors { + if collector.Name() == result.Source { + return collector.Enrich(s, result) } - return result } + if result.ThumbnailURL == "" { + result.ThumbnailURL = deriveThumbnail(result.Link) + } + return result +} + +func (s *SearchService) searchWithFallback(query, categories, engine, source string) ([]SearchResult, error) { + items, err := s.search(query, categories, engine, source) + if err == nil { + return items, nil + } + if strings.TrimSpace(engine) == "" { + return nil, err + } + return s.search(query, categories, "", source) } func (s *SearchService) enrichEnvato(result SearchResult) SearchResult { diff --git a/backend/services/search_collectors.go b/backend/services/search_collectors.go new file mode 100644 index 0000000..332b125 --- /dev/null +++ b/backend/services/search_collectors.go @@ -0,0 +1,70 @@ +package services + +import "strings" + +type searchCollector interface { + Name() string + MaxResults() int + Enabled(enabledPlatforms map[string]bool) bool + BuildQueries(base string) []string + Collect(searcher *SearchService, query string) ([]SearchResult, error) + Accept(result SearchResult) bool + Enrich(searcher *SearchService, result SearchResult) SearchResult +} + +type envatoCollector struct{} + +func (envatoCollector) Name() string { return "Envato" } +func (envatoCollector) MaxResults() int { return 8 } +func (envatoCollector) Enabled(enabledPlatforms map[string]bool) bool { + return len(enabledPlatforms) == 0 || enabledPlatforms["envato"] +} +func (envatoCollector) BuildQueries(base string) []string { return buildEnvatoQueries(base) } +func (envatoCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) { + return searcher.searchWithFallback(query, "general", searcher.WebEngine, "Envato") +} +func (envatoCollector) Accept(result SearchResult) bool { return isRenderableEnvatoResult(result) } +func (envatoCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult { + return searcher.enrichEnvato(result) +} + +type artgridCollector struct{} + +func (artgridCollector) Name() string { return "Artgrid" } +func (artgridCollector) MaxResults() int { return 8 } +func (artgridCollector) Enabled(enabledPlatforms map[string]bool) bool { + return len(enabledPlatforms) == 0 || enabledPlatforms["artgrid"] +} +func (artgridCollector) BuildQueries(base string) []string { return buildArtgridQueries(base) } +func (artgridCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) { + return searcher.searchWithFallback(query, "general", searcher.WebEngine, "Artgrid") +} +func (artgridCollector) Accept(result SearchResult) bool { return isRenderableArtgridResult(result) } +func (artgridCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult { + return searcher.enrichArtgrid(result) +} + +type googleVideoCollector struct{} + +func (googleVideoCollector) Name() string { return "Google Video" } +func (googleVideoCollector) MaxResults() int { return 6 } +func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool { + return len(enabledPlatforms) == 0 || enabledPlatforms["google video"] +} +func (googleVideoCollector) BuildQueries(base string) []string { return buildGoogleVideoQueries(base) } +func (googleVideoCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) { + return searcher.searchWithFallback(query, "videos", searcher.GoogleVideoEngine, "Google Video") +} +func (googleVideoCollector) Accept(result SearchResult) bool { + return isUsefulGoogleVideoResult(result) +} +func (googleVideoCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult { + if result.ThumbnailURL == "" { + result.ThumbnailURL = deriveThumbnail(result.Link) + } + result.Source = strings.TrimSpace(result.Source) + if result.Source == "" { + result.Source = "Google Video" + } + return result +} diff --git a/scripts/selftest.sh b/scripts/selftest.sh index 976f45a..89c4848 100755 --- a/scripts/selftest.sh +++ b/scripts/selftest.sh @@ -24,7 +24,7 @@ trap cleanup EXIT cd "${ROOT_DIR}" echo "[selftest] gofmt" -gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/gemini.go backend/services/gemini_test.go +gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/search_collectors.go backend/services/gemini.go backend/services/gemini_test.go echo "[selftest] python syntax" python3 -m py_compile worker/downloader.py scripts/mock_searxng.py