Refactor search into source-specific collectors
build-push / docker (push) Successful in 4m35s

This commit is contained in:
AI Assistant
2026-03-13 19:11:54 +09:00
parent ae091c5a7d
commit 5aebbef639
4 changed files with 122 additions and 60 deletions
+17
View File
@@ -88,6 +88,23 @@
- Artgrid hover-video preview cannot be derived reliably from the provided snapshot alone
- if Artgrid preview video is still required, the next useful artifact is a browser HAR or DevTools network capture from an opened clip page
## Current Session Update (2026-03-13, Collector Refactor)
- Refactored the search pipeline into source-specific collectors:
- `envatoCollector`
- `artgridCollector`
- `googleVideoCollector`
- `SearchService` now acts mainly as:
- collector orchestration
- query-pass control
- dedupe
- cross-source enrichment scheduling
- Goal of the refactor:
- reduce cross-source coupling
- make future source-specific fixes safer
- make it easier to replace or disable one source without destabilizing the others
- Current implementation note:
- collectors are still in Go code under backend services, but the responsibilities are now separated by source instead of one monolithic search loop
## Local Self-Test Workflow
- Primary command:
- `bash scripts/selftest.sh`
+34 -59
View File
@@ -29,6 +29,7 @@ type SearchService struct {
GoogleVideoEngine string
WebEngine string
Client *http.Client
collectors []searchCollector
}
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
@@ -43,6 +44,11 @@ func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchServi
GoogleVideoEngine: googleVideoEngine,
WebEngine: webEngine,
Client: &http.Client{Timeout: 20 * time.Second},
collectors: []searchCollector{
envatoCollector{},
artgridCollector{},
googleVideoCollector{},
},
}
}
@@ -51,42 +57,6 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
return nil, fmt.Errorf("searxng base url is not configured")
}
type sourceConfig struct {
name string
categories string
engine string
maxResults int
build func(string) []string
accept func(SearchResult) bool
}
sources := []sourceConfig{
{
name: "Envato",
categories: "general",
engine: s.WebEngine,
maxResults: 8,
build: buildEnvatoQueries,
accept: isRenderableEnvatoResult,
},
{
name: "Artgrid",
categories: "general",
engine: s.WebEngine,
maxResults: 8,
build: buildArtgridQueries,
accept: isRenderableArtgridResult,
},
{
name: "Google Video",
categories: "videos",
engine: s.GoogleVideoEngine,
maxResults: 6,
build: buildGoogleVideoQueries,
accept: isUsefulGoogleVideoResult,
},
}
seen := map[string]bool{}
sourceCounts := map[string]int{}
results := make([]SearchResult, 0, 90)
@@ -100,37 +70,33 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
if base == "" {
continue
}
for _, source := range sources {
if len(enabledPlatforms) > 0 && !enabledPlatforms[strings.ToLower(source.name)] {
for _, collector := range s.collectors {
if !collector.Enabled(enabledPlatforms) {
continue
}
if sourceCounts[source.name] >= source.maxResults {
if sourceCounts[collector.Name()] >= collector.MaxResults() {
continue
}
if onlyMissing && sourceCounts[source.name] > 0 {
if onlyMissing && sourceCounts[collector.Name()] > 0 {
continue
}
for _, searchQuery := range source.build(base) {
if sourceCounts[source.name] >= source.maxResults {
for _, searchQuery := range collector.BuildQueries(base) {
if sourceCounts[collector.Name()] >= collector.MaxResults() {
break
}
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
if err != nil {
lastErr = err
items, err = s.search(searchQuery, source.categories, "", source.name)
}
items, err := collector.Collect(s, searchQuery)
if err != nil {
lastErr = err
continue
}
for _, item := range items {
if item.Link == "" || seen[item.Link] || !source.accept(item) {
if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
continue
}
seen[item.Link] = true
results = append(results, item)
sourceCounts[source.name]++
if sourceCounts[source.name] >= source.maxResults {
sourceCounts[collector.Name()]++
if sourceCounts[collector.Name()] >= collector.MaxResults() {
break
}
}
@@ -179,17 +145,26 @@ func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
}
func (s *SearchService) enrichResult(result SearchResult) SearchResult {
switch result.Source {
case "Envato":
return s.enrichEnvato(result)
case "Artgrid":
return s.enrichArtgrid(result)
default:
if result.ThumbnailURL == "" {
result.ThumbnailURL = deriveThumbnail(result.Link)
for _, collector := range s.collectors {
if collector.Name() == result.Source {
return collector.Enrich(s, result)
}
return result
}
if result.ThumbnailURL == "" {
result.ThumbnailURL = deriveThumbnail(result.Link)
}
return result
}
func (s *SearchService) searchWithFallback(query, categories, engine, source string) ([]SearchResult, error) {
items, err := s.search(query, categories, engine, source)
if err == nil {
return items, nil
}
if strings.TrimSpace(engine) == "" {
return nil, err
}
return s.search(query, categories, "", source)
}
func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
+70
View File
@@ -0,0 +1,70 @@
package services
import "strings"
type searchCollector interface {
Name() string
MaxResults() int
Enabled(enabledPlatforms map[string]bool) bool
BuildQueries(base string) []string
Collect(searcher *SearchService, query string) ([]SearchResult, error)
Accept(result SearchResult) bool
Enrich(searcher *SearchService, result SearchResult) SearchResult
}
type envatoCollector struct{}
func (envatoCollector) Name() string { return "Envato" }
func (envatoCollector) MaxResults() int { return 8 }
func (envatoCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["envato"]
}
func (envatoCollector) BuildQueries(base string) []string { return buildEnvatoQueries(base) }
func (envatoCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) {
return searcher.searchWithFallback(query, "general", searcher.WebEngine, "Envato")
}
func (envatoCollector) Accept(result SearchResult) bool { return isRenderableEnvatoResult(result) }
func (envatoCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult {
return searcher.enrichEnvato(result)
}
type artgridCollector struct{}
func (artgridCollector) Name() string { return "Artgrid" }
func (artgridCollector) MaxResults() int { return 8 }
func (artgridCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["artgrid"]
}
func (artgridCollector) BuildQueries(base string) []string { return buildArtgridQueries(base) }
func (artgridCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) {
return searcher.searchWithFallback(query, "general", searcher.WebEngine, "Artgrid")
}
func (artgridCollector) Accept(result SearchResult) bool { return isRenderableArtgridResult(result) }
func (artgridCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult {
return searcher.enrichArtgrid(result)
}
type googleVideoCollector struct{}
func (googleVideoCollector) Name() string { return "Google Video" }
func (googleVideoCollector) MaxResults() int { return 6 }
func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["google video"]
}
func (googleVideoCollector) BuildQueries(base string) []string { return buildGoogleVideoQueries(base) }
func (googleVideoCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) {
return searcher.searchWithFallback(query, "videos", searcher.GoogleVideoEngine, "Google Video")
}
func (googleVideoCollector) Accept(result SearchResult) bool {
return isUsefulGoogleVideoResult(result)
}
func (googleVideoCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult {
if result.ThumbnailURL == "" {
result.ThumbnailURL = deriveThumbnail(result.Link)
}
result.Source = strings.TrimSpace(result.Source)
if result.Source == "" {
result.Source = "Google Video"
}
return result
}
+1 -1
View File
@@ -24,7 +24,7 @@ trap cleanup EXIT
cd "${ROOT_DIR}"
echo "[selftest] gofmt"
gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/gemini.go backend/services/gemini_test.go
gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/search_collectors.go backend/services/gemini.go backend/services/gemini_test.go
echo "[selftest] python syntax"
python3 -m py_compile worker/downloader.py scripts/mock_searxng.py