This commit is contained in:
@@ -88,6 +88,23 @@
|
||||
- Artgrid hover-video preview cannot be derived reliably from the provided snapshot alone
|
||||
- if Artgrid preview video is still required, the next useful artifact is a browser HAR or DevTools network capture from an opened clip page
|
||||
|
||||
## Current Session Update (2026-03-13, Collector Refactor)
|
||||
- Refactored the search pipeline into source-specific collectors:
|
||||
- `envatoCollector`
|
||||
- `artgridCollector`
|
||||
- `googleVideoCollector`
|
||||
- `SearchService` now acts mainly as:
|
||||
- collector orchestration
|
||||
- query-pass control
|
||||
- dedupe
|
||||
- cross-source enrichment scheduling
|
||||
- Goal of the refactor:
|
||||
- reduce cross-source coupling
|
||||
- make future source-specific fixes safer
|
||||
- make it easier to replace or disable one source without destabilizing the others
|
||||
- Current implementation note:
|
||||
- collectors are still in Go code under backend services, but the responsibilities are now separated by source instead of one monolithic search loop
|
||||
|
||||
## Local Self-Test Workflow
|
||||
- Primary command:
|
||||
- `bash scripts/selftest.sh`
|
||||
|
||||
+31
-56
@@ -29,6 +29,7 @@ type SearchService struct {
|
||||
GoogleVideoEngine string
|
||||
WebEngine string
|
||||
Client *http.Client
|
||||
collectors []searchCollector
|
||||
}
|
||||
|
||||
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
|
||||
@@ -43,6 +44,11 @@ func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchServi
|
||||
GoogleVideoEngine: googleVideoEngine,
|
||||
WebEngine: webEngine,
|
||||
Client: &http.Client{Timeout: 20 * time.Second},
|
||||
collectors: []searchCollector{
|
||||
envatoCollector{},
|
||||
artgridCollector{},
|
||||
googleVideoCollector{},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,42 +57,6 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
return nil, fmt.Errorf("searxng base url is not configured")
|
||||
}
|
||||
|
||||
type sourceConfig struct {
|
||||
name string
|
||||
categories string
|
||||
engine string
|
||||
maxResults int
|
||||
build func(string) []string
|
||||
accept func(SearchResult) bool
|
||||
}
|
||||
|
||||
sources := []sourceConfig{
|
||||
{
|
||||
name: "Envato",
|
||||
categories: "general",
|
||||
engine: s.WebEngine,
|
||||
maxResults: 8,
|
||||
build: buildEnvatoQueries,
|
||||
accept: isRenderableEnvatoResult,
|
||||
},
|
||||
{
|
||||
name: "Artgrid",
|
||||
categories: "general",
|
||||
engine: s.WebEngine,
|
||||
maxResults: 8,
|
||||
build: buildArtgridQueries,
|
||||
accept: isRenderableArtgridResult,
|
||||
},
|
||||
{
|
||||
name: "Google Video",
|
||||
categories: "videos",
|
||||
engine: s.GoogleVideoEngine,
|
||||
maxResults: 6,
|
||||
build: buildGoogleVideoQueries,
|
||||
accept: isUsefulGoogleVideoResult,
|
||||
},
|
||||
}
|
||||
|
||||
seen := map[string]bool{}
|
||||
sourceCounts := map[string]int{}
|
||||
results := make([]SearchResult, 0, 90)
|
||||
@@ -100,37 +70,33 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
if base == "" {
|
||||
continue
|
||||
}
|
||||
for _, source := range sources {
|
||||
if len(enabledPlatforms) > 0 && !enabledPlatforms[strings.ToLower(source.name)] {
|
||||
for _, collector := range s.collectors {
|
||||
if !collector.Enabled(enabledPlatforms) {
|
||||
continue
|
||||
}
|
||||
if sourceCounts[source.name] >= source.maxResults {
|
||||
if sourceCounts[collector.Name()] >= collector.MaxResults() {
|
||||
continue
|
||||
}
|
||||
if onlyMissing && sourceCounts[source.name] > 0 {
|
||||
if onlyMissing && sourceCounts[collector.Name()] > 0 {
|
||||
continue
|
||||
}
|
||||
for _, searchQuery := range source.build(base) {
|
||||
if sourceCounts[source.name] >= source.maxResults {
|
||||
for _, searchQuery := range collector.BuildQueries(base) {
|
||||
if sourceCounts[collector.Name()] >= collector.MaxResults() {
|
||||
break
|
||||
}
|
||||
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
items, err = s.search(searchQuery, source.categories, "", source.name)
|
||||
}
|
||||
items, err := collector.Collect(s, searchQuery)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
for _, item := range items {
|
||||
if item.Link == "" || seen[item.Link] || !source.accept(item) {
|
||||
if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
results = append(results, item)
|
||||
sourceCounts[source.name]++
|
||||
if sourceCounts[source.name] >= source.maxResults {
|
||||
sourceCounts[collector.Name()]++
|
||||
if sourceCounts[collector.Name()] >= collector.MaxResults() {
|
||||
break
|
||||
}
|
||||
}
|
||||
@@ -179,17 +145,26 @@ func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichResult(result SearchResult) SearchResult {
|
||||
switch result.Source {
|
||||
case "Envato":
|
||||
return s.enrichEnvato(result)
|
||||
case "Artgrid":
|
||||
return s.enrichArtgrid(result)
|
||||
default:
|
||||
for _, collector := range s.collectors {
|
||||
if collector.Name() == result.Source {
|
||||
return collector.Enrich(s, result)
|
||||
}
|
||||
}
|
||||
if result.ThumbnailURL == "" {
|
||||
result.ThumbnailURL = deriveThumbnail(result.Link)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *SearchService) searchWithFallback(query, categories, engine, source string) ([]SearchResult, error) {
|
||||
items, err := s.search(query, categories, engine, source)
|
||||
if err == nil {
|
||||
return items, nil
|
||||
}
|
||||
if strings.TrimSpace(engine) == "" {
|
||||
return nil, err
|
||||
}
|
||||
return s.search(query, categories, "", source)
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
package services
|
||||
|
||||
import "strings"
|
||||
|
||||
type searchCollector interface {
|
||||
Name() string
|
||||
MaxResults() int
|
||||
Enabled(enabledPlatforms map[string]bool) bool
|
||||
BuildQueries(base string) []string
|
||||
Collect(searcher *SearchService, query string) ([]SearchResult, error)
|
||||
Accept(result SearchResult) bool
|
||||
Enrich(searcher *SearchService, result SearchResult) SearchResult
|
||||
}
|
||||
|
||||
type envatoCollector struct{}
|
||||
|
||||
func (envatoCollector) Name() string { return "Envato" }
|
||||
func (envatoCollector) MaxResults() int { return 8 }
|
||||
func (envatoCollector) Enabled(enabledPlatforms map[string]bool) bool {
|
||||
return len(enabledPlatforms) == 0 || enabledPlatforms["envato"]
|
||||
}
|
||||
func (envatoCollector) BuildQueries(base string) []string { return buildEnvatoQueries(base) }
|
||||
func (envatoCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) {
|
||||
return searcher.searchWithFallback(query, "general", searcher.WebEngine, "Envato")
|
||||
}
|
||||
func (envatoCollector) Accept(result SearchResult) bool { return isRenderableEnvatoResult(result) }
|
||||
func (envatoCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult {
|
||||
return searcher.enrichEnvato(result)
|
||||
}
|
||||
|
||||
type artgridCollector struct{}
|
||||
|
||||
func (artgridCollector) Name() string { return "Artgrid" }
|
||||
func (artgridCollector) MaxResults() int { return 8 }
|
||||
func (artgridCollector) Enabled(enabledPlatforms map[string]bool) bool {
|
||||
return len(enabledPlatforms) == 0 || enabledPlatforms["artgrid"]
|
||||
}
|
||||
func (artgridCollector) BuildQueries(base string) []string { return buildArtgridQueries(base) }
|
||||
func (artgridCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) {
|
||||
return searcher.searchWithFallback(query, "general", searcher.WebEngine, "Artgrid")
|
||||
}
|
||||
func (artgridCollector) Accept(result SearchResult) bool { return isRenderableArtgridResult(result) }
|
||||
func (artgridCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult {
|
||||
return searcher.enrichArtgrid(result)
|
||||
}
|
||||
|
||||
type googleVideoCollector struct{}
|
||||
|
||||
func (googleVideoCollector) Name() string { return "Google Video" }
|
||||
func (googleVideoCollector) MaxResults() int { return 6 }
|
||||
func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool {
|
||||
return len(enabledPlatforms) == 0 || enabledPlatforms["google video"]
|
||||
}
|
||||
func (googleVideoCollector) BuildQueries(base string) []string { return buildGoogleVideoQueries(base) }
|
||||
func (googleVideoCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) {
|
||||
return searcher.searchWithFallback(query, "videos", searcher.GoogleVideoEngine, "Google Video")
|
||||
}
|
||||
func (googleVideoCollector) Accept(result SearchResult) bool {
|
||||
return isUsefulGoogleVideoResult(result)
|
||||
}
|
||||
func (googleVideoCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult {
|
||||
if result.ThumbnailURL == "" {
|
||||
result.ThumbnailURL = deriveThumbnail(result.Link)
|
||||
}
|
||||
result.Source = strings.TrimSpace(result.Source)
|
||||
if result.Source == "" {
|
||||
result.Source = "Google Video"
|
||||
}
|
||||
return result
|
||||
}
|
||||
+1
-1
@@ -24,7 +24,7 @@ trap cleanup EXIT
|
||||
cd "${ROOT_DIR}"
|
||||
|
||||
echo "[selftest] gofmt"
|
||||
gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/gemini.go backend/services/gemini_test.go
|
||||
gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/search_collectors.go backend/services/gemini.go backend/services/gemini_test.go
|
||||
|
||||
echo "[selftest] python syntax"
|
||||
python3 -m py_compile worker/downloader.py scripts/mock_searxng.py
|
||||
|
||||
Reference in New Issue
Block a user