This commit is contained in:
@@ -88,6 +88,23 @@
|
|||||||
- Artgrid hover-video preview cannot be derived reliably from the provided snapshot alone
|
- Artgrid hover-video preview cannot be derived reliably from the provided snapshot alone
|
||||||
- if Artgrid preview video is still required, the next useful artifact is a browser HAR or DevTools network capture from an opened clip page
|
- if Artgrid preview video is still required, the next useful artifact is a browser HAR or DevTools network capture from an opened clip page
|
||||||
|
|
||||||
|
## Current Session Update (2026-03-13, Collector Refactor)
|
||||||
|
- Refactored the search pipeline into source-specific collectors:
|
||||||
|
- `envatoCollector`
|
||||||
|
- `artgridCollector`
|
||||||
|
- `googleVideoCollector`
|
||||||
|
- `SearchService` now acts mainly as:
|
||||||
|
- collector orchestration
|
||||||
|
- query-pass control
|
||||||
|
- dedupe
|
||||||
|
- cross-source enrichment scheduling
|
||||||
|
- Goal of the refactor:
|
||||||
|
- reduce cross-source coupling
|
||||||
|
- make future source-specific fixes safer
|
||||||
|
- make it easier to replace or disable one source without destabilizing the others
|
||||||
|
- Current implementation note:
|
||||||
|
- collectors are still in Go code under backend services, but the responsibilities are now separated by source instead of one monolithic search loop
|
||||||
|
|
||||||
## Local Self-Test Workflow
|
## Local Self-Test Workflow
|
||||||
- Primary command:
|
- Primary command:
|
||||||
- `bash scripts/selftest.sh`
|
- `bash scripts/selftest.sh`
|
||||||
|
|||||||
+34
-59
@@ -29,6 +29,7 @@ type SearchService struct {
|
|||||||
GoogleVideoEngine string
|
GoogleVideoEngine string
|
||||||
WebEngine string
|
WebEngine string
|
||||||
Client *http.Client
|
Client *http.Client
|
||||||
|
collectors []searchCollector
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
|
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
|
||||||
@@ -43,6 +44,11 @@ func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchServi
|
|||||||
GoogleVideoEngine: googleVideoEngine,
|
GoogleVideoEngine: googleVideoEngine,
|
||||||
WebEngine: webEngine,
|
WebEngine: webEngine,
|
||||||
Client: &http.Client{Timeout: 20 * time.Second},
|
Client: &http.Client{Timeout: 20 * time.Second},
|
||||||
|
collectors: []searchCollector{
|
||||||
|
envatoCollector{},
|
||||||
|
artgridCollector{},
|
||||||
|
googleVideoCollector{},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -51,42 +57,6 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
|||||||
return nil, fmt.Errorf("searxng base url is not configured")
|
return nil, fmt.Errorf("searxng base url is not configured")
|
||||||
}
|
}
|
||||||
|
|
||||||
type sourceConfig struct {
|
|
||||||
name string
|
|
||||||
categories string
|
|
||||||
engine string
|
|
||||||
maxResults int
|
|
||||||
build func(string) []string
|
|
||||||
accept func(SearchResult) bool
|
|
||||||
}
|
|
||||||
|
|
||||||
sources := []sourceConfig{
|
|
||||||
{
|
|
||||||
name: "Envato",
|
|
||||||
categories: "general",
|
|
||||||
engine: s.WebEngine,
|
|
||||||
maxResults: 8,
|
|
||||||
build: buildEnvatoQueries,
|
|
||||||
accept: isRenderableEnvatoResult,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Artgrid",
|
|
||||||
categories: "general",
|
|
||||||
engine: s.WebEngine,
|
|
||||||
maxResults: 8,
|
|
||||||
build: buildArtgridQueries,
|
|
||||||
accept: isRenderableArtgridResult,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
name: "Google Video",
|
|
||||||
categories: "videos",
|
|
||||||
engine: s.GoogleVideoEngine,
|
|
||||||
maxResults: 6,
|
|
||||||
build: buildGoogleVideoQueries,
|
|
||||||
accept: isUsefulGoogleVideoResult,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
seen := map[string]bool{}
|
seen := map[string]bool{}
|
||||||
sourceCounts := map[string]int{}
|
sourceCounts := map[string]int{}
|
||||||
results := make([]SearchResult, 0, 90)
|
results := make([]SearchResult, 0, 90)
|
||||||
@@ -100,37 +70,33 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
|||||||
if base == "" {
|
if base == "" {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for _, source := range sources {
|
for _, collector := range s.collectors {
|
||||||
if len(enabledPlatforms) > 0 && !enabledPlatforms[strings.ToLower(source.name)] {
|
if !collector.Enabled(enabledPlatforms) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if sourceCounts[source.name] >= source.maxResults {
|
if sourceCounts[collector.Name()] >= collector.MaxResults() {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
if onlyMissing && sourceCounts[source.name] > 0 {
|
if onlyMissing && sourceCounts[collector.Name()] > 0 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for _, searchQuery := range source.build(base) {
|
for _, searchQuery := range collector.BuildQueries(base) {
|
||||||
if sourceCounts[source.name] >= source.maxResults {
|
if sourceCounts[collector.Name()] >= collector.MaxResults() {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
|
items, err := collector.Collect(s, searchQuery)
|
||||||
if err != nil {
|
|
||||||
lastErr = err
|
|
||||||
items, err = s.search(searchQuery, source.categories, "", source.name)
|
|
||||||
}
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
lastErr = err
|
lastErr = err
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
if item.Link == "" || seen[item.Link] || !source.accept(item) {
|
if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
seen[item.Link] = true
|
seen[item.Link] = true
|
||||||
results = append(results, item)
|
results = append(results, item)
|
||||||
sourceCounts[source.name]++
|
sourceCounts[collector.Name()]++
|
||||||
if sourceCounts[source.name] >= source.maxResults {
|
if sourceCounts[collector.Name()] >= collector.MaxResults() {
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -179,17 +145,26 @@ func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (s *SearchService) enrichResult(result SearchResult) SearchResult {
|
func (s *SearchService) enrichResult(result SearchResult) SearchResult {
|
||||||
switch result.Source {
|
for _, collector := range s.collectors {
|
||||||
case "Envato":
|
if collector.Name() == result.Source {
|
||||||
return s.enrichEnvato(result)
|
return collector.Enrich(s, result)
|
||||||
case "Artgrid":
|
|
||||||
return s.enrichArtgrid(result)
|
|
||||||
default:
|
|
||||||
if result.ThumbnailURL == "" {
|
|
||||||
result.ThumbnailURL = deriveThumbnail(result.Link)
|
|
||||||
}
|
}
|
||||||
return result
|
|
||||||
}
|
}
|
||||||
|
if result.ThumbnailURL == "" {
|
||||||
|
result.ThumbnailURL = deriveThumbnail(result.Link)
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *SearchService) searchWithFallback(query, categories, engine, source string) ([]SearchResult, error) {
|
||||||
|
items, err := s.search(query, categories, engine, source)
|
||||||
|
if err == nil {
|
||||||
|
return items, nil
|
||||||
|
}
|
||||||
|
if strings.TrimSpace(engine) == "" {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return s.search(query, categories, "", source)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
package services
|
||||||
|
|
||||||
|
import "strings"
|
||||||
|
|
||||||
|
type searchCollector interface {
|
||||||
|
Name() string
|
||||||
|
MaxResults() int
|
||||||
|
Enabled(enabledPlatforms map[string]bool) bool
|
||||||
|
BuildQueries(base string) []string
|
||||||
|
Collect(searcher *SearchService, query string) ([]SearchResult, error)
|
||||||
|
Accept(result SearchResult) bool
|
||||||
|
Enrich(searcher *SearchService, result SearchResult) SearchResult
|
||||||
|
}
|
||||||
|
|
||||||
|
type envatoCollector struct{}
|
||||||
|
|
||||||
|
func (envatoCollector) Name() string { return "Envato" }
|
||||||
|
func (envatoCollector) MaxResults() int { return 8 }
|
||||||
|
func (envatoCollector) Enabled(enabledPlatforms map[string]bool) bool {
|
||||||
|
return len(enabledPlatforms) == 0 || enabledPlatforms["envato"]
|
||||||
|
}
|
||||||
|
func (envatoCollector) BuildQueries(base string) []string { return buildEnvatoQueries(base) }
|
||||||
|
func (envatoCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) {
|
||||||
|
return searcher.searchWithFallback(query, "general", searcher.WebEngine, "Envato")
|
||||||
|
}
|
||||||
|
func (envatoCollector) Accept(result SearchResult) bool { return isRenderableEnvatoResult(result) }
|
||||||
|
func (envatoCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult {
|
||||||
|
return searcher.enrichEnvato(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
type artgridCollector struct{}
|
||||||
|
|
||||||
|
func (artgridCollector) Name() string { return "Artgrid" }
|
||||||
|
func (artgridCollector) MaxResults() int { return 8 }
|
||||||
|
func (artgridCollector) Enabled(enabledPlatforms map[string]bool) bool {
|
||||||
|
return len(enabledPlatforms) == 0 || enabledPlatforms["artgrid"]
|
||||||
|
}
|
||||||
|
func (artgridCollector) BuildQueries(base string) []string { return buildArtgridQueries(base) }
|
||||||
|
func (artgridCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) {
|
||||||
|
return searcher.searchWithFallback(query, "general", searcher.WebEngine, "Artgrid")
|
||||||
|
}
|
||||||
|
func (artgridCollector) Accept(result SearchResult) bool { return isRenderableArtgridResult(result) }
|
||||||
|
func (artgridCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult {
|
||||||
|
return searcher.enrichArtgrid(result)
|
||||||
|
}
|
||||||
|
|
||||||
|
type googleVideoCollector struct{}
|
||||||
|
|
||||||
|
func (googleVideoCollector) Name() string { return "Google Video" }
|
||||||
|
func (googleVideoCollector) MaxResults() int { return 6 }
|
||||||
|
func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool {
|
||||||
|
return len(enabledPlatforms) == 0 || enabledPlatforms["google video"]
|
||||||
|
}
|
||||||
|
func (googleVideoCollector) BuildQueries(base string) []string { return buildGoogleVideoQueries(base) }
|
||||||
|
func (googleVideoCollector) Collect(searcher *SearchService, query string) ([]SearchResult, error) {
|
||||||
|
return searcher.searchWithFallback(query, "videos", searcher.GoogleVideoEngine, "Google Video")
|
||||||
|
}
|
||||||
|
func (googleVideoCollector) Accept(result SearchResult) bool {
|
||||||
|
return isUsefulGoogleVideoResult(result)
|
||||||
|
}
|
||||||
|
func (googleVideoCollector) Enrich(searcher *SearchService, result SearchResult) SearchResult {
|
||||||
|
if result.ThumbnailURL == "" {
|
||||||
|
result.ThumbnailURL = deriveThumbnail(result.Link)
|
||||||
|
}
|
||||||
|
result.Source = strings.TrimSpace(result.Source)
|
||||||
|
if result.Source == "" {
|
||||||
|
result.Source = "Google Video"
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
+1
-1
@@ -24,7 +24,7 @@ trap cleanup EXIT
|
|||||||
cd "${ROOT_DIR}"
|
cd "${ROOT_DIR}"
|
||||||
|
|
||||||
echo "[selftest] gofmt"
|
echo "[selftest] gofmt"
|
||||||
gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/gemini.go backend/services/gemini_test.go
|
gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/search_collectors.go backend/services/gemini.go backend/services/gemini_test.go
|
||||||
|
|
||||||
echo "[selftest] python syntax"
|
echo "[selftest] python syntax"
|
||||||
python3 -m py_compile worker/downloader.py scripts/mock_searxng.py
|
python3 -m py_compile worker/downloader.py scripts/mock_searxng.py
|
||||||
|
|||||||
Reference in New Issue
Block a user