This commit is contained in:
+74
-31
@@ -27,15 +27,16 @@ type SearchResult struct {
|
||||
}
|
||||
|
||||
type SearchService struct {
|
||||
BaseURL string
|
||||
GoogleVideoEngine string
|
||||
WebEngine string
|
||||
Client *http.Client
|
||||
collectors []searchCollector
|
||||
Debug func(message string, data any)
|
||||
cacheMu sync.Mutex
|
||||
searchCache map[string]cachedSearchResults
|
||||
fetchCache map[string]cachedFetchResult
|
||||
BaseURL string
|
||||
GoogleVideoEngine string
|
||||
WebEngine string
|
||||
Client *http.Client
|
||||
collectors []searchCollector
|
||||
Debug func(message string, data any)
|
||||
cacheMu sync.Mutex
|
||||
searchCache map[string]cachedSearchResults
|
||||
fetchCache map[string]cachedFetchResult
|
||||
artgridAPIBlockedUntil time.Time
|
||||
}
|
||||
|
||||
type cachedSearchResults struct {
|
||||
@@ -48,6 +49,10 @@ type cachedFetchResult struct {
|
||||
expiresAt time.Time
|
||||
}
|
||||
|
||||
type SearchExecutionMeta struct {
|
||||
PartialDueToDeadline bool `json:"partialDueToDeadline"`
|
||||
}
|
||||
|
||||
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
|
||||
if googleVideoEngine == "" {
|
||||
googleVideoEngine = "google videos"
|
||||
@@ -70,13 +75,14 @@ func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchServi
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[string]bool) ([]SearchResult, error) {
|
||||
func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[string]bool) ([]SearchResult, SearchExecutionMeta, error) {
|
||||
return s.SearchMediaWithDeadline(queries, enabledPlatforms, time.Time{})
|
||||
}
|
||||
|
||||
func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatforms map[string]bool, deadline time.Time) ([]SearchResult, error) {
|
||||
func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatforms map[string]bool, deadline time.Time) ([]SearchResult, SearchExecutionMeta, error) {
|
||||
meta := SearchExecutionMeta{}
|
||||
if s.BaseURL == "" {
|
||||
return nil, fmt.Errorf("searxng base url is not configured")
|
||||
return nil, meta, fmt.Errorf("searxng base url is not configured")
|
||||
}
|
||||
s.debug("search_service:start", map[string]any{
|
||||
"queries": queries,
|
||||
@@ -94,6 +100,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
|
||||
runSearchPass := func(bases []string, onlyMissing bool) {
|
||||
for _, base := range bases {
|
||||
if !deadline.IsZero() && time.Now().After(deadline) {
|
||||
meta.PartialDueToDeadline = true
|
||||
s.debug("search_service:deadline_reached", map[string]any{"stage": "runSearchPass", "base": base})
|
||||
return
|
||||
}
|
||||
@@ -103,6 +110,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
|
||||
}
|
||||
for _, collector := range s.collectors {
|
||||
if !deadline.IsZero() && time.Now().After(deadline) {
|
||||
meta.PartialDueToDeadline = true
|
||||
s.debug("search_service:deadline_reached", map[string]any{"stage": "collectorLoop", "collector": collector.Name()})
|
||||
return
|
||||
}
|
||||
@@ -126,6 +134,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
|
||||
})
|
||||
for _, searchQuery := range searchQueries {
|
||||
if !deadline.IsZero() && time.Now().After(deadline) {
|
||||
meta.PartialDueToDeadline = true
|
||||
s.debug("search_service:deadline_reached", map[string]any{"stage": "queryLoop", "collector": collector.Name(), "query": searchQuery})
|
||||
return
|
||||
}
|
||||
@@ -171,28 +180,33 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
|
||||
}
|
||||
|
||||
if len(results) == 0 && lastErr != nil {
|
||||
return nil, lastErr
|
||||
return nil, meta, lastErr
|
||||
}
|
||||
|
||||
sort.SliceStable(results, func(i, j int) bool {
|
||||
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
|
||||
})
|
||||
s.debug("search_service:complete", map[string]any{
|
||||
"resultCount": len(results),
|
||||
"sourceCounts": sourceCounts,
|
||||
"hadError": lastErr != nil,
|
||||
"resultCount": len(results),
|
||||
"sourceCounts": sourceCounts,
|
||||
"hadError": lastErr != nil,
|
||||
"partialDueToDeadline": meta.PartialDueToDeadline,
|
||||
})
|
||||
return s.EnrichResultsWithDeadline(results, deadline), nil
|
||||
enriched, enrichMeta := s.EnrichResultsWithDeadline(results, deadline)
|
||||
meta.PartialDueToDeadline = meta.PartialDueToDeadline || enrichMeta.PartialDueToDeadline
|
||||
return enriched, meta, nil
|
||||
}
|
||||
|
||||
func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
|
||||
return s.EnrichResultsWithDeadline(results, time.Time{})
|
||||
enriched, _ := s.EnrichResultsWithDeadline(results, time.Time{})
|
||||
return enriched
|
||||
}
|
||||
|
||||
func (s *SearchService) EnrichResultsWithDeadline(results []SearchResult, deadline time.Time) []SearchResult {
|
||||
func (s *SearchService) EnrichResultsWithDeadline(results []SearchResult, deadline time.Time) ([]SearchResult, SearchExecutionMeta) {
|
||||
meta := SearchExecutionMeta{}
|
||||
limit := minInt(len(results), 18)
|
||||
if limit == 0 {
|
||||
return results
|
||||
return results, meta
|
||||
}
|
||||
s.debug("search_service:enrich_start", map[string]any{
|
||||
"total": len(results),
|
||||
@@ -203,12 +217,16 @@ func (s *SearchService) EnrichResultsWithDeadline(results []SearchResult, deadli
|
||||
copy(enriched, results)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
var metaMu sync.Mutex
|
||||
sem := make(chan struct{}, 4)
|
||||
for idx := 0; idx < limit; idx++ {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
if !deadline.IsZero() && time.Now().After(deadline) {
|
||||
metaMu.Lock()
|
||||
meta.PartialDueToDeadline = true
|
||||
metaMu.Unlock()
|
||||
return
|
||||
}
|
||||
sem <- struct{}{}
|
||||
@@ -231,7 +249,7 @@ func (s *SearchService) EnrichResultsWithDeadline(results []SearchResult, deadli
|
||||
}
|
||||
wg.Wait()
|
||||
s.debug("search_service:enrich_complete", map[string]any{"limit": limit})
|
||||
return enriched
|
||||
return enriched, meta
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichResult(result SearchResult) SearchResult {
|
||||
@@ -323,19 +341,32 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
s.debug("search_service:enrich_artgrid_start", map[string]any{"link": result.Link, "clipId": clipID})
|
||||
|
||||
apiURL := "https://artgrid.io/api/clip/details?clipId=" + clipID
|
||||
body, err := s.fetchJSONText(apiURL)
|
||||
if err == nil {
|
||||
urls := collectURLs(body)
|
||||
if !hasUsableThumbnail(result.ThumbnailURL) {
|
||||
result.ThumbnailURL = pickArtgridImageURL(urls, clipID)
|
||||
var err error
|
||||
if s.shouldSkipArtgridAPI() {
|
||||
s.debug("search_service:enrich_artgrid_api_skip", map[string]any{
|
||||
"link": result.Link,
|
||||
"clipId": clipID,
|
||||
"reason": "cached_403_guard",
|
||||
})
|
||||
} else {
|
||||
var body string
|
||||
body, err = s.fetchJSONText(apiURL)
|
||||
if err == nil {
|
||||
urls := collectURLs(body)
|
||||
if !hasUsableThumbnail(result.ThumbnailURL) {
|
||||
result.ThumbnailURL = pickArtgridImageURL(urls, clipID)
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
result.PreviewVideoURL = pickVideoURL(urls)
|
||||
}
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
result.PreviewVideoURL = pickVideoURL(urls)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), "status 403") {
|
||||
s.blockArtgridAPI(15 * time.Minute)
|
||||
}
|
||||
s.debug("search_service:enrich_artgrid_api_error", map[string]any{"link": result.Link, "clipId": clipID, "error": err.Error()})
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
s.debug("search_service:enrich_artgrid_api_error", map[string]any{"link": result.Link, "clipId": clipID, "error": err.Error()})
|
||||
}
|
||||
|
||||
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
|
||||
html, err := s.fetchText(result.Link)
|
||||
@@ -540,6 +571,18 @@ func (s *SearchService) setCachedFetchResult(key, body string, ttl time.Duration
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SearchService) shouldSkipArtgridAPI() bool {
|
||||
s.cacheMu.Lock()
|
||||
defer s.cacheMu.Unlock()
|
||||
return !s.artgridAPIBlockedUntil.IsZero() && time.Now().Before(s.artgridAPIBlockedUntil)
|
||||
}
|
||||
|
||||
func (s *SearchService) blockArtgridAPI(ttl time.Duration) {
|
||||
s.cacheMu.Lock()
|
||||
defer s.cacheMu.Unlock()
|
||||
s.artgridAPIBlockedUntil = time.Now().Add(ttl)
|
||||
}
|
||||
|
||||
func (s *SearchService) debug(message string, data any) {
|
||||
if s != nil && s.Debug != nil {
|
||||
s.Debug(message, data)
|
||||
|
||||
Reference in New Issue
Block a user