Optimize backend search and evaluation pipeline
build-push / docker (push) Has been cancelled

This commit is contained in:
AI Assistant
2026-03-16 16:13:43 +09:00
parent c5f6c611ec
commit 60fdd7842c
7 changed files with 371 additions and 10 deletions
+129 -2
View File
@@ -33,6 +33,19 @@ type SearchService struct {
Client *http.Client
collectors []searchCollector
Debug func(message string, data any)
cacheMu sync.Mutex
searchCache map[string]cachedSearchResults
fetchCache map[string]cachedFetchResult
}
type cachedSearchResults struct {
items []SearchResult
expiresAt time.Time
}
type cachedFetchResult struct {
body string
expiresAt time.Time
}
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
@@ -52,6 +65,8 @@ func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchServi
artgridCollector{},
googleVideoCollector{},
},
searchCache: map[string]cachedSearchResults{},
fetchCache: map[string]cachedFetchResult{},
}
}
@@ -102,6 +117,7 @@ func (s *SearchService) SearchMediaWithDeadline(queries []string, enabledPlatfor
}
searchQueries := collector.BuildQueries(base)
shuffleStrings(searchQueries)
searchQueries = limitCollectorQueries(collector.Name(), searchQueries, onlyMissing)
s.debug("search_service:collector_queries", map[string]any{
"collector": collector.Name(),
"base": base,
@@ -392,6 +408,24 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
}
func (s *SearchService) search(query, categories, engine, source string) ([]SearchResult, error) {
cacheKey := strings.Join([]string{
s.BaseURL,
query,
categories,
engine,
source,
}, "\n")
if cached, ok := s.getCachedSearchResults(cacheKey); ok {
s.debug("search_service:searx_cache_hit", map[string]any{
"query": query,
"categories": categories,
"engine": engine,
"source": source,
"count": len(cached),
})
return cached, nil
}
values := url.Values{}
values.Set("q", query)
values.Set("format", "json")
@@ -458,9 +492,60 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
Source: normalizeSource(source, link, item.Engine),
})
}
s.setCachedSearchResults(cacheKey, results, 2*time.Minute)
return results, nil
}
func (s *SearchService) getCachedSearchResults(key string) ([]SearchResult, bool) {
s.cacheMu.Lock()
defer s.cacheMu.Unlock()
entry, ok := s.searchCache[key]
if !ok {
return nil, false
}
if time.Now().After(entry.expiresAt) {
delete(s.searchCache, key)
return nil, false
}
return cloneSearchResults(entry.items), true
}
func (s *SearchService) setCachedSearchResults(key string, items []SearchResult, ttl time.Duration) {
s.cacheMu.Lock()
defer s.cacheMu.Unlock()
s.searchCache[key] = cachedSearchResults{
items: cloneSearchResults(items),
expiresAt: time.Now().Add(ttl),
}
}
func (s *SearchService) getCachedFetchResult(key string) (string, bool) {
s.cacheMu.Lock()
defer s.cacheMu.Unlock()
entry, ok := s.fetchCache[key]
if !ok {
return "", false
}
if time.Now().After(entry.expiresAt) {
delete(s.fetchCache, key)
return "", false
}
return entry.body, true
}
func (s *SearchService) setCachedFetchResult(key, body string, ttl time.Duration) {
s.cacheMu.Lock()
defer s.cacheMu.Unlock()
s.fetchCache[key] = cachedFetchResult{
body: body,
expiresAt: time.Now().Add(ttl),
}
}
func (s *SearchService) debug(message string, data any) {
if s != nil && s.Debug != nil {
s.Debug(message, data)
@@ -750,6 +835,12 @@ func pickVideoURL(urls []string) string {
}
func (s *SearchService) fetchText(target string) (string, error) {
cacheKey := "html\n" + target
if cached, ok := s.getCachedFetchResult(cacheKey); ok {
s.debug("search_service:fetch_cache_hit", map[string]any{"type": "html", "target": target, "bytes": len(cached)})
return cached, nil
}
req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
if err != nil {
return "", err
@@ -772,10 +863,18 @@ func (s *SearchService) fetchText(target string) (string, error) {
if looksLikeCloudflareChallenge(string(data)) {
return fetchTextViaPython(target)
}
return string(data), nil
body := string(data)
s.setCachedFetchResult(cacheKey, body, 3*time.Minute)
return body, nil
}
func (s *SearchService) fetchJSONText(target string) (string, error) {
cacheKey := "json\n" + target
if cached, ok := s.getCachedFetchResult(cacheKey); ok {
s.debug("search_service:fetch_cache_hit", map[string]any{"type": "json", "target": target, "bytes": len(cached)})
return cached, nil
}
req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*")
if err != nil {
return "", err
@@ -792,7 +891,9 @@ func (s *SearchService) fetchJSONText(target string) (string, error) {
if err != nil {
return "", err
}
return string(data), nil
body := string(data)
s.setCachedFetchResult(cacheKey, body, 3*time.Minute)
return body, nil
}
func firstNonEmpty(values ...string) string {
@@ -1086,6 +1187,32 @@ func limitQueries(queries []string, limit int) []string {
return filtered
}
func limitCollectorQueries(collector string, queries []string, onlyMissing bool) []string {
limit := 2
switch collector {
case "Envato", "Artgrid":
limit = 3
case "Google Video":
limit = 2
}
if onlyMissing {
limit--
}
if limit < 1 {
limit = 1
}
return limitQueries(queries, limit)
}
func cloneSearchResults(items []SearchResult) []SearchResult {
if len(items) == 0 {
return []SearchResult{}
}
cloned := make([]SearchResult, len(items))
copy(cloned, items)
return cloned
}
func shuffleStrings(values []string) {
if len(values) < 2 {
return