This commit is contained in:
@@ -32,6 +32,7 @@ type SearchService struct {
|
||||
WebEngine string
|
||||
Client *http.Client
|
||||
collectors []searchCollector
|
||||
Debug func(message string, data any)
|
||||
}
|
||||
|
||||
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
|
||||
@@ -58,6 +59,10 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
if s.BaseURL == "" {
|
||||
return nil, fmt.Errorf("searxng base url is not configured")
|
||||
}
|
||||
s.debug("search_service:start", map[string]any{
|
||||
"queries": queries,
|
||||
"enabledPlatforms": enabledPlatforms,
|
||||
})
|
||||
|
||||
seen := map[string]bool{}
|
||||
sourceCounts := map[string]int{}
|
||||
@@ -85,15 +90,32 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
}
|
||||
searchQueries := collector.BuildQueries(base)
|
||||
shuffleStrings(searchQueries)
|
||||
s.debug("search_service:collector_queries", map[string]any{
|
||||
"collector": collector.Name(),
|
||||
"base": base,
|
||||
"onlyMissing": onlyMissing,
|
||||
"searchQueries": searchQueries,
|
||||
})
|
||||
for _, searchQuery := range searchQueries {
|
||||
if sourceCounts[collector.Name()] >= collector.MaxResults() {
|
||||
break
|
||||
}
|
||||
items, err := collector.Collect(s, searchQuery)
|
||||
if err != nil {
|
||||
s.debug("search_service:collector_error", map[string]any{
|
||||
"collector": collector.Name(),
|
||||
"query": searchQuery,
|
||||
"error": err.Error(),
|
||||
})
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
s.debug("search_service:collector_results", map[string]any{
|
||||
"collector": collector.Name(),
|
||||
"query": searchQuery,
|
||||
"rawCount": len(items),
|
||||
"sourceCount": sourceCounts[collector.Name()],
|
||||
})
|
||||
for _, item := range items {
|
||||
item = normalizeResultForCollector(collector.Name(), item)
|
||||
if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
|
||||
@@ -123,6 +145,11 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
sort.SliceStable(results, func(i, j int) bool {
|
||||
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
|
||||
})
|
||||
s.debug("search_service:complete", map[string]any{
|
||||
"resultCount": len(results),
|
||||
"sourceCounts": sourceCounts,
|
||||
"hadError": lastErr != nil,
|
||||
})
|
||||
return s.EnrichResults(results), nil
|
||||
}
|
||||
|
||||
@@ -131,6 +158,10 @@ func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
|
||||
if limit == 0 {
|
||||
return results
|
||||
}
|
||||
s.debug("search_service:enrich_start", map[string]any{
|
||||
"total": len(results),
|
||||
"limit": limit,
|
||||
})
|
||||
|
||||
enriched := make([]SearchResult, len(results))
|
||||
copy(enriched, results)
|
||||
@@ -143,10 +174,24 @@ func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
|
||||
defer wg.Done()
|
||||
sem <- struct{}{}
|
||||
defer func() { <-sem }()
|
||||
s.debug("search_service:enrich_item_start", map[string]any{
|
||||
"index": i,
|
||||
"link": enriched[i].Link,
|
||||
"source": enriched[i].Source,
|
||||
})
|
||||
enriched[i] = s.enrichResult(enriched[i])
|
||||
s.debug("search_service:enrich_item_done", map[string]any{
|
||||
"index": i,
|
||||
"link": enriched[i].Link,
|
||||
"source": enriched[i].Source,
|
||||
"thumbnail": strings.TrimSpace(enriched[i].ThumbnailURL) != "",
|
||||
"preview": strings.TrimSpace(enriched[i].PreviewVideoURL) != "",
|
||||
"title": truncateForDebug(enriched[i].Title, 120),
|
||||
})
|
||||
}(idx)
|
||||
}
|
||||
wg.Wait()
|
||||
s.debug("search_service:enrich_complete", map[string]any{"limit": limit})
|
||||
return enriched
|
||||
}
|
||||
|
||||
@@ -163,10 +208,21 @@ func (s *SearchService) enrichResult(result SearchResult) SearchResult {
|
||||
}
|
||||
|
||||
func (s *SearchService) searchWithFallback(query, categories, engine, source string) ([]SearchResult, error) {
|
||||
s.debug("search_service:search_with_fallback", map[string]any{
|
||||
"query": query,
|
||||
"categories": categories,
|
||||
"engine": engine,
|
||||
"source": source,
|
||||
})
|
||||
items, err := s.search(query, categories, engine, source)
|
||||
if err == nil {
|
||||
return items, nil
|
||||
}
|
||||
s.debug("search_service:search_with_fallback_primary_error", map[string]any{
|
||||
"query": query,
|
||||
"engine": engine,
|
||||
"error": err.Error(),
|
||||
})
|
||||
if strings.TrimSpace(engine) == "" {
|
||||
return nil, err
|
||||
}
|
||||
@@ -174,8 +230,10 @@ func (s *SearchService) searchWithFallback(query, categories, engine, source str
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
||||
s.debug("search_service:enrich_envato_start", map[string]any{"link": result.Link})
|
||||
html, err := s.fetchText(result.Link)
|
||||
if err != nil {
|
||||
s.debug("search_service:enrich_envato_fetch_error", map[string]any{"link": result.Link, "error": err.Error()})
|
||||
return result
|
||||
}
|
||||
videoMeta := extractVideoObjectJSONLD(html)
|
||||
@@ -224,14 +282,21 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
||||
)
|
||||
}
|
||||
}
|
||||
s.debug("search_service:enrich_envato_done", map[string]any{
|
||||
"link": result.Link,
|
||||
"thumbnail": strings.TrimSpace(result.ThumbnailURL) != "",
|
||||
"preview": strings.TrimSpace(result.PreviewVideoURL) != "",
|
||||
})
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
clipID := extractArtgridClipID(result.Link)
|
||||
if clipID == "" {
|
||||
s.debug("search_service:enrich_artgrid_skip", map[string]any{"link": result.Link, "reason": "missing clip id"})
|
||||
return result
|
||||
}
|
||||
s.debug("search_service:enrich_artgrid_start", map[string]any{"link": result.Link, "clipId": clipID})
|
||||
|
||||
apiURL := "https://artgrid.io/api/clip/details?clipId=" + clipID
|
||||
body, err := s.fetchJSONText(apiURL)
|
||||
@@ -244,11 +309,15 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
result.PreviewVideoURL = pickVideoURL(urls)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
s.debug("search_service:enrich_artgrid_api_error", map[string]any{"link": result.Link, "clipId": clipID, "error": err.Error()})
|
||||
}
|
||||
|
||||
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
|
||||
html, err := s.fetchText(result.Link)
|
||||
if err == nil {
|
||||
if !isMatchingArtgridClipPage(html, clipID) {
|
||||
s.debug("search_service:enrich_artgrid_html_mismatch", map[string]any{"link": result.Link, "clipId": clipID})
|
||||
return result
|
||||
}
|
||||
result.Title = firstNonEmpty(
|
||||
@@ -289,6 +358,12 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
}
|
||||
}
|
||||
}
|
||||
s.debug("search_service:enrich_artgrid_done", map[string]any{
|
||||
"link": result.Link,
|
||||
"clipId": clipID,
|
||||
"thumbnail": strings.TrimSpace(result.ThumbnailURL) != "",
|
||||
"preview": strings.TrimSpace(result.PreviewVideoURL) != "",
|
||||
})
|
||||
|
||||
return result
|
||||
}
|
||||
@@ -307,6 +382,13 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
|
||||
}
|
||||
|
||||
endpoint := s.BaseURL + "/search?" + values.Encode()
|
||||
s.debug("search_service:searx_request", map[string]any{
|
||||
"endpoint": endpoint,
|
||||
"query": query,
|
||||
"categories": categories,
|
||||
"engine": engine,
|
||||
"source": source,
|
||||
})
|
||||
resp, err := s.Client.Get(endpoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -332,6 +414,11 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
|
||||
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
||||
return nil, fmt.Errorf("searxng JSON decode failed for query %q: %w", query, err)
|
||||
}
|
||||
s.debug("search_service:searx_response", map[string]any{
|
||||
"query": query,
|
||||
"source": source,
|
||||
"rawCount": len(payload.Results),
|
||||
})
|
||||
|
||||
results := make([]SearchResult, 0, len(payload.Results))
|
||||
for _, item := range payload.Results {
|
||||
@@ -351,6 +438,20 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (s *SearchService) debug(message string, data any) {
|
||||
if s != nil && s.Debug != nil {
|
||||
s.Debug(message, data)
|
||||
}
|
||||
}
|
||||
|
||||
func truncateForDebug(text string, limit int) string {
|
||||
trimmed := strings.TrimSpace(text)
|
||||
if len(trimmed) <= limit {
|
||||
return trimmed
|
||||
}
|
||||
return trimmed[:limit] + "..."
|
||||
}
|
||||
|
||||
func buildGoogleVideoQueries(base string) []string {
|
||||
return []string{
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
|
||||
|
||||
@@ -22,6 +22,7 @@ type GeminiService struct {
|
||||
Client *http.Client
|
||||
GenerateEndpoint string
|
||||
TranslateEndpoint string
|
||||
Debug func(message string, data any)
|
||||
}
|
||||
|
||||
type AIRecommendation struct {
|
||||
@@ -50,7 +51,13 @@ func NewGeminiService(apiKey string) *GeminiService {
|
||||
|
||||
func (g *GeminiService) ExpandQuery(query string) ([]string, error) {
|
||||
englishBase := g.TranslateQuery(query)
|
||||
return buildSearchQueries(query, englishBase), nil
|
||||
expanded := buildSearchQueries(query, englishBase)
|
||||
g.debug("gemini:expand_query", map[string]any{
|
||||
"original": query,
|
||||
"english": englishBase,
|
||||
"expanded": expanded,
|
||||
})
|
||||
return expanded, nil
|
||||
}
|
||||
|
||||
func (g *GeminiService) TranslateQuery(query string) string {
|
||||
@@ -67,6 +74,7 @@ func (g *GeminiService) TranslateQuery(query string) string {
|
||||
}
|
||||
|
||||
if g.APIKey != "" {
|
||||
g.debug("gemini:translate_attempt", map[string]any{"mode": "gemini", "query": trimmed})
|
||||
body := map[string]any{
|
||||
"systemInstruction": map[string]any{
|
||||
"parts": []map[string]string{
|
||||
@@ -95,17 +103,25 @@ func (g *GeminiService) TranslateQuery(query string) string {
|
||||
if err == nil {
|
||||
translated := sanitizePlainEnglishLine(rawText)
|
||||
if translated != "" && !strings.EqualFold(translated, trimmed) && !isOvercompressedTranslation(trimmed, translated) {
|
||||
g.debug("gemini:translate_success", map[string]any{"mode": "gemini", "query": trimmed, "translated": translated})
|
||||
return translated
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
g.debug("gemini:translate_error", map[string]any{"mode": "gemini", "query": trimmed, "error": err.Error()})
|
||||
}
|
||||
}
|
||||
|
||||
g.debug("gemini:translate_attempt", map[string]any{"mode": "google", "query": trimmed})
|
||||
if translated, err := g.translateViaGoogle(trimmed); err == nil && translated != "" && isLikelyEnglishQuery(translated) && !isOvercompressedTranslation(trimmed, translated) {
|
||||
g.debug("gemini:translate_success", map[string]any{"mode": "google", "query": trimmed, "translated": translated})
|
||||
return translated
|
||||
}
|
||||
if translated := translateKoreanMediaTerms(normalizedIntent); translated != "" && !strings.EqualFold(translated, trimmed) {
|
||||
g.debug("gemini:translate_success", map[string]any{"mode": "dictionary", "query": trimmed, "translated": translated})
|
||||
return translated
|
||||
}
|
||||
g.debug("gemini:translate_fallback_original", map[string]any{"query": trimmed, "normalized": normalizedIntent})
|
||||
return strings.TrimSpace(normalizedIntent)
|
||||
}
|
||||
|
||||
@@ -148,6 +164,10 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
|
||||
if len(candidates) == 0 {
|
||||
return []AIRecommendation{}, nil
|
||||
}
|
||||
g.debug("gemini:vision_start", map[string]any{
|
||||
"query": query,
|
||||
"candidateCount": len(candidates),
|
||||
})
|
||||
|
||||
type geminiPart map[string]any
|
||||
parts := []geminiPart{
|
||||
@@ -169,6 +189,12 @@ User query: ` + query,
|
||||
for idx := 0; idx < maxImages; idx++ {
|
||||
img, mimeType, err := fetchCandidateVisualInlineData(g.Client, candidates[idx])
|
||||
if err != nil {
|
||||
g.debug("gemini:vision_candidate_visual_error", map[string]any{
|
||||
"index": idx,
|
||||
"link": candidates[idx].Link,
|
||||
"source": candidates[idx].Source,
|
||||
"error": err.Error(),
|
||||
})
|
||||
continue
|
||||
}
|
||||
visualCount++
|
||||
@@ -180,6 +206,11 @@ User query: ` + query,
|
||||
if visualCount == 0 {
|
||||
return nil, fmt.Errorf("no candidate thumbnails or preview frames could be fetched for gemini vision")
|
||||
}
|
||||
g.debug("gemini:vision_visuals_prepared", map[string]any{
|
||||
"query": query,
|
||||
"visualCount": visualCount,
|
||||
"maxImages": maxImages,
|
||||
})
|
||||
|
||||
body := map[string]any{
|
||||
"contents": []map[string]any{
|
||||
@@ -254,10 +285,20 @@ User query: ` + query,
|
||||
Recommended: recommended,
|
||||
})
|
||||
}
|
||||
g.debug("gemini:vision_complete", map[string]any{
|
||||
"query": query,
|
||||
"recommendationCount": len(recommendations),
|
||||
})
|
||||
|
||||
return recommendations, nil
|
||||
}
|
||||
|
||||
func (g *GeminiService) debug(message string, data any) {
|
||||
if g != nil && g.Debug != nil {
|
||||
g.Debug(message, data)
|
||||
}
|
||||
}
|
||||
|
||||
func fetchImageAsInlineData(client *http.Client, imageURL, referer string) (string, string, error) {
|
||||
if strings.TrimSpace(imageURL) == "" {
|
||||
return "", "", fmt.Errorf("image url is empty")
|
||||
|
||||
@@ -119,6 +119,14 @@ func EvaluateAllCandidatesWithGemini(service *GeminiService, query string, ranke
|
||||
if len(batches) == 0 {
|
||||
return []AIRecommendation{}, stats, nil
|
||||
}
|
||||
if service != nil && service.Debug != nil {
|
||||
service.Debug("ranker:gemini_batches_created", map[string]any{
|
||||
"query": query,
|
||||
"limit": limit,
|
||||
"batches": len(batches),
|
||||
"chunkSize": chunkSize,
|
||||
})
|
||||
}
|
||||
|
||||
results := make([]batchResult, len(batches))
|
||||
var wg sync.WaitGroup
|
||||
@@ -143,6 +151,12 @@ func EvaluateAllCandidatesWithGemini(service *GeminiService, query string, ranke
|
||||
seen := map[string]bool{}
|
||||
for _, batch := range results {
|
||||
if batch.err != nil {
|
||||
if service != nil && service.Debug != nil {
|
||||
service.Debug("ranker:gemini_batch_error", map[string]any{
|
||||
"batchIndex": batch.index,
|
||||
"error": batch.err.Error(),
|
||||
})
|
||||
}
|
||||
recovered, recoveredErrs := recoverGeminiBatchSequentially(service, query, ranked, batch.index*chunkSize)
|
||||
if len(recovered) > 0 {
|
||||
stats.SequentialRetried++
|
||||
@@ -180,6 +194,9 @@ func EvaluateAllCandidatesWithGemini(service *GeminiService, query string, ranke
|
||||
}
|
||||
}
|
||||
stats.RecommendedCount = len(merged)
|
||||
if service != nil && service.Debug != nil {
|
||||
service.Debug("ranker:gemini_batches_complete", stats)
|
||||
}
|
||||
|
||||
switch {
|
||||
case len(merged) > 0 && stats.Failed == 0:
|
||||
|
||||
Reference in New Issue
Block a user