This commit is contained in:
@@ -105,6 +105,18 @@
|
|||||||
- Current implementation note:
|
- Current implementation note:
|
||||||
- collectors are still in Go code under backend services, but the responsibilities are now separated by source instead of one monolithic search loop
|
- collectors are still in Go code under backend services, but the responsibilities are now separated by source instead of one monolithic search loop
|
||||||
|
|
||||||
|
## Current Session Update (2026-03-13, Artgrid Collector Fix + Ranker Split)
|
||||||
|
- Artgrid collector regression fixed:
|
||||||
|
- real search results can come back as `artlist.io/stock-footage/clip/.../<id>` instead of only `artgrid.io/clip/<id>/...`
|
||||||
|
- renderable filtering was rejecting those URLs, which caused `SearXNG returned no renderable results.` for Artgrid-only searches
|
||||||
|
- Fix applied:
|
||||||
|
- Artgrid renderability now accepts both `artgrid.io` and `artlist.io/stock-footage/clip/...` clip URLs
|
||||||
|
- Artgrid result links are normalized into `https://artgrid.io/clip/<id>/<slug>` inside the collector flow before filtering/enrichment
|
||||||
|
- Refactor continued:
|
||||||
|
- ranking / Gemini candidate evaluation / recommendation merge logic moved out of `handlers/api.go`
|
||||||
|
- new service layer file: `backend/services/ranker.go`
|
||||||
|
- handler is now thinner and less coupled to search internals
|
||||||
|
|
||||||
## Local Self-Test Workflow
|
## Local Self-Test Workflow
|
||||||
- Primary command:
|
- Primary command:
|
||||||
- `bash scripts/selftest.sh`
|
- `bash scripts/selftest.sh`
|
||||||
|
|||||||
+4
-158
@@ -11,7 +11,6 @@ import (
|
|||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"regexp"
|
"regexp"
|
||||||
"sort"
|
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@@ -87,16 +86,6 @@ type searchDebugSummary struct {
|
|||||||
GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"`
|
GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type geminiBatchStats struct {
|
|
||||||
CandidateCap int `json:"candidateCap"`
|
|
||||||
Requested int `json:"requested"`
|
|
||||||
Batches int `json:"batches"`
|
|
||||||
Succeeded int `json:"succeeded"`
|
|
||||||
Failed int `json:"failed"`
|
|
||||||
RecommendedCount int `json:"recommendedCount"`
|
|
||||||
Errors []string `json:"errors,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func RegisterRoutes(router *gin.Engine, app *App) {
|
func RegisterRoutes(router *gin.Engine, app *App) {
|
||||||
router.GET("/healthz", func(c *gin.Context) {
|
router.GET("/healthz", func(c *gin.Context) {
|
||||||
c.JSON(http.StatusOK, gin.H{"status": "ok"})
|
c.JSON(http.StatusOK, gin.H{"status": "ok"})
|
||||||
@@ -329,10 +318,10 @@ func (a *App) searchMedia(c *gin.Context) {
|
|||||||
if len(queryVariants) > 0 {
|
if len(queryVariants) > 0 {
|
||||||
rankQuery = strings.Join(queryVariants[:min(len(queryVariants), 3)], " ")
|
rankQuery = strings.Join(queryVariants[:min(len(queryVariants), 3)], " ")
|
||||||
}
|
}
|
||||||
scored := rankSearchResults(rankQuery, results)
|
scored := services.RankSearchResults(rankQuery, results)
|
||||||
a.debug("search ranked summary", summarizeSearchResults(scored, time.Since(started), geminiCandidateLimit(len(scored)), ""))
|
a.debug("search ranked summary", summarizeSearchResults(scored, time.Since(started), services.GeminiCandidateLimit(len(scored)), ""))
|
||||||
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "analyzing top candidate visuals with Gemini Vision", "progress": 75})
|
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "analyzing top candidate visuals with Gemini Vision", "progress": 75})
|
||||||
recommended, geminiStats := evaluateAllCandidatesWithGemini(a.GeminiService, req.Query, scored)
|
recommended, geminiStats := services.EvaluateAllCandidatesWithGemini(a.GeminiService, req.Query, scored)
|
||||||
a.debug("search gemini evaluation", geminiStats)
|
a.debug("search gemini evaluation", geminiStats)
|
||||||
err = nil
|
err = nil
|
||||||
if len(recommended) == 0 {
|
if len(recommended) == 0 {
|
||||||
@@ -359,7 +348,7 @@ func (a *App) searchMedia(c *gin.Context) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
merged := mergeRecommendations(recommended, scored, 20)
|
merged := services.MergeRecommendations(recommended, scored, 20)
|
||||||
a.debug("search complete summary", summarizeRecommendationResults(merged, time.Since(started), ""))
|
a.debug("search complete summary", summarizeRecommendationResults(merged, time.Since(started), ""))
|
||||||
response := gin.H{"results": merged, "queries": queryVariants}
|
response := gin.H{"results": merged, "queries": queryVariants}
|
||||||
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "search complete", "progress": 100})
|
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "search complete", "progress": 100})
|
||||||
@@ -438,149 +427,6 @@ func selectedPlatformLabel(platforms map[string]bool) string {
|
|||||||
return strings.Join(labels, ", ")
|
return strings.Join(labels, ", ")
|
||||||
}
|
}
|
||||||
|
|
||||||
func evaluateAllCandidatesWithGemini(service *services.GeminiService, query string, ranked []services.SearchResult) ([]services.AIRecommendation, geminiBatchStats) {
|
|
||||||
const chunkSize = 8
|
|
||||||
limit := geminiCandidateLimit(len(ranked))
|
|
||||||
stats := geminiBatchStats{
|
|
||||||
CandidateCap: limit,
|
|
||||||
Requested: min(limit, len(ranked)),
|
|
||||||
}
|
|
||||||
merged := make([]services.AIRecommendation, 0, len(ranked))
|
|
||||||
seen := map[string]bool{}
|
|
||||||
for start := 0; start < limit; start += chunkSize {
|
|
||||||
end := start + chunkSize
|
|
||||||
if end > limit {
|
|
||||||
end = limit
|
|
||||||
}
|
|
||||||
batch := ranked[start:end]
|
|
||||||
stats.Batches++
|
|
||||||
recommended, err := service.Recommend(query, batch)
|
|
||||||
if err != nil {
|
|
||||||
stats.Failed++
|
|
||||||
if len(stats.Errors) < 5 {
|
|
||||||
stats.Errors = append(stats.Errors, err.Error())
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
stats.Succeeded++
|
|
||||||
for _, item := range recommended {
|
|
||||||
if item.Link == "" || seen[item.Link] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
seen[item.Link] = true
|
|
||||||
merged = append(merged, item)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stats.RecommendedCount = len(merged)
|
|
||||||
return merged, stats
|
|
||||||
}
|
|
||||||
|
|
||||||
func rankSearchResults(query string, results []services.SearchResult) []services.SearchResult {
|
|
||||||
queryTerms := strings.Fields(strings.ToLower(query))
|
|
||||||
positiveTerms := []string{
|
|
||||||
"b-roll", "b roll", "stock", "stock footage", "footage", "cinematic", "editorial",
|
|
||||||
"establishing", "4k", "hd", "drone", "ambient", "scene", "urban", "cityscape",
|
|
||||||
}
|
|
||||||
negativeTerms := []string{
|
|
||||||
"shocking", "amazing", "crazy", "must watch", "reaction", "gossip", "celebrity",
|
|
||||||
"thumbnail", "meme", "prank", "drama", "breaking", "viral", "tutorial",
|
|
||||||
"how to", "review", "walkthrough", "course", "lesson", "podcast", "interview",
|
|
||||||
"premiere pro", "after effects", "explained", "breakdown", "vlog",
|
|
||||||
}
|
|
||||||
type scoredResult struct {
|
|
||||||
item services.SearchResult
|
|
||||||
score int
|
|
||||||
}
|
|
||||||
|
|
||||||
scored := make([]scoredResult, 0, len(results))
|
|
||||||
for _, result := range results {
|
|
||||||
score := 0
|
|
||||||
text := strings.ToLower(result.Title + " " + result.Snippet + " " + result.Source)
|
|
||||||
for _, term := range queryTerms {
|
|
||||||
if strings.Contains(text, term) {
|
|
||||||
score += 3
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, term := range positiveTerms {
|
|
||||||
if strings.Contains(text, term) {
|
|
||||||
score += 2
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for _, term := range negativeTerms {
|
|
||||||
if strings.Contains(text, term) {
|
|
||||||
score -= 4
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if result.ThumbnailURL != "" {
|
|
||||||
score += 2
|
|
||||||
}
|
|
||||||
if result.PreviewVideoURL != "" {
|
|
||||||
score += 3
|
|
||||||
}
|
|
||||||
switch result.Source {
|
|
||||||
case "Google Video":
|
|
||||||
score -= 1
|
|
||||||
case "Envato":
|
|
||||||
score += 7
|
|
||||||
case "Artgrid":
|
|
||||||
score += 7
|
|
||||||
}
|
|
||||||
scored = append(scored, scoredResult{item: result, score: score})
|
|
||||||
}
|
|
||||||
|
|
||||||
sort.SliceStable(scored, func(i, j int) bool {
|
|
||||||
return scored[i].score > scored[j].score
|
|
||||||
})
|
|
||||||
|
|
||||||
ranked := make([]services.SearchResult, 0, len(scored))
|
|
||||||
for _, item := range scored {
|
|
||||||
ranked = append(ranked, item.item)
|
|
||||||
}
|
|
||||||
return ranked
|
|
||||||
}
|
|
||||||
|
|
||||||
func mergeRecommendations(recommended []services.AIRecommendation, ranked []services.SearchResult, limit int) []services.AIRecommendation {
|
|
||||||
merged := make([]services.AIRecommendation, 0, min(limit, len(ranked)))
|
|
||||||
seen := map[string]bool{}
|
|
||||||
|
|
||||||
for _, item := range recommended {
|
|
||||||
if item.Link == "" || seen[item.Link] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
seen[item.Link] = true
|
|
||||||
merged = append(merged, item)
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, item := range ranked {
|
|
||||||
if len(merged) >= limit || item.Link == "" || seen[item.Link] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
seen[item.Link] = true
|
|
||||||
merged = append(merged, services.AIRecommendation{
|
|
||||||
Title: item.Title,
|
|
||||||
Link: item.Link,
|
|
||||||
Snippet: item.Snippet,
|
|
||||||
ThumbnailURL: item.ThumbnailURL,
|
|
||||||
PreviewVideoURL: item.PreviewVideoURL,
|
|
||||||
Source: item.Source,
|
|
||||||
Reason: "Keyword-ranked result added without extra Gemini vision tokens.",
|
|
||||||
Recommended: true,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
return merged
|
|
||||||
}
|
|
||||||
|
|
||||||
func geminiCandidateLimit(total int) int {
|
|
||||||
switch {
|
|
||||||
case total <= 8:
|
|
||||||
return total
|
|
||||||
case total <= 16:
|
|
||||||
return 12
|
|
||||||
default:
|
|
||||||
return 16
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func summarizeSearchResults(results []services.SearchResult, duration time.Duration, geminiCap int, warning string) searchDebugSummary {
|
func summarizeSearchResults(results []services.SearchResult, duration time.Duration, geminiCap int, warning string) searchDebugSummary {
|
||||||
bySource := map[string]int{}
|
bySource := map[string]int{}
|
||||||
withPreview := 0
|
withPreview := 0
|
||||||
|
|||||||
+60
-6
@@ -90,6 +90,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
for _, item := range items {
|
for _, item := range items {
|
||||||
|
item = normalizeResultForCollector(collector.Name(), item)
|
||||||
if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
|
if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@@ -379,10 +380,16 @@ func isRenderableArtgridResult(result SearchResult) bool {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
if !strings.Contains(strings.ToLower(parsed.Host), "artgrid.io") {
|
host := strings.ToLower(parsed.Host)
|
||||||
|
switch {
|
||||||
|
case strings.Contains(host, "artgrid.io"):
|
||||||
|
return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path)
|
||||||
|
case strings.Contains(host, "artlist.io"):
|
||||||
|
trimmedPath := strings.TrimSuffix(parsed.Path, "/")
|
||||||
|
return regexp.MustCompile(`^/stock-footage/clip/.+/[0-9]+$`).MatchString(trimmedPath)
|
||||||
|
default:
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func normalizeSource(source, link, engine string) string {
|
func normalizeSource(source, link, engine string) string {
|
||||||
@@ -391,7 +398,7 @@ func normalizeSource(source, link, engine string) string {
|
|||||||
return source
|
return source
|
||||||
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
|
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
|
||||||
return "Envato"
|
return "Envato"
|
||||||
case strings.Contains(strings.ToLower(link), "artgrid"):
|
case strings.Contains(strings.ToLower(link), "artgrid"), strings.Contains(strings.ToLower(link), "artlist.io/stock-footage/clip/"):
|
||||||
return "Artgrid"
|
return "Artgrid"
|
||||||
case strings.Contains(strings.ToLower(engine), "google"):
|
case strings.Contains(strings.ToLower(engine), "google"):
|
||||||
return "Google Video"
|
return "Google Video"
|
||||||
@@ -473,13 +480,60 @@ func extractArtgridBackgroundThumbnail(html, clipID string) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func extractArtgridClipID(link string) string {
|
func extractArtgridClipID(link string) string {
|
||||||
matches := regexp.MustCompile(`/clip/([0-9]+)/`).FindStringSubmatch(link)
|
patterns := []*regexp.Regexp{
|
||||||
if len(matches) == 2 {
|
regexp.MustCompile(`/clip/([0-9]+)/`),
|
||||||
return matches[1]
|
regexp.MustCompile(`/stock-footage/clip/[^/]+/([0-9]+)$`),
|
||||||
|
regexp.MustCompile(`/stock-footage/clip/.+/([0-9]+)$`),
|
||||||
|
}
|
||||||
|
for _, pattern := range patterns {
|
||||||
|
matches := pattern.FindStringSubmatch(strings.TrimSuffix(link, "/"))
|
||||||
|
if len(matches) == 2 {
|
||||||
|
return matches[1]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func canonicalizeArtgridLink(link string) string {
|
||||||
|
trimmed := strings.TrimSpace(link)
|
||||||
|
if trimmed == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
clipID := extractArtgridClipID(trimmed)
|
||||||
|
if clipID == "" {
|
||||||
|
return trimmed
|
||||||
|
}
|
||||||
|
if strings.Contains(strings.ToLower(trimmed), "artgrid.io/clip/") {
|
||||||
|
return trimmed
|
||||||
|
}
|
||||||
|
parsed, err := url.Parse(trimmed)
|
||||||
|
if err != nil {
|
||||||
|
return trimmed
|
||||||
|
}
|
||||||
|
segments := strings.Split(strings.Trim(parsed.Path, "/"), "/")
|
||||||
|
slug := clipID
|
||||||
|
for idx, segment := range segments {
|
||||||
|
if segment == clipID && idx > 0 {
|
||||||
|
slug = segments[idx-1]
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return "https://artgrid.io/clip/" + clipID + "/" + slug
|
||||||
|
}
|
||||||
|
|
||||||
|
func normalizeResultForCollector(source string, result SearchResult) SearchResult {
|
||||||
|
switch source {
|
||||||
|
case "Artgrid":
|
||||||
|
result.Link = canonicalizeArtgridLink(result.Link)
|
||||||
|
result.Source = "Artgrid"
|
||||||
|
case "Envato":
|
||||||
|
result.Source = "Envato"
|
||||||
|
case "Google Video":
|
||||||
|
result.Source = "Google Video"
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
func collectURLs(body string) []string {
|
func collectURLs(body string) []string {
|
||||||
pattern := regexp.MustCompile(`https?:\/\/[^"'\\\s]+`)
|
pattern := regexp.MustCompile(`https?:\/\/[^"'\\\s]+`)
|
||||||
matches := pattern.FindAllString(body, -1)
|
matches := pattern.FindAllString(body, -1)
|
||||||
|
|||||||
@@ -49,3 +49,17 @@ func TestCleanArtgridTitle(t *testing.T) {
|
|||||||
t.Fatalf("expected %q, got %q", want, got)
|
t.Fatalf("expected %q, got %q", want, got)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCanonicalizeArtgridLinkFromArtlist(t *testing.T) {
|
||||||
|
got := canonicalizeArtgridLink("https://artlist.io/stock-footage/clip/movie-film-moving-slowly-from-a-reel/114756")
|
||||||
|
want := "https://artgrid.io/clip/114756/movie-film-moving-slowly-from-a-reel"
|
||||||
|
if got != want {
|
||||||
|
t.Fatalf("expected %q, got %q", want, got)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestIsRenderableArtgridResultAcceptsArtlistCanonical(t *testing.T) {
|
||||||
|
if !isRenderableArtgridResult(SearchResult{Link: "https://artlist.io/stock-footage/clip/movie-film-moving-slowly-from-a-reel/114756"}) {
|
||||||
|
t.Fatal("expected artlist canonical clip URL to be accepted for Artgrid collector")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -0,0 +1,159 @@
|
|||||||
|
package services
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sort"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type GeminiBatchStats struct {
|
||||||
|
CandidateCap int `json:"candidateCap"`
|
||||||
|
Requested int `json:"requested"`
|
||||||
|
Batches int `json:"batches"`
|
||||||
|
Succeeded int `json:"succeeded"`
|
||||||
|
Failed int `json:"failed"`
|
||||||
|
RecommendedCount int `json:"recommendedCount"`
|
||||||
|
Errors []string `json:"errors,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func RankSearchResults(query string, results []SearchResult) []SearchResult {
|
||||||
|
queryTerms := strings.Fields(strings.ToLower(query))
|
||||||
|
positiveTerms := []string{
|
||||||
|
"b-roll", "b roll", "stock", "stock footage", "footage", "cinematic", "editorial",
|
||||||
|
"establishing", "4k", "hd", "drone", "ambient", "scene", "urban", "cityscape",
|
||||||
|
}
|
||||||
|
negativeTerms := []string{
|
||||||
|
"shocking", "amazing", "crazy", "must watch", "reaction", "gossip", "celebrity",
|
||||||
|
"thumbnail", "meme", "prank", "drama", "breaking", "viral", "tutorial",
|
||||||
|
"how to", "review", "walkthrough", "course", "lesson", "podcast", "interview",
|
||||||
|
"premiere pro", "after effects", "explained", "breakdown", "vlog",
|
||||||
|
}
|
||||||
|
type scoredResult struct {
|
||||||
|
item SearchResult
|
||||||
|
score int
|
||||||
|
}
|
||||||
|
|
||||||
|
scored := make([]scoredResult, 0, len(results))
|
||||||
|
for _, result := range results {
|
||||||
|
score := 0
|
||||||
|
text := strings.ToLower(result.Title + " " + result.Snippet + " " + result.Source)
|
||||||
|
for _, term := range queryTerms {
|
||||||
|
if strings.Contains(text, term) {
|
||||||
|
score += 3
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, term := range positiveTerms {
|
||||||
|
if strings.Contains(text, term) {
|
||||||
|
score += 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for _, term := range negativeTerms {
|
||||||
|
if strings.Contains(text, term) {
|
||||||
|
score -= 4
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if result.ThumbnailURL != "" {
|
||||||
|
score += 2
|
||||||
|
}
|
||||||
|
if result.PreviewVideoURL != "" {
|
||||||
|
score += 3
|
||||||
|
}
|
||||||
|
switch result.Source {
|
||||||
|
case "Google Video":
|
||||||
|
score -= 1
|
||||||
|
case "Envato":
|
||||||
|
score += 7
|
||||||
|
case "Artgrid":
|
||||||
|
score += 7
|
||||||
|
}
|
||||||
|
scored = append(scored, scoredResult{item: result, score: score})
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.SliceStable(scored, func(i, j int) bool {
|
||||||
|
return scored[i].score > scored[j].score
|
||||||
|
})
|
||||||
|
|
||||||
|
ranked := make([]SearchResult, 0, len(scored))
|
||||||
|
for _, item := range scored {
|
||||||
|
ranked = append(ranked, item.item)
|
||||||
|
}
|
||||||
|
return ranked
|
||||||
|
}
|
||||||
|
|
||||||
|
func GeminiCandidateLimit(total int) int {
|
||||||
|
switch {
|
||||||
|
case total <= 8:
|
||||||
|
return total
|
||||||
|
case total <= 16:
|
||||||
|
return 12
|
||||||
|
default:
|
||||||
|
return 16
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func EvaluateAllCandidatesWithGemini(service *GeminiService, query string, ranked []SearchResult) ([]AIRecommendation, GeminiBatchStats) {
|
||||||
|
const chunkSize = 8
|
||||||
|
limit := GeminiCandidateLimit(len(ranked))
|
||||||
|
stats := GeminiBatchStats{
|
||||||
|
CandidateCap: limit,
|
||||||
|
Requested: min(limit, len(ranked)),
|
||||||
|
}
|
||||||
|
merged := make([]AIRecommendation, 0, len(ranked))
|
||||||
|
seen := map[string]bool{}
|
||||||
|
for start := 0; start < limit; start += chunkSize {
|
||||||
|
end := start + chunkSize
|
||||||
|
if end > limit {
|
||||||
|
end = limit
|
||||||
|
}
|
||||||
|
batch := ranked[start:end]
|
||||||
|
stats.Batches++
|
||||||
|
recommended, err := service.Recommend(query, batch)
|
||||||
|
if err != nil {
|
||||||
|
stats.Failed++
|
||||||
|
if len(stats.Errors) < 5 {
|
||||||
|
stats.Errors = append(stats.Errors, err.Error())
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
stats.Succeeded++
|
||||||
|
for _, item := range recommended {
|
||||||
|
if item.Link == "" || seen[item.Link] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[item.Link] = true
|
||||||
|
merged = append(merged, item)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stats.RecommendedCount = len(merged)
|
||||||
|
return merged, stats
|
||||||
|
}
|
||||||
|
|
||||||
|
func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, limit int) []AIRecommendation {
|
||||||
|
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
||||||
|
seen := map[string]bool{}
|
||||||
|
|
||||||
|
for _, item := range recommended {
|
||||||
|
if item.Link == "" || seen[item.Link] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[item.Link] = true
|
||||||
|
merged = append(merged, item)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, item := range ranked {
|
||||||
|
if len(merged) >= limit || item.Link == "" || seen[item.Link] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
seen[item.Link] = true
|
||||||
|
merged = append(merged, AIRecommendation{
|
||||||
|
Title: item.Title,
|
||||||
|
Link: item.Link,
|
||||||
|
Snippet: item.Snippet,
|
||||||
|
ThumbnailURL: item.ThumbnailURL,
|
||||||
|
PreviewVideoURL: item.PreviewVideoURL,
|
||||||
|
Source: item.Source,
|
||||||
|
Reason: "Keyword-ranked result added without extra Gemini vision tokens.",
|
||||||
|
Recommended: true,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return merged
|
||||||
|
}
|
||||||
+1
-1
@@ -24,7 +24,7 @@ trap cleanup EXIT
|
|||||||
cd "${ROOT_DIR}"
|
cd "${ROOT_DIR}"
|
||||||
|
|
||||||
echo "[selftest] gofmt"
|
echo "[selftest] gofmt"
|
||||||
gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/search_collectors.go backend/services/gemini.go backend/services/gemini_test.go
|
gofmt -w backend/main.go backend/handlers/api.go backend/models/db.go backend/services/cse.go backend/services/cse_test.go backend/services/search_collectors.go backend/services/ranker.go backend/services/gemini.go backend/services/gemini_test.go
|
||||||
|
|
||||||
echo "[selftest] python syntax"
|
echo "[selftest] python syntax"
|
||||||
python3 -m py_compile worker/downloader.py scripts/mock_searxng.py
|
python3 -m py_compile worker/downloader.py scripts/mock_searxng.py
|
||||||
|
|||||||
Reference in New Issue
Block a user