This commit is contained in:
+4
-158
@@ -11,7 +11,6 @@ import (
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -87,16 +86,6 @@ type searchDebugSummary struct {
|
||||
GeminiCandidateCap int `json:"geminiCandidateCap,omitempty"`
|
||||
}
|
||||
|
||||
type geminiBatchStats struct {
|
||||
CandidateCap int `json:"candidateCap"`
|
||||
Requested int `json:"requested"`
|
||||
Batches int `json:"batches"`
|
||||
Succeeded int `json:"succeeded"`
|
||||
Failed int `json:"failed"`
|
||||
RecommendedCount int `json:"recommendedCount"`
|
||||
Errors []string `json:"errors,omitempty"`
|
||||
}
|
||||
|
||||
func RegisterRoutes(router *gin.Engine, app *App) {
|
||||
router.GET("/healthz", func(c *gin.Context) {
|
||||
c.JSON(http.StatusOK, gin.H{"status": "ok"})
|
||||
@@ -329,10 +318,10 @@ func (a *App) searchMedia(c *gin.Context) {
|
||||
if len(queryVariants) > 0 {
|
||||
rankQuery = strings.Join(queryVariants[:min(len(queryVariants), 3)], " ")
|
||||
}
|
||||
scored := rankSearchResults(rankQuery, results)
|
||||
a.debug("search ranked summary", summarizeSearchResults(scored, time.Since(started), geminiCandidateLimit(len(scored)), ""))
|
||||
scored := services.RankSearchResults(rankQuery, results)
|
||||
a.debug("search ranked summary", summarizeSearchResults(scored, time.Since(started), services.GeminiCandidateLimit(len(scored)), ""))
|
||||
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "analyzing top candidate visuals with Gemini Vision", "progress": 75})
|
||||
recommended, geminiStats := evaluateAllCandidatesWithGemini(a.GeminiService, req.Query, scored)
|
||||
recommended, geminiStats := services.EvaluateAllCandidatesWithGemini(a.GeminiService, req.Query, scored)
|
||||
a.debug("search gemini evaluation", geminiStats)
|
||||
err = nil
|
||||
if len(recommended) == 0 {
|
||||
@@ -359,7 +348,7 @@ func (a *App) searchMedia(c *gin.Context) {
|
||||
return
|
||||
}
|
||||
|
||||
merged := mergeRecommendations(recommended, scored, 20)
|
||||
merged := services.MergeRecommendations(recommended, scored, 20)
|
||||
a.debug("search complete summary", summarizeRecommendationResults(merged, time.Since(started), ""))
|
||||
response := gin.H{"results": merged, "queries": queryVariants}
|
||||
a.Hub.Broadcast("progress", gin.H{"type": "search", "status": "search complete", "progress": 100})
|
||||
@@ -438,149 +427,6 @@ func selectedPlatformLabel(platforms map[string]bool) string {
|
||||
return strings.Join(labels, ", ")
|
||||
}
|
||||
|
||||
func evaluateAllCandidatesWithGemini(service *services.GeminiService, query string, ranked []services.SearchResult) ([]services.AIRecommendation, geminiBatchStats) {
|
||||
const chunkSize = 8
|
||||
limit := geminiCandidateLimit(len(ranked))
|
||||
stats := geminiBatchStats{
|
||||
CandidateCap: limit,
|
||||
Requested: min(limit, len(ranked)),
|
||||
}
|
||||
merged := make([]services.AIRecommendation, 0, len(ranked))
|
||||
seen := map[string]bool{}
|
||||
for start := 0; start < limit; start += chunkSize {
|
||||
end := start + chunkSize
|
||||
if end > limit {
|
||||
end = limit
|
||||
}
|
||||
batch := ranked[start:end]
|
||||
stats.Batches++
|
||||
recommended, err := service.Recommend(query, batch)
|
||||
if err != nil {
|
||||
stats.Failed++
|
||||
if len(stats.Errors) < 5 {
|
||||
stats.Errors = append(stats.Errors, err.Error())
|
||||
}
|
||||
continue
|
||||
}
|
||||
stats.Succeeded++
|
||||
for _, item := range recommended {
|
||||
if item.Link == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, item)
|
||||
}
|
||||
}
|
||||
stats.RecommendedCount = len(merged)
|
||||
return merged, stats
|
||||
}
|
||||
|
||||
func rankSearchResults(query string, results []services.SearchResult) []services.SearchResult {
|
||||
queryTerms := strings.Fields(strings.ToLower(query))
|
||||
positiveTerms := []string{
|
||||
"b-roll", "b roll", "stock", "stock footage", "footage", "cinematic", "editorial",
|
||||
"establishing", "4k", "hd", "drone", "ambient", "scene", "urban", "cityscape",
|
||||
}
|
||||
negativeTerms := []string{
|
||||
"shocking", "amazing", "crazy", "must watch", "reaction", "gossip", "celebrity",
|
||||
"thumbnail", "meme", "prank", "drama", "breaking", "viral", "tutorial",
|
||||
"how to", "review", "walkthrough", "course", "lesson", "podcast", "interview",
|
||||
"premiere pro", "after effects", "explained", "breakdown", "vlog",
|
||||
}
|
||||
type scoredResult struct {
|
||||
item services.SearchResult
|
||||
score int
|
||||
}
|
||||
|
||||
scored := make([]scoredResult, 0, len(results))
|
||||
for _, result := range results {
|
||||
score := 0
|
||||
text := strings.ToLower(result.Title + " " + result.Snippet + " " + result.Source)
|
||||
for _, term := range queryTerms {
|
||||
if strings.Contains(text, term) {
|
||||
score += 3
|
||||
}
|
||||
}
|
||||
for _, term := range positiveTerms {
|
||||
if strings.Contains(text, term) {
|
||||
score += 2
|
||||
}
|
||||
}
|
||||
for _, term := range negativeTerms {
|
||||
if strings.Contains(text, term) {
|
||||
score -= 4
|
||||
}
|
||||
}
|
||||
if result.ThumbnailURL != "" {
|
||||
score += 2
|
||||
}
|
||||
if result.PreviewVideoURL != "" {
|
||||
score += 3
|
||||
}
|
||||
switch result.Source {
|
||||
case "Google Video":
|
||||
score -= 1
|
||||
case "Envato":
|
||||
score += 7
|
||||
case "Artgrid":
|
||||
score += 7
|
||||
}
|
||||
scored = append(scored, scoredResult{item: result, score: score})
|
||||
}
|
||||
|
||||
sort.SliceStable(scored, func(i, j int) bool {
|
||||
return scored[i].score > scored[j].score
|
||||
})
|
||||
|
||||
ranked := make([]services.SearchResult, 0, len(scored))
|
||||
for _, item := range scored {
|
||||
ranked = append(ranked, item.item)
|
||||
}
|
||||
return ranked
|
||||
}
|
||||
|
||||
func mergeRecommendations(recommended []services.AIRecommendation, ranked []services.SearchResult, limit int) []services.AIRecommendation {
|
||||
merged := make([]services.AIRecommendation, 0, min(limit, len(ranked)))
|
||||
seen := map[string]bool{}
|
||||
|
||||
for _, item := range recommended {
|
||||
if item.Link == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, item)
|
||||
}
|
||||
|
||||
for _, item := range ranked {
|
||||
if len(merged) >= limit || item.Link == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, services.AIRecommendation{
|
||||
Title: item.Title,
|
||||
Link: item.Link,
|
||||
Snippet: item.Snippet,
|
||||
ThumbnailURL: item.ThumbnailURL,
|
||||
PreviewVideoURL: item.PreviewVideoURL,
|
||||
Source: item.Source,
|
||||
Reason: "Keyword-ranked result added without extra Gemini vision tokens.",
|
||||
Recommended: true,
|
||||
})
|
||||
}
|
||||
return merged
|
||||
}
|
||||
|
||||
func geminiCandidateLimit(total int) int {
|
||||
switch {
|
||||
case total <= 8:
|
||||
return total
|
||||
case total <= 16:
|
||||
return 12
|
||||
default:
|
||||
return 16
|
||||
}
|
||||
}
|
||||
|
||||
func summarizeSearchResults(results []services.SearchResult, duration time.Duration, geminiCap int, warning string) searchDebugSummary {
|
||||
bySource := map[string]int{}
|
||||
withPreview := 0
|
||||
|
||||
+60
-6
@@ -90,6 +90,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
continue
|
||||
}
|
||||
for _, item := range items {
|
||||
item = normalizeResultForCollector(collector.Name(), item)
|
||||
if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
|
||||
continue
|
||||
}
|
||||
@@ -379,10 +380,16 @@ func isRenderableArtgridResult(result SearchResult) bool {
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(parsed.Host), "artgrid.io") {
|
||||
host := strings.ToLower(parsed.Host)
|
||||
switch {
|
||||
case strings.Contains(host, "artgrid.io"):
|
||||
return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path)
|
||||
case strings.Contains(host, "artlist.io"):
|
||||
trimmedPath := strings.TrimSuffix(parsed.Path, "/")
|
||||
return regexp.MustCompile(`^/stock-footage/clip/.+/[0-9]+$`).MatchString(trimmedPath)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path)
|
||||
}
|
||||
|
||||
func normalizeSource(source, link, engine string) string {
|
||||
@@ -391,7 +398,7 @@ func normalizeSource(source, link, engine string) string {
|
||||
return source
|
||||
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
|
||||
return "Envato"
|
||||
case strings.Contains(strings.ToLower(link), "artgrid"):
|
||||
case strings.Contains(strings.ToLower(link), "artgrid"), strings.Contains(strings.ToLower(link), "artlist.io/stock-footage/clip/"):
|
||||
return "Artgrid"
|
||||
case strings.Contains(strings.ToLower(engine), "google"):
|
||||
return "Google Video"
|
||||
@@ -473,13 +480,60 @@ func extractArtgridBackgroundThumbnail(html, clipID string) string {
|
||||
}
|
||||
|
||||
func extractArtgridClipID(link string) string {
|
||||
matches := regexp.MustCompile(`/clip/([0-9]+)/`).FindStringSubmatch(link)
|
||||
if len(matches) == 2 {
|
||||
return matches[1]
|
||||
patterns := []*regexp.Regexp{
|
||||
regexp.MustCompile(`/clip/([0-9]+)/`),
|
||||
regexp.MustCompile(`/stock-footage/clip/[^/]+/([0-9]+)$`),
|
||||
regexp.MustCompile(`/stock-footage/clip/.+/([0-9]+)$`),
|
||||
}
|
||||
for _, pattern := range patterns {
|
||||
matches := pattern.FindStringSubmatch(strings.TrimSuffix(link, "/"))
|
||||
if len(matches) == 2 {
|
||||
return matches[1]
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func canonicalizeArtgridLink(link string) string {
|
||||
trimmed := strings.TrimSpace(link)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
clipID := extractArtgridClipID(trimmed)
|
||||
if clipID == "" {
|
||||
return trimmed
|
||||
}
|
||||
if strings.Contains(strings.ToLower(trimmed), "artgrid.io/clip/") {
|
||||
return trimmed
|
||||
}
|
||||
parsed, err := url.Parse(trimmed)
|
||||
if err != nil {
|
||||
return trimmed
|
||||
}
|
||||
segments := strings.Split(strings.Trim(parsed.Path, "/"), "/")
|
||||
slug := clipID
|
||||
for idx, segment := range segments {
|
||||
if segment == clipID && idx > 0 {
|
||||
slug = segments[idx-1]
|
||||
break
|
||||
}
|
||||
}
|
||||
return "https://artgrid.io/clip/" + clipID + "/" + slug
|
||||
}
|
||||
|
||||
func normalizeResultForCollector(source string, result SearchResult) SearchResult {
|
||||
switch source {
|
||||
case "Artgrid":
|
||||
result.Link = canonicalizeArtgridLink(result.Link)
|
||||
result.Source = "Artgrid"
|
||||
case "Envato":
|
||||
result.Source = "Envato"
|
||||
case "Google Video":
|
||||
result.Source = "Google Video"
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func collectURLs(body string) []string {
|
||||
pattern := regexp.MustCompile(`https?:\/\/[^"'\\\s]+`)
|
||||
matches := pattern.FindAllString(body, -1)
|
||||
|
||||
@@ -49,3 +49,17 @@ func TestCleanArtgridTitle(t *testing.T) {
|
||||
t.Fatalf("expected %q, got %q", want, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCanonicalizeArtgridLinkFromArtlist(t *testing.T) {
|
||||
got := canonicalizeArtgridLink("https://artlist.io/stock-footage/clip/movie-film-moving-slowly-from-a-reel/114756")
|
||||
want := "https://artgrid.io/clip/114756/movie-film-moving-slowly-from-a-reel"
|
||||
if got != want {
|
||||
t.Fatalf("expected %q, got %q", want, got)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsRenderableArtgridResultAcceptsArtlistCanonical(t *testing.T) {
|
||||
if !isRenderableArtgridResult(SearchResult{Link: "https://artlist.io/stock-footage/clip/movie-film-moving-slowly-from-a-reel/114756"}) {
|
||||
t.Fatal("expected artlist canonical clip URL to be accepted for Artgrid collector")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,159 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type GeminiBatchStats struct {
|
||||
CandidateCap int `json:"candidateCap"`
|
||||
Requested int `json:"requested"`
|
||||
Batches int `json:"batches"`
|
||||
Succeeded int `json:"succeeded"`
|
||||
Failed int `json:"failed"`
|
||||
RecommendedCount int `json:"recommendedCount"`
|
||||
Errors []string `json:"errors,omitempty"`
|
||||
}
|
||||
|
||||
func RankSearchResults(query string, results []SearchResult) []SearchResult {
|
||||
queryTerms := strings.Fields(strings.ToLower(query))
|
||||
positiveTerms := []string{
|
||||
"b-roll", "b roll", "stock", "stock footage", "footage", "cinematic", "editorial",
|
||||
"establishing", "4k", "hd", "drone", "ambient", "scene", "urban", "cityscape",
|
||||
}
|
||||
negativeTerms := []string{
|
||||
"shocking", "amazing", "crazy", "must watch", "reaction", "gossip", "celebrity",
|
||||
"thumbnail", "meme", "prank", "drama", "breaking", "viral", "tutorial",
|
||||
"how to", "review", "walkthrough", "course", "lesson", "podcast", "interview",
|
||||
"premiere pro", "after effects", "explained", "breakdown", "vlog",
|
||||
}
|
||||
type scoredResult struct {
|
||||
item SearchResult
|
||||
score int
|
||||
}
|
||||
|
||||
scored := make([]scoredResult, 0, len(results))
|
||||
for _, result := range results {
|
||||
score := 0
|
||||
text := strings.ToLower(result.Title + " " + result.Snippet + " " + result.Source)
|
||||
for _, term := range queryTerms {
|
||||
if strings.Contains(text, term) {
|
||||
score += 3
|
||||
}
|
||||
}
|
||||
for _, term := range positiveTerms {
|
||||
if strings.Contains(text, term) {
|
||||
score += 2
|
||||
}
|
||||
}
|
||||
for _, term := range negativeTerms {
|
||||
if strings.Contains(text, term) {
|
||||
score -= 4
|
||||
}
|
||||
}
|
||||
if result.ThumbnailURL != "" {
|
||||
score += 2
|
||||
}
|
||||
if result.PreviewVideoURL != "" {
|
||||
score += 3
|
||||
}
|
||||
switch result.Source {
|
||||
case "Google Video":
|
||||
score -= 1
|
||||
case "Envato":
|
||||
score += 7
|
||||
case "Artgrid":
|
||||
score += 7
|
||||
}
|
||||
scored = append(scored, scoredResult{item: result, score: score})
|
||||
}
|
||||
|
||||
sort.SliceStable(scored, func(i, j int) bool {
|
||||
return scored[i].score > scored[j].score
|
||||
})
|
||||
|
||||
ranked := make([]SearchResult, 0, len(scored))
|
||||
for _, item := range scored {
|
||||
ranked = append(ranked, item.item)
|
||||
}
|
||||
return ranked
|
||||
}
|
||||
|
||||
func GeminiCandidateLimit(total int) int {
|
||||
switch {
|
||||
case total <= 8:
|
||||
return total
|
||||
case total <= 16:
|
||||
return 12
|
||||
default:
|
||||
return 16
|
||||
}
|
||||
}
|
||||
|
||||
func EvaluateAllCandidatesWithGemini(service *GeminiService, query string, ranked []SearchResult) ([]AIRecommendation, GeminiBatchStats) {
|
||||
const chunkSize = 8
|
||||
limit := GeminiCandidateLimit(len(ranked))
|
||||
stats := GeminiBatchStats{
|
||||
CandidateCap: limit,
|
||||
Requested: min(limit, len(ranked)),
|
||||
}
|
||||
merged := make([]AIRecommendation, 0, len(ranked))
|
||||
seen := map[string]bool{}
|
||||
for start := 0; start < limit; start += chunkSize {
|
||||
end := start + chunkSize
|
||||
if end > limit {
|
||||
end = limit
|
||||
}
|
||||
batch := ranked[start:end]
|
||||
stats.Batches++
|
||||
recommended, err := service.Recommend(query, batch)
|
||||
if err != nil {
|
||||
stats.Failed++
|
||||
if len(stats.Errors) < 5 {
|
||||
stats.Errors = append(stats.Errors, err.Error())
|
||||
}
|
||||
continue
|
||||
}
|
||||
stats.Succeeded++
|
||||
for _, item := range recommended {
|
||||
if item.Link == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, item)
|
||||
}
|
||||
}
|
||||
stats.RecommendedCount = len(merged)
|
||||
return merged, stats
|
||||
}
|
||||
|
||||
func MergeRecommendations(recommended []AIRecommendation, ranked []SearchResult, limit int) []AIRecommendation {
|
||||
merged := make([]AIRecommendation, 0, min(limit, len(ranked)))
|
||||
seen := map[string]bool{}
|
||||
|
||||
for _, item := range recommended {
|
||||
if item.Link == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, item)
|
||||
}
|
||||
|
||||
for _, item := range ranked {
|
||||
if len(merged) >= limit || item.Link == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
merged = append(merged, AIRecommendation{
|
||||
Title: item.Title,
|
||||
Link: item.Link,
|
||||
Snippet: item.Snippet,
|
||||
ThumbnailURL: item.ThumbnailURL,
|
||||
PreviewVideoURL: item.PreviewVideoURL,
|
||||
Source: item.Source,
|
||||
Reason: "Keyword-ranked result added without extra Gemini vision tokens.",
|
||||
Recommended: true,
|
||||
})
|
||||
}
|
||||
return merged
|
||||
}
|
||||
Reference in New Issue
Block a user