Switch search backend to SearXNG
build-push / docker (push) Has been cancelled

This commit is contained in:
AI Assistant
2026-03-13 10:10:13 +09:00
parent 6734887fc6
commit ee316de7ab
8 changed files with 466 additions and 199 deletions
+110 -20
View File
@@ -11,6 +11,7 @@ import (
"os/exec"
"path/filepath"
"regexp"
"sort"
"strings"
"sync"
"time"
@@ -65,14 +66,14 @@ func (h *Hub) Remove(conn *websocket.Conn) {
}
type PreviewResponse struct {
Title string `json:"title"`
Thumbnail string `json:"thumbnail"`
Title string `json:"title"`
Thumbnail string `json:"thumbnail"`
PreviewStreamURL string `json:"previewStreamUrl"`
Duration string `json:"duration"`
DurationSeconds int `json:"durationSeconds"`
StartDefault string `json:"startDefault"`
EndDefault string `json:"endDefault"`
Qualities []map[string]any `json:"qualities"`
Duration string `json:"duration"`
DurationSeconds int `json:"durationSeconds"`
StartDefault string `json:"startDefault"`
EndDefault string `json:"endDefault"`
Qualities []map[string]any `json:"qualities"`
}
func RegisterRoutes(router *gin.Engine, app *App) {
@@ -141,11 +142,11 @@ func (a *App) uploadFile(c *gin.Context) {
func (a *App) startDownload(c *gin.Context) {
var req struct {
URL string `json:"url"`
Start string `json:"start"`
End string `json:"end"`
URL string `json:"url"`
Start string `json:"start"`
End string `json:"end"`
Quality string `json:"quality"`
Force bool `json:"force"`
Force bool `json:"force"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(http.StatusBadRequest, gin.H{"error": err.Error()})
@@ -192,7 +193,7 @@ func (a *App) previewDownload(c *gin.Context) {
return
}
cmd := exec.Command("python3", a.WorkerScript, "--mode", "probe", "--url", req.URL, "--output", filepath.Join(a.DownloadsDir, "probe.tmp"))
cmd := exec.Command("python3", a.WorkerScript, "--mode", "probe", "--url", req.URL)
output, err := cmd.CombinedOutput()
if err != nil {
c.JSON(http.StatusBadGateway, gin.H{"error": strings.TrimSpace(string(output))})
@@ -258,34 +259,53 @@ func (a *App) searchMedia(c *gin.Context) {
return
}
results, err := a.SearchService.SearchMedia(req.Query)
queryVariants, expandErr := a.GeminiService.ExpandQuery(req.Query)
if len(queryVariants) == 0 {
queryVariants = []string{req.Query}
}
results, err := a.SearchService.SearchMedia(queryVariants)
if err != nil {
c.JSON(http.StatusBadGateway, gin.H{"error": err.Error()})
return
}
if len(results) == 0 {
c.JSON(http.StatusOK, gin.H{"results": []services.AIRecommendation{}, "warning": "Vertex AI Search returned no renderable results. Check your website indexing fields and thumbnails."})
warning := "SearXNG returned no renderable results."
if expandErr != nil {
warning += " Query expansion fallback was used."
}
c.JSON(http.StatusOK, gin.H{"results": []services.AIRecommendation{}, "warning": warning})
return
}
recommended, err := a.GeminiService.Recommend(req.Query, results)
scored := rankSearchResults(req.Query, results)
shortlist := scored[:min(len(scored), 10)]
recommended, err := a.GeminiService.Recommend(req.Query, shortlist)
if err != nil {
fallback := make([]services.AIRecommendation, 0, min(4, len(results)))
for _, result := range results[:min(4, len(results))] {
fallback := make([]services.AIRecommendation, 0, min(20, len(scored)))
for _, result := range scored[:min(20, len(scored))] {
fallback = append(fallback, services.AIRecommendation{
Title: result.Title,
Link: result.Link,
ThumbnailURL: result.ThumbnailURL,
Source: result.Source,
Reason: "Gemini recommendation failed, showing raw search result.",
Reason: "Keyword-ranked result added without extra Gemini vision tokens.",
Recommended: true,
})
}
c.JSON(http.StatusOK, gin.H{"results": fallback, "warning": err.Error()})
warning := err.Error()
if expandErr != nil {
warning = warning + " Query expansion fallback was used."
}
c.JSON(http.StatusOK, gin.H{"results": fallback, "warning": warning, "queries": queryVariants})
return
}
c.JSON(http.StatusOK, gin.H{"results": recommended})
response := gin.H{"results": mergeRecommendations(recommended, scored, 20), "queries": queryVariants}
if expandErr != nil {
response["warning"] = "Gemini query expansion failed, using the original query only."
}
c.JSON(http.StatusOK, response)
}
func normalizeFilename(name string) string {
@@ -321,6 +341,76 @@ func min(a, b int) int {
return b
}
func rankSearchResults(query string, results []services.SearchResult) []services.SearchResult {
queryTerms := strings.Fields(strings.ToLower(query))
type scoredResult struct {
item services.SearchResult
score int
}
scored := make([]scoredResult, 0, len(results))
for _, result := range results {
score := 0
text := strings.ToLower(result.Title + " " + result.Snippet + " " + result.Source)
for _, term := range queryTerms {
if strings.Contains(text, term) {
score += 3
}
}
if result.ThumbnailURL != "" {
score += 2
}
switch result.Source {
case "Google Video":
score += 3
case "Envato":
score += 2
case "Artgrid":
score += 2
}
scored = append(scored, scoredResult{item: result, score: score})
}
sort.SliceStable(scored, func(i, j int) bool {
return scored[i].score > scored[j].score
})
ranked := make([]services.SearchResult, 0, len(scored))
for _, item := range scored {
ranked = append(ranked, item.item)
}
return ranked
}
func mergeRecommendations(recommended []services.AIRecommendation, ranked []services.SearchResult, limit int) []services.AIRecommendation {
merged := make([]services.AIRecommendation, 0, min(limit, len(ranked)))
seen := map[string]bool{}
for _, item := range recommended {
if item.Link == "" || seen[item.Link] {
continue
}
seen[item.Link] = true
merged = append(merged, item)
}
for _, item := range ranked {
if len(merged) >= limit || item.Link == "" || seen[item.Link] {
continue
}
seen[item.Link] = true
merged = append(merged, services.AIRecommendation{
Title: item.Title,
Link: item.Link,
ThumbnailURL: item.ThumbnailURL,
Source: item.Source,
Reason: "Keyword-ranked result added without extra Gemini vision tokens.",
Recommended: true,
})
}
return merged
}
func EnsurePaths(downloadsDir, workerScript string) error {
if err := os.MkdirAll(downloadsDir, 0o755); err != nil {
return err
+6 -8
View File
@@ -31,15 +31,13 @@ func main() {
}
app := &handlers.App{
DB: db,
DownloadsDir: downloadsDir,
WorkerScript: workerScript,
DB: db,
DownloadsDir: downloadsDir,
WorkerScript: workerScript,
SearchService: services.NewSearchService(
os.Getenv("VERTEX_AI_SEARCH_API_KEY"),
os.Getenv("VERTEX_AI_SEARCH_PROJECT_ID"),
os.Getenv("VERTEX_AI_SEARCH_LOCATION"),
os.Getenv("VERTEX_AI_SEARCH_DATA_STORE_ID"),
os.Getenv("VERTEX_AI_SEARCH_SERVING_CONFIG"),
os.Getenv("SEARXNG_BASE_URL"),
os.Getenv("SEARXNG_GOOGLE_VIDEO_ENGINE"),
os.Getenv("SEARXNG_WEB_ENGINE"),
),
GeminiService: services.NewGeminiService(os.Getenv("GEMINI_API_KEY")),
Hub: handlers.NewHub(),
+146 -145
View File
@@ -3,10 +3,10 @@ package services
import (
"encoding/json"
"fmt"
"io"
"net/http"
neturl "net/url"
"net/url"
"regexp"
"sort"
"strings"
"time"
)
@@ -21,143 +21,148 @@ type SearchResult struct {
}
type SearchService struct {
APIKey string
ProjectID string
Location string
DataStoreID string
ServingConfig string
Client *http.Client
BaseURL string
GoogleVideoEngine string
WebEngine string
Client *http.Client
}
func NewSearchService(apiKey, projectID, location, dataStoreID, servingConfig string) *SearchService {
if location == "" {
location = "global"
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
if googleVideoEngine == "" {
googleVideoEngine = "google videos"
}
if servingConfig == "" {
servingConfig = "default_serving_config"
if webEngine == "" {
webEngine = "google"
}
return &SearchService{
APIKey: apiKey,
ProjectID: projectID,
Location: location,
DataStoreID: dataStoreID,
ServingConfig: servingConfig,
Client: &http.Client{Timeout: 20 * time.Second},
BaseURL: strings.TrimRight(baseURL, "/"),
GoogleVideoEngine: googleVideoEngine,
WebEngine: webEngine,
Client: &http.Client{Timeout: 20 * time.Second},
}
}
func (s *SearchService) SearchMedia(query string) ([]SearchResult, error) {
if s.APIKey == "" || s.ProjectID == "" || s.DataStoreID == "" {
return nil, fmt.Errorf("vertex ai search credentials are not configured")
}
results, err := s.searchLite(query, true)
if err != nil {
results, err = s.searchLite(query, false)
if err != nil {
return nil, err
}
}
return results, nil
}
func (s *SearchService) searchLite(query string, imageSearch bool) ([]SearchResult, error) {
filteredQuery := strings.TrimSpace(query + " site:youtube.com OR site:tiktok.com OR site:envato.com OR site:artgrid.io")
servingConfig := fmt.Sprintf(
"projects/%s/locations/%s/dataStores/%s/servingConfigs/%s",
s.ProjectID,
s.Location,
s.DataStoreID,
s.ServingConfig,
)
params := map[string]any{
"user_country_code": "us",
}
if imageSearch {
params["searchType"] = 1
func (s *SearchService) SearchMedia(queries []string) ([]SearchResult, error) {
if s.BaseURL == "" {
return nil, fmt.Errorf("searxng base url is not configured")
}
requestBody := map[string]any{
"query": filteredQuery,
"pageSize": 25,
"safeSearch": false,
"languageCode": "ko-KR",
"params": params,
"contentSearchSpec": map[string]any{
"snippetSpec": map[string]any{
"returnSnippet": true,
},
sources := []struct {
name string
categories string
engine string
siteFilter string
match func(string) bool
}{
{
name: "Google Video",
categories: "videos",
engine: s.GoogleVideoEngine,
match: func(string) bool { return true },
},
{
name: "Envato",
categories: "general",
engine: s.WebEngine,
siteFilter: "site:elements.envato.com OR site:envato.com OR site:videohive.net",
match: isEnvatoURL,
},
{
name: "Artgrid",
categories: "general",
engine: s.WebEngine,
siteFilter: "site:artgrid.io",
match: func(link string) bool { return strings.Contains(strings.ToLower(link), "artgrid.io") },
},
}
body, _ := json.Marshal(requestBody)
endpoint := fmt.Sprintf(
"https://discoveryengine.googleapis.com/v1/%s:searchLite?key=%s",
servingConfig,
neturl.QueryEscape(s.APIKey),
)
resp, err := s.Client.Post(endpoint, "application/json", strings.NewReader(string(body)))
seen := map[string]bool{}
results := make([]SearchResult, 0, 60)
for _, query := range queries {
query = strings.TrimSpace(query)
if query == "" {
continue
}
for _, source := range sources {
searchQuery := query
if source.siteFilter != "" {
searchQuery = query + " " + source.siteFilter
}
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
if err != nil {
continue
}
for _, item := range items {
if item.Link == "" || seen[item.Link] {
continue
}
if source.match != nil && !source.match(item.Link) {
continue
}
seen[item.Link] = true
results = append(results, item)
}
}
}
sort.SliceStable(results, func(i, j int) bool {
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
})
return results, nil
}
func (s *SearchService) search(query, categories, engine, source string) ([]SearchResult, error) {
values := url.Values{}
values.Set("q", query)
values.Set("format", "json")
values.Set("safesearch", "0")
values.Set("language", "ko-KR")
if categories != "" {
values.Set("categories", categories)
}
if engine != "" {
values.Set("engines", engine)
}
endpoint := s.BaseURL + "/search?" + values.Encode()
resp, err := s.Client.Get(endpoint)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
data, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
return nil, fmt.Errorf("vertex ai search returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(data)))
return nil, fmt.Errorf("searxng returned status %d", resp.StatusCode)
}
var payload struct {
Results []struct {
Document struct {
StructData map[string]any `json:"structData"`
DerivedStructData map[string]any `json:"derivedStructData"`
} `json:"document"`
Title string `json:"title"`
URL string `json:"url"`
Content string `json:"content"`
Thumbnail string `json:"thumbnail"`
ThumbnailSrc string `json:"thumbnail_src"`
ImgSrc string `json:"img_src"`
ParsedURL []any `json:"parsed_url"`
Engine string `json:"engine"`
} `json:"results"`
}
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
return nil, err
}
results := make([]SearchResult, 0, len(payload.Results))
for _, item := range payload.Results {
link := firstNonEmpty(
firstString(item.Document.DerivedStructData, "link", "url", "uri"),
firstString(item.Document.StructData, "link", "url", "uri"),
)
title := firstNonEmpty(
firstString(item.Document.DerivedStructData, "title", "name"),
firstString(item.Document.StructData, "title", "name"),
)
displayLink := firstNonEmpty(
firstString(item.Document.DerivedStructData, "displayLink", "site_name"),
firstString(item.Document.StructData, "displayLink", "site_name"),
)
snippet := firstNonEmpty(
firstString(item.Document.DerivedStructData, "snippets", "snippet", "extractive_answers"),
firstString(item.Document.StructData, "snippets", "snippet", "description"),
)
thumb := firstNonEmpty(
firstString(item.Document.DerivedStructData, "thumbnail", "image", "image_url", "link"),
firstString(item.Document.StructData, "thumbnail", "image", "image_url"),
)
if thumb == "" {
thumb = deriveThumbnail(link)
}
if title == "" {
title = displayLink
}
if link == "" {
continue
}
link := strings.TrimSpace(item.URL)
thumb := firstNonEmpty(item.Thumbnail, item.ThumbnailSrc, item.ImgSrc, deriveThumbnail(link))
results = append(results, SearchResult{
Title: title,
Title: item.Title,
Link: link,
DisplayLink: displayLink,
Snippet: snippet,
DisplayLink: inferDisplayLink(link, item.ParsedURL),
Snippet: item.Content,
ThumbnailURL: thumb,
Source: inferSource(displayLink + " " + link),
Source: normalizeSource(source, link, item.Engine),
})
}
return results, nil
@@ -172,41 +177,39 @@ func firstNonEmpty(values ...string) string {
return ""
}
func firstString(values map[string]any, keys ...string) string {
for _, key := range keys {
value, ok := values[key]
if !ok {
continue
}
switch typed := value.(type) {
case string:
if typed != "" {
return typed
}
case []any:
for _, item := range typed {
if text, ok := item.(string); ok && text != "" {
return text
}
if mapped, ok := item.(map[string]any); ok {
if text := firstString(mapped, "snippet", "htmlSnippet", "url", "link", "value", "content"); text != "" {
return text
}
}
}
case map[string]any:
if text := firstString(typed, "snippet", "htmlSnippet", "url", "link", "value", "content"); text != "" {
return text
}
func normalizeSource(source, link, engine string) string {
switch {
case source != "":
return source
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
return "Envato"
case strings.Contains(strings.ToLower(link), "artgrid"):
return "Artgrid"
case strings.Contains(strings.ToLower(engine), "google"):
return "Google Video"
default:
return engine
}
}
func inferDisplayLink(link string, parsed []any) string {
if len(parsed) > 1 {
if host, ok := parsed[1].(string); ok {
return host
}
}
if parsedURL, err := url.Parse(link); err == nil {
return parsedURL.Host
}
return ""
}
func isEnvatoURL(link string) bool {
lower := strings.ToLower(link)
return strings.Contains(lower, "envato") || strings.Contains(lower, "videohive.net")
}
func deriveThumbnail(link string) string {
if link == "" {
return ""
}
if videoID := extractYouTubeID(link); videoID != "" {
return "https://i.ytimg.com/vi/" + videoID + "/hqdefault.jpg"
}
@@ -227,17 +230,15 @@ func extractYouTubeID(link string) string {
return ""
}
func inferSource(displayLink string) string {
switch {
case strings.Contains(displayLink, "youtube"):
return "YouTube"
case strings.Contains(displayLink, "tiktok"):
return "TikTok"
case strings.Contains(displayLink, "envato"):
return "Envato"
case strings.Contains(displayLink, "artgrid"):
return "Artgrid"
func sourceWeight(source string) int {
switch source {
case "Google Video":
return 3
case "Envato":
return 2
case "Artgrid":
return 1
default:
return displayLink
return 0
}
}
+80 -2
View File
@@ -26,6 +26,10 @@ type AIRecommendation struct {
Recommended bool `json:"recommended"`
}
type QueryExpansion struct {
Querywords []string `json:"querywords"`
}
func NewGeminiService(apiKey string) *GeminiService {
return &GeminiService{
APIKey: apiKey,
@@ -33,6 +37,80 @@ func NewGeminiService(apiKey string) *GeminiService {
}
}
func (g *GeminiService) ExpandQuery(query string) ([]string, error) {
if g.APIKey == "" {
return []string{query}, nil
}
body := map[string]any{
"contents": []map[string]any{
{
"parts": []map[string]string{
{
"text": `Return JSON only in this shape: {"querywords":["..."]}.
Generate at most 4 concise search variations for media discovery across Google Video, Envato, and Artgrid.
Keep the original language when possible. User query: ` + query,
},
},
},
},
"generationConfig": map[string]any{
"responseMimeType": "application/json",
"temperature": 0.2,
"maxOutputTokens": 120,
},
}
rawBody, _ := json.Marshal(body)
endpoint := "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key=" + g.APIKey
resp, err := g.Client.Post(endpoint, "application/json", bytes.NewReader(rawBody))
if err != nil {
return []string{query}, err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
return []string{query}, fmt.Errorf("gemini returned status %d for query expansion", resp.StatusCode)
}
var payload struct {
Candidates []struct {
Content struct {
Parts []struct {
Text string `json:"text"`
} `json:"parts"`
} `json:"content"`
} `json:"candidates"`
}
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
return []string{query}, err
}
if len(payload.Candidates) == 0 || len(payload.Candidates[0].Content.Parts) == 0 {
return []string{query}, nil
}
var parsed QueryExpansion
if err := json.Unmarshal([]byte(payload.Candidates[0].Content.Parts[0].Text), &parsed); err != nil {
return []string{query}, err
}
queries := []string{query}
seen := map[string]bool{strings.ToLower(strings.TrimSpace(query)): true}
for _, item := range parsed.Querywords {
trimmed := strings.TrimSpace(item)
if trimmed == "" {
continue
}
key := strings.ToLower(trimmed)
if seen[key] {
continue
}
seen[key] = true
queries = append(queries, trimmed)
}
return queries, nil
}
func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AIRecommendation, error) {
if g.APIKey == "" {
return nil, fmt.Errorf("gemini api key is not configured")
@@ -46,11 +124,11 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
{
"text": `Analyze the provided images for the user's search intent. Return JSON only in this shape:
{"recommendations":[{"index":0,"reason":"short reason","recommended":true}]}
Mark only the best matches as recommended=true. Keep reasons concise. User query: ` + query,
Mark only the best matches as recommended=true. Keep reasons concise. Recommend up to 8 items. User query: ` + query,
},
}
maxImages := min(len(candidates), 8)
maxImages := min(len(candidates), 10)
for idx := 0; idx < maxImages; idx++ {
img, mimeType, err := fetchImageAsInlineData(g.Client, candidates[idx].ThumbnailURL)
if err != nil {