Switch search backend to SearXNG
build-push / docker (push) Has been cancelled

This commit is contained in:
AI Assistant
2026-03-13 10:10:13 +09:00
parent 6734887fc6
commit ee316de7ab
8 changed files with 466 additions and 199 deletions
+146 -145
View File
@@ -3,10 +3,10 @@ package services
import (
"encoding/json"
"fmt"
"io"
"net/http"
neturl "net/url"
"net/url"
"regexp"
"sort"
"strings"
"time"
)
@@ -21,143 +21,148 @@ type SearchResult struct {
}
type SearchService struct {
APIKey string
ProjectID string
Location string
DataStoreID string
ServingConfig string
Client *http.Client
BaseURL string
GoogleVideoEngine string
WebEngine string
Client *http.Client
}
func NewSearchService(apiKey, projectID, location, dataStoreID, servingConfig string) *SearchService {
if location == "" {
location = "global"
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
if googleVideoEngine == "" {
googleVideoEngine = "google videos"
}
if servingConfig == "" {
servingConfig = "default_serving_config"
if webEngine == "" {
webEngine = "google"
}
return &SearchService{
APIKey: apiKey,
ProjectID: projectID,
Location: location,
DataStoreID: dataStoreID,
ServingConfig: servingConfig,
Client: &http.Client{Timeout: 20 * time.Second},
BaseURL: strings.TrimRight(baseURL, "/"),
GoogleVideoEngine: googleVideoEngine,
WebEngine: webEngine,
Client: &http.Client{Timeout: 20 * time.Second},
}
}
func (s *SearchService) SearchMedia(query string) ([]SearchResult, error) {
if s.APIKey == "" || s.ProjectID == "" || s.DataStoreID == "" {
return nil, fmt.Errorf("vertex ai search credentials are not configured")
}
results, err := s.searchLite(query, true)
if err != nil {
results, err = s.searchLite(query, false)
if err != nil {
return nil, err
}
}
return results, nil
}
func (s *SearchService) searchLite(query string, imageSearch bool) ([]SearchResult, error) {
filteredQuery := strings.TrimSpace(query + " site:youtube.com OR site:tiktok.com OR site:envato.com OR site:artgrid.io")
servingConfig := fmt.Sprintf(
"projects/%s/locations/%s/dataStores/%s/servingConfigs/%s",
s.ProjectID,
s.Location,
s.DataStoreID,
s.ServingConfig,
)
params := map[string]any{
"user_country_code": "us",
}
if imageSearch {
params["searchType"] = 1
func (s *SearchService) SearchMedia(queries []string) ([]SearchResult, error) {
if s.BaseURL == "" {
return nil, fmt.Errorf("searxng base url is not configured")
}
requestBody := map[string]any{
"query": filteredQuery,
"pageSize": 25,
"safeSearch": false,
"languageCode": "ko-KR",
"params": params,
"contentSearchSpec": map[string]any{
"snippetSpec": map[string]any{
"returnSnippet": true,
},
sources := []struct {
name string
categories string
engine string
siteFilter string
match func(string) bool
}{
{
name: "Google Video",
categories: "videos",
engine: s.GoogleVideoEngine,
match: func(string) bool { return true },
},
{
name: "Envato",
categories: "general",
engine: s.WebEngine,
siteFilter: "site:elements.envato.com OR site:envato.com OR site:videohive.net",
match: isEnvatoURL,
},
{
name: "Artgrid",
categories: "general",
engine: s.WebEngine,
siteFilter: "site:artgrid.io",
match: func(link string) bool { return strings.Contains(strings.ToLower(link), "artgrid.io") },
},
}
body, _ := json.Marshal(requestBody)
endpoint := fmt.Sprintf(
"https://discoveryengine.googleapis.com/v1/%s:searchLite?key=%s",
servingConfig,
neturl.QueryEscape(s.APIKey),
)
resp, err := s.Client.Post(endpoint, "application/json", strings.NewReader(string(body)))
seen := map[string]bool{}
results := make([]SearchResult, 0, 60)
for _, query := range queries {
query = strings.TrimSpace(query)
if query == "" {
continue
}
for _, source := range sources {
searchQuery := query
if source.siteFilter != "" {
searchQuery = query + " " + source.siteFilter
}
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
if err != nil {
continue
}
for _, item := range items {
if item.Link == "" || seen[item.Link] {
continue
}
if source.match != nil && !source.match(item.Link) {
continue
}
seen[item.Link] = true
results = append(results, item)
}
}
}
sort.SliceStable(results, func(i, j int) bool {
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
})
return results, nil
}
func (s *SearchService) search(query, categories, engine, source string) ([]SearchResult, error) {
values := url.Values{}
values.Set("q", query)
values.Set("format", "json")
values.Set("safesearch", "0")
values.Set("language", "ko-KR")
if categories != "" {
values.Set("categories", categories)
}
if engine != "" {
values.Set("engines", engine)
}
endpoint := s.BaseURL + "/search?" + values.Encode()
resp, err := s.Client.Get(endpoint)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
data, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
return nil, fmt.Errorf("vertex ai search returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(data)))
return nil, fmt.Errorf("searxng returned status %d", resp.StatusCode)
}
var payload struct {
Results []struct {
Document struct {
StructData map[string]any `json:"structData"`
DerivedStructData map[string]any `json:"derivedStructData"`
} `json:"document"`
Title string `json:"title"`
URL string `json:"url"`
Content string `json:"content"`
Thumbnail string `json:"thumbnail"`
ThumbnailSrc string `json:"thumbnail_src"`
ImgSrc string `json:"img_src"`
ParsedURL []any `json:"parsed_url"`
Engine string `json:"engine"`
} `json:"results"`
}
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
return nil, err
}
results := make([]SearchResult, 0, len(payload.Results))
for _, item := range payload.Results {
link := firstNonEmpty(
firstString(item.Document.DerivedStructData, "link", "url", "uri"),
firstString(item.Document.StructData, "link", "url", "uri"),
)
title := firstNonEmpty(
firstString(item.Document.DerivedStructData, "title", "name"),
firstString(item.Document.StructData, "title", "name"),
)
displayLink := firstNonEmpty(
firstString(item.Document.DerivedStructData, "displayLink", "site_name"),
firstString(item.Document.StructData, "displayLink", "site_name"),
)
snippet := firstNonEmpty(
firstString(item.Document.DerivedStructData, "snippets", "snippet", "extractive_answers"),
firstString(item.Document.StructData, "snippets", "snippet", "description"),
)
thumb := firstNonEmpty(
firstString(item.Document.DerivedStructData, "thumbnail", "image", "image_url", "link"),
firstString(item.Document.StructData, "thumbnail", "image", "image_url"),
)
if thumb == "" {
thumb = deriveThumbnail(link)
}
if title == "" {
title = displayLink
}
if link == "" {
continue
}
link := strings.TrimSpace(item.URL)
thumb := firstNonEmpty(item.Thumbnail, item.ThumbnailSrc, item.ImgSrc, deriveThumbnail(link))
results = append(results, SearchResult{
Title: title,
Title: item.Title,
Link: link,
DisplayLink: displayLink,
Snippet: snippet,
DisplayLink: inferDisplayLink(link, item.ParsedURL),
Snippet: item.Content,
ThumbnailURL: thumb,
Source: inferSource(displayLink + " " + link),
Source: normalizeSource(source, link, item.Engine),
})
}
return results, nil
@@ -172,41 +177,39 @@ func firstNonEmpty(values ...string) string {
return ""
}
func firstString(values map[string]any, keys ...string) string {
for _, key := range keys {
value, ok := values[key]
if !ok {
continue
}
switch typed := value.(type) {
case string:
if typed != "" {
return typed
}
case []any:
for _, item := range typed {
if text, ok := item.(string); ok && text != "" {
return text
}
if mapped, ok := item.(map[string]any); ok {
if text := firstString(mapped, "snippet", "htmlSnippet", "url", "link", "value", "content"); text != "" {
return text
}
}
}
case map[string]any:
if text := firstString(typed, "snippet", "htmlSnippet", "url", "link", "value", "content"); text != "" {
return text
}
func normalizeSource(source, link, engine string) string {
switch {
case source != "":
return source
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
return "Envato"
case strings.Contains(strings.ToLower(link), "artgrid"):
return "Artgrid"
case strings.Contains(strings.ToLower(engine), "google"):
return "Google Video"
default:
return engine
}
}
func inferDisplayLink(link string, parsed []any) string {
if len(parsed) > 1 {
if host, ok := parsed[1].(string); ok {
return host
}
}
if parsedURL, err := url.Parse(link); err == nil {
return parsedURL.Host
}
return ""
}
func isEnvatoURL(link string) bool {
lower := strings.ToLower(link)
return strings.Contains(lower, "envato") || strings.Contains(lower, "videohive.net")
}
func deriveThumbnail(link string) string {
if link == "" {
return ""
}
if videoID := extractYouTubeID(link); videoID != "" {
return "https://i.ytimg.com/vi/" + videoID + "/hqdefault.jpg"
}
@@ -227,17 +230,15 @@ func extractYouTubeID(link string) string {
return ""
}
func inferSource(displayLink string) string {
switch {
case strings.Contains(displayLink, "youtube"):
return "YouTube"
case strings.Contains(displayLink, "tiktok"):
return "TikTok"
case strings.Contains(displayLink, "envato"):
return "Envato"
case strings.Contains(displayLink, "artgrid"):
return "Artgrid"
func sourceWeight(source string) int {
switch source {
case "Google Video":
return 3
case "Envato":
return 2
case "Artgrid":
return 1
default:
return displayLink
return 0
}
}