Files
ai-media-hub/backend/services/cse.go
T
AI Assistant d0fca7374d
build-push / docker (push) Has been cancelled
Improve runtime error messages
2026-03-13 10:44:00 +09:00

260 lines
6.3 KiB
Go

package services
import (
"encoding/json"
"fmt"
"net/http"
"net/url"
"regexp"
"sort"
"strings"
"time"
)
type SearchResult struct {
Title string `json:"title"`
Link string `json:"link"`
DisplayLink string `json:"displayLink"`
Snippet string `json:"snippet"`
ThumbnailURL string `json:"thumbnailUrl"`
Source string `json:"source"`
}
type SearchService struct {
BaseURL string
GoogleVideoEngine string
WebEngine string
Client *http.Client
}
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
if googleVideoEngine == "" {
googleVideoEngine = "google videos"
}
if webEngine == "" {
webEngine = "google"
}
return &SearchService{
BaseURL: strings.TrimRight(baseURL, "/"),
GoogleVideoEngine: googleVideoEngine,
WebEngine: webEngine,
Client: &http.Client{Timeout: 20 * time.Second},
}
}
func (s *SearchService) SearchMedia(queries []string) ([]SearchResult, error) {
if s.BaseURL == "" {
return nil, fmt.Errorf("searxng base url is not configured")
}
sources := []struct {
name string
categories string
engine string
siteFilter string
match func(string) bool
}{
{
name: "Google Video",
categories: "videos",
engine: s.GoogleVideoEngine,
match: func(string) bool { return true },
},
{
name: "Envato",
categories: "general",
engine: s.WebEngine,
siteFilter: "site:elements.envato.com OR site:envato.com OR site:videohive.net",
match: isEnvatoURL,
},
{
name: "Artgrid",
categories: "general",
engine: s.WebEngine,
siteFilter: "site:artgrid.io",
match: func(link string) bool { return strings.Contains(strings.ToLower(link), "artgrid.io") },
},
}
seen := map[string]bool{}
results := make([]SearchResult, 0, 60)
var lastErr error
for _, query := range queries {
query = strings.TrimSpace(query)
if query == "" {
continue
}
for _, source := range sources {
searchQuery := query
if source.siteFilter != "" {
searchQuery = query + " " + source.siteFilter
}
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
if err != nil {
lastErr = err
items, err = s.search(searchQuery, source.categories, "", source.name)
}
if err != nil {
lastErr = err
if source.categories != "general" {
items, err = s.search(searchQuery, "general", "", source.name)
}
}
if err != nil {
lastErr = err
continue
}
for _, item := range items {
if item.Link == "" || seen[item.Link] {
continue
}
if source.match != nil && !source.match(item.Link) {
continue
}
seen[item.Link] = true
results = append(results, item)
}
}
}
sort.SliceStable(results, func(i, j int) bool {
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
})
if len(results) == 0 && lastErr != nil {
return nil, lastErr
}
return results, nil
}
func (s *SearchService) search(query, categories, engine, source string) ([]SearchResult, error) {
values := url.Values{}
values.Set("q", query)
values.Set("format", "json")
values.Set("safesearch", "0")
values.Set("language", "ko-KR")
if categories != "" {
values.Set("categories", categories)
}
if engine != "" {
values.Set("engines", engine)
}
endpoint := s.BaseURL + "/search?" + values.Encode()
resp, err := s.Client.Get(endpoint)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
return nil, fmt.Errorf("searxng returned status %d for query %q", resp.StatusCode, query)
}
var payload struct {
Results []struct {
Title string `json:"title"`
URL string `json:"url"`
Content string `json:"content"`
Thumbnail string `json:"thumbnail"`
ThumbnailSrc string `json:"thumbnail_src"`
ImgSrc string `json:"img_src"`
ParsedURL []any `json:"parsed_url"`
Engine string `json:"engine"`
} `json:"results"`
}
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
return nil, fmt.Errorf("searxng JSON decode failed for query %q: %w", query, err)
}
results := make([]SearchResult, 0, len(payload.Results))
for _, item := range payload.Results {
link := strings.TrimSpace(item.URL)
thumb := firstNonEmpty(item.Thumbnail, item.ThumbnailSrc, item.ImgSrc, deriveThumbnail(link))
results = append(results, SearchResult{
Title: item.Title,
Link: link,
DisplayLink: inferDisplayLink(link, item.ParsedURL),
Snippet: item.Content,
ThumbnailURL: thumb,
Source: normalizeSource(source, link, item.Engine),
})
}
return results, nil
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return value
}
}
return ""
}
func normalizeSource(source, link, engine string) string {
switch {
case source != "":
return source
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
return "Envato"
case strings.Contains(strings.ToLower(link), "artgrid"):
return "Artgrid"
case strings.Contains(strings.ToLower(engine), "google"):
return "Google Video"
default:
return engine
}
}
func inferDisplayLink(link string, parsed []any) string {
if len(parsed) > 1 {
if host, ok := parsed[1].(string); ok {
return host
}
}
if parsedURL, err := url.Parse(link); err == nil {
return parsedURL.Host
}
return ""
}
func isEnvatoURL(link string) bool {
lower := strings.ToLower(link)
return strings.Contains(lower, "envato") || strings.Contains(lower, "videohive.net")
}
func deriveThumbnail(link string) string {
if videoID := extractYouTubeID(link); videoID != "" {
return "https://i.ytimg.com/vi/" + videoID + "/hqdefault.jpg"
}
return ""
}
func extractYouTubeID(link string) string {
patterns := []*regexp.Regexp{
regexp.MustCompile(`(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})`),
regexp.MustCompile(`youtu\.be\/([A-Za-z0-9_-]{11})`),
}
for _, pattern := range patterns {
matches := pattern.FindStringSubmatch(link)
if len(matches) == 2 {
return matches[1]
}
}
return ""
}
func sourceWeight(source string) int {
switch source {
case "Google Video":
return 3
case "Envato":
return 2
case "Artgrid":
return 1
default:
return 0
}
}