package services import ( "encoding/json" "fmt" "io" "net/http" "net/url" "regexp" "sort" "strings" "sync" "time" ) type SearchResult struct { Title string `json:"title"` Link string `json:"link"` DisplayLink string `json:"displayLink"` Snippet string `json:"snippet"` ThumbnailURL string `json:"thumbnailUrl"` PreviewVideoURL string `json:"previewVideoUrl"` Source string `json:"source"` } type SearchService struct { BaseURL string GoogleVideoEngine string WebEngine string Client *http.Client } func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService { if googleVideoEngine == "" { googleVideoEngine = "google videos" } if webEngine == "" { webEngine = "google" } return &SearchService{ BaseURL: strings.TrimRight(baseURL, "/"), GoogleVideoEngine: googleVideoEngine, WebEngine: webEngine, Client: &http.Client{Timeout: 20 * time.Second}, } } func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[string]bool) ([]SearchResult, error) { if s.BaseURL == "" { return nil, fmt.Errorf("searxng base url is not configured") } type sourceConfig struct { name string categories string engine string build func(string) []string accept func(SearchResult) bool } sources := []sourceConfig{ { name: "Envato", categories: "general", engine: s.WebEngine, build: buildEnvatoQueries, accept: isRenderableEnvatoResult, }, { name: "Artgrid", categories: "general", engine: s.WebEngine, build: buildArtgridQueries, accept: isRenderableArtgridResult, }, { name: "Google Video", categories: "videos", engine: s.GoogleVideoEngine, build: buildGoogleVideoQueries, accept: isUsefulGoogleVideoResult, }, } seen := map[string]bool{} results := make([]SearchResult, 0, 90) var lastErr error baseQueries := limitQueries(queries, 5) for _, base := range baseQueries { base = strings.TrimSpace(base) if base == "" { continue } for _, source := range sources { if len(enabledPlatforms) > 0 && !enabledPlatforms[strings.ToLower(source.name)] { continue } for _, searchQuery := range source.build(base) { items, err := s.search(searchQuery, source.categories, source.engine, source.name) if err != nil { lastErr = err items, err = s.search(searchQuery, source.categories, "", source.name) } if err != nil { lastErr = err continue } for _, item := range items { if item.Link == "" || seen[item.Link] || !source.accept(item) { continue } seen[item.Link] = true results = append(results, item) } } } } if len(results) == 0 && lastErr != nil { return nil, lastErr } sort.SliceStable(results, func(i, j int) bool { return sourceWeight(results[i].Source) > sourceWeight(results[j].Source) }) return s.EnrichResults(results), nil } func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult { limit := minInt(len(results), 24) if limit == 0 { return results } enriched := make([]SearchResult, len(results)) copy(enriched, results) var wg sync.WaitGroup sem := make(chan struct{}, 4) for idx := 0; idx < limit; idx++ { wg.Add(1) go func(i int) { defer wg.Done() sem <- struct{}{} defer func() { <-sem }() enriched[i] = s.enrichResult(enriched[i]) }(idx) } wg.Wait() return enriched } func (s *SearchService) enrichResult(result SearchResult) SearchResult { switch result.Source { case "Envato": return s.enrichEnvato(result) case "Artgrid": return s.enrichArtgrid(result) default: if result.ThumbnailURL == "" { result.ThumbnailURL = deriveThumbnail(result.Link) } return result } } func (s *SearchService) enrichEnvato(result SearchResult) SearchResult { html, err := s.fetchText(result.Link) if err != nil { return result } if result.ThumbnailURL == "" { result.ThumbnailURL = firstNonEmpty( extractMetaContent(html, "og:image"), extractMetaContent(html, "twitter:image"), ) } if result.PreviewVideoURL == "" { result.PreviewVideoURL = extractVideoPreviewURL(html) } return result } func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult { clipID := extractArtgridClipID(result.Link) if clipID == "" { return result } apiURL := "https://artgrid.io/api/clip/details?clipId=" + clipID body, err := s.fetchJSONText(apiURL) if err == nil { urls := collectURLs(body) if result.ThumbnailURL == "" { result.ThumbnailURL = pickImageURL(urls) } if result.PreviewVideoURL == "" { result.PreviewVideoURL = pickVideoURL(urls) } } if result.ThumbnailURL == "" || result.PreviewVideoURL == "" { html, err := s.fetchText(result.Link) if err == nil { if result.ThumbnailURL == "" { result.ThumbnailURL = firstNonEmpty( extractMetaContent(html, "og:image"), extractMetaContent(html, "twitter:image"), ) if result.ThumbnailURL == "" { result.ThumbnailURL = extractArtgridBackgroundThumbnail(html, clipID) } } if result.PreviewVideoURL == "" { result.PreviewVideoURL = extractVideoPreviewURL(html) } } } return result } func (s *SearchService) search(query, categories, engine, source string) ([]SearchResult, error) { values := url.Values{} values.Set("q", query) values.Set("format", "json") values.Set("safesearch", "0") values.Set("language", "en-US") if categories != "" { values.Set("categories", categories) } if engine != "" { values.Set("engines", engine) } endpoint := s.BaseURL + "/search?" + values.Encode() resp, err := s.Client.Get(endpoint) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode >= 300 { return nil, fmt.Errorf("searxng returned status %d for query %q", resp.StatusCode, query) } var payload struct { Results []struct { Title string `json:"title"` URL string `json:"url"` Content string `json:"content"` Thumbnail string `json:"thumbnail"` ThumbnailSrc string `json:"thumbnail_src"` ImgSrc string `json:"img_src"` ParsedURL []any `json:"parsed_url"` Engine string `json:"engine"` } `json:"results"` } if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil { return nil, fmt.Errorf("searxng JSON decode failed for query %q: %w", query, err) } results := make([]SearchResult, 0, len(payload.Results)) for _, item := range payload.Results { link := strings.TrimSpace(item.URL) if link == "" { continue } results = append(results, SearchResult{ Title: item.Title, Link: link, DisplayLink: inferDisplayLink(link, item.ParsedURL), Snippet: item.Content, ThumbnailURL: firstNonEmpty(item.Thumbnail, item.ThumbnailSrc, item.ImgSrc, deriveThumbnail(link)), Source: normalizeSource(source, link, item.Engine), }) } return results, nil } func buildGoogleVideoQueries(base string) []string { return []string{ fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base), fmt.Sprintf(`"%s" ("cinematic footage" OR "free stock footage" OR "4k footage") -tutorial -"how to" -review`, base), } } func buildEnvatoQueries(base string) []string { return []string{ fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com`, base), fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com/stock-video`, base), } } func buildArtgridQueries(base string) []string { return []string{ fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artgrid.io/clip/`, base), fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artgrid.io/clip/`, base), } } func isUsefulGoogleVideoResult(result SearchResult) bool { lowerLink := strings.ToLower(result.Link) if !(strings.Contains(lowerLink, "youtube.com/watch") || strings.Contains(lowerLink, "youtu.be/") || strings.Contains(lowerLink, "youtube.com/shorts/")) { return false } text := strings.ToLower(result.Title + " " + result.Snippet) for _, banned := range []string{ "tutorial", "how to", "review", "reaction", "podcast", "interview", "walkthrough", "course", "lesson", "edit tutorial", "editing tutorial", "premiere pro", "after effects", "breakdown", "explained", "vlog", "tips", "guide", "learn", "free download", } { if strings.Contains(text, banned) { return false } } return true } func isRenderableEnvatoResult(result SearchResult) bool { parsed, err := url.Parse(result.Link) if err != nil { return false } host := strings.ToLower(parsed.Host) path := strings.Trim(parsed.Path, "/") if strings.Contains(host, "elements.envato.com") { if path == "" || strings.Contains(path, "/stock-video") || strings.Contains(path, "/video-templates") { return false } return regexp.MustCompile(`-[A-Z0-9]{6,}$`).MatchString(path) } return false } func isRenderableArtgridResult(result SearchResult) bool { parsed, err := url.Parse(result.Link) if err != nil { return false } if !strings.Contains(strings.ToLower(parsed.Host), "artgrid.io") { return false } return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path) } func normalizeSource(source, link, engine string) string { switch { case source != "": return source case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"): return "Envato" case strings.Contains(strings.ToLower(link), "artgrid"): return "Artgrid" case strings.Contains(strings.ToLower(engine), "google"): return "Google Video" default: return engine } } func inferDisplayLink(link string, parsed []any) string { if len(parsed) > 1 { if host, ok := parsed[1].(string); ok { return host } } if parsedURL, err := url.Parse(link); err == nil { return parsedURL.Host } return "" } func deriveThumbnail(link string) string { if videoID := extractYouTubeID(link); videoID != "" { return "https://i.ytimg.com/vi/" + videoID + "/hqdefault.jpg" } return "" } func extractYouTubeID(link string) string { patterns := []*regexp.Regexp{ regexp.MustCompile(`(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})`), regexp.MustCompile(`youtu\.be\/([A-Za-z0-9_-]{11})`), } for _, pattern := range patterns { matches := pattern.FindStringSubmatch(link) if len(matches) == 2 { return matches[1] } } return "" } func extractMetaContent(html, property string) string { patterns := []*regexp.Regexp{ regexp.MustCompile(`(?i)]+property=["']` + regexp.QuoteMeta(property) + `["'][^>]+content=["']([^"']+)`), regexp.MustCompile(`(?i)]+name=["']` + regexp.QuoteMeta(property) + `["'][^>]+content=["']([^"']+)`), } for _, pattern := range patterns { matches := pattern.FindStringSubmatch(html) if len(matches) == 2 { return htmlUnescape(matches[1]) } } return "" } func extractVideoPreviewURL(html string) string { pattern := regexp.MustCompile(`https?:\\?/\\?/[^"'\\s>]+(?:mp4|m3u8)`) matches := pattern.FindAllString(html, -1) for _, match := range matches { candidate := strings.ReplaceAll(match, `\/`, `/`) candidate = strings.ReplaceAll(candidate, `\u002F`, `/`) candidate = strings.ReplaceAll(candidate, `\\`, "") if strings.Contains(strings.ToLower(candidate), "preview") || strings.Contains(strings.ToLower(candidate), "video") || strings.Contains(strings.ToLower(candidate), "watermark") { return candidate } } return "" } func extractArtgridBackgroundThumbnail(html, clipID string) string { pattern := regexp.MustCompile(`https://[^"'\\s>]+(?:artgrid\.imgix\.net|cms-public-artifacts\.artlist\.io|artlist-content-images\.imgix\.net)[^"'\\s>]+(?:jpeg|jpg|png|webp)`) matches := pattern.FindAllString(html, -1) for _, match := range matches { if strings.Contains(match, clipID) || strings.Contains(strings.ToLower(match), "graded-thumbnail") { return match } } return "" } func extractArtgridClipID(link string) string { matches := regexp.MustCompile(`/clip/([0-9]+)/`).FindStringSubmatch(link) if len(matches) == 2 { return matches[1] } return "" } func collectURLs(body string) []string { pattern := regexp.MustCompile(`https?:\/\/[^"'\\\s]+`) matches := pattern.FindAllString(body, -1) seen := map[string]bool{} results := make([]string, 0, len(matches)) for _, match := range matches { candidate := strings.TrimSpace(strings.Trim(match, `"'`)) if candidate == "" || seen[candidate] { continue } seen[candidate] = true results = append(results, candidate) } return results } func pickImageURL(urls []string) string { for _, item := range urls { lower := strings.ToLower(item) if strings.Contains(lower, ".jpg") || strings.Contains(lower, ".jpeg") || strings.Contains(lower, ".png") || strings.Contains(lower, ".webp") { return item } } return "" } func pickVideoURL(urls []string) string { for _, item := range urls { lower := strings.ToLower(item) if strings.Contains(lower, ".mp4") || strings.Contains(lower, ".m3u8") { return item } } return "" } func (s *SearchService) fetchText(target string) (string, error) { resp, err := s.Client.Get(target) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode >= 300 { return "", fmt.Errorf("fetch returned status %d", resp.StatusCode) } data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024)) if err != nil { return "", err } return string(data), nil } func (s *SearchService) fetchJSONText(target string) (string, error) { req, err := http.NewRequest(http.MethodGet, target, nil) if err != nil { return "", err } req.Header.Set("Accept", "application/json, text/json") resp, err := s.Client.Do(req) if err != nil { return "", err } defer resp.Body.Close() if resp.StatusCode >= 300 { return "", fmt.Errorf("json fetch returned status %d", resp.StatusCode) } data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024)) if err != nil { return "", err } return string(data), nil } func firstNonEmpty(values ...string) string { for _, value := range values { if strings.TrimSpace(value) != "" { return value } } return "" } func limitQueries(queries []string, limit int) []string { seen := map[string]bool{} filtered := make([]string, 0, minInt(len(queries), limit)) for _, item := range queries { trimmed := strings.TrimSpace(item) if trimmed == "" { continue } key := strings.ToLower(trimmed) if seen[key] { continue } seen[key] = true filtered = append(filtered, trimmed) if len(filtered) >= limit { break } } return filtered } func htmlUnescape(text string) string { replacer := strings.NewReplacer("&", "&", """, `"`, "'", "'", "<", "<", ">", ">") return replacer.Replace(text) } func sourceWeight(source string) int { switch source { case "Envato": return 3 case "Artgrid": return 2 case "Google Video": return 1 default: return 0 } } func minInt(a, b int) int { if a < b { return a } return b }