package services
import (
"encoding/json"
"fmt"
"io"
"net/http"
"net/url"
"os/exec"
"regexp"
"sort"
"strings"
"sync"
"time"
)
type SearchResult struct {
Title string `json:"title"`
Link string `json:"link"`
DisplayLink string `json:"displayLink"`
Snippet string `json:"snippet"`
ThumbnailURL string `json:"thumbnailUrl"`
PreviewVideoURL string `json:"previewVideoUrl"`
Source string `json:"source"`
}
type SearchService struct {
BaseURL string
GoogleVideoEngine string
WebEngine string
Client *http.Client
}
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
if googleVideoEngine == "" {
googleVideoEngine = "google videos"
}
if webEngine == "" {
webEngine = "google"
}
return &SearchService{
BaseURL: strings.TrimRight(baseURL, "/"),
GoogleVideoEngine: googleVideoEngine,
WebEngine: webEngine,
Client: &http.Client{Timeout: 20 * time.Second},
}
}
func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[string]bool) ([]SearchResult, error) {
if s.BaseURL == "" {
return nil, fmt.Errorf("searxng base url is not configured")
}
type sourceConfig struct {
name string
categories string
engine string
maxResults int
build func(string) []string
accept func(SearchResult) bool
}
sources := []sourceConfig{
{
name: "Envato",
categories: "general",
engine: s.WebEngine,
maxResults: 8,
build: buildEnvatoQueries,
accept: isRenderableEnvatoResult,
},
{
name: "Artgrid",
categories: "general",
engine: s.WebEngine,
maxResults: 8,
build: buildArtgridQueries,
accept: isRenderableArtgridResult,
},
{
name: "Google Video",
categories: "videos",
engine: s.GoogleVideoEngine,
maxResults: 6,
build: buildGoogleVideoQueries,
accept: isUsefulGoogleVideoResult,
},
}
seen := map[string]bool{}
sourceCounts := map[string]int{}
results := make([]SearchResult, 0, 90)
var lastErr error
baseQueries := limitQueries(queries, 3)
for _, base := range baseQueries {
base = strings.TrimSpace(base)
if base == "" {
continue
}
for _, source := range sources {
if len(enabledPlatforms) > 0 && !enabledPlatforms[strings.ToLower(source.name)] {
continue
}
if sourceCounts[source.name] >= source.maxResults {
continue
}
for _, searchQuery := range source.build(base) {
if sourceCounts[source.name] >= source.maxResults {
break
}
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
if err != nil {
lastErr = err
items, err = s.search(searchQuery, source.categories, "", source.name)
}
if err != nil {
lastErr = err
continue
}
for _, item := range items {
if item.Link == "" || seen[item.Link] || !source.accept(item) {
continue
}
seen[item.Link] = true
results = append(results, item)
sourceCounts[source.name]++
if sourceCounts[source.name] >= source.maxResults {
break
}
}
}
}
}
if len(results) == 0 && lastErr != nil {
return nil, lastErr
}
sort.SliceStable(results, func(i, j int) bool {
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
})
return s.EnrichResults(results), nil
}
func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
limit := minInt(len(results), 18)
if limit == 0 {
return results
}
enriched := make([]SearchResult, len(results))
copy(enriched, results)
var wg sync.WaitGroup
sem := make(chan struct{}, 4)
for idx := 0; idx < limit; idx++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
sem <- struct{}{}
defer func() { <-sem }()
enriched[i] = s.enrichResult(enriched[i])
}(idx)
}
wg.Wait()
return enriched
}
func (s *SearchService) enrichResult(result SearchResult) SearchResult {
switch result.Source {
case "Envato":
return s.enrichEnvato(result)
case "Artgrid":
return s.enrichArtgrid(result)
default:
if result.ThumbnailURL == "" {
result.ThumbnailURL = deriveThumbnail(result.Link)
}
return result
}
}
func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
html, err := s.fetchText(result.Link)
if err != nil {
return result
}
result.Title = firstNonEmpty(
extractMetaContent(html, "og:title"),
result.Title,
)
result.Snippet = firstNonEmpty(
extractMetaContent(html, "og:description"),
extractMetaContent(html, "description"),
result.Snippet,
)
pageThumbnail := firstNonEmpty(
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
extractJSONLDValue(html, "thumbnailUrl"),
)
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
result.ThumbnailURL = pageThumbnail
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = firstNonEmpty(
extractJSONLDValue(html, "contentUrl"),
extractMetaContent(html, "twitter:player:stream"),
extractVideoPreviewURL(html),
deriveEnvatoPreviewFromThumbnail(pageThumbnail),
deriveEnvatoPreviewFromThumbnail(result.ThumbnailURL),
)
}
return result
}
func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
clipID := extractArtgridClipID(result.Link)
if clipID == "" {
return result
}
apiURL := "https://artgrid.io/api/clip/details?clipId=" + clipID
body, err := s.fetchJSONText(apiURL)
if err == nil {
urls := collectURLs(body)
if result.ThumbnailURL == "" {
result.ThumbnailURL = pickImageURL(urls)
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = pickVideoURL(urls)
}
}
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
html, err := s.fetchText(result.Link)
if err == nil {
result.Title = firstNonEmpty(
extractMetaContent(html, "og:title"),
result.Title,
)
result.Snippet = firstNonEmpty(
extractMetaContent(html, "og:description"),
extractMetaContent(html, "description"),
result.Snippet,
)
pageThumbnail := firstNonEmpty(
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
extractArtgridBackgroundThumbnail(html, clipID),
extractJSONLDValue(html, "image"),
)
if shouldPreferPageThumbnail(result.ThumbnailURL, result.Link) {
result.ThumbnailURL = pageThumbnail
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = firstNonEmpty(
extractJSONLDValue(html, "contentUrl"),
extractMetaContent(html, "twitter:player:stream"),
extractVideoPreviewURL(html),
)
}
}
}
return result
}
func (s *SearchService) search(query, categories, engine, source string) ([]SearchResult, error) {
values := url.Values{}
values.Set("q", query)
values.Set("format", "json")
values.Set("safesearch", "0")
values.Set("language", "en-US")
if categories != "" {
values.Set("categories", categories)
}
if engine != "" {
values.Set("engines", engine)
}
endpoint := s.BaseURL + "/search?" + values.Encode()
resp, err := s.Client.Get(endpoint)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
return nil, fmt.Errorf("searxng returned status %d for query %q", resp.StatusCode, query)
}
var payload struct {
Results []struct {
Title string `json:"title"`
URL string `json:"url"`
Content string `json:"content"`
Thumbnail string `json:"thumbnail"`
ThumbnailSrc string `json:"thumbnail_src"`
ImgSrc string `json:"img_src"`
ParsedURL []any `json:"parsed_url"`
Engine string `json:"engine"`
} `json:"results"`
}
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
return nil, fmt.Errorf("searxng JSON decode failed for query %q: %w", query, err)
}
results := make([]SearchResult, 0, len(payload.Results))
for _, item := range payload.Results {
link := strings.TrimSpace(item.URL)
if link == "" {
continue
}
results = append(results, SearchResult{
Title: item.Title,
Link: link,
DisplayLink: inferDisplayLink(link, item.ParsedURL),
Snippet: item.Content,
ThumbnailURL: firstNonEmpty(item.Thumbnail, item.ThumbnailSrc, item.ImgSrc, deriveThumbnail(link)),
Source: normalizeSource(source, link, item.Engine),
})
}
return results, nil
}
func buildGoogleVideoQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
}
}
func buildEnvatoQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com`, base),
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com/stock-video`, base),
}
}
func buildArtgridQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artgrid.io/clip/`, base),
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artgrid.io/clip/`, base),
}
}
func isUsefulGoogleVideoResult(result SearchResult) bool {
lowerLink := strings.ToLower(result.Link)
if !(strings.Contains(lowerLink, "youtube.com/watch") || strings.Contains(lowerLink, "youtu.be/") || strings.Contains(lowerLink, "youtube.com/shorts/")) {
return false
}
text := strings.ToLower(result.Title + " " + result.Snippet)
for _, banned := range []string{
"tutorial", "how to", "review", "reaction", "podcast", "interview", "walkthrough",
"course", "lesson", "edit tutorial", "editing tutorial", "premiere pro", "after effects",
"breakdown", "explained", "vlog", "tips", "guide", "learn", "free download",
"bgm", "music", "song", "lyrics", "audio", "soundtrack", "trailer", "teaser",
"full movie", "movie clip", "status", "whatsapp status", "fan cam", "fancam",
} {
if strings.Contains(text, banned) {
return false
}
}
return true
}
func isRenderableEnvatoResult(result SearchResult) bool {
parsed, err := url.Parse(result.Link)
if err != nil {
return false
}
host := strings.ToLower(parsed.Host)
path := strings.Trim(parsed.Path, "/")
if strings.Contains(host, "elements.envato.com") {
if path == "" || strings.Contains(path, "/stock-video") || strings.Contains(path, "/video-templates") {
return false
}
return regexp.MustCompile(`-[A-Z0-9]{6,}$`).MatchString(path)
}
return false
}
func isRenderableArtgridResult(result SearchResult) bool {
parsed, err := url.Parse(result.Link)
if err != nil {
return false
}
if !strings.Contains(strings.ToLower(parsed.Host), "artgrid.io") {
return false
}
return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path)
}
func normalizeSource(source, link, engine string) string {
switch {
case source != "":
return source
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
return "Envato"
case strings.Contains(strings.ToLower(link), "artgrid"):
return "Artgrid"
case strings.Contains(strings.ToLower(engine), "google"):
return "Google Video"
default:
return engine
}
}
func inferDisplayLink(link string, parsed []any) string {
if len(parsed) > 1 {
if host, ok := parsed[1].(string); ok {
return host
}
}
if parsedURL, err := url.Parse(link); err == nil {
return parsedURL.Host
}
return ""
}
func deriveThumbnail(link string) string {
if videoID := extractYouTubeID(link); videoID != "" {
return "https://i.ytimg.com/vi/" + videoID + "/hqdefault.jpg"
}
return ""
}
func extractYouTubeID(link string) string {
patterns := []*regexp.Regexp{
regexp.MustCompile(`(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})`),
regexp.MustCompile(`youtu\.be\/([A-Za-z0-9_-]{11})`),
}
for _, pattern := range patterns {
matches := pattern.FindStringSubmatch(link)
if len(matches) == 2 {
return matches[1]
}
}
return ""
}
func extractMetaContent(html, property string) string {
patterns := []*regexp.Regexp{
regexp.MustCompile(`(?i)]+property=["']` + regexp.QuoteMeta(property) + `["'][^>]+content=["']([^"']+)`),
regexp.MustCompile(`(?i)]+name=["']` + regexp.QuoteMeta(property) + `["'][^>]+content=["']([^"']+)`),
}
for _, pattern := range patterns {
matches := pattern.FindStringSubmatch(html)
if len(matches) == 2 {
return htmlUnescape(matches[1])
}
}
return ""
}
func extractVideoPreviewURL(html string) string {
pattern := regexp.MustCompile(`https?:\\?/\\?/[^"'\\s>]+(?:mp4|m3u8)`)
matches := pattern.FindAllString(html, -1)
for _, match := range matches {
candidate := strings.ReplaceAll(match, `\/`, `/`)
candidate = strings.ReplaceAll(candidate, `\u002F`, `/`)
candidate = strings.ReplaceAll(candidate, `\\`, "")
if strings.Contains(strings.ToLower(candidate), "preview") || strings.Contains(strings.ToLower(candidate), "video") || strings.Contains(strings.ToLower(candidate), "watermark") {
return candidate
}
}
return ""
}
func extractArtgridBackgroundThumbnail(html, clipID string) string {
pattern := regexp.MustCompile(`https://[^"'\\s>]+(?:artgrid\.imgix\.net|cms-public-artifacts\.artlist\.io|artlist-content-images\.imgix\.net)[^"'\\s>]+(?:jpeg|jpg|png|webp)`)
matches := pattern.FindAllString(html, -1)
for _, match := range matches {
if strings.Contains(match, clipID) || strings.Contains(strings.ToLower(match), "graded-thumbnail") {
return match
}
}
return ""
}
func extractArtgridClipID(link string) string {
matches := regexp.MustCompile(`/clip/([0-9]+)/`).FindStringSubmatch(link)
if len(matches) == 2 {
return matches[1]
}
return ""
}
func collectURLs(body string) []string {
pattern := regexp.MustCompile(`https?:\/\/[^"'\\\s]+`)
matches := pattern.FindAllString(body, -1)
seen := map[string]bool{}
results := make([]string, 0, len(matches))
for _, match := range matches {
candidate := strings.TrimSpace(strings.Trim(match, `"'`))
if candidate == "" || seen[candidate] {
continue
}
seen[candidate] = true
results = append(results, candidate)
}
return results
}
func pickImageURL(urls []string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, ".jpg") || strings.Contains(lower, ".jpeg") || strings.Contains(lower, ".png") || strings.Contains(lower, ".webp") {
return item
}
}
return ""
}
func pickVideoURL(urls []string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, ".mp4") || strings.Contains(lower, ".m3u8") {
return item
}
}
return ""
}
func (s *SearchService) fetchText(target string) (string, error) {
req, err := newBrowserRequest(http.MethodGet, target, "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
if err != nil {
return "", err
}
resp, err := s.Client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusForbidden || resp.StatusCode == http.StatusServiceUnavailable {
return fetchTextViaPython(target)
}
if resp.StatusCode >= 300 {
return "", fmt.Errorf("fetch returned status %d", resp.StatusCode)
}
data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
if err != nil {
return "", err
}
if looksLikeCloudflareChallenge(string(data)) {
return fetchTextViaPython(target)
}
return string(data), nil
}
func (s *SearchService) fetchJSONText(target string) (string, error) {
req, err := newBrowserRequest(http.MethodGet, target, "application/json, text/json, */*")
if err != nil {
return "", err
}
resp, err := s.Client.Do(req)
if err != nil {
return "", err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
return "", fmt.Errorf("json fetch returned status %d", resp.StatusCode)
}
data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
if err != nil {
return "", err
}
return string(data), nil
}
func firstNonEmpty(values ...string) string {
for _, value := range values {
if strings.TrimSpace(value) != "" {
return value
}
}
return ""
}
func shouldPreferPageThumbnail(current, pageLink string) bool {
current = strings.TrimSpace(current)
if current == "" {
return true
}
lower := strings.ToLower(current)
if strings.Contains(lower, "imgs.search.brave.com") || strings.Contains(lower, "googleusercontent.com") || strings.Contains(lower, "bing.com") {
return true
}
currentHost := hostOf(current)
pageHost := hostOf(pageLink)
return currentHost == "" || (pageHost != "" && currentHost != pageHost)
}
func hostOf(raw string) string {
parsed, err := url.Parse(raw)
if err != nil {
return ""
}
return strings.ToLower(parsed.Host)
}
func extractJSONLDValue(html, key string) string {
pattern := regexp.MustCompile(`"` + regexp.QuoteMeta(key) + `"\s*:\s*"(https?:\\?/\\?/[^"]+|[^"]+)"`)
matches := pattern.FindAllStringSubmatch(html, -1)
for _, match := range matches {
if len(match) != 2 {
continue
}
value := strings.ReplaceAll(match[1], `\/`, `/`)
value = strings.ReplaceAll(value, `\u002F`, `/`)
value = strings.ReplaceAll(value, `\\`, "")
value = htmlUnescape(value)
if strings.TrimSpace(value) != "" {
return value
}
}
return ""
}
func deriveEnvatoPreviewFromThumbnail(thumbnail string) string {
candidate := htmlUnescape(strings.TrimSpace(thumbnail))
if candidate == "" {
return ""
}
candidate = strings.ReplaceAll(candidate, "&", "&")
if strings.Contains(candidate, "/video_preview/") {
if idx := strings.Index(candidate, "?"); idx >= 0 {
candidate = candidate[:idx]
}
return regexp.MustCompile(`/video_preview/[^/]+\.(?:jpg|jpeg|png|webp)$`).ReplaceAllString(candidate, `/watermarked_preview/watermarked_preview.mp4`)
}
return ""
}
func newBrowserRequest(method, target, accept string) (*http.Request, error) {
req, err := http.NewRequest(method, target, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36")
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
if accept != "" {
req.Header.Set("Accept", accept)
}
return req, nil
}
func fetchTextViaPython(target string) (string, error) {
script := `
from urllib.request import Request, urlopen
import sys
req = Request(sys.argv[1], headers={
"User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.0.0 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Accept-Language": "en-US,en;q=0.9",
})
with urlopen(req, timeout=20) as resp:
sys.stdout.buffer.write(resp.read(1024 * 1024))
`
output, err := exec.Command("python3", "-c", script, target).CombinedOutput()
if err != nil {
return "", fmt.Errorf("python fallback failed: %v: %s", err, truncateBytes(output, 300))
}
return string(output), nil
}
func looksLikeCloudflareChallenge(body string) bool {
lower := strings.ToLower(body)
return strings.Contains(lower, "cf-mitigated") || strings.Contains(lower, "attention required") || strings.Contains(lower, "just a moment")
}
func truncateBytes(data []byte, limit int) string {
trimmed := strings.TrimSpace(string(data))
if len(trimmed) <= limit {
return trimmed
}
return trimmed[:limit] + "..."
}
func limitQueries(queries []string, limit int) []string {
seen := map[string]bool{}
filtered := make([]string, 0, minInt(len(queries), limit))
for _, item := range queries {
trimmed := strings.TrimSpace(item)
if trimmed == "" {
continue
}
key := strings.ToLower(trimmed)
if seen[key] {
continue
}
seen[key] = true
filtered = append(filtered, trimmed)
if len(filtered) >= limit {
break
}
}
return filtered
}
func htmlUnescape(text string) string {
replacer := strings.NewReplacer("&", "&", """, `"`, "'", "'", "<", "<", ">", ">")
return replacer.Replace(text)
}
func sourceWeight(source string) int {
switch source {
case "Envato":
return 3
case "Artgrid":
return 2
case "Google Video":
return 1
default:
return 0
}
}
func minInt(a, b int) int {
if a < b {
return a
}
return b
}