320 lines
8.4 KiB
Go
320 lines
8.4 KiB
Go
package services
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type SearchResult struct {
|
|
Title string `json:"title"`
|
|
Link string `json:"link"`
|
|
DisplayLink string `json:"displayLink"`
|
|
Snippet string `json:"snippet"`
|
|
ThumbnailURL string `json:"thumbnailUrl"`
|
|
Source string `json:"source"`
|
|
}
|
|
|
|
type SearchService struct {
|
|
BaseURL string
|
|
GoogleVideoEngine string
|
|
WebEngine string
|
|
Client *http.Client
|
|
}
|
|
|
|
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
|
|
if googleVideoEngine == "" {
|
|
googleVideoEngine = "google videos"
|
|
}
|
|
if webEngine == "" {
|
|
webEngine = "google"
|
|
}
|
|
return &SearchService{
|
|
BaseURL: strings.TrimRight(baseURL, "/"),
|
|
GoogleVideoEngine: googleVideoEngine,
|
|
WebEngine: webEngine,
|
|
Client: &http.Client{Timeout: 20 * time.Second},
|
|
}
|
|
}
|
|
|
|
func (s *SearchService) SearchMedia(queries []string) ([]SearchResult, error) {
|
|
if s.BaseURL == "" {
|
|
return nil, fmt.Errorf("searxng base url is not configured")
|
|
}
|
|
|
|
sources := []struct {
|
|
name string
|
|
categories string
|
|
engine string
|
|
queryBuilder func(string) string
|
|
match func(SearchResult) bool
|
|
}{
|
|
{
|
|
name: "Google Video",
|
|
categories: "videos",
|
|
engine: s.GoogleVideoEngine,
|
|
queryBuilder: func(query string) string {
|
|
return buildGoogleVideoQuery(query)
|
|
},
|
|
match: isUsefulGoogleVideoResult,
|
|
},
|
|
{
|
|
name: "Envato",
|
|
categories: "general",
|
|
engine: s.WebEngine,
|
|
queryBuilder: buildEnvatoQuery,
|
|
match: isRenderableEnvatoResult,
|
|
},
|
|
{
|
|
name: "Artgrid",
|
|
categories: "general",
|
|
engine: s.WebEngine,
|
|
queryBuilder: buildArtgridQuery,
|
|
match: isRenderableArtgridResult,
|
|
},
|
|
}
|
|
|
|
seen := map[string]bool{}
|
|
results := make([]SearchResult, 0, 60)
|
|
var lastErr error
|
|
for _, query := range queries {
|
|
query = strings.TrimSpace(query)
|
|
if query == "" {
|
|
continue
|
|
}
|
|
for _, source := range sources {
|
|
searchQuery := source.queryBuilder(query)
|
|
|
|
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
|
|
if err != nil {
|
|
lastErr = err
|
|
items, err = s.search(searchQuery, source.categories, "", source.name)
|
|
}
|
|
if err != nil {
|
|
lastErr = err
|
|
if source.categories != "general" {
|
|
items, err = s.search(searchQuery, "general", "", source.name)
|
|
}
|
|
}
|
|
if err != nil {
|
|
lastErr = err
|
|
continue
|
|
}
|
|
for _, item := range items {
|
|
if item.Link == "" || seen[item.Link] {
|
|
continue
|
|
}
|
|
if !source.match(item) {
|
|
continue
|
|
}
|
|
seen[item.Link] = true
|
|
results = append(results, item)
|
|
}
|
|
}
|
|
}
|
|
|
|
sort.SliceStable(results, func(i, j int) bool {
|
|
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
|
|
})
|
|
if len(results) == 0 && lastErr != nil {
|
|
return nil, lastErr
|
|
}
|
|
return results, nil
|
|
}
|
|
|
|
func (s *SearchService) search(query, categories, engine, source string) ([]SearchResult, error) {
|
|
values := url.Values{}
|
|
values.Set("q", query)
|
|
values.Set("format", "json")
|
|
values.Set("safesearch", "0")
|
|
values.Set("language", "en-US")
|
|
if categories != "" {
|
|
values.Set("categories", categories)
|
|
}
|
|
if engine != "" {
|
|
values.Set("engines", engine)
|
|
}
|
|
|
|
endpoint := s.BaseURL + "/search?" + values.Encode()
|
|
resp, err := s.Client.Get(endpoint)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode >= 300 {
|
|
return nil, fmt.Errorf("searxng returned status %d for query %q", resp.StatusCode, query)
|
|
}
|
|
|
|
var payload struct {
|
|
Results []struct {
|
|
Title string `json:"title"`
|
|
URL string `json:"url"`
|
|
Content string `json:"content"`
|
|
Thumbnail string `json:"thumbnail"`
|
|
ThumbnailSrc string `json:"thumbnail_src"`
|
|
ImgSrc string `json:"img_src"`
|
|
ParsedURL []any `json:"parsed_url"`
|
|
Engine string `json:"engine"`
|
|
} `json:"results"`
|
|
}
|
|
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
|
return nil, fmt.Errorf("searxng JSON decode failed for query %q: %w", query, err)
|
|
}
|
|
|
|
results := make([]SearchResult, 0, len(payload.Results))
|
|
for _, item := range payload.Results {
|
|
link := strings.TrimSpace(item.URL)
|
|
thumb := firstNonEmpty(item.Thumbnail, item.ThumbnailSrc, item.ImgSrc, deriveThumbnail(link))
|
|
results = append(results, SearchResult{
|
|
Title: item.Title,
|
|
Link: link,
|
|
DisplayLink: inferDisplayLink(link, item.ParsedURL),
|
|
Snippet: item.Content,
|
|
ThumbnailURL: thumb,
|
|
Source: normalizeSource(source, link, item.Engine),
|
|
})
|
|
}
|
|
return results, nil
|
|
}
|
|
|
|
func buildGoogleVideoQuery(query string) string {
|
|
return fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, query)
|
|
}
|
|
|
|
func buildEnvatoQuery(query string) string {
|
|
return fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "motion graphics" OR cinematic OR "b-roll") (site:elements.envato.com OR site:videohive.net/item) -site:elements.envato.com/stock-video -site:elements.envato.com/video-templates -site:elements.envato.com/stock-video/stock-footage`, query)
|
|
}
|
|
|
|
func buildArtgridQuery(query string) string {
|
|
return fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artgrid.io/clip/`, query)
|
|
}
|
|
|
|
func isUsefulGoogleVideoResult(result SearchResult) bool {
|
|
text := strings.ToLower(result.Title + " " + result.Snippet)
|
|
for _, banned := range []string{
|
|
"tutorial", "how to", "review", "reaction", "podcast", "interview", "walkthrough",
|
|
"course", "lesson", "edit tutorial", "editing tutorial", "premiere pro", "after effects",
|
|
"breakdown", "explained", "vlog",
|
|
} {
|
|
if strings.Contains(text, banned) {
|
|
return false
|
|
}
|
|
}
|
|
for _, desired := range []string{
|
|
"b-roll", "stock footage", "cinematic", "footage", "establishing shot", "4k",
|
|
} {
|
|
if strings.Contains(text, desired) {
|
|
return true
|
|
}
|
|
}
|
|
lowerLink := strings.ToLower(result.Link)
|
|
return strings.Contains(lowerLink, "youtube.com/watch") || strings.Contains(lowerLink, "youtu.be/")
|
|
}
|
|
|
|
func isRenderableEnvatoResult(result SearchResult) bool {
|
|
parsed, err := url.Parse(result.Link)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
host := strings.ToLower(parsed.Host)
|
|
path := strings.Trim(parsed.Path, "/")
|
|
if strings.Contains(host, "videohive.net") {
|
|
return strings.HasPrefix(path, "item/")
|
|
}
|
|
if strings.Contains(host, "elements.envato.com") {
|
|
if path == "" || strings.Contains(path, "/") {
|
|
return false
|
|
}
|
|
return regexp.MustCompile(`-[A-Z0-9]{6,}$`).MatchString(path)
|
|
}
|
|
return false
|
|
}
|
|
|
|
func isRenderableArtgridResult(result SearchResult) bool {
|
|
parsed, err := url.Parse(result.Link)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
if !strings.Contains(strings.ToLower(parsed.Host), "artgrid.io") {
|
|
return false
|
|
}
|
|
path := strings.Trim(parsed.Path, "/")
|
|
return regexp.MustCompile(`^clip/[0-9]+/`).MatchString(path)
|
|
}
|
|
|
|
func firstNonEmpty(values ...string) string {
|
|
for _, value := range values {
|
|
if strings.TrimSpace(value) != "" {
|
|
return value
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func normalizeSource(source, link, engine string) string {
|
|
switch {
|
|
case source != "":
|
|
return source
|
|
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
|
|
return "Envato"
|
|
case strings.Contains(strings.ToLower(link), "artgrid"):
|
|
return "Artgrid"
|
|
case strings.Contains(strings.ToLower(engine), "google"):
|
|
return "Google Video"
|
|
default:
|
|
return engine
|
|
}
|
|
}
|
|
|
|
func inferDisplayLink(link string, parsed []any) string {
|
|
if len(parsed) > 1 {
|
|
if host, ok := parsed[1].(string); ok {
|
|
return host
|
|
}
|
|
}
|
|
if parsedURL, err := url.Parse(link); err == nil {
|
|
return parsedURL.Host
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func deriveThumbnail(link string) string {
|
|
if videoID := extractYouTubeID(link); videoID != "" {
|
|
return "https://i.ytimg.com/vi/" + videoID + "/hqdefault.jpg"
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func extractYouTubeID(link string) string {
|
|
patterns := []*regexp.Regexp{
|
|
regexp.MustCompile(`(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})`),
|
|
regexp.MustCompile(`youtu\.be\/([A-Za-z0-9_-]{11})`),
|
|
}
|
|
for _, pattern := range patterns {
|
|
matches := pattern.FindStringSubmatch(link)
|
|
if len(matches) == 2 {
|
|
return matches[1]
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func sourceWeight(source string) int {
|
|
switch source {
|
|
case "Envato":
|
|
return 3
|
|
case "Artgrid":
|
|
return 2
|
|
case "Google Video":
|
|
return 1
|
|
default:
|
|
return 0
|
|
}
|
|
}
|