This commit is contained in:
+322
-90
@@ -3,21 +3,24 @@ package services
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
type SearchResult struct {
|
||||
Title string `json:"title"`
|
||||
Link string `json:"link"`
|
||||
DisplayLink string `json:"displayLink"`
|
||||
Snippet string `json:"snippet"`
|
||||
ThumbnailURL string `json:"thumbnailUrl"`
|
||||
Source string `json:"source"`
|
||||
Title string `json:"title"`
|
||||
Link string `json:"link"`
|
||||
DisplayLink string `json:"displayLink"`
|
||||
Snippet string `json:"snippet"`
|
||||
ThumbnailURL string `json:"thumbnailUrl"`
|
||||
PreviewVideoURL string `json:"previewVideoUrl"`
|
||||
Source string `json:"source"`
|
||||
}
|
||||
|
||||
type SearchService struct {
|
||||
@@ -47,84 +50,169 @@ func (s *SearchService) SearchMedia(queries []string) ([]SearchResult, error) {
|
||||
return nil, fmt.Errorf("searxng base url is not configured")
|
||||
}
|
||||
|
||||
sources := []struct {
|
||||
name string
|
||||
categories string
|
||||
engine string
|
||||
queryBuilder func(string) string
|
||||
match func(SearchResult) bool
|
||||
}{
|
||||
type sourceConfig struct {
|
||||
name string
|
||||
categories string
|
||||
engine string
|
||||
build func(string) []string
|
||||
accept func(SearchResult) bool
|
||||
}
|
||||
|
||||
sources := []sourceConfig{
|
||||
{
|
||||
name: "Envato",
|
||||
categories: "general",
|
||||
engine: s.WebEngine,
|
||||
build: buildEnvatoQueries,
|
||||
accept: isRenderableEnvatoResult,
|
||||
},
|
||||
{
|
||||
name: "Artgrid",
|
||||
categories: "general",
|
||||
engine: s.WebEngine,
|
||||
build: buildArtgridQueries,
|
||||
accept: isRenderableArtgridResult,
|
||||
},
|
||||
{
|
||||
name: "Google Video",
|
||||
categories: "videos",
|
||||
engine: s.GoogleVideoEngine,
|
||||
queryBuilder: func(query string) string {
|
||||
return buildGoogleVideoQuery(query)
|
||||
},
|
||||
match: isUsefulGoogleVideoResult,
|
||||
},
|
||||
{
|
||||
name: "Envato",
|
||||
categories: "general",
|
||||
engine: s.WebEngine,
|
||||
queryBuilder: buildEnvatoQuery,
|
||||
match: isRenderableEnvatoResult,
|
||||
},
|
||||
{
|
||||
name: "Artgrid",
|
||||
categories: "general",
|
||||
engine: s.WebEngine,
|
||||
queryBuilder: buildArtgridQuery,
|
||||
match: isRenderableArtgridResult,
|
||||
build: buildGoogleVideoQueries,
|
||||
accept: isUsefulGoogleVideoResult,
|
||||
},
|
||||
}
|
||||
|
||||
seen := map[string]bool{}
|
||||
results := make([]SearchResult, 0, 60)
|
||||
results := make([]SearchResult, 0, 90)
|
||||
var lastErr error
|
||||
for _, query := range queries {
|
||||
query = strings.TrimSpace(query)
|
||||
if query == "" {
|
||||
|
||||
baseQueries := limitQueries(queries, 5)
|
||||
for _, base := range baseQueries {
|
||||
base = strings.TrimSpace(base)
|
||||
if base == "" {
|
||||
continue
|
||||
}
|
||||
for _, source := range sources {
|
||||
searchQuery := source.queryBuilder(query)
|
||||
|
||||
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
items, err = s.search(searchQuery, source.categories, "", source.name)
|
||||
}
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
if source.categories != "general" {
|
||||
items, err = s.search(searchQuery, "general", "", source.name)
|
||||
for _, searchQuery := range source.build(base) {
|
||||
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
items, err = s.search(searchQuery, source.categories, "", source.name)
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
for _, item := range items {
|
||||
if item.Link == "" || seen[item.Link] {
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
continue
|
||||
}
|
||||
if !source.match(item) {
|
||||
continue
|
||||
for _, item := range items {
|
||||
if item.Link == "" || seen[item.Link] || !source.accept(item) {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
results = append(results, item)
|
||||
}
|
||||
seen[item.Link] = true
|
||||
results = append(results, item)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(results) == 0 && lastErr != nil {
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
sort.SliceStable(results, func(i, j int) bool {
|
||||
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
|
||||
})
|
||||
if len(results) == 0 && lastErr != nil {
|
||||
return nil, lastErr
|
||||
return s.EnrichResults(results), nil
|
||||
}
|
||||
|
||||
func (s *SearchService) EnrichResults(results []SearchResult) []SearchResult {
|
||||
limit := minInt(len(results), 24)
|
||||
if limit == 0 {
|
||||
return results
|
||||
}
|
||||
return results, nil
|
||||
|
||||
enriched := make([]SearchResult, len(results))
|
||||
copy(enriched, results)
|
||||
|
||||
var wg sync.WaitGroup
|
||||
sem := make(chan struct{}, 4)
|
||||
for idx := 0; idx < limit; idx++ {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
sem <- struct{}{}
|
||||
defer func() { <-sem }()
|
||||
enriched[i] = s.enrichResult(enriched[i])
|
||||
}(idx)
|
||||
}
|
||||
wg.Wait()
|
||||
return enriched
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichResult(result SearchResult) SearchResult {
|
||||
switch result.Source {
|
||||
case "Envato":
|
||||
return s.enrichEnvato(result)
|
||||
case "Artgrid":
|
||||
return s.enrichArtgrid(result)
|
||||
default:
|
||||
if result.ThumbnailURL == "" {
|
||||
result.ThumbnailURL = deriveThumbnail(result.Link)
|
||||
}
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
|
||||
html, err := s.fetchText(result.Link)
|
||||
if err != nil {
|
||||
return result
|
||||
}
|
||||
if result.ThumbnailURL == "" {
|
||||
result.ThumbnailURL = firstNonEmpty(
|
||||
extractMetaContent(html, "og:image"),
|
||||
extractMetaContent(html, "twitter:image"),
|
||||
)
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
result.PreviewVideoURL = extractVideoPreviewURL(html)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
clipID := extractArtgridClipID(result.Link)
|
||||
if clipID == "" {
|
||||
return result
|
||||
}
|
||||
|
||||
apiURL := "https://artgrid.io/api/clip/details?clipId=" + clipID
|
||||
body, err := s.fetchJSONText(apiURL)
|
||||
if err == nil {
|
||||
urls := collectURLs(body)
|
||||
if result.ThumbnailURL == "" {
|
||||
result.ThumbnailURL = pickImageURL(urls)
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
result.PreviewVideoURL = pickVideoURL(urls)
|
||||
}
|
||||
}
|
||||
|
||||
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
|
||||
html, err := s.fetchText(result.Link)
|
||||
if err == nil {
|
||||
if result.ThumbnailURL == "" {
|
||||
result.ThumbnailURL = firstNonEmpty(
|
||||
extractMetaContent(html, "og:image"),
|
||||
extractMetaContent(html, "twitter:image"),
|
||||
)
|
||||
}
|
||||
if result.PreviewVideoURL == "" {
|
||||
result.PreviewVideoURL = extractVideoPreviewURL(html)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
func (s *SearchService) search(query, categories, engine, source string) ([]SearchResult, error) {
|
||||
@@ -170,29 +258,40 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
|
||||
results := make([]SearchResult, 0, len(payload.Results))
|
||||
for _, item := range payload.Results {
|
||||
link := strings.TrimSpace(item.URL)
|
||||
thumb := firstNonEmpty(item.Thumbnail, item.ThumbnailSrc, item.ImgSrc, deriveThumbnail(link))
|
||||
if link == "" {
|
||||
continue
|
||||
}
|
||||
results = append(results, SearchResult{
|
||||
Title: item.Title,
|
||||
Link: link,
|
||||
DisplayLink: inferDisplayLink(link, item.ParsedURL),
|
||||
Snippet: item.Content,
|
||||
ThumbnailURL: thumb,
|
||||
ThumbnailURL: firstNonEmpty(item.Thumbnail, item.ThumbnailSrc, item.ImgSrc, deriveThumbnail(link)),
|
||||
Source: normalizeSource(source, link, item.Engine),
|
||||
})
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func buildGoogleVideoQuery(query string) string {
|
||||
return fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, query)
|
||||
func buildGoogleVideoQueries(base string) []string {
|
||||
return []string{
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
|
||||
fmt.Sprintf(`"%s" ("cinematic footage" OR "free stock footage" OR "4k footage") -tutorial -"how to" -review`, base),
|
||||
}
|
||||
}
|
||||
|
||||
func buildEnvatoQuery(query string) string {
|
||||
return fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "motion graphics" OR cinematic OR "b-roll") (site:elements.envato.com OR site:videohive.net/item) -site:elements.envato.com/stock-video -site:elements.envato.com/video-templates -site:elements.envato.com/stock-video/stock-footage`, query)
|
||||
func buildEnvatoQueries(base string) []string {
|
||||
return []string{
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com`, base),
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:videohive.net/item`, base),
|
||||
}
|
||||
}
|
||||
|
||||
func buildArtgridQuery(query string) string {
|
||||
return fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artgrid.io/clip/`, query)
|
||||
func buildArtgridQueries(base string) []string {
|
||||
return []string{
|
||||
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artgrid.io/clip/`, base),
|
||||
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artgrid.io/clip/`, base),
|
||||
}
|
||||
}
|
||||
|
||||
func isUsefulGoogleVideoResult(result SearchResult) bool {
|
||||
@@ -200,21 +299,13 @@ func isUsefulGoogleVideoResult(result SearchResult) bool {
|
||||
for _, banned := range []string{
|
||||
"tutorial", "how to", "review", "reaction", "podcast", "interview", "walkthrough",
|
||||
"course", "lesson", "edit tutorial", "editing tutorial", "premiere pro", "after effects",
|
||||
"breakdown", "explained", "vlog",
|
||||
"breakdown", "explained", "vlog", "tips", "guide", "learn", "free download",
|
||||
} {
|
||||
if strings.Contains(text, banned) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
for _, desired := range []string{
|
||||
"b-roll", "stock footage", "cinematic", "footage", "establishing shot", "4k",
|
||||
} {
|
||||
if strings.Contains(text, desired) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
lowerLink := strings.ToLower(result.Link)
|
||||
return strings.Contains(lowerLink, "youtube.com/watch") || strings.Contains(lowerLink, "youtu.be/")
|
||||
return true
|
||||
}
|
||||
|
||||
func isRenderableEnvatoResult(result SearchResult) bool {
|
||||
@@ -225,7 +316,7 @@ func isRenderableEnvatoResult(result SearchResult) bool {
|
||||
host := strings.ToLower(parsed.Host)
|
||||
path := strings.Trim(parsed.Path, "/")
|
||||
if strings.Contains(host, "videohive.net") {
|
||||
return strings.HasPrefix(path, "item/")
|
||||
return strings.HasPrefix(path, "item/") && len(strings.Split(path, "/")) >= 2
|
||||
}
|
||||
if strings.Contains(host, "elements.envato.com") {
|
||||
if path == "" || strings.Contains(path, "/") {
|
||||
@@ -244,17 +335,7 @@ func isRenderableArtgridResult(result SearchResult) bool {
|
||||
if !strings.Contains(strings.ToLower(parsed.Host), "artgrid.io") {
|
||||
return false
|
||||
}
|
||||
path := strings.Trim(parsed.Path, "/")
|
||||
return regexp.MustCompile(`^clip/[0-9]+/`).MatchString(path)
|
||||
}
|
||||
|
||||
func firstNonEmpty(values ...string) string {
|
||||
for _, value := range values {
|
||||
if strings.TrimSpace(value) != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path)
|
||||
}
|
||||
|
||||
func normalizeSource(source, link, engine string) string {
|
||||
@@ -305,6 +386,150 @@ func extractYouTubeID(link string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractMetaContent(html, property string) string {
|
||||
patterns := []*regexp.Regexp{
|
||||
regexp.MustCompile(`(?i)<meta[^>]+property=["']` + regexp.QuoteMeta(property) + `["'][^>]+content=["']([^"']+)`),
|
||||
regexp.MustCompile(`(?i)<meta[^>]+name=["']` + regexp.QuoteMeta(property) + `["'][^>]+content=["']([^"']+)`),
|
||||
}
|
||||
for _, pattern := range patterns {
|
||||
matches := pattern.FindStringSubmatch(html)
|
||||
if len(matches) == 2 {
|
||||
return htmlUnescape(matches[1])
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractVideoPreviewURL(html string) string {
|
||||
pattern := regexp.MustCompile(`https?:\\?/\\?/[^"'\\s>]+(?:mp4|m3u8)`)
|
||||
matches := pattern.FindAllString(html, -1)
|
||||
for _, match := range matches {
|
||||
candidate := strings.ReplaceAll(match, `\/`, `/`)
|
||||
candidate = strings.ReplaceAll(candidate, `\u002F`, `/`)
|
||||
candidate = strings.ReplaceAll(candidate, `\\`, "")
|
||||
if strings.Contains(strings.ToLower(candidate), "preview") || strings.Contains(strings.ToLower(candidate), "video") {
|
||||
return candidate
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func extractArtgridClipID(link string) string {
|
||||
matches := regexp.MustCompile(`/clip/([0-9]+)/`).FindStringSubmatch(link)
|
||||
if len(matches) == 2 {
|
||||
return matches[1]
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func collectURLs(body string) []string {
|
||||
pattern := regexp.MustCompile(`https?:\/\/[^"'\\\s]+`)
|
||||
matches := pattern.FindAllString(body, -1)
|
||||
seen := map[string]bool{}
|
||||
results := make([]string, 0, len(matches))
|
||||
for _, match := range matches {
|
||||
candidate := strings.TrimSpace(strings.Trim(match, `"'`))
|
||||
if candidate == "" || seen[candidate] {
|
||||
continue
|
||||
}
|
||||
seen[candidate] = true
|
||||
results = append(results, candidate)
|
||||
}
|
||||
return results
|
||||
}
|
||||
|
||||
func pickImageURL(urls []string) string {
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, ".jpg") || strings.Contains(lower, ".jpeg") || strings.Contains(lower, ".png") || strings.Contains(lower, ".webp") {
|
||||
return item
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func pickVideoURL(urls []string) string {
|
||||
for _, item := range urls {
|
||||
lower := strings.ToLower(item)
|
||||
if strings.Contains(lower, ".mp4") || strings.Contains(lower, ".m3u8") {
|
||||
return item
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (s *SearchService) fetchText(target string) (string, error) {
|
||||
resp, err := s.Client.Get(target)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return "", fmt.Errorf("fetch returned status %d", resp.StatusCode)
|
||||
}
|
||||
data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(data), nil
|
||||
}
|
||||
|
||||
func (s *SearchService) fetchJSONText(target string) (string, error) {
|
||||
req, err := http.NewRequest(http.MethodGet, target, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
req.Header.Set("Accept", "application/json, text/json")
|
||||
resp, err := s.Client.Do(req)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
if resp.StatusCode >= 300 {
|
||||
return "", fmt.Errorf("json fetch returned status %d", resp.StatusCode)
|
||||
}
|
||||
data, err := io.ReadAll(io.LimitReader(resp.Body, 1024*1024))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return string(data), nil
|
||||
}
|
||||
|
||||
func firstNonEmpty(values ...string) string {
|
||||
for _, value := range values {
|
||||
if strings.TrimSpace(value) != "" {
|
||||
return value
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func limitQueries(queries []string, limit int) []string {
|
||||
seen := map[string]bool{}
|
||||
filtered := make([]string, 0, minInt(len(queries), limit))
|
||||
for _, item := range queries {
|
||||
trimmed := strings.TrimSpace(item)
|
||||
if trimmed == "" {
|
||||
continue
|
||||
}
|
||||
key := strings.ToLower(trimmed)
|
||||
if seen[key] {
|
||||
continue
|
||||
}
|
||||
seen[key] = true
|
||||
filtered = append(filtered, trimmed)
|
||||
if len(filtered) >= limit {
|
||||
break
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
func htmlUnescape(text string) string {
|
||||
replacer := strings.NewReplacer("&", "&", """, `"`, "'", "'", "<", "<", ">", ">")
|
||||
return replacer.Replace(text)
|
||||
}
|
||||
|
||||
func sourceWeight(source string) int {
|
||||
switch source {
|
||||
case "Envato":
|
||||
@@ -317,3 +542,10 @@ func sourceWeight(source string) int {
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func minInt(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user