177 lines
4.5 KiB
Go
177 lines
4.5 KiB
Go
package services
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type SearchResult struct {
|
|
Title string `json:"title"`
|
|
Link string `json:"link"`
|
|
DisplayLink string `json:"displayLink"`
|
|
Snippet string `json:"snippet"`
|
|
ThumbnailURL string `json:"thumbnailUrl"`
|
|
Source string `json:"source"`
|
|
}
|
|
|
|
type SearchService struct {
|
|
APIKey string
|
|
CX string
|
|
Client *http.Client
|
|
}
|
|
|
|
func NewSearchService(apiKey, cx string) *SearchService {
|
|
return &SearchService{
|
|
APIKey: apiKey,
|
|
CX: cx,
|
|
Client: &http.Client{Timeout: 20 * time.Second},
|
|
}
|
|
}
|
|
|
|
func (s *SearchService) SearchMedia(query string) ([]SearchResult, error) {
|
|
if s.APIKey == "" || s.CX == "" {
|
|
return nil, fmt.Errorf("google cse credentials are not configured")
|
|
}
|
|
|
|
domains := []string{"youtube.com", "tiktok.com", "envato.com", "artgrid.io"}
|
|
siteQuery := strings.Join(domains, " OR site:")
|
|
fullQuery := fmt.Sprintf("%s (site:%s)", query, siteQuery)
|
|
|
|
values := url.Values{}
|
|
values.Set("key", s.APIKey)
|
|
values.Set("cx", s.CX)
|
|
values.Set("q", fullQuery)
|
|
values.Set("num", "10")
|
|
values.Set("safe", "off")
|
|
|
|
results := make([]SearchResult, 0, 30)
|
|
seen := map[string]bool{}
|
|
for _, start := range []string{"1", "11", "21"} {
|
|
pageResults, err := s.fetchPage(values, start, true)
|
|
if err != nil {
|
|
pageResults, err = s.fetchPage(values, start, false)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
for _, item := range pageResults {
|
|
if item.Link == "" || item.ThumbnailURL == "" || seen[item.Link] {
|
|
continue
|
|
}
|
|
seen[item.Link] = true
|
|
results = append(results, item)
|
|
}
|
|
}
|
|
return results, nil
|
|
}
|
|
|
|
func (s *SearchService) fetchPage(values url.Values, start string, imageSearch bool) ([]SearchResult, error) {
|
|
pageValues := url.Values{}
|
|
for key, items := range values {
|
|
for _, item := range items {
|
|
pageValues.Add(key, item)
|
|
}
|
|
}
|
|
pageValues.Set("start", start)
|
|
if imageSearch {
|
|
pageValues.Set("searchType", "image")
|
|
}
|
|
|
|
endpoint := "https://www.googleapis.com/customsearch/v1?" + pageValues.Encode()
|
|
resp, err := s.Client.Get(endpoint)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
if resp.StatusCode >= 300 {
|
|
data, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
|
|
return nil, fmt.Errorf("google cse returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(data)))
|
|
}
|
|
|
|
var payload struct {
|
|
Items []struct {
|
|
Title string `json:"title"`
|
|
Link string `json:"link"`
|
|
DisplayLink string `json:"displayLink"`
|
|
Snippet string `json:"snippet"`
|
|
Image struct {
|
|
ThumbnailLink string `json:"thumbnailLink"`
|
|
} `json:"image"`
|
|
Pagemap struct {
|
|
CSEImage []struct {
|
|
Src string `json:"src"`
|
|
} `json:"cse_image"`
|
|
CSEThumbnail []struct {
|
|
Src string `json:"src"`
|
|
} `json:"cse_thumbnail"`
|
|
Metatags []map[string]string `json:"metatags"`
|
|
} `json:"pagemap"`
|
|
} `json:"items"`
|
|
}
|
|
|
|
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
results := make([]SearchResult, 0, len(payload.Items))
|
|
for _, item := range payload.Items {
|
|
thumb := item.Image.ThumbnailLink
|
|
if thumb == "" {
|
|
thumb = extractThumbnail(item.Pagemap)
|
|
}
|
|
results = append(results, SearchResult{
|
|
Title: item.Title,
|
|
Link: item.Link,
|
|
DisplayLink: item.DisplayLink,
|
|
Snippet: item.Snippet,
|
|
ThumbnailURL: thumb,
|
|
Source: inferSource(item.DisplayLink),
|
|
})
|
|
}
|
|
return results, nil
|
|
}
|
|
|
|
func extractThumbnail(pagemap struct {
|
|
CSEImage []struct{ Src string "json:\"src\"" } "json:\"cse_image\""
|
|
CSEThumbnail []struct{ Src string "json:\"src\"" } "json:\"cse_thumbnail\""
|
|
Metatags []map[string]string "json:\"metatags\""
|
|
}) string {
|
|
if len(pagemap.CSEThumbnail) > 0 && pagemap.CSEThumbnail[0].Src != "" {
|
|
return pagemap.CSEThumbnail[0].Src
|
|
}
|
|
if len(pagemap.CSEImage) > 0 && pagemap.CSEImage[0].Src != "" {
|
|
return pagemap.CSEImage[0].Src
|
|
}
|
|
for _, tag := range pagemap.Metatags {
|
|
if value := tag["og:image"]; value != "" {
|
|
return value
|
|
}
|
|
if value := tag["twitter:image"]; value != "" {
|
|
return value
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func inferSource(displayLink string) string {
|
|
switch {
|
|
case strings.Contains(displayLink, "youtube"):
|
|
return "YouTube"
|
|
case strings.Contains(displayLink, "tiktok"):
|
|
return "TikTok"
|
|
case strings.Contains(displayLink, "envato"):
|
|
return "Envato"
|
|
case strings.Contains(displayLink, "artgrid"):
|
|
return "Artgrid"
|
|
default:
|
|
return displayLink
|
|
}
|
|
}
|