This commit is contained in:
+146
-145
@@ -3,10 +3,10 @@ package services
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
neturl "net/url"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
@@ -21,143 +21,148 @@ type SearchResult struct {
|
||||
}
|
||||
|
||||
type SearchService struct {
|
||||
APIKey string
|
||||
ProjectID string
|
||||
Location string
|
||||
DataStoreID string
|
||||
ServingConfig string
|
||||
Client *http.Client
|
||||
BaseURL string
|
||||
GoogleVideoEngine string
|
||||
WebEngine string
|
||||
Client *http.Client
|
||||
}
|
||||
|
||||
func NewSearchService(apiKey, projectID, location, dataStoreID, servingConfig string) *SearchService {
|
||||
if location == "" {
|
||||
location = "global"
|
||||
func NewSearchService(baseURL, googleVideoEngine, webEngine string) *SearchService {
|
||||
if googleVideoEngine == "" {
|
||||
googleVideoEngine = "google videos"
|
||||
}
|
||||
if servingConfig == "" {
|
||||
servingConfig = "default_serving_config"
|
||||
if webEngine == "" {
|
||||
webEngine = "google"
|
||||
}
|
||||
return &SearchService{
|
||||
APIKey: apiKey,
|
||||
ProjectID: projectID,
|
||||
Location: location,
|
||||
DataStoreID: dataStoreID,
|
||||
ServingConfig: servingConfig,
|
||||
Client: &http.Client{Timeout: 20 * time.Second},
|
||||
BaseURL: strings.TrimRight(baseURL, "/"),
|
||||
GoogleVideoEngine: googleVideoEngine,
|
||||
WebEngine: webEngine,
|
||||
Client: &http.Client{Timeout: 20 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SearchService) SearchMedia(query string) ([]SearchResult, error) {
|
||||
if s.APIKey == "" || s.ProjectID == "" || s.DataStoreID == "" {
|
||||
return nil, fmt.Errorf("vertex ai search credentials are not configured")
|
||||
}
|
||||
results, err := s.searchLite(query, true)
|
||||
if err != nil {
|
||||
results, err = s.searchLite(query, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (s *SearchService) searchLite(query string, imageSearch bool) ([]SearchResult, error) {
|
||||
filteredQuery := strings.TrimSpace(query + " site:youtube.com OR site:tiktok.com OR site:envato.com OR site:artgrid.io")
|
||||
servingConfig := fmt.Sprintf(
|
||||
"projects/%s/locations/%s/dataStores/%s/servingConfigs/%s",
|
||||
s.ProjectID,
|
||||
s.Location,
|
||||
s.DataStoreID,
|
||||
s.ServingConfig,
|
||||
)
|
||||
|
||||
params := map[string]any{
|
||||
"user_country_code": "us",
|
||||
}
|
||||
if imageSearch {
|
||||
params["searchType"] = 1
|
||||
func (s *SearchService) SearchMedia(queries []string) ([]SearchResult, error) {
|
||||
if s.BaseURL == "" {
|
||||
return nil, fmt.Errorf("searxng base url is not configured")
|
||||
}
|
||||
|
||||
requestBody := map[string]any{
|
||||
"query": filteredQuery,
|
||||
"pageSize": 25,
|
||||
"safeSearch": false,
|
||||
"languageCode": "ko-KR",
|
||||
"params": params,
|
||||
"contentSearchSpec": map[string]any{
|
||||
"snippetSpec": map[string]any{
|
||||
"returnSnippet": true,
|
||||
},
|
||||
sources := []struct {
|
||||
name string
|
||||
categories string
|
||||
engine string
|
||||
siteFilter string
|
||||
match func(string) bool
|
||||
}{
|
||||
{
|
||||
name: "Google Video",
|
||||
categories: "videos",
|
||||
engine: s.GoogleVideoEngine,
|
||||
match: func(string) bool { return true },
|
||||
},
|
||||
{
|
||||
name: "Envato",
|
||||
categories: "general",
|
||||
engine: s.WebEngine,
|
||||
siteFilter: "site:elements.envato.com OR site:envato.com OR site:videohive.net",
|
||||
match: isEnvatoURL,
|
||||
},
|
||||
{
|
||||
name: "Artgrid",
|
||||
categories: "general",
|
||||
engine: s.WebEngine,
|
||||
siteFilter: "site:artgrid.io",
|
||||
match: func(link string) bool { return strings.Contains(strings.ToLower(link), "artgrid.io") },
|
||||
},
|
||||
}
|
||||
|
||||
body, _ := json.Marshal(requestBody)
|
||||
endpoint := fmt.Sprintf(
|
||||
"https://discoveryengine.googleapis.com/v1/%s:searchLite?key=%s",
|
||||
servingConfig,
|
||||
neturl.QueryEscape(s.APIKey),
|
||||
)
|
||||
resp, err := s.Client.Post(endpoint, "application/json", strings.NewReader(string(body)))
|
||||
seen := map[string]bool{}
|
||||
results := make([]SearchResult, 0, 60)
|
||||
for _, query := range queries {
|
||||
query = strings.TrimSpace(query)
|
||||
if query == "" {
|
||||
continue
|
||||
}
|
||||
for _, source := range sources {
|
||||
searchQuery := query
|
||||
if source.siteFilter != "" {
|
||||
searchQuery = query + " " + source.siteFilter
|
||||
}
|
||||
|
||||
items, err := s.search(searchQuery, source.categories, source.engine, source.name)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
for _, item := range items {
|
||||
if item.Link == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
if source.match != nil && !source.match(item.Link) {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
results = append(results, item)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sort.SliceStable(results, func(i, j int) bool {
|
||||
return sourceWeight(results[i].Source) > sourceWeight(results[j].Source)
|
||||
})
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (s *SearchService) search(query, categories, engine, source string) ([]SearchResult, error) {
|
||||
values := url.Values{}
|
||||
values.Set("q", query)
|
||||
values.Set("format", "json")
|
||||
values.Set("safesearch", "0")
|
||||
values.Set("language", "ko-KR")
|
||||
if categories != "" {
|
||||
values.Set("categories", categories)
|
||||
}
|
||||
if engine != "" {
|
||||
values.Set("engines", engine)
|
||||
}
|
||||
|
||||
endpoint := s.BaseURL + "/search?" + values.Encode()
|
||||
resp, err := s.Client.Get(endpoint)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode >= 300 {
|
||||
data, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
|
||||
return nil, fmt.Errorf("vertex ai search returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(data)))
|
||||
return nil, fmt.Errorf("searxng returned status %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
Results []struct {
|
||||
Document struct {
|
||||
StructData map[string]any `json:"structData"`
|
||||
DerivedStructData map[string]any `json:"derivedStructData"`
|
||||
} `json:"document"`
|
||||
Title string `json:"title"`
|
||||
URL string `json:"url"`
|
||||
Content string `json:"content"`
|
||||
Thumbnail string `json:"thumbnail"`
|
||||
ThumbnailSrc string `json:"thumbnail_src"`
|
||||
ImgSrc string `json:"img_src"`
|
||||
ParsedURL []any `json:"parsed_url"`
|
||||
Engine string `json:"engine"`
|
||||
} `json:"results"`
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
results := make([]SearchResult, 0, len(payload.Results))
|
||||
for _, item := range payload.Results {
|
||||
link := firstNonEmpty(
|
||||
firstString(item.Document.DerivedStructData, "link", "url", "uri"),
|
||||
firstString(item.Document.StructData, "link", "url", "uri"),
|
||||
)
|
||||
title := firstNonEmpty(
|
||||
firstString(item.Document.DerivedStructData, "title", "name"),
|
||||
firstString(item.Document.StructData, "title", "name"),
|
||||
)
|
||||
displayLink := firstNonEmpty(
|
||||
firstString(item.Document.DerivedStructData, "displayLink", "site_name"),
|
||||
firstString(item.Document.StructData, "displayLink", "site_name"),
|
||||
)
|
||||
snippet := firstNonEmpty(
|
||||
firstString(item.Document.DerivedStructData, "snippets", "snippet", "extractive_answers"),
|
||||
firstString(item.Document.StructData, "snippets", "snippet", "description"),
|
||||
)
|
||||
thumb := firstNonEmpty(
|
||||
firstString(item.Document.DerivedStructData, "thumbnail", "image", "image_url", "link"),
|
||||
firstString(item.Document.StructData, "thumbnail", "image", "image_url"),
|
||||
)
|
||||
if thumb == "" {
|
||||
thumb = deriveThumbnail(link)
|
||||
}
|
||||
if title == "" {
|
||||
title = displayLink
|
||||
}
|
||||
if link == "" {
|
||||
continue
|
||||
}
|
||||
link := strings.TrimSpace(item.URL)
|
||||
thumb := firstNonEmpty(item.Thumbnail, item.ThumbnailSrc, item.ImgSrc, deriveThumbnail(link))
|
||||
results = append(results, SearchResult{
|
||||
Title: title,
|
||||
Title: item.Title,
|
||||
Link: link,
|
||||
DisplayLink: displayLink,
|
||||
Snippet: snippet,
|
||||
DisplayLink: inferDisplayLink(link, item.ParsedURL),
|
||||
Snippet: item.Content,
|
||||
ThumbnailURL: thumb,
|
||||
Source: inferSource(displayLink + " " + link),
|
||||
Source: normalizeSource(source, link, item.Engine),
|
||||
})
|
||||
}
|
||||
return results, nil
|
||||
@@ -172,41 +177,39 @@ func firstNonEmpty(values ...string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func firstString(values map[string]any, keys ...string) string {
|
||||
for _, key := range keys {
|
||||
value, ok := values[key]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
switch typed := value.(type) {
|
||||
case string:
|
||||
if typed != "" {
|
||||
return typed
|
||||
}
|
||||
case []any:
|
||||
for _, item := range typed {
|
||||
if text, ok := item.(string); ok && text != "" {
|
||||
return text
|
||||
}
|
||||
if mapped, ok := item.(map[string]any); ok {
|
||||
if text := firstString(mapped, "snippet", "htmlSnippet", "url", "link", "value", "content"); text != "" {
|
||||
return text
|
||||
}
|
||||
}
|
||||
}
|
||||
case map[string]any:
|
||||
if text := firstString(typed, "snippet", "htmlSnippet", "url", "link", "value", "content"); text != "" {
|
||||
return text
|
||||
}
|
||||
func normalizeSource(source, link, engine string) string {
|
||||
switch {
|
||||
case source != "":
|
||||
return source
|
||||
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
|
||||
return "Envato"
|
||||
case strings.Contains(strings.ToLower(link), "artgrid"):
|
||||
return "Artgrid"
|
||||
case strings.Contains(strings.ToLower(engine), "google"):
|
||||
return "Google Video"
|
||||
default:
|
||||
return engine
|
||||
}
|
||||
}
|
||||
|
||||
func inferDisplayLink(link string, parsed []any) string {
|
||||
if len(parsed) > 1 {
|
||||
if host, ok := parsed[1].(string); ok {
|
||||
return host
|
||||
}
|
||||
}
|
||||
if parsedURL, err := url.Parse(link); err == nil {
|
||||
return parsedURL.Host
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func isEnvatoURL(link string) bool {
|
||||
lower := strings.ToLower(link)
|
||||
return strings.Contains(lower, "envato") || strings.Contains(lower, "videohive.net")
|
||||
}
|
||||
|
||||
func deriveThumbnail(link string) string {
|
||||
if link == "" {
|
||||
return ""
|
||||
}
|
||||
if videoID := extractYouTubeID(link); videoID != "" {
|
||||
return "https://i.ytimg.com/vi/" + videoID + "/hqdefault.jpg"
|
||||
}
|
||||
@@ -227,17 +230,15 @@ func extractYouTubeID(link string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
func inferSource(displayLink string) string {
|
||||
switch {
|
||||
case strings.Contains(displayLink, "youtube"):
|
||||
return "YouTube"
|
||||
case strings.Contains(displayLink, "tiktok"):
|
||||
return "TikTok"
|
||||
case strings.Contains(displayLink, "envato"):
|
||||
return "Envato"
|
||||
case strings.Contains(displayLink, "artgrid"):
|
||||
return "Artgrid"
|
||||
func sourceWeight(source string) int {
|
||||
switch source {
|
||||
case "Google Video":
|
||||
return 3
|
||||
case "Envato":
|
||||
return 2
|
||||
case "Artgrid":
|
||||
return 1
|
||||
default:
|
||||
return displayLink
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user