This commit is contained in:
+60
-6
@@ -90,6 +90,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
|
||||
continue
|
||||
}
|
||||
for _, item := range items {
|
||||
item = normalizeResultForCollector(collector.Name(), item)
|
||||
if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
|
||||
continue
|
||||
}
|
||||
@@ -379,10 +380,16 @@ func isRenderableArtgridResult(result SearchResult) bool {
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
if !strings.Contains(strings.ToLower(parsed.Host), "artgrid.io") {
|
||||
host := strings.ToLower(parsed.Host)
|
||||
switch {
|
||||
case strings.Contains(host, "artgrid.io"):
|
||||
return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path)
|
||||
case strings.Contains(host, "artlist.io"):
|
||||
trimmedPath := strings.TrimSuffix(parsed.Path, "/")
|
||||
return regexp.MustCompile(`^/stock-footage/clip/.+/[0-9]+$`).MatchString(trimmedPath)
|
||||
default:
|
||||
return false
|
||||
}
|
||||
return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path)
|
||||
}
|
||||
|
||||
func normalizeSource(source, link, engine string) string {
|
||||
@@ -391,7 +398,7 @@ func normalizeSource(source, link, engine string) string {
|
||||
return source
|
||||
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
|
||||
return "Envato"
|
||||
case strings.Contains(strings.ToLower(link), "artgrid"):
|
||||
case strings.Contains(strings.ToLower(link), "artgrid"), strings.Contains(strings.ToLower(link), "artlist.io/stock-footage/clip/"):
|
||||
return "Artgrid"
|
||||
case strings.Contains(strings.ToLower(engine), "google"):
|
||||
return "Google Video"
|
||||
@@ -473,13 +480,60 @@ func extractArtgridBackgroundThumbnail(html, clipID string) string {
|
||||
}
|
||||
|
||||
func extractArtgridClipID(link string) string {
|
||||
matches := regexp.MustCompile(`/clip/([0-9]+)/`).FindStringSubmatch(link)
|
||||
if len(matches) == 2 {
|
||||
return matches[1]
|
||||
patterns := []*regexp.Regexp{
|
||||
regexp.MustCompile(`/clip/([0-9]+)/`),
|
||||
regexp.MustCompile(`/stock-footage/clip/[^/]+/([0-9]+)$`),
|
||||
regexp.MustCompile(`/stock-footage/clip/.+/([0-9]+)$`),
|
||||
}
|
||||
for _, pattern := range patterns {
|
||||
matches := pattern.FindStringSubmatch(strings.TrimSuffix(link, "/"))
|
||||
if len(matches) == 2 {
|
||||
return matches[1]
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func canonicalizeArtgridLink(link string) string {
|
||||
trimmed := strings.TrimSpace(link)
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
clipID := extractArtgridClipID(trimmed)
|
||||
if clipID == "" {
|
||||
return trimmed
|
||||
}
|
||||
if strings.Contains(strings.ToLower(trimmed), "artgrid.io/clip/") {
|
||||
return trimmed
|
||||
}
|
||||
parsed, err := url.Parse(trimmed)
|
||||
if err != nil {
|
||||
return trimmed
|
||||
}
|
||||
segments := strings.Split(strings.Trim(parsed.Path, "/"), "/")
|
||||
slug := clipID
|
||||
for idx, segment := range segments {
|
||||
if segment == clipID && idx > 0 {
|
||||
slug = segments[idx-1]
|
||||
break
|
||||
}
|
||||
}
|
||||
return "https://artgrid.io/clip/" + clipID + "/" + slug
|
||||
}
|
||||
|
||||
func normalizeResultForCollector(source string, result SearchResult) SearchResult {
|
||||
switch source {
|
||||
case "Artgrid":
|
||||
result.Link = canonicalizeArtgridLink(result.Link)
|
||||
result.Source = "Artgrid"
|
||||
case "Envato":
|
||||
result.Source = "Envato"
|
||||
case "Google Video":
|
||||
result.Source = "Google Video"
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func collectURLs(body string) []string {
|
||||
pattern := regexp.MustCompile(`https?:\/\/[^"'\\\s]+`)
|
||||
matches := pattern.FindAllString(body, -1)
|
||||
|
||||
Reference in New Issue
Block a user