Fix Artgrid collector matching and split ranker
build-push / docker (push) Successful in 4m16s

This commit is contained in:
AI Assistant
2026-03-13 19:31:57 +09:00
parent 5aebbef639
commit e4262613c3
6 changed files with 250 additions and 165 deletions
+60 -6
View File
@@ -90,6 +90,7 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
continue
}
for _, item := range items {
item = normalizeResultForCollector(collector.Name(), item)
if item.Link == "" || seen[item.Link] || !collector.Accept(item) {
continue
}
@@ -379,10 +380,16 @@ func isRenderableArtgridResult(result SearchResult) bool {
if err != nil {
return false
}
if !strings.Contains(strings.ToLower(parsed.Host), "artgrid.io") {
host := strings.ToLower(parsed.Host)
switch {
case strings.Contains(host, "artgrid.io"):
return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path)
case strings.Contains(host, "artlist.io"):
trimmedPath := strings.TrimSuffix(parsed.Path, "/")
return regexp.MustCompile(`^/stock-footage/clip/.+/[0-9]+$`).MatchString(trimmedPath)
default:
return false
}
return regexp.MustCompile(`^/clip/[0-9]+/`).MatchString(parsed.Path)
}
func normalizeSource(source, link, engine string) string {
@@ -391,7 +398,7 @@ func normalizeSource(source, link, engine string) string {
return source
case strings.Contains(strings.ToLower(link), "envato") || strings.Contains(strings.ToLower(link), "videohive"):
return "Envato"
case strings.Contains(strings.ToLower(link), "artgrid"):
case strings.Contains(strings.ToLower(link), "artgrid"), strings.Contains(strings.ToLower(link), "artlist.io/stock-footage/clip/"):
return "Artgrid"
case strings.Contains(strings.ToLower(engine), "google"):
return "Google Video"
@@ -473,13 +480,60 @@ func extractArtgridBackgroundThumbnail(html, clipID string) string {
}
func extractArtgridClipID(link string) string {
matches := regexp.MustCompile(`/clip/([0-9]+)/`).FindStringSubmatch(link)
if len(matches) == 2 {
return matches[1]
patterns := []*regexp.Regexp{
regexp.MustCompile(`/clip/([0-9]+)/`),
regexp.MustCompile(`/stock-footage/clip/[^/]+/([0-9]+)$`),
regexp.MustCompile(`/stock-footage/clip/.+/([0-9]+)$`),
}
for _, pattern := range patterns {
matches := pattern.FindStringSubmatch(strings.TrimSuffix(link, "/"))
if len(matches) == 2 {
return matches[1]
}
}
return ""
}
func canonicalizeArtgridLink(link string) string {
trimmed := strings.TrimSpace(link)
if trimmed == "" {
return ""
}
clipID := extractArtgridClipID(trimmed)
if clipID == "" {
return trimmed
}
if strings.Contains(strings.ToLower(trimmed), "artgrid.io/clip/") {
return trimmed
}
parsed, err := url.Parse(trimmed)
if err != nil {
return trimmed
}
segments := strings.Split(strings.Trim(parsed.Path, "/"), "/")
slug := clipID
for idx, segment := range segments {
if segment == clipID && idx > 0 {
slug = segments[idx-1]
break
}
}
return "https://artgrid.io/clip/" + clipID + "/" + slug
}
func normalizeResultForCollector(source string, result SearchResult) SearchResult {
switch source {
case "Artgrid":
result.Link = canonicalizeArtgridLink(result.Link)
result.Source = "Artgrid"
case "Envato":
result.Source = "Envato"
case "Google Video":
result.Source = "Google Video"
}
return result
}
func collectURLs(body string) []string {
pattern := regexp.MustCompile(`https?:\/\/[^"'\\\s]+`)
matches := pattern.FindAllString(body, -1)