diff --git a/TODO.md b/TODO.md
index 92c885b..3a367c4 100644
--- a/TODO.md
+++ b/TODO.md
@@ -64,6 +64,30 @@
- keep the anti-timeout optimization
- recover Envato/Artgrid recall when the early pass is too narrow
+## Current Session Update (2026-03-13, HTML Snapshot Analysis)
+- Used saved HTML snapshots supplied by the user for:
+ - Envato item page
+ - Artgrid clip page
+- Findings:
+ - Envato page exposes clean `VideoObject` JSON-LD with:
+ - exact asset title
+ - rich description
+ - thumbnail URL
+ - preview mp4 URL
+ - Artgrid page exposes reliable meta fields for:
+ - title
+ - description
+ - thumbnail
+ - canonical URL
+ - Artgrid snapshot still does **not** expose a stable preview mp4 or m3u8 in the saved HTML or downloaded asset bundle inspected here
+- Fixes applied from the snapshots:
+ - Envato enrichment now prefers `VideoObject` JSON-LD over generic meta tags
+ - Envato search cards should now align much better with the actual source asset and preview
+ - Artgrid title/description are now cleaned so Gemini/source text is less polluted by site suffixes and generic boilerplate
+- Remaining limitation:
+ - Artgrid hover-video preview cannot be derived reliably from the provided snapshot alone
+ - if Artgrid preview video is still required, the next useful artifact is a browser HAR or DevTools network capture from an opened clip page
+
## Local Self-Test Workflow
- Primary command:
- `bash scripts/selftest.sh`
diff --git a/backend/services/cse.go b/backend/services/cse.go
index 7b74180..0d657c2 100644
--- a/backend/services/cse.go
+++ b/backend/services/cse.go
@@ -197,17 +197,21 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
if err != nil {
return result
}
+ videoMeta := extractVideoObjectJSONLD(html)
result.Title = firstNonEmpty(
+ cleanEnvatoTitle(videoMeta.Name),
extractMetaContent(html, "og:title"),
result.Title,
)
result.Snippet = firstNonEmpty(
+ cleanEnvatoDescription(videoMeta.Description),
extractMetaContent(html, "og:description"),
extractMetaContent(html, "description"),
result.Snippet,
)
pageThumbnail := firstNonEmpty(
+ videoMeta.ThumbnailURL,
extractMetaContent(html, "og:image"),
extractMetaContent(html, "twitter:image"),
extractJSONLDValue(html, "thumbnailUrl"),
@@ -217,6 +221,7 @@ func (s *SearchService) enrichEnvato(result SearchResult) SearchResult {
}
if result.PreviewVideoURL == "" {
result.PreviewVideoURL = firstNonEmpty(
+ videoMeta.ContentURL,
extractJSONLDValue(html, "contentUrl"),
extractMetaContent(html, "twitter:player:stream"),
extractVideoPreviewURL(html),
@@ -249,12 +254,13 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
html, err := s.fetchText(result.Link)
if err == nil {
result.Title = firstNonEmpty(
- extractMetaContent(html, "og:title"),
+ cleanArtgridTitle(extractMetaContent(html, "og:title")),
+ cleanArtgridTitle(extractMetaContent(html, "title")),
result.Title,
)
result.Snippet = firstNonEmpty(
- extractMetaContent(html, "og:description"),
- extractMetaContent(html, "description"),
+ cleanArtgridDescription(extractMetaContent(html, "og:description")),
+ cleanArtgridDescription(extractMetaContent(html, "description")),
result.Snippet,
)
pageThumbnail := firstNonEmpty(
@@ -630,6 +636,95 @@ func extractJSONLDValue(html, key string) string {
return ""
}
+type videoObjectMetadata struct {
+ Name string
+ Description string
+ ThumbnailURL string
+ ContentURL string
+}
+
+func extractVideoObjectJSONLD(html string) videoObjectMetadata {
+ pattern := regexp.MustCompile(`(?is)`)
+ matches := pattern.FindAllStringSubmatch(html, -1)
+ for _, match := range matches {
+ if len(match) != 2 {
+ continue
+ }
+ var payload map[string]any
+ if err := json.Unmarshal([]byte(htmlUnescape(strings.TrimSpace(match[1]))), &payload); err != nil {
+ continue
+ }
+ typeName, _ := payload["@type"].(string)
+ if !strings.EqualFold(typeName, "VideoObject") {
+ continue
+ }
+ meta := videoObjectMetadata{
+ Name: stringValue(payload["name"]),
+ Description: stringValue(payload["description"]),
+ ThumbnailURL: stringValue(payload["thumbnailUrl"]),
+ ContentURL: stringValue(payload["contentUrl"]),
+ }
+ if meta.Name != "" || meta.Description != "" || meta.ThumbnailURL != "" || meta.ContentURL != "" {
+ return meta
+ }
+ }
+ return videoObjectMetadata{}
+}
+
+func stringValue(value any) string {
+ switch typed := value.(type) {
+ case string:
+ return htmlUnescape(strings.TrimSpace(typed))
+ case []any:
+ for _, item := range typed {
+ if text := stringValue(item); text != "" {
+ return text
+ }
+ }
+ }
+ return ""
+}
+
+func cleanEnvatoTitle(title string) string {
+ title = htmlUnescape(strings.TrimSpace(title))
+ return strings.TrimSuffix(title, " - Envato")
+}
+
+func cleanEnvatoDescription(description string) string {
+ description = htmlUnescape(strings.TrimSpace(description))
+ description = strings.ReplaceAll(description, "&", "&")
+ return description
+}
+
+func cleanArtgridTitle(title string) string {
+ title = htmlUnescape(strings.TrimSpace(title))
+ replacements := []string{
+ " | Stock Video Footage * Artgrid.io*",
+ " | Stock Video Footage - Artgrid.io",
+ " | Royalty Free Stock Footage – Artgrid.io",
+ " | Royalty Free Stock Footage - Artgrid.io",
+ }
+ for _, suffix := range replacements {
+ title = strings.TrimSuffix(title, suffix)
+ }
+ if idx := strings.Index(title, " by "); idx > 0 {
+ title = title[:idx]
+ }
+ return strings.TrimSpace(title)
+}
+
+func cleanArtgridDescription(description string) string {
+ description = htmlUnescape(strings.TrimSpace(description))
+ description = strings.TrimSuffix(description, " Download this royalty free video and other Stunning Stock HD Videos from Artgrid.")
+ description = strings.TrimSuffix(description, " Download this royalty free video and other Stunning Stock HD Videos from Artgrid")
+ if parts := strings.SplitN(description, " | ", 2); len(parts) == 2 {
+ description = parts[1]
+ }
+ description = strings.TrimSuffix(description, " | Royalty Free Stock Footage – Artgrid.io")
+ description = strings.TrimSuffix(description, " | Royalty Free Stock Footage - Artgrid.io")
+ return strings.TrimSpace(description)
+}
+
func deriveEnvatoPreviewFromThumbnail(thumbnail string) string {
candidate := htmlUnescape(strings.TrimSpace(thumbnail))
if candidate == "" {
diff --git a/backend/services/cse_test.go b/backend/services/cse_test.go
index 0e790c3..493c85e 100644
--- a/backend/services/cse_test.go
+++ b/backend/services/cse_test.go
@@ -30,3 +30,22 @@ func TestIsUsefulGoogleVideoResultRejectsMusicResults(t *testing.T) {
t.Fatal("expected bgm/music result to be rejected")
}
}
+
+func TestExtractVideoObjectJSONLD(t *testing.T) {
+ html := ``
+ meta := extractVideoObjectJSONLD(html)
+ if meta.Name != "Smiling Man and Woman Waving at Camera" {
+ t.Fatalf("unexpected name: %#v", meta)
+ }
+ if meta.ContentURL == "" || meta.ThumbnailURL == "" || meta.Description == "" {
+ t.Fatalf("expected full video object metadata, got %#v", meta)
+ }
+}
+
+func TestCleanArtgridTitle(t *testing.T) {
+ got := cleanArtgridTitle("movie film moving slowly from a reel by Arthur Cauty | Royalty Free Stock Footage – Artgrid.io")
+ want := "movie film moving slowly from a reel"
+ if got != want {
+ t.Fatalf("expected %q, got %q", want, got)
+ }
+}