Expand search coverage and preview parsing
build-push / docker (push) Successful in 4m17s

This commit is contained in:
AI Assistant
2026-03-16 13:46:28 +09:00
parent 82cead950e
commit c92ef97c98
7 changed files with 143 additions and 19 deletions
+18
View File
@@ -340,6 +340,24 @@
- [x] local `/api/upload` through `scripts/selftest.sh` - [x] local `/api/upload` through `scripts/selftest.sh`
- [ ] full browser-level validation was not fully reproducible in this environment - [ ] full browser-level validation was not fully reproducible in this environment
## Recent Change Log
- Date: `2026-03-16`
- What changed:
- Increased collector result caps and widened source-specific search query templates for Envato, Artgrid, and Google Video.
- Strengthened Gemini query-expansion and vision prompts with a professional video-editor framing.
- Restored result modal media fallback so Google Video uses YouTube embed while Envato and Artgrid can show preview video or thumbnail instead of blocked iframe pages.
- Expanded generic preview URL parsing so HTML-embedded `.mp4` and `.m3u8` sources are accepted more broadly.
- Why it changed:
- Search result volume was too low.
- The user wanted Gemini to reason more like a professional editor.
- Envato iframe pages were being refused, Google Video modal opening was broken, and preview extraction still missed known media URLs.
- How it was verified:
- local code inspection against attached Envato / Artgrid HTML samples
- `go test ./...`
- What is still risky or incomplete:
- The attached Artgrid HTML sample is a generic homepage shell, so preview extraction still depends on what the live clip page or downstream assets expose at runtime.
- Some providers can still refuse iframe rendering even when Artgrid pages currently appear to work.
## Unraid / Docker / CI Notes ## Unraid / Docker / CI Notes
- Dockerfile uses: - Dockerfile uses:
- Go build stage - Go build stage
+33 -8
View File
@@ -64,9 +64,9 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
results := make([]SearchResult, 0, 90) results := make([]SearchResult, 0, 90)
var lastErr error var lastErr error
baseQueries := limitQueries(queries, 6) baseQueries := limitQueries(queries, 10)
shuffleStrings(baseQueries) shuffleStrings(baseQueries)
primaryQueries := baseQueries[:minInt(len(baseQueries), 3)] primaryQueries := baseQueries[:minInt(len(baseQueries), 5)]
runSearchPass := func(bases []string, onlyMissing bool) { runSearchPass := func(bases []string, onlyMissing bool) {
for _, base := range bases { for _, base := range bases {
base = strings.TrimSpace(base) base = strings.TrimSpace(base)
@@ -331,6 +331,8 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
func buildGoogleVideoQueries(base string) []string { func buildGoogleVideoQueries(base string) []string {
return []string{ return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base), fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
fmt.Sprintf(`"%s" ("cinematic b-roll" OR "establishing shot" OR "drone footage" OR "urban footage") -tutorial -reaction -vlog -podcast`, base),
fmt.Sprintf(`"%s" ("night drive" OR "city footage" OR "street footage" OR "editorial footage") -tutorial -review -music`, base),
} }
} }
@@ -338,6 +340,8 @@ func buildEnvatoQueries(base string) []string {
return []string{ return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com`, base), fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com`, base),
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com/stock-video`, base), fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com/stock-video`, base),
fmt.Sprintf(`"%s" ("motion graphics" OR "backgrounds" OR "establishing shot" OR "loop") site:elements.envato.com`, base),
fmt.Sprintf(`"%s" ("urban" OR "night city" OR "cyberpunk" OR "sci-fi") site:elements.envato.com`, base),
} }
} }
@@ -347,6 +351,8 @@ func buildArtgridQueries(base string) []string {
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artgrid.io/clip/`, base), fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artgrid.io/clip/`, base),
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artlist.io/stock-footage/clip/`, base), fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artlist.io/stock-footage/clip/`, base),
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artlist.io/stock-footage/clip/`, base), fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artlist.io/stock-footage/clip/`, base),
fmt.Sprintf(`"%s" ("night drive" OR "urban night" OR "wet road" OR "cyberpunk") site:artgrid.io/clip/`, base),
fmt.Sprintf(`"%s" ("drone" OR "city skyline" OR "street scene" OR "mood shot") site:artlist.io/stock-footage/clip/`, base),
} }
} }
@@ -466,16 +472,23 @@ func extractMetaContent(html, property string) string {
} }
func extractVideoPreviewURL(html string) string { func extractVideoPreviewURL(html string) string {
pattern := regexp.MustCompile(`https?:\\?/\\?/[^"'\\s>]+(?:mp4|m3u8)`) normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`)
matches := pattern.FindAllString(html, -1) normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`)
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\u002F`, `/`)
pattern := regexp.MustCompile(`https?://[^"'[:space:]>]+(?:mp4|m3u8)(?:\?[^"'[:space:]>]*)?`)
matches := pattern.FindAllString(normalizedHTML, -1)
for _, match := range matches { for _, match := range matches {
candidate := strings.ReplaceAll(match, `\/`, `/`) candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
candidate = strings.ReplaceAll(candidate, `\u002F`, `/`)
candidate = strings.ReplaceAll(candidate, `\\`, "")
if strings.Contains(strings.ToLower(candidate), "preview") || strings.Contains(strings.ToLower(candidate), "video") || strings.Contains(strings.ToLower(candidate), "watermark") { if strings.Contains(strings.ToLower(candidate), "preview") || strings.Contains(strings.ToLower(candidate), "video") || strings.Contains(strings.ToLower(candidate), "watermark") {
return candidate return candidate
} }
} }
for _, match := range matches {
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
if strings.TrimSpace(candidate) != "" {
return candidate
}
}
return "" return ""
} }
@@ -572,6 +585,12 @@ func pickImageURL(urls []string) string {
} }
func pickVideoURL(urls []string) string { func pickVideoURL(urls []string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, ".m3u8") && (strings.Contains(lower, "artgrid") || strings.Contains(lower, "artlist") || strings.Contains(lower, "cdn")) {
return item
}
}
for _, item := range urls { for _, item := range urls {
lower := strings.ToLower(item) lower := strings.ToLower(item)
if strings.Contains(lower, ".mp4") || strings.Contains(lower, ".m3u8") { if strings.Contains(lower, ".mp4") || strings.Contains(lower, ".m3u8") {
@@ -818,7 +837,7 @@ func extractEnvatoPreviewFromHydration(html string) string {
return "" return ""
} }
urls := collectURLs(string(decoded)) urls := collectURLs(string(decoded))
return pickBestEnvatoPreviewURL(urls) return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded)))
} }
func extractWindowAssignedValue(html, variable string) string { func extractWindowAssignedValue(html, variable string) string {
@@ -831,6 +850,12 @@ func extractWindowAssignedValue(html, variable string) string {
} }
func pickBestEnvatoPreviewURL(urls []string) string { func pickBestEnvatoPreviewURL(urls []string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, "envatousercontent.com") && strings.HasSuffix(lower, ".mp4") {
return item
}
}
for _, item := range urls { for _, item := range urls {
lower := strings.ToLower(item) lower := strings.ToLower(item)
if strings.Contains(lower, "video-previews.elements.envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") { if strings.Contains(lower, "video-previews.elements.envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") {
+9
View File
@@ -15,6 +15,15 @@ func TestExtractVideoPreviewURLFindsEnvatoPreview(t *testing.T) {
} }
} }
func TestExtractVideoPreviewURLFindsGenericM3U8(t *testing.T) {
html := `<script>window.preview="https:\/\/cdn.example.com\/preview\/master.m3u8?token=abc"</script>`
got := extractVideoPreviewURL(html)
want := "https://cdn.example.com/preview/master.m3u8?token=abc"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) { func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) {
thumb := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/video_preview/video_preview_0000.jpg?w=1200&amp;h=630" thumb := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/video_preview/video_preview_0000.jpg?w=1200&amp;h=630"
got := deriveEnvatoPreviewFromThumbnail(thumb) got := deriveEnvatoPreviewFromThumbnail(thumb)
+4 -4
View File
@@ -71,7 +71,7 @@ func (g *GeminiService) TranslateQuery(query string) string {
"systemInstruction": map[string]any{ "systemInstruction": map[string]any{
"parts": []map[string]string{ "parts": []map[string]string{
{ {
"text": "You translate media search intents into natural English. Output one plain English search phrase only. No labels, no quotes, no explanations.", "text": "You are a professional video editor. Infer stronger stock-footage and scene-search wording from the user's keyword, and expand it into natural English that a professional editor would use to find usable footage. Output one plain English search phrase only. No labels, no quotes, no explanations.",
}, },
}, },
}, },
@@ -79,7 +79,7 @@ func (g *GeminiService) TranslateQuery(query string) string {
{ {
"parts": []map[string]string{ "parts": []map[string]string{
{ {
"text": "Translate this user query into concise English suitable for stock-footage search: " + trimmed, "text": "Expand this user query into a concise but editor-grade English footage search phrase suitable for stock-footage discovery: " + trimmed,
}, },
}, },
}, },
@@ -152,13 +152,13 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
type geminiPart map[string]any type geminiPart map[string]any
parts := []geminiPart{ parts := []geminiPart{
{ {
"text": `Analyze the provided images for the user's search intent. Return JSON only in this shape: "text": `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
{"recommendations":[{"index":0,"reason":"short reason","recommended":true}]} {"recommendations":[{"index":0,"reason":"short reason","recommended":true}]}
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness. Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
Mark the strongest matches as recommended=true and weaker matches as recommended=false. Mark the strongest matches as recommended=true and weaker matches as recommended=false.
Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails. Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails.
Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery. Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery.
Favor thumbnails that look directly useful for media editing and footage sourcing. Favor scenes that look directly useful for professional editing, sequencing, establishing, cutaway, or mood-building usage.
User query: ` + query, User query: ` + query,
}, },
} }
+3 -3
View File
@@ -15,7 +15,7 @@ type searchCollector interface {
type envatoCollector struct{} type envatoCollector struct{}
func (envatoCollector) Name() string { return "Envato" } func (envatoCollector) Name() string { return "Envato" }
func (envatoCollector) MaxResults() int { return 8 } func (envatoCollector) MaxResults() int { return 14 }
func (envatoCollector) Enabled(enabledPlatforms map[string]bool) bool { func (envatoCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["envato"] return len(enabledPlatforms) == 0 || enabledPlatforms["envato"]
} }
@@ -31,7 +31,7 @@ func (envatoCollector) Enrich(searcher *SearchService, result SearchResult) Sear
type artgridCollector struct{} type artgridCollector struct{}
func (artgridCollector) Name() string { return "Artgrid" } func (artgridCollector) Name() string { return "Artgrid" }
func (artgridCollector) MaxResults() int { return 8 } func (artgridCollector) MaxResults() int { return 14 }
func (artgridCollector) Enabled(enabledPlatforms map[string]bool) bool { func (artgridCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["artgrid"] return len(enabledPlatforms) == 0 || enabledPlatforms["artgrid"]
} }
@@ -47,7 +47,7 @@ func (artgridCollector) Enrich(searcher *SearchService, result SearchResult) Sea
type googleVideoCollector struct{} type googleVideoCollector struct{}
func (googleVideoCollector) Name() string { return "Google Video" } func (googleVideoCollector) Name() string { return "Google Video" }
func (googleVideoCollector) MaxResults() int { return 6 } func (googleVideoCollector) MaxResults() int { return 10 }
func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool { func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["google video"] return len(enabledPlatforms) == 0 || enabledPlatforms["google video"]
} }
+71 -1
View File
@@ -43,6 +43,9 @@ const resultModalSource = document.getElementById("resultModalSource");
const resultModalSnippet = document.getElementById("resultModalSnippet"); const resultModalSnippet = document.getElementById("resultModalSnippet");
const resultModalReason = document.getElementById("resultModalReason"); const resultModalReason = document.getElementById("resultModalReason");
const resultModalFrame = document.getElementById("resultModalFrame"); const resultModalFrame = document.getElementById("resultModalFrame");
const resultModalMediaFrame = document.getElementById("resultModalMediaFrame");
const resultModalVideo = document.getElementById("resultModalVideo");
const resultModalThumbnail = document.getElementById("resultModalThumbnail");
const resultModalOpenExternal = document.getElementById("resultModalOpenExternal"); const resultModalOpenExternal = document.getElementById("resultModalOpenExternal");
const resultModalDownload = document.getElementById("resultModalDownload"); const resultModalDownload = document.getElementById("resultModalDownload");
const closeResultModal = document.getElementById("closeResultModal"); const closeResultModal = document.getElementById("closeResultModal");
@@ -53,6 +56,9 @@ const resultModalReady = Boolean(
resultModalSnippet && resultModalSnippet &&
resultModalReason && resultModalReason &&
resultModalFrame && resultModalFrame &&
resultModalMediaFrame &&
resultModalVideo &&
resultModalThumbnail &&
resultModalOpenExternal && resultModalOpenExternal &&
resultModalDownload && resultModalDownload &&
closeResultModal, closeResultModal,
@@ -157,6 +163,23 @@ function toClock(totalSeconds) {
return `${hours}:${minutes}:${secs}`; return `${hours}:${minutes}:${secs}`;
} }
function extractYouTubeID(link) {
if (!link) {
return "";
}
const patterns = [
/(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})/,
/youtu\.be\/([A-Za-z0-9_-]{11})/,
];
for (const pattern of patterns) {
const match = link.match(pattern);
if (match?.[1]) {
return match[1];
}
}
return "";
}
function syncRanges() { function syncRanges() {
let start = cropStart; let start = cropStart;
let end = cropEnd; let end = cropEnd;
@@ -374,6 +397,35 @@ function resetResultModalMedia() {
return; return;
} }
resultModalFrame.src = "about:blank"; resultModalFrame.src = "about:blank";
resultModalVideo.pause();
detachVideoSource(resultModalVideo);
resultModalThumbnail.removeAttribute("src");
resultModalMediaFrame.style.aspectRatio = "";
setHidden(resultModalFrame, true, "");
setHidden(resultModalVideo, true, "");
setHidden(resultModalThumbnail, true, "");
}
function showResultModalFrame(src) {
if (!src) {
return;
}
resultModalFrame.src = src;
setHidden(resultModalFrame, false, "");
}
function showResultModalVideo(src) {
if (!src) {
return;
}
attachVideoSource(resultModalVideo, src);
setHidden(resultModalVideo, false, "");
}
function showResultModalThumbnail(src, alt) {
resultModalThumbnail.src = src || PREVIEW_PLACEHOLDER;
resultModalThumbnail.alt = alt || "";
setHidden(resultModalThumbnail, false, "");
} }
function renderResults(results) { function renderResults(results) {
@@ -452,7 +504,13 @@ function openResultModal(item) {
const canDirectDownload = item.source === "Google Video" && item.link; const canDirectDownload = item.source === "Google Video" && item.link;
resultModalDownload.classList.toggle("hidden", !canDirectDownload); resultModalDownload.classList.toggle("hidden", !canDirectDownload);
resetResultModalMedia(); resetResultModalMedia();
resultModalFrame.src = buildResultModalEmbedURL(item); if (item.source === "Google Video") {
showResultModalFrame(buildResultModalEmbedURL(item));
} else if (item.previewVideoUrl) {
showResultModalVideo(item.previewVideoUrl);
} else {
showResultModalThumbnail(item.thumbnailUrl, item.title || "");
}
showModal(resultModal); showModal(resultModal);
logEvent("result:modal:open", { title: item.title, source: item.source, link: item.link }); logEvent("result:modal:open", { title: item.title, source: item.source, link: item.link });
} }
@@ -683,6 +741,18 @@ previewThumbnail.addEventListener("load", () => {
previewMediaFrame.style.aspectRatio = `${previewThumbnail.naturalWidth} / ${previewThumbnail.naturalHeight}`; previewMediaFrame.style.aspectRatio = `${previewThumbnail.naturalWidth} / ${previewThumbnail.naturalHeight}`;
} }
}); });
if (resultModalReady) {
resultModalVideo.addEventListener("loadedmetadata", () => {
if (resultModalVideo.videoWidth > 0 && resultModalVideo.videoHeight > 0) {
resultModalMediaFrame.style.aspectRatio = `${resultModalVideo.videoWidth} / ${resultModalVideo.videoHeight}`;
}
});
resultModalThumbnail.addEventListener("load", () => {
if (resultModalThumbnail.naturalWidth > 0 && resultModalThumbnail.naturalHeight > 0) {
resultModalMediaFrame.style.aspectRatio = `${resultModalThumbnail.naturalWidth} / ${resultModalThumbnail.naturalHeight}`;
}
});
}
for (const button of platformToggles) { for (const button of platformToggles) {
button.addEventListener("click", () => { button.addEventListener("click", () => {
const platform = button.dataset.platformToggle; const platform = button.dataset.platformToggle;
+5 -3
View File
@@ -162,8 +162,10 @@
</div> </div>
</div> </div>
<div class="border-b border-white/10 bg-black/40 p-4"> <div class="border-b border-white/10 bg-black/40 p-4">
<div class="aspect-video overflow-hidden rounded-2xl border border-white/10 bg-black"> <div id="resultModalMediaFrame" class="aspect-video overflow-hidden rounded-2xl border border-white/10 bg-black">
<iframe id="resultModalFrame" class="h-full w-full bg-white" referrerpolicy="no-referrer" allow="autoplay; fullscreen; encrypted-media; picture-in-picture" allowfullscreen></iframe> <iframe id="resultModalFrame" class="hidden h-full w-full bg-white" referrerpolicy="no-referrer" allow="autoplay; fullscreen; encrypted-media; picture-in-picture" allowfullscreen></iframe>
<video id="resultModalVideo" class="hidden h-full w-full bg-black object-contain" controls playsinline></video>
<img id="resultModalThumbnail" class="hidden h-full w-full object-contain" alt="" />
</div> </div>
</div> </div>
<div class="grid gap-5 px-5 py-5 lg:grid-cols-[1.6fr_0.8fr]"> <div class="grid gap-5 px-5 py-5 lg:grid-cols-[1.6fr_0.8fr]">
@@ -200,6 +202,6 @@
</button> </button>
</template> </template>
<script src="/app.js?v=20260316c" defer></script> <script src="/app.js?v=20260316d" defer></script>
</body> </body>
</html> </html>