Expand search coverage and preview parsing
build-push / docker (push) Successful in 4m17s

This commit is contained in:
AI Assistant
2026-03-16 13:46:28 +09:00
parent 82cead950e
commit c92ef97c98
7 changed files with 143 additions and 19 deletions
+18
View File
@@ -340,6 +340,24 @@
- [x] local `/api/upload` through `scripts/selftest.sh`
- [ ] full browser-level validation was not fully reproducible in this environment
## Recent Change Log
- Date: `2026-03-16`
- What changed:
- Increased collector result caps and widened source-specific search query templates for Envato, Artgrid, and Google Video.
- Strengthened Gemini query-expansion and vision prompts with a professional video-editor framing.
- Restored result modal media fallback so Google Video uses YouTube embed while Envato and Artgrid can show preview video or thumbnail instead of blocked iframe pages.
- Expanded generic preview URL parsing so HTML-embedded `.mp4` and `.m3u8` sources are accepted more broadly.
- Why it changed:
- Search result volume was too low.
- The user wanted Gemini to reason more like a professional editor.
- Envato iframe pages were being refused, Google Video modal opening was broken, and preview extraction still missed known media URLs.
- How it was verified:
- local code inspection against attached Envato / Artgrid HTML samples
- `go test ./...`
- What is still risky or incomplete:
- The attached Artgrid HTML sample is a generic homepage shell, so preview extraction still depends on what the live clip page or downstream assets expose at runtime.
- Some providers can still refuse iframe rendering even when Artgrid pages currently appear to work.
## Unraid / Docker / CI Notes
- Dockerfile uses:
- Go build stage
+33 -8
View File
@@ -64,9 +64,9 @@ func (s *SearchService) SearchMedia(queries []string, enabledPlatforms map[strin
results := make([]SearchResult, 0, 90)
var lastErr error
baseQueries := limitQueries(queries, 6)
baseQueries := limitQueries(queries, 10)
shuffleStrings(baseQueries)
primaryQueries := baseQueries[:minInt(len(baseQueries), 3)]
primaryQueries := baseQueries[:minInt(len(baseQueries), 5)]
runSearchPass := func(bases []string, onlyMissing bool) {
for _, base := range bases {
base = strings.TrimSpace(base)
@@ -331,6 +331,8 @@ func (s *SearchService) search(query, categories, engine, source string) ([]Sear
func buildGoogleVideoQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR "establishing shot" OR editorial) -tutorial -"how to" -review -reaction -course -podcast -vlog -interview -breakdown -edit -editing`, base),
fmt.Sprintf(`"%s" ("cinematic b-roll" OR "establishing shot" OR "drone footage" OR "urban footage") -tutorial -reaction -vlog -podcast`, base),
fmt.Sprintf(`"%s" ("night drive" OR "city footage" OR "street footage" OR "editorial footage") -tutorial -review -music`, base),
}
}
@@ -338,6 +340,8 @@ func buildEnvatoQueries(base string) []string {
return []string{
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com`, base),
fmt.Sprintf(`"%s" ("stock footage" OR "stock video" OR "b-roll" OR cinematic) site:elements.envato.com/stock-video`, base),
fmt.Sprintf(`"%s" ("motion graphics" OR "backgrounds" OR "establishing shot" OR "loop") site:elements.envato.com`, base),
fmt.Sprintf(`"%s" ("urban" OR "night city" OR "cyberpunk" OR "sci-fi") site:elements.envato.com`, base),
}
}
@@ -347,6 +351,8 @@ func buildArtgridQueries(base string) []string {
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artgrid.io/clip/`, base),
fmt.Sprintf(`"%s" ("stock footage" OR "b-roll" OR cinematic OR editorial) site:artlist.io/stock-footage/clip/`, base),
fmt.Sprintf(`"%s" ("footage" OR "cinematic" OR "establishing shot") site:artlist.io/stock-footage/clip/`, base),
fmt.Sprintf(`"%s" ("night drive" OR "urban night" OR "wet road" OR "cyberpunk") site:artgrid.io/clip/`, base),
fmt.Sprintf(`"%s" ("drone" OR "city skyline" OR "street scene" OR "mood shot") site:artlist.io/stock-footage/clip/`, base),
}
}
@@ -466,16 +472,23 @@ func extractMetaContent(html, property string) string {
}
func extractVideoPreviewURL(html string) string {
pattern := regexp.MustCompile(`https?:\\?/\\?/[^"'\\s>]+(?:mp4|m3u8)`)
matches := pattern.FindAllString(html, -1)
normalizedHTML := strings.ReplaceAll(html, `\\\/`, `/`)
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\/`, `/`)
normalizedHTML = strings.ReplaceAll(normalizedHTML, `\u002F`, `/`)
pattern := regexp.MustCompile(`https?://[^"'[:space:]>]+(?:mp4|m3u8)(?:\?[^"'[:space:]>]*)?`)
matches := pattern.FindAllString(normalizedHTML, -1)
for _, match := range matches {
candidate := strings.ReplaceAll(match, `\/`, `/`)
candidate = strings.ReplaceAll(candidate, `\u002F`, `/`)
candidate = strings.ReplaceAll(candidate, `\\`, "")
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
if strings.Contains(strings.ToLower(candidate), "preview") || strings.Contains(strings.ToLower(candidate), "video") || strings.Contains(strings.ToLower(candidate), "watermark") {
return candidate
}
}
for _, match := range matches {
candidate := strings.TrimSpace(strings.ReplaceAll(match, `\\`, ""))
if strings.TrimSpace(candidate) != "" {
return candidate
}
}
return ""
}
@@ -572,6 +585,12 @@ func pickImageURL(urls []string) string {
}
func pickVideoURL(urls []string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, ".m3u8") && (strings.Contains(lower, "artgrid") || strings.Contains(lower, "artlist") || strings.Contains(lower, "cdn")) {
return item
}
}
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, ".mp4") || strings.Contains(lower, ".m3u8") {
@@ -818,7 +837,7 @@ func extractEnvatoPreviewFromHydration(html string) string {
return ""
}
urls := collectURLs(string(decoded))
return pickBestEnvatoPreviewURL(urls)
return firstNonEmpty(pickBestEnvatoPreviewURL(urls), extractVideoPreviewURL(string(decoded)))
}
func extractWindowAssignedValue(html, variable string) string {
@@ -831,6 +850,12 @@ func extractWindowAssignedValue(html, variable string) string {
}
func pickBestEnvatoPreviewURL(urls []string) string {
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, "envatousercontent.com") && strings.HasSuffix(lower, ".mp4") {
return item
}
}
for _, item := range urls {
lower := strings.ToLower(item)
if strings.Contains(lower, "video-previews.elements.envatousercontent.com") && strings.Contains(lower, "watermarked_preview") && strings.HasSuffix(lower, ".mp4") {
+9
View File
@@ -15,6 +15,15 @@ func TestExtractVideoPreviewURLFindsEnvatoPreview(t *testing.T) {
}
}
func TestExtractVideoPreviewURLFindsGenericM3U8(t *testing.T) {
html := `<script>window.preview="https:\/\/cdn.example.com\/preview\/master.m3u8?token=abc"</script>`
got := extractVideoPreviewURL(html)
want := "https://cdn.example.com/preview/master.m3u8?token=abc"
if got != want {
t.Fatalf("expected %q, got %q", want, got)
}
}
func TestDeriveEnvatoPreviewFromThumbnail(t *testing.T) {
thumb := "https://elements-resized.envatousercontent.com/elements-video-cover-images/ad0a3abc-7eb0-4075-8f68-8198f9a08777/video_preview/video_preview_0000.jpg?w=1200&amp;h=630"
got := deriveEnvatoPreviewFromThumbnail(thumb)
+4 -4
View File
@@ -71,7 +71,7 @@ func (g *GeminiService) TranslateQuery(query string) string {
"systemInstruction": map[string]any{
"parts": []map[string]string{
{
"text": "You translate media search intents into natural English. Output one plain English search phrase only. No labels, no quotes, no explanations.",
"text": "You are a professional video editor. Infer stronger stock-footage and scene-search wording from the user's keyword, and expand it into natural English that a professional editor would use to find usable footage. Output one plain English search phrase only. No labels, no quotes, no explanations.",
},
},
},
@@ -79,7 +79,7 @@ func (g *GeminiService) TranslateQuery(query string) string {
{
"parts": []map[string]string{
{
"text": "Translate this user query into concise English suitable for stock-footage search: " + trimmed,
"text": "Expand this user query into a concise but editor-grade English footage search phrase suitable for stock-footage discovery: " + trimmed,
},
},
},
@@ -152,13 +152,13 @@ func (g *GeminiService) Recommend(query string, candidates []SearchResult) ([]AI
type geminiPart map[string]any
parts := []geminiPart{
{
"text": `Analyze the provided images for the user's search intent. Return JSON only in this shape:
"text": `You are a professional video editor. Analyze whether each provided visual is suitable as a usable scene or shot for the user's requested keyword. Return JSON only in this shape:
{"recommendations":[{"index":0,"reason":"short reason","recommended":true}]}
Return one entry for every analyzed candidate. Use Korean for every reason. Keep reasons concise but specific enough to explain usefulness.
Mark the strongest matches as recommended=true and weaker matches as recommended=false.
Prefer cinematic b-roll, stock footage, editorial footage, clean composition, usable establishing shots, and professional media thumbnails.
Avoid clickbait faces, exaggerated expressions, meme aesthetics, low-information thumbnails, sensational text overlays, or gossip-style imagery.
Favor thumbnails that look directly useful for media editing and footage sourcing.
Favor scenes that look directly useful for professional editing, sequencing, establishing, cutaway, or mood-building usage.
User query: ` + query,
},
}
+3 -3
View File
@@ -15,7 +15,7 @@ type searchCollector interface {
type envatoCollector struct{}
func (envatoCollector) Name() string { return "Envato" }
func (envatoCollector) MaxResults() int { return 8 }
func (envatoCollector) MaxResults() int { return 14 }
func (envatoCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["envato"]
}
@@ -31,7 +31,7 @@ func (envatoCollector) Enrich(searcher *SearchService, result SearchResult) Sear
type artgridCollector struct{}
func (artgridCollector) Name() string { return "Artgrid" }
func (artgridCollector) MaxResults() int { return 8 }
func (artgridCollector) MaxResults() int { return 14 }
func (artgridCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["artgrid"]
}
@@ -47,7 +47,7 @@ func (artgridCollector) Enrich(searcher *SearchService, result SearchResult) Sea
type googleVideoCollector struct{}
func (googleVideoCollector) Name() string { return "Google Video" }
func (googleVideoCollector) MaxResults() int { return 6 }
func (googleVideoCollector) MaxResults() int { return 10 }
func (googleVideoCollector) Enabled(enabledPlatforms map[string]bool) bool {
return len(enabledPlatforms) == 0 || enabledPlatforms["google video"]
}
+71 -1
View File
@@ -43,6 +43,9 @@ const resultModalSource = document.getElementById("resultModalSource");
const resultModalSnippet = document.getElementById("resultModalSnippet");
const resultModalReason = document.getElementById("resultModalReason");
const resultModalFrame = document.getElementById("resultModalFrame");
const resultModalMediaFrame = document.getElementById("resultModalMediaFrame");
const resultModalVideo = document.getElementById("resultModalVideo");
const resultModalThumbnail = document.getElementById("resultModalThumbnail");
const resultModalOpenExternal = document.getElementById("resultModalOpenExternal");
const resultModalDownload = document.getElementById("resultModalDownload");
const closeResultModal = document.getElementById("closeResultModal");
@@ -53,6 +56,9 @@ const resultModalReady = Boolean(
resultModalSnippet &&
resultModalReason &&
resultModalFrame &&
resultModalMediaFrame &&
resultModalVideo &&
resultModalThumbnail &&
resultModalOpenExternal &&
resultModalDownload &&
closeResultModal,
@@ -157,6 +163,23 @@ function toClock(totalSeconds) {
return `${hours}:${minutes}:${secs}`;
}
function extractYouTubeID(link) {
if (!link) {
return "";
}
const patterns = [
/(?:v=|\/shorts\/|\/embed\/)([A-Za-z0-9_-]{11})/,
/youtu\.be\/([A-Za-z0-9_-]{11})/,
];
for (const pattern of patterns) {
const match = link.match(pattern);
if (match?.[1]) {
return match[1];
}
}
return "";
}
function syncRanges() {
let start = cropStart;
let end = cropEnd;
@@ -374,6 +397,35 @@ function resetResultModalMedia() {
return;
}
resultModalFrame.src = "about:blank";
resultModalVideo.pause();
detachVideoSource(resultModalVideo);
resultModalThumbnail.removeAttribute("src");
resultModalMediaFrame.style.aspectRatio = "";
setHidden(resultModalFrame, true, "");
setHidden(resultModalVideo, true, "");
setHidden(resultModalThumbnail, true, "");
}
function showResultModalFrame(src) {
if (!src) {
return;
}
resultModalFrame.src = src;
setHidden(resultModalFrame, false, "");
}
function showResultModalVideo(src) {
if (!src) {
return;
}
attachVideoSource(resultModalVideo, src);
setHidden(resultModalVideo, false, "");
}
function showResultModalThumbnail(src, alt) {
resultModalThumbnail.src = src || PREVIEW_PLACEHOLDER;
resultModalThumbnail.alt = alt || "";
setHidden(resultModalThumbnail, false, "");
}
function renderResults(results) {
@@ -452,7 +504,13 @@ function openResultModal(item) {
const canDirectDownload = item.source === "Google Video" && item.link;
resultModalDownload.classList.toggle("hidden", !canDirectDownload);
resetResultModalMedia();
resultModalFrame.src = buildResultModalEmbedURL(item);
if (item.source === "Google Video") {
showResultModalFrame(buildResultModalEmbedURL(item));
} else if (item.previewVideoUrl) {
showResultModalVideo(item.previewVideoUrl);
} else {
showResultModalThumbnail(item.thumbnailUrl, item.title || "");
}
showModal(resultModal);
logEvent("result:modal:open", { title: item.title, source: item.source, link: item.link });
}
@@ -683,6 +741,18 @@ previewThumbnail.addEventListener("load", () => {
previewMediaFrame.style.aspectRatio = `${previewThumbnail.naturalWidth} / ${previewThumbnail.naturalHeight}`;
}
});
if (resultModalReady) {
resultModalVideo.addEventListener("loadedmetadata", () => {
if (resultModalVideo.videoWidth > 0 && resultModalVideo.videoHeight > 0) {
resultModalMediaFrame.style.aspectRatio = `${resultModalVideo.videoWidth} / ${resultModalVideo.videoHeight}`;
}
});
resultModalThumbnail.addEventListener("load", () => {
if (resultModalThumbnail.naturalWidth > 0 && resultModalThumbnail.naturalHeight > 0) {
resultModalMediaFrame.style.aspectRatio = `${resultModalThumbnail.naturalWidth} / ${resultModalThumbnail.naturalHeight}`;
}
});
}
for (const button of platformToggles) {
button.addEventListener("click", () => {
const platform = button.dataset.platformToggle;
+5 -3
View File
@@ -162,8 +162,10 @@
</div>
</div>
<div class="border-b border-white/10 bg-black/40 p-4">
<div class="aspect-video overflow-hidden rounded-2xl border border-white/10 bg-black">
<iframe id="resultModalFrame" class="h-full w-full bg-white" referrerpolicy="no-referrer" allow="autoplay; fullscreen; encrypted-media; picture-in-picture" allowfullscreen></iframe>
<div id="resultModalMediaFrame" class="aspect-video overflow-hidden rounded-2xl border border-white/10 bg-black">
<iframe id="resultModalFrame" class="hidden h-full w-full bg-white" referrerpolicy="no-referrer" allow="autoplay; fullscreen; encrypted-media; picture-in-picture" allowfullscreen></iframe>
<video id="resultModalVideo" class="hidden h-full w-full bg-black object-contain" controls playsinline></video>
<img id="resultModalThumbnail" class="hidden h-full w-full object-contain" alt="" />
</div>
</div>
<div class="grid gap-5 px-5 py-5 lg:grid-cols-[1.6fr_0.8fr]">
@@ -200,6 +202,6 @@
</button>
</template>
<script src="/app.js?v=20260316c" defer></script>
<script src="/app.js?v=20260316d" defer></script>
</body>
</html>