This commit is contained in:
@@ -141,6 +141,19 @@
|
|||||||
- Effect:
|
- Effect:
|
||||||
- improves Artgrid recall in SearXNG result sets that favor canonical Artlist URLs over Artgrid URLs
|
- improves Artgrid recall in SearXNG result sets that favor canonical Artlist URLs over Artgrid URLs
|
||||||
|
|
||||||
|
## Current Session Update (2026-03-16, Query / Preview Follow-up)
|
||||||
|
- Search intent translation was updated to better preserve compound media phrases:
|
||||||
|
- added explicit normalization for terms like `사이버 펑크` -> `cyberpunk`
|
||||||
|
- added a guard that rejects over-compressed translations when the original query contains a richer multi-word intent
|
||||||
|
- Artgrid page parsing was tightened:
|
||||||
|
- generic Artgrid homepage / challenge HTML should no longer be mistaken for a real clip page during enrichment
|
||||||
|
- this prevents homepage thumbnails/descriptions from overwriting real search result metadata
|
||||||
|
- Hover preview playback was changed to lazy attach on hover:
|
||||||
|
- preview source is now attached on mouseenter
|
||||||
|
- playback waits for media readiness instead of trying to play immediately from the render path
|
||||||
|
- source is detached again on mouseleave
|
||||||
|
- Self-test script search step now retries to reduce flaky startup timing failures during local smoke tests
|
||||||
|
|
||||||
## Local Self-Test Workflow
|
## Local Self-Test Workflow
|
||||||
- Primary command:
|
- Primary command:
|
||||||
- `bash scripts/selftest.sh`
|
- `bash scripts/selftest.sh`
|
||||||
|
|||||||
@@ -229,6 +229,9 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
|||||||
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
|
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
|
||||||
html, err := s.fetchText(result.Link)
|
html, err := s.fetchText(result.Link)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
|
if !isMatchingArtgridClipPage(html, clipID) {
|
||||||
|
return result
|
||||||
|
}
|
||||||
result.Title = firstNonEmpty(
|
result.Title = firstNonEmpty(
|
||||||
cleanArtgridTitle(extractMetaContent(html, "og:title")),
|
cleanArtgridTitle(extractMetaContent(html, "og:title")),
|
||||||
cleanArtgridTitle(extractMetaContent(html, "title")),
|
cleanArtgridTitle(extractMetaContent(html, "title")),
|
||||||
@@ -756,6 +759,33 @@ func cleanArtgridDescription(description string) string {
|
|||||||
return strings.TrimSpace(description)
|
return strings.TrimSpace(description)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func isMatchingArtgridClipPage(html, clipID string) bool {
|
||||||
|
if clipID == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
ogURL := extractMetaContent(html, "og:url")
|
||||||
|
canonical := extractCanonicalURL(html)
|
||||||
|
lowerHTML := strings.ToLower(html)
|
||||||
|
for _, candidate := range []string{ogURL, canonical} {
|
||||||
|
if strings.Contains(candidate, clipID) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if strings.Contains(lowerHTML, "main-clipvideo_"+clipID) || strings.Contains(lowerHTML, "/clip/"+clipID+"/") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
func extractCanonicalURL(html string) string {
|
||||||
|
pattern := regexp.MustCompile(`(?i)<link[^>]+rel=["']canonical["'][^>]+href=["']([^"']+)`)
|
||||||
|
matches := pattern.FindStringSubmatch(html)
|
||||||
|
if len(matches) == 2 {
|
||||||
|
return htmlUnescape(matches[1])
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
func deriveEnvatoPreviewFromThumbnail(thumbnail string) string {
|
func deriveEnvatoPreviewFromThumbnail(thumbnail string) string {
|
||||||
candidate := htmlUnescape(strings.TrimSpace(thumbnail))
|
candidate := htmlUnescape(strings.TrimSpace(thumbnail))
|
||||||
if candidate == "" {
|
if candidate == "" {
|
||||||
|
|||||||
@@ -81,6 +81,13 @@ func TestBuildArtgridQueriesIncludesArtlistCanonicalDomain(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIsMatchingArtgridClipPageRejectsHomepage(t *testing.T) {
|
||||||
|
html := `<html><head><meta property="og:url" content="https://artgrid.io/"><link rel="canonical" href="https://artgrid.io/"></head></html>`
|
||||||
|
if isMatchingArtgridClipPage(html, "114756") {
|
||||||
|
t.Fatal("expected generic Artgrid homepage HTML to be rejected as a clip page")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) {
|
func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) {
|
||||||
if got := GeminiCandidateLimit(9); got != 9 {
|
if got := GeminiCandidateLimit(9); got != 9 {
|
||||||
t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got)
|
t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got)
|
||||||
|
|||||||
@@ -58,6 +58,10 @@ func (g *GeminiService) TranslateQuery(query string) string {
|
|||||||
if trimmed == "" {
|
if trimmed == "" {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
normalizedIntent := normalizeKnownMediaPhrases(trimmed)
|
||||||
|
if looksMostlyASCII(normalizedIntent) {
|
||||||
|
return strings.TrimSpace(normalizedIntent)
|
||||||
|
}
|
||||||
if looksMostlyASCII(trimmed) {
|
if looksMostlyASCII(trimmed) {
|
||||||
return trimmed
|
return trimmed
|
||||||
}
|
}
|
||||||
@@ -90,19 +94,19 @@ func (g *GeminiService) TranslateQuery(query string) string {
|
|||||||
rawText, err := g.generateText(body)
|
rawText, err := g.generateText(body)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
translated := sanitizePlainEnglishLine(rawText)
|
translated := sanitizePlainEnglishLine(rawText)
|
||||||
if translated != "" && !strings.EqualFold(translated, trimmed) {
|
if translated != "" && !strings.EqualFold(translated, trimmed) && !isOvercompressedTranslation(trimmed, translated) {
|
||||||
return translated
|
return translated
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if translated, err := g.translateViaGoogle(trimmed); err == nil && translated != "" && isLikelyEnglishQuery(translated) {
|
if translated, err := g.translateViaGoogle(trimmed); err == nil && translated != "" && isLikelyEnglishQuery(translated) && !isOvercompressedTranslation(trimmed, translated) {
|
||||||
return translated
|
return translated
|
||||||
}
|
}
|
||||||
if translated := translateKoreanMediaTerms(trimmed); translated != "" && !strings.EqualFold(translated, trimmed) {
|
if translated := translateKoreanMediaTerms(normalizedIntent); translated != "" && !strings.EqualFold(translated, trimmed) {
|
||||||
return translated
|
return translated
|
||||||
}
|
}
|
||||||
return trimmed
|
return strings.TrimSpace(normalizedIntent)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (g *GeminiService) generateText(body map[string]any) (string, error) {
|
func (g *GeminiService) generateText(body map[string]any) (string, error) {
|
||||||
@@ -493,6 +497,12 @@ func translateKoreanMediaTerms(query string) string {
|
|||||||
korean string
|
korean string
|
||||||
english string
|
english string
|
||||||
}{
|
}{
|
||||||
|
{korean: "사이버 펑크 도시", english: "cyberpunk city"},
|
||||||
|
{korean: "사이버펑크 도시", english: "cyberpunk city"},
|
||||||
|
{korean: "사이버 펑크", english: "cyberpunk"},
|
||||||
|
{korean: "사이버펑크", english: "cyberpunk"},
|
||||||
|
{korean: "네온 도시", english: "neon city"},
|
||||||
|
{korean: "미래 도시", english: "futuristic city"},
|
||||||
{korean: "숲속", english: "forest"},
|
{korean: "숲속", english: "forest"},
|
||||||
{korean: "다정한", english: "affectionate"},
|
{korean: "다정한", english: "affectionate"},
|
||||||
{korean: "항공샷", english: "aerial shot"},
|
{korean: "항공샷", english: "aerial shot"},
|
||||||
@@ -528,6 +538,38 @@ func translateKoreanMediaTerms(query string) string {
|
|||||||
return strings.TrimSpace(translated)
|
return strings.TrimSpace(translated)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func normalizeKnownMediaPhrases(query string) string {
|
||||||
|
normalized := strings.TrimSpace(query)
|
||||||
|
replacements := []struct {
|
||||||
|
from string
|
||||||
|
to string
|
||||||
|
}{
|
||||||
|
{from: "사이버 펑크 도시", to: "cyberpunk city"},
|
||||||
|
{from: "사이버펑크 도시", to: "cyberpunk city"},
|
||||||
|
{from: "사이버 펑크", to: "cyberpunk"},
|
||||||
|
{from: "사이버펑크", to: "cyberpunk"},
|
||||||
|
}
|
||||||
|
for _, replacement := range replacements {
|
||||||
|
normalized = strings.ReplaceAll(normalized, replacement.from, replacement.to)
|
||||||
|
}
|
||||||
|
return strings.Join(strings.Fields(normalized), " ")
|
||||||
|
}
|
||||||
|
|
||||||
|
func isOvercompressedTranslation(original, translated string) bool {
|
||||||
|
originalWords := len(strings.Fields(strings.TrimSpace(original)))
|
||||||
|
translatedWords := len(strings.Fields(strings.TrimSpace(translated)))
|
||||||
|
if originalWords < 2 || translatedWords >= 2 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
lower := strings.ToLower(strings.TrimSpace(translated))
|
||||||
|
for _, allow := range []string{"cyberpunk", "nightlife", "cityscape"} {
|
||||||
|
if lower == allow {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
func (g *GeminiService) translateViaGoogle(query string) (string, error) {
|
func (g *GeminiService) translateViaGoogle(query string) (string, error) {
|
||||||
baseURL := g.TranslateEndpoint
|
baseURL := g.TranslateEndpoint
|
||||||
if strings.TrimSpace(baseURL) == "" {
|
if strings.TrimSpace(baseURL) == "" {
|
||||||
|
|||||||
@@ -39,3 +39,10 @@ func TestTranslateQueryFallsBackToDictionaryWhenTranslateFails(t *testing.T) {
|
|||||||
t.Fatalf("expected dictionary fallback translation, got %q", translated)
|
t.Fatalf("expected dictionary fallback translation, got %q", translated)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNormalizeKnownMediaPhrases(t *testing.T) {
|
||||||
|
translated := translateKoreanMediaTerms("사이버 펑크 도시")
|
||||||
|
if translated != "cyberpunk city" {
|
||||||
|
t.Fatalf("expected cyberpunk city, got %q", translated)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
+30
-4
@@ -270,6 +270,33 @@ function attachVideoSource(video, src) {
|
|||||||
logEvent("preview:attach:file", { src });
|
logEvent("preview:attach:file", { src });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function startHoverPreview(video, src) {
|
||||||
|
if (!src) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
attachVideoSource(video, src);
|
||||||
|
video.classList.remove("hidden");
|
||||||
|
const attemptPlay = () => {
|
||||||
|
video.play().catch((error) => {
|
||||||
|
logEvent("preview:hover:play:error", { src, message: String(error) });
|
||||||
|
});
|
||||||
|
};
|
||||||
|
if (video.readyState >= 2) {
|
||||||
|
attemptPlay();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const onReady = () => {
|
||||||
|
video.removeEventListener("loadeddata", onReady);
|
||||||
|
video.removeEventListener("canplay", onReady);
|
||||||
|
attemptPlay();
|
||||||
|
};
|
||||||
|
video.addEventListener("loadeddata", onReady, { once: true });
|
||||||
|
video.addEventListener("canplay", onReady, { once: true });
|
||||||
|
if (video.load) {
|
||||||
|
video.load();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
function detachVideoSource(video) {
|
function detachVideoSource(video) {
|
||||||
const existing = hlsInstances.get(video);
|
const existing = hlsInstances.get(video);
|
||||||
if (existing) {
|
if (existing) {
|
||||||
@@ -299,21 +326,20 @@ function renderResults(results) {
|
|||||||
node.querySelector(".result-snippet").textContent = item.snippet || item.reason || item.source || "";
|
node.querySelector(".result-snippet").textContent = item.snippet || item.reason || item.source || "";
|
||||||
node.querySelector(".result-reason").textContent = item.reason ? `AI note: ${item.reason}` : "";
|
node.querySelector(".result-reason").textContent = item.reason ? `AI note: ${item.reason}` : "";
|
||||||
node.querySelector(".source-badge").textContent = item.source;
|
node.querySelector(".source-badge").textContent = item.source;
|
||||||
|
previewVideo.poster = item.thumbnailUrl || "";
|
||||||
if (item.previewVideoUrl) {
|
if (item.previewVideoUrl) {
|
||||||
attachVideoSource(previewVideo, item.previewVideoUrl);
|
|
||||||
previewVideo.poster = item.thumbnailUrl || "";
|
|
||||||
const mediaArea = node.querySelector(".relative");
|
const mediaArea = node.querySelector(".relative");
|
||||||
mediaArea.addEventListener("mouseenter", () => {
|
mediaArea.addEventListener("mouseenter", () => {
|
||||||
logEvent("preview:hover:start", { title: item.title, source: item.source, previewVideoUrl: item.previewVideoUrl });
|
logEvent("preview:hover:start", { title: item.title, source: item.source, previewVideoUrl: item.previewVideoUrl });
|
||||||
overlays.forEach((overlay) => overlay.classList.add("hidden"));
|
overlays.forEach((overlay) => overlay.classList.add("hidden"));
|
||||||
previewVideo.classList.remove("hidden");
|
startHoverPreview(previewVideo, item.previewVideoUrl);
|
||||||
previewVideo.play().catch(() => {});
|
|
||||||
});
|
});
|
||||||
mediaArea.addEventListener("mouseleave", () => {
|
mediaArea.addEventListener("mouseleave", () => {
|
||||||
logEvent("preview:hover:end", { title: item.title, source: item.source });
|
logEvent("preview:hover:end", { title: item.title, source: item.source });
|
||||||
previewVideo.pause();
|
previewVideo.pause();
|
||||||
previewVideo.currentTime = 0;
|
previewVideo.currentTime = 0;
|
||||||
previewVideo.classList.add("hidden");
|
previewVideo.classList.add("hidden");
|
||||||
|
detachVideoSource(previewVideo);
|
||||||
overlays.forEach((overlay) => overlay.classList.remove("hidden"));
|
overlays.forEach((overlay) => overlay.classList.remove("hidden"));
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
+9
-4
@@ -70,10 +70,15 @@ if payload.get("status") != "ok":
|
|||||||
PY
|
PY
|
||||||
|
|
||||||
echo "[selftest] verify search"
|
echo "[selftest] verify search"
|
||||||
curl -fsS \
|
for _ in $(seq 1 5); do
|
||||||
-H "Content-Type: application/json" \
|
if curl -fsS \
|
||||||
-d '{"query":"city rain","platforms":["envato","artgrid","google video"]}' \
|
-H "Content-Type: application/json" \
|
||||||
"http://127.0.0.1:${APP_PORT}/api/search" >"${TMP_DIR}/search.json"
|
-d '{"query":"city rain","platforms":["envato","artgrid","google video"]}' \
|
||||||
|
"http://127.0.0.1:${APP_PORT}/api/search" >"${TMP_DIR}/search.json"; then
|
||||||
|
break
|
||||||
|
fi
|
||||||
|
sleep 1
|
||||||
|
done
|
||||||
python3 - "${TMP_DIR}/search.json" <<'PY'
|
python3 - "${TMP_DIR}/search.json" <<'PY'
|
||||||
import json
|
import json
|
||||||
import sys
|
import sys
|
||||||
|
|||||||
Reference in New Issue
Block a user