This commit is contained in:
@@ -229,6 +229,9 @@ func (s *SearchService) enrichArtgrid(result SearchResult) SearchResult {
|
||||
if result.ThumbnailURL == "" || result.PreviewVideoURL == "" {
|
||||
html, err := s.fetchText(result.Link)
|
||||
if err == nil {
|
||||
if !isMatchingArtgridClipPage(html, clipID) {
|
||||
return result
|
||||
}
|
||||
result.Title = firstNonEmpty(
|
||||
cleanArtgridTitle(extractMetaContent(html, "og:title")),
|
||||
cleanArtgridTitle(extractMetaContent(html, "title")),
|
||||
@@ -756,6 +759,33 @@ func cleanArtgridDescription(description string) string {
|
||||
return strings.TrimSpace(description)
|
||||
}
|
||||
|
||||
func isMatchingArtgridClipPage(html, clipID string) bool {
|
||||
if clipID == "" {
|
||||
return false
|
||||
}
|
||||
ogURL := extractMetaContent(html, "og:url")
|
||||
canonical := extractCanonicalURL(html)
|
||||
lowerHTML := strings.ToLower(html)
|
||||
for _, candidate := range []string{ogURL, canonical} {
|
||||
if strings.Contains(candidate, clipID) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
if strings.Contains(lowerHTML, "main-clipvideo_"+clipID) || strings.Contains(lowerHTML, "/clip/"+clipID+"/") {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func extractCanonicalURL(html string) string {
|
||||
pattern := regexp.MustCompile(`(?i)<link[^>]+rel=["']canonical["'][^>]+href=["']([^"']+)`)
|
||||
matches := pattern.FindStringSubmatch(html)
|
||||
if len(matches) == 2 {
|
||||
return htmlUnescape(matches[1])
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func deriveEnvatoPreviewFromThumbnail(thumbnail string) string {
|
||||
candidate := htmlUnescape(strings.TrimSpace(thumbnail))
|
||||
if candidate == "" {
|
||||
|
||||
@@ -81,6 +81,13 @@ func TestBuildArtgridQueriesIncludesArtlistCanonicalDomain(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsMatchingArtgridClipPageRejectsHomepage(t *testing.T) {
|
||||
html := `<html><head><meta property="og:url" content="https://artgrid.io/"><link rel="canonical" href="https://artgrid.io/"></head></html>`
|
||||
if isMatchingArtgridClipPage(html, "114756") {
|
||||
t.Fatal("expected generic Artgrid homepage HTML to be rejected as a clip page")
|
||||
}
|
||||
}
|
||||
|
||||
func TestGeminiCandidateLimitNeverExceedsCandidates(t *testing.T) {
|
||||
if got := GeminiCandidateLimit(9); got != 9 {
|
||||
t.Fatalf("expected Gemini limit to stay within candidate count, got %d", got)
|
||||
|
||||
@@ -58,6 +58,10 @@ func (g *GeminiService) TranslateQuery(query string) string {
|
||||
if trimmed == "" {
|
||||
return ""
|
||||
}
|
||||
normalizedIntent := normalizeKnownMediaPhrases(trimmed)
|
||||
if looksMostlyASCII(normalizedIntent) {
|
||||
return strings.TrimSpace(normalizedIntent)
|
||||
}
|
||||
if looksMostlyASCII(trimmed) {
|
||||
return trimmed
|
||||
}
|
||||
@@ -90,19 +94,19 @@ func (g *GeminiService) TranslateQuery(query string) string {
|
||||
rawText, err := g.generateText(body)
|
||||
if err == nil {
|
||||
translated := sanitizePlainEnglishLine(rawText)
|
||||
if translated != "" && !strings.EqualFold(translated, trimmed) {
|
||||
if translated != "" && !strings.EqualFold(translated, trimmed) && !isOvercompressedTranslation(trimmed, translated) {
|
||||
return translated
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if translated, err := g.translateViaGoogle(trimmed); err == nil && translated != "" && isLikelyEnglishQuery(translated) {
|
||||
if translated, err := g.translateViaGoogle(trimmed); err == nil && translated != "" && isLikelyEnglishQuery(translated) && !isOvercompressedTranslation(trimmed, translated) {
|
||||
return translated
|
||||
}
|
||||
if translated := translateKoreanMediaTerms(trimmed); translated != "" && !strings.EqualFold(translated, trimmed) {
|
||||
if translated := translateKoreanMediaTerms(normalizedIntent); translated != "" && !strings.EqualFold(translated, trimmed) {
|
||||
return translated
|
||||
}
|
||||
return trimmed
|
||||
return strings.TrimSpace(normalizedIntent)
|
||||
}
|
||||
|
||||
func (g *GeminiService) generateText(body map[string]any) (string, error) {
|
||||
@@ -493,6 +497,12 @@ func translateKoreanMediaTerms(query string) string {
|
||||
korean string
|
||||
english string
|
||||
}{
|
||||
{korean: "사이버 펑크 도시", english: "cyberpunk city"},
|
||||
{korean: "사이버펑크 도시", english: "cyberpunk city"},
|
||||
{korean: "사이버 펑크", english: "cyberpunk"},
|
||||
{korean: "사이버펑크", english: "cyberpunk"},
|
||||
{korean: "네온 도시", english: "neon city"},
|
||||
{korean: "미래 도시", english: "futuristic city"},
|
||||
{korean: "숲속", english: "forest"},
|
||||
{korean: "다정한", english: "affectionate"},
|
||||
{korean: "항공샷", english: "aerial shot"},
|
||||
@@ -528,6 +538,38 @@ func translateKoreanMediaTerms(query string) string {
|
||||
return strings.TrimSpace(translated)
|
||||
}
|
||||
|
||||
func normalizeKnownMediaPhrases(query string) string {
|
||||
normalized := strings.TrimSpace(query)
|
||||
replacements := []struct {
|
||||
from string
|
||||
to string
|
||||
}{
|
||||
{from: "사이버 펑크 도시", to: "cyberpunk city"},
|
||||
{from: "사이버펑크 도시", to: "cyberpunk city"},
|
||||
{from: "사이버 펑크", to: "cyberpunk"},
|
||||
{from: "사이버펑크", to: "cyberpunk"},
|
||||
}
|
||||
for _, replacement := range replacements {
|
||||
normalized = strings.ReplaceAll(normalized, replacement.from, replacement.to)
|
||||
}
|
||||
return strings.Join(strings.Fields(normalized), " ")
|
||||
}
|
||||
|
||||
func isOvercompressedTranslation(original, translated string) bool {
|
||||
originalWords := len(strings.Fields(strings.TrimSpace(original)))
|
||||
translatedWords := len(strings.Fields(strings.TrimSpace(translated)))
|
||||
if originalWords < 2 || translatedWords >= 2 {
|
||||
return false
|
||||
}
|
||||
lower := strings.ToLower(strings.TrimSpace(translated))
|
||||
for _, allow := range []string{"cyberpunk", "nightlife", "cityscape"} {
|
||||
if lower == allow {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func (g *GeminiService) translateViaGoogle(query string) (string, error) {
|
||||
baseURL := g.TranslateEndpoint
|
||||
if strings.TrimSpace(baseURL) == "" {
|
||||
|
||||
@@ -39,3 +39,10 @@ func TestTranslateQueryFallsBackToDictionaryWhenTranslateFails(t *testing.T) {
|
||||
t.Fatalf("expected dictionary fallback translation, got %q", translated)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNormalizeKnownMediaPhrases(t *testing.T) {
|
||||
translated := translateKoreanMediaTerms("사이버 펑크 도시")
|
||||
if translated != "cyberpunk city" {
|
||||
t.Fatalf("expected cyberpunk city, got %q", translated)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user