Migrate search to Vertex AI and enhance preview modal
All checks were successful
build-push / docker (push) Successful in 4m1s
All checks were successful
build-push / docker (push) Successful in 4m1s
This commit is contained in:
@@ -67,6 +67,7 @@ func (h *Hub) Remove(conn *websocket.Conn) {
|
||||
type PreviewResponse struct {
|
||||
Title string `json:"title"`
|
||||
Thumbnail string `json:"thumbnail"`
|
||||
PreviewStreamURL string `json:"previewStreamUrl"`
|
||||
Duration string `json:"duration"`
|
||||
DurationSeconds int `json:"durationSeconds"`
|
||||
StartDefault string `json:"startDefault"`
|
||||
|
||||
@@ -34,7 +34,13 @@ func main() {
|
||||
DB: db,
|
||||
DownloadsDir: downloadsDir,
|
||||
WorkerScript: workerScript,
|
||||
SearchService: services.NewSearchService(os.Getenv("GOOGLE_CSE_API_KEY"), os.Getenv("GOOGLE_CSE_CX")),
|
||||
SearchService: services.NewSearchService(
|
||||
os.Getenv("VERTEX_AI_SEARCH_API_KEY"),
|
||||
os.Getenv("VERTEX_AI_SEARCH_PROJECT_ID"),
|
||||
os.Getenv("VERTEX_AI_SEARCH_LOCATION"),
|
||||
os.Getenv("VERTEX_AI_SEARCH_DATA_STORE_ID"),
|
||||
os.Getenv("VERTEX_AI_SEARCH_SERVING_CONFIG"),
|
||||
),
|
||||
GeminiService: services.NewGeminiService(os.Getenv("GEMINI_API_KEY")),
|
||||
Hub: handlers.NewHub(),
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/url"
|
||||
neturl "net/url"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
@@ -20,71 +20,82 @@ type SearchResult struct {
|
||||
}
|
||||
|
||||
type SearchService struct {
|
||||
APIKey string
|
||||
CX string
|
||||
Client *http.Client
|
||||
APIKey string
|
||||
ProjectID string
|
||||
Location string
|
||||
DataStoreID string
|
||||
ServingConfig string
|
||||
Client *http.Client
|
||||
}
|
||||
|
||||
func NewSearchService(apiKey, cx string) *SearchService {
|
||||
func NewSearchService(apiKey, projectID, location, dataStoreID, servingConfig string) *SearchService {
|
||||
if location == "" {
|
||||
location = "global"
|
||||
}
|
||||
if servingConfig == "" {
|
||||
servingConfig = "default_serving_config"
|
||||
}
|
||||
return &SearchService{
|
||||
APIKey: apiKey,
|
||||
CX: cx,
|
||||
Client: &http.Client{Timeout: 20 * time.Second},
|
||||
APIKey: apiKey,
|
||||
ProjectID: projectID,
|
||||
Location: location,
|
||||
DataStoreID: dataStoreID,
|
||||
ServingConfig: servingConfig,
|
||||
Client: &http.Client{Timeout: 20 * time.Second},
|
||||
}
|
||||
}
|
||||
|
||||
func (s *SearchService) SearchMedia(query string) ([]SearchResult, error) {
|
||||
if s.APIKey == "" || s.CX == "" {
|
||||
return nil, fmt.Errorf("google cse credentials are not configured")
|
||||
if s.APIKey == "" || s.ProjectID == "" || s.DataStoreID == "" {
|
||||
return nil, fmt.Errorf("vertex ai search credentials are not configured")
|
||||
}
|
||||
|
||||
domains := []string{"youtube.com", "tiktok.com", "envato.com", "artgrid.io"}
|
||||
siteQuery := strings.Join(domains, " OR site:")
|
||||
fullQuery := fmt.Sprintf("%s (site:%s)", query, siteQuery)
|
||||
|
||||
values := url.Values{}
|
||||
values.Set("key", s.APIKey)
|
||||
values.Set("cx", s.CX)
|
||||
values.Set("q", fullQuery)
|
||||
values.Set("num", "10")
|
||||
values.Set("safe", "off")
|
||||
|
||||
results := make([]SearchResult, 0, 30)
|
||||
seen := map[string]bool{}
|
||||
for _, start := range []string{"1", "11", "21"} {
|
||||
pageResults, err := s.fetchPage(values, start, true)
|
||||
results, err := s.searchLite(query, true)
|
||||
if err != nil {
|
||||
results, err = s.searchLite(query, false)
|
||||
if err != nil {
|
||||
pageResults, err = s.fetchPage(values, start, false)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
for _, item := range pageResults {
|
||||
if item.Link == "" || item.ThumbnailURL == "" || seen[item.Link] {
|
||||
continue
|
||||
}
|
||||
seen[item.Link] = true
|
||||
results = append(results, item)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func (s *SearchService) fetchPage(values url.Values, start string, imageSearch bool) ([]SearchResult, error) {
|
||||
pageValues := url.Values{}
|
||||
for key, items := range values {
|
||||
for _, item := range items {
|
||||
pageValues.Add(key, item)
|
||||
}
|
||||
func (s *SearchService) searchLite(query string, imageSearch bool) ([]SearchResult, error) {
|
||||
filteredQuery := strings.TrimSpace(query + " site:youtube.com OR site:tiktok.com OR site:envato.com OR site:artgrid.io")
|
||||
servingConfig := fmt.Sprintf(
|
||||
"projects/%s/locations/%s/dataStores/%s/servingConfigs/%s",
|
||||
s.ProjectID,
|
||||
s.Location,
|
||||
s.DataStoreID,
|
||||
s.ServingConfig,
|
||||
)
|
||||
|
||||
params := map[string]any{
|
||||
"user_country_code": "us",
|
||||
}
|
||||
pageValues.Set("start", start)
|
||||
if imageSearch {
|
||||
pageValues.Set("searchType", "image")
|
||||
params["searchType"] = 1
|
||||
}
|
||||
|
||||
endpoint := "https://www.googleapis.com/customsearch/v1?" + pageValues.Encode()
|
||||
resp, err := s.Client.Get(endpoint)
|
||||
requestBody := map[string]any{
|
||||
"query": filteredQuery,
|
||||
"pageSize": 25,
|
||||
"safeSearch": false,
|
||||
"languageCode": "ko-KR",
|
||||
"params": params,
|
||||
"contentSearchSpec": map[string]any{
|
||||
"snippetSpec": map[string]any{
|
||||
"returnSnippet": true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
body, _ := json.Marshal(requestBody)
|
||||
endpoint := fmt.Sprintf(
|
||||
"https://discoveryengine.googleapis.com/v1/%s:searchLite?key=%s",
|
||||
servingConfig,
|
||||
neturl.QueryEscape(s.APIKey),
|
||||
)
|
||||
resp, err := s.Client.Post(endpoint, "application/json", strings.NewReader(string(body)))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -92,69 +103,73 @@ func (s *SearchService) fetchPage(values url.Values, start string, imageSearch b
|
||||
|
||||
if resp.StatusCode >= 300 {
|
||||
data, _ := io.ReadAll(io.LimitReader(resp.Body, 2048))
|
||||
return nil, fmt.Errorf("google cse returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(data)))
|
||||
return nil, fmt.Errorf("vertex ai search returned status %d: %s", resp.StatusCode, strings.TrimSpace(string(data)))
|
||||
}
|
||||
|
||||
var payload struct {
|
||||
Items []struct {
|
||||
Title string `json:"title"`
|
||||
Link string `json:"link"`
|
||||
DisplayLink string `json:"displayLink"`
|
||||
Snippet string `json:"snippet"`
|
||||
Image struct {
|
||||
ThumbnailLink string `json:"thumbnailLink"`
|
||||
} `json:"image"`
|
||||
Pagemap struct {
|
||||
CSEImage []struct {
|
||||
Src string `json:"src"`
|
||||
} `json:"cse_image"`
|
||||
CSEThumbnail []struct {
|
||||
Src string `json:"src"`
|
||||
} `json:"cse_thumbnail"`
|
||||
Metatags []map[string]string `json:"metatags"`
|
||||
} `json:"pagemap"`
|
||||
} `json:"items"`
|
||||
Results []struct {
|
||||
Document struct {
|
||||
StructData map[string]any `json:"structData"`
|
||||
DerivedStructData map[string]any `json:"derivedStructData"`
|
||||
} `json:"document"`
|
||||
} `json:"results"`
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(resp.Body).Decode(&payload); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
results := make([]SearchResult, 0, len(payload.Items))
|
||||
for _, item := range payload.Items {
|
||||
thumb := item.Image.ThumbnailLink
|
||||
results := make([]SearchResult, 0, len(payload.Results))
|
||||
for _, item := range payload.Results {
|
||||
link := firstString(item.Document.StructData, "link", "url", "uri")
|
||||
title := firstString(item.Document.StructData, "title", "name")
|
||||
displayLink := firstString(item.Document.StructData, "site_name", "displayLink")
|
||||
snippet := firstString(item.Document.DerivedStructData, "snippets", "snippet")
|
||||
thumb := firstString(item.Document.DerivedStructData, "link", "thumbnail", "image", "image_url")
|
||||
if thumb == "" {
|
||||
thumb = extractThumbnail(item.Pagemap)
|
||||
thumb = firstString(item.Document.StructData, "thumbnail", "image", "image_url")
|
||||
}
|
||||
if thumb == "" || link == "" {
|
||||
continue
|
||||
}
|
||||
results = append(results, SearchResult{
|
||||
Title: item.Title,
|
||||
Link: item.Link,
|
||||
DisplayLink: item.DisplayLink,
|
||||
Snippet: item.Snippet,
|
||||
Title: title,
|
||||
Link: link,
|
||||
DisplayLink: displayLink,
|
||||
Snippet: snippet,
|
||||
ThumbnailURL: thumb,
|
||||
Source: inferSource(item.DisplayLink),
|
||||
Source: inferSource(displayLink + " " + link),
|
||||
})
|
||||
}
|
||||
return results, nil
|
||||
}
|
||||
|
||||
func extractThumbnail(pagemap struct {
|
||||
CSEImage []struct{ Src string "json:\"src\"" } "json:\"cse_image\""
|
||||
CSEThumbnail []struct{ Src string "json:\"src\"" } "json:\"cse_thumbnail\""
|
||||
Metatags []map[string]string "json:\"metatags\""
|
||||
}) string {
|
||||
if len(pagemap.CSEThumbnail) > 0 && pagemap.CSEThumbnail[0].Src != "" {
|
||||
return pagemap.CSEThumbnail[0].Src
|
||||
}
|
||||
if len(pagemap.CSEImage) > 0 && pagemap.CSEImage[0].Src != "" {
|
||||
return pagemap.CSEImage[0].Src
|
||||
}
|
||||
for _, tag := range pagemap.Metatags {
|
||||
if value := tag["og:image"]; value != "" {
|
||||
return value
|
||||
func firstString(values map[string]any, keys ...string) string {
|
||||
for _, key := range keys {
|
||||
value, ok := values[key]
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if value := tag["twitter:image"]; value != "" {
|
||||
return value
|
||||
switch typed := value.(type) {
|
||||
case string:
|
||||
if typed != "" {
|
||||
return typed
|
||||
}
|
||||
case []any:
|
||||
for _, item := range typed {
|
||||
if text, ok := item.(string); ok && text != "" {
|
||||
return text
|
||||
}
|
||||
if mapped, ok := item.(map[string]any); ok {
|
||||
if text := firstString(mapped, "snippet", "htmlSnippet", "url"); text != "" {
|
||||
return text
|
||||
}
|
||||
}
|
||||
}
|
||||
case map[string]any:
|
||||
if text := firstString(typed, "snippet", "htmlSnippet", "url"); text != "" {
|
||||
return text
|
||||
}
|
||||
}
|
||||
}
|
||||
return ""
|
||||
|
||||
@@ -15,6 +15,7 @@ const downloadResult = document.getElementById("downloadResult");
|
||||
const cardTemplate = document.getElementById("searchCardTemplate");
|
||||
const previewModal = document.getElementById("previewModal");
|
||||
const previewTitle = document.getElementById("previewTitle");
|
||||
const previewVideo = document.getElementById("previewVideo");
|
||||
const previewThumbnail = document.getElementById("previewThumbnail");
|
||||
const previewDuration = document.getElementById("previewDuration");
|
||||
const qualitySelect = document.getElementById("qualitySelect");
|
||||
@@ -117,6 +118,17 @@ function openPreviewModal(preview) {
|
||||
previewTitle.textContent = preview.title;
|
||||
previewThumbnail.src = preview.thumbnail;
|
||||
previewThumbnail.alt = preview.title;
|
||||
previewVideo.pause();
|
||||
previewVideo.removeAttribute("src");
|
||||
previewVideo.load();
|
||||
if (preview.previewStreamUrl) {
|
||||
previewVideo.src = preview.previewStreamUrl;
|
||||
previewVideo.classList.remove("hidden");
|
||||
previewThumbnail.classList.add("hidden");
|
||||
} else {
|
||||
previewVideo.classList.add("hidden");
|
||||
previewThumbnail.classList.remove("hidden");
|
||||
}
|
||||
previewDuration.textContent = preview.duration;
|
||||
qualitySelect.innerHTML = "";
|
||||
for (const item of preview.qualities || []) {
|
||||
@@ -132,6 +144,9 @@ function openPreviewModal(preview) {
|
||||
}
|
||||
|
||||
function closeModal() {
|
||||
previewVideo.pause();
|
||||
previewVideo.removeAttribute("src");
|
||||
previewVideo.load();
|
||||
previewModal.classList.add("hidden");
|
||||
previewModal.classList.remove("flex");
|
||||
pendingDownload = null;
|
||||
|
||||
@@ -60,10 +60,6 @@
|
||||
<h2 class="text-xl font-semibold text-white">Direct Downloader & Crop</h2>
|
||||
<form id="downloadForm" class="mt-4 space-y-3">
|
||||
<input id="downloadUrl" type="url" placeholder="https://..." class="w-full rounded-2xl border border-white/10 bg-black/40 px-4 py-3 text-sm text-white placeholder:text-zinc-500" />
|
||||
<div class="grid grid-cols-2 gap-3">
|
||||
<input id="startTime" type="text" value="00:00:00" class="rounded-2xl border border-white/10 bg-black/40 px-4 py-3 text-sm text-white" />
|
||||
<input id="endTime" type="text" value="00:00:00" class="rounded-2xl border border-white/10 bg-black/40 px-4 py-3 text-sm text-white" />
|
||||
</div>
|
||||
<button class="w-full rounded-2xl border border-white px-5 py-3 text-sm font-medium text-white transition hover:bg-white hover:text-black">Preview & Queue</button>
|
||||
</form>
|
||||
<p id="downloadResult" class="mt-3 text-sm text-zinc-400"></p>
|
||||
@@ -83,6 +79,7 @@
|
||||
</div>
|
||||
<div class="mt-5 grid gap-5 md:grid-cols-[1.1fr_0.9fr]">
|
||||
<div class="overflow-hidden rounded-3xl border border-white/10 bg-black/30">
|
||||
<video id="previewVideo" class="hidden aspect-video h-full w-full bg-black object-cover" controls playsinline></video>
|
||||
<img id="previewThumbnail" class="aspect-video h-full w-full object-cover" alt="" />
|
||||
</div>
|
||||
<div class="space-y-4">
|
||||
@@ -92,6 +89,16 @@
|
||||
<span id="previewDuration"></span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="grid grid-cols-2 gap-3">
|
||||
<label class="block space-y-2">
|
||||
<span class="text-sm text-zinc-400">Start</span>
|
||||
<input id="startTime" type="text" value="00:00:00" class="w-full rounded-2xl border border-white/10 bg-black/40 px-4 py-3 text-sm text-white" />
|
||||
</label>
|
||||
<label class="block space-y-2">
|
||||
<span class="text-sm text-zinc-400">End</span>
|
||||
<input id="endTime" type="text" value="00:00:00" class="w-full rounded-2xl border border-white/10 bg-black/40 px-4 py-3 text-sm text-white" />
|
||||
</label>
|
||||
</div>
|
||||
<label class="block space-y-2">
|
||||
<span class="text-sm text-zinc-400">Quality</span>
|
||||
<select id="qualitySelect" class="w-full rounded-2xl border border-white/10 bg-black/40 px-4 py-3 text-sm text-white"></select>
|
||||
|
||||
@@ -16,7 +16,10 @@
|
||||
<Config Name="WebUI Port" Target="8080" Default="8080" Mode="tcp" Description="Dashboard port" Type="Port" Display="always" Required="true" Mask="false">8080</Config>
|
||||
<Config Name="Downloads" Target="/app/downloads" Default="/mnt/user/appdata/ai-media-hub/downloads" Mode="rw" Description="Media output directory" Type="Path" Display="always" Required="true" Mask="false">/mnt/user/appdata/ai-media-hub/downloads</Config>
|
||||
<Config Name="Database" Target="/app/db" Default="/mnt/user/appdata/ai-media-hub/db" Mode="rw" Description="SQLite database directory" Type="Path" Display="always" Required="true" Mask="false">/mnt/user/appdata/ai-media-hub/db</Config>
|
||||
<Config Name="Google CSE API Key" Target="GOOGLE_CSE_API_KEY" Default="" Mode="" Description="Google Custom Search API key" Type="Variable" Display="always" Required="true" Mask="true"/>
|
||||
<Config Name="Google CSE CX" Target="GOOGLE_CSE_CX" Default="" Mode="" Description="Google Custom Search Engine ID" Type="Variable" Display="always" Required="true" Mask="false"/>
|
||||
<Config Name="Vertex Search API Key" Target="VERTEX_AI_SEARCH_API_KEY" Default="" Mode="" Description="Vertex AI Search API key for searchLite" Type="Variable" Display="always" Required="true" Mask="true"/>
|
||||
<Config Name="Vertex Project ID" Target="VERTEX_AI_SEARCH_PROJECT_ID" Default="" Mode="" Description="Google Cloud project ID hosting Vertex AI Search" Type="Variable" Display="always" Required="true" Mask="false"/>
|
||||
<Config Name="Vertex Location" Target="VERTEX_AI_SEARCH_LOCATION" Default="global" Mode="" Description="Vertex AI Search location" Type="Variable" Display="always" Required="true" Mask="false">global</Config>
|
||||
<Config Name="Vertex Data Store ID" Target="VERTEX_AI_SEARCH_DATA_STORE_ID" Default="" Mode="" Description="Public website data store ID" Type="Variable" Display="always" Required="true" Mask="false"/>
|
||||
<Config Name="Vertex Serving Config" Target="VERTEX_AI_SEARCH_SERVING_CONFIG" Default="default_serving_config" Mode="" Description="Serving config name for website searchLite" Type="Variable" Display="always" Required="true" Mask="false">default_serving_config</Config>
|
||||
<Config Name="Gemini API Key" Target="GEMINI_API_KEY" Default="" Mode="" Description="Gemini API key" Type="Variable" Display="always" Required="true" Mask="true"/>
|
||||
</Container>
|
||||
|
||||
@@ -28,7 +28,7 @@ def run(cmd):
|
||||
|
||||
def parse_duration(value):
|
||||
if value is None:
|
||||
return "00:00:10"
|
||||
return "00:00:00"
|
||||
total = int(float(value))
|
||||
hours = total // 3600
|
||||
minutes = (total % 3600) // 60
|
||||
@@ -61,6 +61,24 @@ def build_quality_options(formats: List[dict]):
|
||||
return options
|
||||
|
||||
|
||||
def preview_stream_url(url):
|
||||
candidates = [
|
||||
"best[ext=mp4]/best",
|
||||
"best",
|
||||
]
|
||||
for selector in candidates:
|
||||
proc = subprocess.run(
|
||||
["yt-dlp", "-g", "--no-playlist", "-f", selector, url],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
)
|
||||
if proc.returncode == 0:
|
||||
lines = [line.strip() for line in proc.stdout.splitlines() if line.strip()]
|
||||
if lines:
|
||||
return lines[0]
|
||||
return ""
|
||||
|
||||
|
||||
def probe(url):
|
||||
cmd = ["yt-dlp", "--dump-single-json", "--no-playlist", url]
|
||||
proc = run(cmd)
|
||||
@@ -71,6 +89,7 @@ def probe(url):
|
||||
preview = {
|
||||
"title": payload.get("title") or "Untitled",
|
||||
"thumbnail": thumbnail,
|
||||
"previewStreamUrl": preview_stream_url(url),
|
||||
"durationSeconds": duration or 0,
|
||||
"duration": parse_duration(duration),
|
||||
"startDefault": "00:00:00",
|
||||
@@ -85,8 +104,8 @@ def main():
|
||||
parser.add_argument("--mode", choices=["probe", "download"], default="download")
|
||||
parser.add_argument("--url", required=True)
|
||||
parser.add_argument("--start", default="00:00:00")
|
||||
parser.add_argument("--end", default="00:00:10")
|
||||
parser.add_argument("--output", required=True)
|
||||
parser.add_argument("--end", default="00:00:00")
|
||||
parser.add_argument("--output", default="")
|
||||
parser.add_argument("--quality", default="best")
|
||||
args = parser.parse_args()
|
||||
|
||||
@@ -94,6 +113,8 @@ def main():
|
||||
probe(args.url)
|
||||
return
|
||||
|
||||
if not args.output:
|
||||
raise RuntimeError("output path is required for download mode")
|
||||
os.makedirs(os.path.dirname(args.output), exist_ok=True)
|
||||
emit("starting", 5, "Resolving media stream")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user