Files
ai-media-hub/worker/downloader.py
T
AI Assistant 8101f17f5b
build-push / docker (push) Failing after 20m32s
Refactor search fallback and preview flows
2026-03-16 11:12:43 +09:00

183 lines
5.3 KiB
Python

#!/usr/bin/env python3
import argparse
import json
import os
import subprocess
import sys
import tempfile
from typing import List
def emit(status, progress, message="", output=""):
payload = {
"status": status,
"progress": progress,
"message": message,
}
if output:
payload["output"] = output
print(json.dumps(payload), flush=True)
def run(cmd):
proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.returncode != 0:
raise RuntimeError(proc.stderr.strip() or proc.stdout.strip() or "command failed")
return proc
def parse_duration(value):
if value is None:
return "00:00:00"
total = int(float(value))
hours = total // 3600
minutes = (total % 3600) // 60
seconds = total % 60
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
def build_quality_options(formats: List[dict]):
heights = []
for item in formats:
height = item.get("height")
if isinstance(height, int) and height > 0:
heights.append(height)
unique_heights = sorted(set(heights))
options = [{"value": "best", "label": "Best available"}]
for height in unique_heights:
options.append(
{
"value": f"bestvideo[height<={height}]+bestaudio/best[height<={height}]",
"label": f"Up to {height}p",
}
)
return options
def preview_stream_url(url):
candidates = [
"best[ext=mp4]/best",
"best",
]
for selector in candidates:
proc = subprocess.run(
["yt-dlp", "-g", "--no-playlist", "-f", selector, url],
capture_output=True,
text=True,
)
if proc.returncode == 0:
lines = [line.strip() for line in proc.stdout.splitlines() if line.strip()]
if lines:
return lines[0]
return ""
def probe(url):
cmd = ["yt-dlp", "--dump-single-json", "--no-playlist", url]
proc = run(cmd)
payload = json.loads(proc.stdout)
thumbnail = payload.get("thumbnail") or ""
duration = payload.get("duration")
formats = payload.get("formats") or []
preview = {
"title": payload.get("title") or "Untitled",
"thumbnail": thumbnail,
"previewStreamUrl": preview_stream_url(url),
"durationSeconds": duration or 0,
"duration": parse_duration(duration),
"startDefault": "00:00:00",
"endDefault": parse_duration(duration),
"qualities": build_quality_options(formats),
}
print(json.dumps(preview), flush=True)
def parse_timestamp(value: str) -> int:
text = (value or "").strip()
if not text:
return 0
parts = text.split(":")
try:
if len(parts) == 3:
hours, minutes, seconds = parts
return int(hours) * 3600 + int(minutes) * 60 + int(float(seconds))
if len(parts) == 2:
minutes, seconds = parts
return int(minutes) * 60 + int(float(seconds))
return int(float(text))
except ValueError:
return 0
def resolve_source_file(tmpdir: str) -> str:
files = [os.path.join(tmpdir, name) for name in os.listdir(tmpdir)]
media_files = [path for path in files if os.path.isfile(path)]
if not media_files:
raise RuntimeError("yt-dlp did not produce an output file")
return sorted(media_files)[0]
def should_trim(start: str, end: str) -> bool:
start_seconds = parse_timestamp(start)
end_seconds = parse_timestamp(end)
return end_seconds > start_seconds
def build_ffmpeg_cmd(source_file: str, output_path: str, start: str, end: str) -> List[str]:
cmd = ["ffmpeg", "-y"]
if should_trim(start, end):
cmd.extend(["-ss", start, "-to", end])
cmd.extend(["-i", source_file, "-c", "copy", output_path])
return cmd
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--mode", choices=["probe", "download"], default="download")
parser.add_argument("--url", required=True)
parser.add_argument("--start", default="00:00:00")
parser.add_argument("--end", default="00:00:00")
parser.add_argument("--output", default="")
parser.add_argument("--quality", default="best")
args = parser.parse_args()
if args.mode == "probe":
probe(args.url)
return
if not args.output:
raise RuntimeError("output path is required for download mode")
os.makedirs(os.path.dirname(args.output), exist_ok=True)
emit("starting", 5, "Resolving media stream")
with tempfile.TemporaryDirectory(prefix="aihub-") as tmpdir:
source_path = os.path.join(tmpdir, "source.%(ext)s")
download_cmd = [
"yt-dlp",
"--no-playlist",
"-f",
args.quality,
"-o",
source_path,
args.url,
]
run(download_cmd)
emit("downloaded", 55, "Source downloaded")
source_file = resolve_source_file(tmpdir)
ffmpeg_cmd = build_ffmpeg_cmd(source_file, args.output, args.start, args.end)
message = "Cropping requested segment" if should_trim(args.start, args.end) else "Saving downloaded media"
emit("cropping", 75, message)
run(ffmpeg_cmd)
emit("completed", 100, "Download complete", args.output)
if __name__ == "__main__":
try:
main()
except Exception as exc:
emit("error", 100, str(exc))
sys.exit(1)