reels-app/scripts/yt_download.py
OpenClaw Agent 968eba7205 YT metadata fetch: razširi --info-only output (id, uploader, description, upload_date, view_count, tags, ...). Single video submit fetcha metadata + Qnet match takoj (kot playlist). Worker preskoči info fetch če metadata že obstaja, sicer shrani vsa polja in naredi Qnet match.
- yt_download.py: get_info() probaj najprej yt.biba.live API /download/info (residential IP, sveži cookies), fallback na lokalni yt-dlp. --info-only output razširjen na 17 polj.
- main.py submit_youtube single video: fetcha metadata (yt_get_info) ob submit, shrani youtube_title/uploader/id/description/duration/thumbnail/upload_date in naredi Qnet match (parity s playlist branch).
- main.py worker: skip info fetch če youtube_title in youtube_uploader že obstajata. Sicer shrani VSE polja + Qnet match + parser fallback.
2026-05-02 15:54:28 +00:00

287 lines
10 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
yt_download.py — Download YouTube video v 1080p (16:9) za reels pipeline.
Primer:
python3 yt_download.py "https://youtu.be/dQw4w9WgXcQ" /data/uploads/video.mp4
"""
import argparse
import os
import subprocess
import sys
import time
from pathlib import Path
import json
YT_API_URL = os.environ.get("YT_API_URL", "https://yt.biba.live")
YT_API_TOKEN = os.environ.get("YT_API_TOKEN", "")
def fetch_cookies_from_api(cache_path="/tmp/yt_cookies_api.txt", max_age_seconds=300):
"""Pull sveže YouTube cookies iz yt.biba.live API.
Cache za 5 min (TTL) — če imamo nedavno fetched, uporabi local copy.
Vrne path do cookies file-a ali None če API ne deluje.
"""
if not YT_API_TOKEN:
return None # ni configured
cache = Path(cache_path)
if cache.exists():
age = time.time() - cache.stat().st_mtime
if age < max_age_seconds and cache.stat().st_size > 100:
return str(cache)
try:
import urllib.request
req = urllib.request.Request(
f"{YT_API_URL}/cookies/raw",
headers={"Authorization": f"Bearer {YT_API_TOKEN}"},
)
with urllib.request.urlopen(req, timeout=10) as resp:
content = resp.read().decode("utf-8")
if len(content) < 100:
print(f"⚠ yt.biba.live vrnil prekratek cookies file ({len(content)} bytes)", file=sys.stderr)
return None
cache.write_text(content, encoding="utf-8")
print(f"🍪 Cookies fetched iz yt.biba.live ({len(content)} bytes)", file=sys.stderr)
return str(cache)
except Exception as e:
print(f"⚠ yt.biba.live cookies fetch failed: {e}", file=sys.stderr)
return None
def get_cookies_file(cookies_file=None):
"""Vrne path do cookies file-a — prioriteta:
1. eksplicitno podano (cookies_file param)
2. yt.biba.live API (sveže, hourly refreshed)
3. /data/cookies/youtube.txt (lokalni fallback)
4. YT_COOKIES_FILE env
"""
if cookies_file and Path(cookies_file).exists():
return cookies_file
# Try API
api_cookies = fetch_cookies_from_api()
if api_cookies:
return api_cookies
# Fallback to local
for candidate in [
"/data/cookies/youtube.txt",
os.environ.get("YT_COOKIES_FILE", ""),
]:
if candidate and Path(candidate).exists():
return candidate
return None
def download(url, output, max_height=1080, format_str=None, cookies_file=None):
"""
Download YT video. Privzeto: best mp4 ≤1080p z audiotrackom.
"""
if format_str is None:
format_str = (
f"bestvideo[height<={max_height}][ext=mp4]+bestaudio[ext=m4a]/"
f"best[height<={max_height}][ext=mp4]/best"
)
cmd = [
"yt-dlp",
"-f", format_str,
"--merge-output-format", "mp4",
"--no-playlist",
"--write-info-json",
"--restrict-filenames",
"-o", str(output),
]
cookies_file = get_cookies_file(cookies_file)
if cookies_file:
cmd += ["--cookies", str(cookies_file)]
print(f"🍪 Using cookies: {cookies_file}", file=sys.stderr)
cmd.append(url)
print(f"⬇ Downloading {url}...", file=sys.stderr)
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"❌ yt-dlp napaka:\n{result.stderr[-1500:]}", file=sys.stderr)
sys.exit(1)
print(f"{output}", file=sys.stderr)
return output
def get_info(url, cookies_file=None):
"""Vrni metadata brez prenosa.
Strategija:
1. Najprej probaj yt.biba.live API /download/info (residential IP,
sveži cookies, signature solver — manj možnosti za bot detection).
2. Fallback na lokalni yt-dlp --dump-json.
"""
# ─── 1. Try yt.biba.live API ───
if YT_API_TOKEN:
try:
import urllib.request
import urllib.parse
params = urllib.parse.urlencode({"url": url})
req = urllib.request.Request(
f"{YT_API_URL}/download/info?{params}",
headers={"Authorization": f"Bearer {YT_API_TOKEN}"},
)
with urllib.request.urlopen(req, timeout=30) as resp:
api_data = json.loads(resp.read().decode("utf-8"))
if api_data.get("ok"):
print(f" Info via yt.biba.live API: {api_data.get('title','')[:60]}", file=sys.stderr)
# API že vrne flat dict — preimenujmo v yt-dlp obliko za kompatibilnost
return api_data
except Exception as e:
print(f"⚠ yt.biba.live /download/info failed: {e}, fallback to local", file=sys.stderr)
# ─── 2. Fallback: lokalni yt-dlp ───
cmd = ["yt-dlp", "--dump-json", "--no-playlist"]
cookies_file = get_cookies_file(cookies_file)
if cookies_file:
cmd += ["--cookies", str(cookies_file)]
cmd.append(url)
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
return None
return json.loads(result.stdout.strip().split("\n")[0])
def get_playlist(url, cookies_file=None):
"""Vrni listo videov v playlistu (samo metadata, ne pobere).
Strategija:
1. Najprej probaj yt.biba.live API /download/playlist (centralizirano,
vedno sveže cookies + signature solver na strežniku).
2. Fallback na lokalni yt-dlp --flat-playlist.
Returns:
{
"is_playlist": bool,
"playlist_title": str,
"items": [
{"id": "VIDEO_ID", "title": "...", "url": "https://...", "duration": 234},
...
]
}
"""
# ─── 1. Try yt.biba.live API ───
if YT_API_TOKEN:
try:
import urllib.request
import urllib.parse
params = urllib.parse.urlencode({"url": url, "max_items": 200})
req = urllib.request.Request(
f"{YT_API_URL}/download/playlist?{params}",
headers={"Authorization": f"Bearer {YT_API_TOKEN}"},
)
with urllib.request.urlopen(req, timeout=120) as resp:
api_data = json.loads(resp.read().decode("utf-8"))
if api_data.get("ok"):
print(f"📋 Playlist resolved via yt.biba.live API: {api_data.get('count', 0)} items", file=sys.stderr)
return {
"is_playlist": api_data.get("is_playlist", False),
"playlist_title": api_data.get("playlist_title", ""),
"items": api_data.get("items", []),
}
else:
print(f"⚠ yt.biba.live API error: {api_data.get('error', 'unknown')[:200]}", file=sys.stderr)
# Fall through to local
except Exception as e:
print(f"⚠ yt.biba.live API failed: {e}, fallback to local yt-dlp", file=sys.stderr)
# ─── 2. Fallback: lokalni yt-dlp ───
cmd = ["yt-dlp", "--flat-playlist", "--dump-json"]
cookies_file = get_cookies_file(cookies_file)
if cookies_file:
cmd += ["--cookies", str(cookies_file)]
cmd.append(url)
print(f"📋 Resolving playlist (lokalno): {url}", file=sys.stderr)
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
if result.returncode != 0:
print(f"❌ yt-dlp playlist napaka:\n{result.stderr[-1500:]}", file=sys.stderr)
return {"is_playlist": False, "playlist_title": "", "items": [], "error": result.stderr[-500:]}
items = []
playlist_title = ""
for line in result.stdout.strip().split("\n"):
if not line.strip():
continue
try:
entry = json.loads(line)
except json.JSONDecodeError:
continue
# Playlist header (typ=playlist)
if entry.get("_type") == "playlist":
playlist_title = entry.get("title", "")
continue
# Video entry
vid = entry.get("id")
if not vid:
continue
items.append({
"id": vid,
"title": entry.get("title", "") or "",
"url": entry.get("url") or f"https://www.youtube.com/watch?v={vid}",
"duration": entry.get("duration"),
"uploader": entry.get("uploader") or entry.get("channel") or "",
})
is_playlist = len(items) > 1 or ("list=" in url)
return {
"is_playlist": is_playlist,
"playlist_title": playlist_title or (items[0]["title"] if items else ""),
"items": items,
}
def main():
ap = argparse.ArgumentParser()
ap.add_argument("url")
ap.add_argument("output")
ap.add_argument("--max-height", type=int, default=1080)
ap.add_argument("--cookies", default=None,
help="Pot do cookies.txt (Netscape format)")
ap.add_argument("--info-only", action="store_true",
help="Samo metadata, brez prenosa")
args = ap.parse_args()
if args.info_only:
info = get_info(args.url, cookies_file=args.cookies)
if info:
print(json.dumps({
"id": info.get("id"),
"title": info.get("title"),
"duration": info.get("duration"),
"uploader": info.get("uploader"),
"uploader_id": info.get("uploader_id"),
"channel": info.get("channel"),
"channel_id": info.get("channel_id"),
"thumbnail": info.get("thumbnail"),
"description": info.get("description"),
"upload_date": info.get("upload_date"),
"view_count": info.get("view_count"),
"like_count": info.get("like_count"),
"categories": info.get("categories"),
"tags": info.get("tags"),
"webpage_url": info.get("webpage_url"),
"age_limit": info.get("age_limit"),
"is_live": info.get("is_live"),
}, indent=2, ensure_ascii=False))
else:
print("❌ Ne morem dobiti info", file=sys.stderr)
sys.exit(1)
return
download(args.url, args.output, max_height=args.max_height, cookies_file=args.cookies)
if __name__ == "__main__":
main()