- yt_download.py: get_info() probaj najprej yt.biba.live API /download/info (residential IP, sveži cookies), fallback na lokalni yt-dlp. --info-only output razširjen na 17 polj. - main.py submit_youtube single video: fetcha metadata (yt_get_info) ob submit, shrani youtube_title/uploader/id/description/duration/thumbnail/upload_date in naredi Qnet match (parity s playlist branch). - main.py worker: skip info fetch če youtube_title in youtube_uploader že obstajata. Sicer shrani VSE polja + Qnet match + parser fallback.
287 lines
10 KiB
Python
287 lines
10 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
yt_download.py — Download YouTube video v 1080p (16:9) za reels pipeline.
|
||
|
||
Primer:
|
||
python3 yt_download.py "https://youtu.be/dQw4w9WgXcQ" /data/uploads/video.mp4
|
||
"""
|
||
import argparse
|
||
import os
|
||
import subprocess
|
||
import sys
|
||
import time
|
||
from pathlib import Path
|
||
import json
|
||
|
||
|
||
YT_API_URL = os.environ.get("YT_API_URL", "https://yt.biba.live")
|
||
YT_API_TOKEN = os.environ.get("YT_API_TOKEN", "")
|
||
|
||
|
||
def fetch_cookies_from_api(cache_path="/tmp/yt_cookies_api.txt", max_age_seconds=300):
|
||
"""Pull sveže YouTube cookies iz yt.biba.live API.
|
||
|
||
Cache za 5 min (TTL) — če imamo nedavno fetched, uporabi local copy.
|
||
Vrne path do cookies file-a ali None če API ne deluje.
|
||
"""
|
||
if not YT_API_TOKEN:
|
||
return None # ni configured
|
||
|
||
cache = Path(cache_path)
|
||
if cache.exists():
|
||
age = time.time() - cache.stat().st_mtime
|
||
if age < max_age_seconds and cache.stat().st_size > 100:
|
||
return str(cache)
|
||
|
||
try:
|
||
import urllib.request
|
||
req = urllib.request.Request(
|
||
f"{YT_API_URL}/cookies/raw",
|
||
headers={"Authorization": f"Bearer {YT_API_TOKEN}"},
|
||
)
|
||
with urllib.request.urlopen(req, timeout=10) as resp:
|
||
content = resp.read().decode("utf-8")
|
||
if len(content) < 100:
|
||
print(f"⚠ yt.biba.live vrnil prekratek cookies file ({len(content)} bytes)", file=sys.stderr)
|
||
return None
|
||
cache.write_text(content, encoding="utf-8")
|
||
print(f"🍪 Cookies fetched iz yt.biba.live ({len(content)} bytes)", file=sys.stderr)
|
||
return str(cache)
|
||
except Exception as e:
|
||
print(f"⚠ yt.biba.live cookies fetch failed: {e}", file=sys.stderr)
|
||
return None
|
||
|
||
|
||
def get_cookies_file(cookies_file=None):
|
||
"""Vrne path do cookies file-a — prioriteta:
|
||
1. eksplicitno podano (cookies_file param)
|
||
2. yt.biba.live API (sveže, hourly refreshed)
|
||
3. /data/cookies/youtube.txt (lokalni fallback)
|
||
4. YT_COOKIES_FILE env
|
||
"""
|
||
if cookies_file and Path(cookies_file).exists():
|
||
return cookies_file
|
||
|
||
# Try API
|
||
api_cookies = fetch_cookies_from_api()
|
||
if api_cookies:
|
||
return api_cookies
|
||
|
||
# Fallback to local
|
||
for candidate in [
|
||
"/data/cookies/youtube.txt",
|
||
os.environ.get("YT_COOKIES_FILE", ""),
|
||
]:
|
||
if candidate and Path(candidate).exists():
|
||
return candidate
|
||
|
||
return None
|
||
|
||
|
||
def download(url, output, max_height=1080, format_str=None, cookies_file=None):
|
||
"""
|
||
Download YT video. Privzeto: best mp4 ≤1080p z audiotrackom.
|
||
"""
|
||
if format_str is None:
|
||
format_str = (
|
||
f"bestvideo[height<={max_height}][ext=mp4]+bestaudio[ext=m4a]/"
|
||
f"best[height<={max_height}][ext=mp4]/best"
|
||
)
|
||
|
||
cmd = [
|
||
"yt-dlp",
|
||
"-f", format_str,
|
||
"--merge-output-format", "mp4",
|
||
"--no-playlist",
|
||
"--write-info-json",
|
||
"--restrict-filenames",
|
||
"-o", str(output),
|
||
]
|
||
|
||
cookies_file = get_cookies_file(cookies_file)
|
||
if cookies_file:
|
||
cmd += ["--cookies", str(cookies_file)]
|
||
print(f"🍪 Using cookies: {cookies_file}", file=sys.stderr)
|
||
|
||
cmd.append(url)
|
||
print(f"⬇ Downloading {url}...", file=sys.stderr)
|
||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||
if result.returncode != 0:
|
||
print(f"❌ yt-dlp napaka:\n{result.stderr[-1500:]}", file=sys.stderr)
|
||
sys.exit(1)
|
||
print(f"✅ {output}", file=sys.stderr)
|
||
return output
|
||
|
||
|
||
def get_info(url, cookies_file=None):
|
||
"""Vrni metadata brez prenosa.
|
||
|
||
Strategija:
|
||
1. Najprej probaj yt.biba.live API /download/info (residential IP,
|
||
sveži cookies, signature solver — manj možnosti za bot detection).
|
||
2. Fallback na lokalni yt-dlp --dump-json.
|
||
"""
|
||
# ─── 1. Try yt.biba.live API ───
|
||
if YT_API_TOKEN:
|
||
try:
|
||
import urllib.request
|
||
import urllib.parse
|
||
params = urllib.parse.urlencode({"url": url})
|
||
req = urllib.request.Request(
|
||
f"{YT_API_URL}/download/info?{params}",
|
||
headers={"Authorization": f"Bearer {YT_API_TOKEN}"},
|
||
)
|
||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||
api_data = json.loads(resp.read().decode("utf-8"))
|
||
if api_data.get("ok"):
|
||
print(f"ℹ Info via yt.biba.live API: {api_data.get('title','')[:60]}", file=sys.stderr)
|
||
# API že vrne flat dict — preimenujmo v yt-dlp obliko za kompatibilnost
|
||
return api_data
|
||
except Exception as e:
|
||
print(f"⚠ yt.biba.live /download/info failed: {e}, fallback to local", file=sys.stderr)
|
||
|
||
# ─── 2. Fallback: lokalni yt-dlp ───
|
||
cmd = ["yt-dlp", "--dump-json", "--no-playlist"]
|
||
cookies_file = get_cookies_file(cookies_file)
|
||
if cookies_file:
|
||
cmd += ["--cookies", str(cookies_file)]
|
||
cmd.append(url)
|
||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||
if result.returncode != 0:
|
||
return None
|
||
return json.loads(result.stdout.strip().split("\n")[0])
|
||
|
||
|
||
def get_playlist(url, cookies_file=None):
|
||
"""Vrni listo videov v playlistu (samo metadata, ne pobere).
|
||
|
||
Strategija:
|
||
1. Najprej probaj yt.biba.live API /download/playlist (centralizirano,
|
||
vedno sveže cookies + signature solver na strežniku).
|
||
2. Fallback na lokalni yt-dlp --flat-playlist.
|
||
|
||
Returns:
|
||
{
|
||
"is_playlist": bool,
|
||
"playlist_title": str,
|
||
"items": [
|
||
{"id": "VIDEO_ID", "title": "...", "url": "https://...", "duration": 234},
|
||
...
|
||
]
|
||
}
|
||
"""
|
||
# ─── 1. Try yt.biba.live API ───
|
||
if YT_API_TOKEN:
|
||
try:
|
||
import urllib.request
|
||
import urllib.parse
|
||
params = urllib.parse.urlencode({"url": url, "max_items": 200})
|
||
req = urllib.request.Request(
|
||
f"{YT_API_URL}/download/playlist?{params}",
|
||
headers={"Authorization": f"Bearer {YT_API_TOKEN}"},
|
||
)
|
||
with urllib.request.urlopen(req, timeout=120) as resp:
|
||
api_data = json.loads(resp.read().decode("utf-8"))
|
||
|
||
if api_data.get("ok"):
|
||
print(f"📋 Playlist resolved via yt.biba.live API: {api_data.get('count', 0)} items", file=sys.stderr)
|
||
return {
|
||
"is_playlist": api_data.get("is_playlist", False),
|
||
"playlist_title": api_data.get("playlist_title", ""),
|
||
"items": api_data.get("items", []),
|
||
}
|
||
else:
|
||
print(f"⚠ yt.biba.live API error: {api_data.get('error', 'unknown')[:200]}", file=sys.stderr)
|
||
# Fall through to local
|
||
except Exception as e:
|
||
print(f"⚠ yt.biba.live API failed: {e}, fallback to local yt-dlp", file=sys.stderr)
|
||
|
||
# ─── 2. Fallback: lokalni yt-dlp ───
|
||
cmd = ["yt-dlp", "--flat-playlist", "--dump-json"]
|
||
cookies_file = get_cookies_file(cookies_file)
|
||
if cookies_file:
|
||
cmd += ["--cookies", str(cookies_file)]
|
||
cmd.append(url)
|
||
print(f"📋 Resolving playlist (lokalno): {url}", file=sys.stderr)
|
||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
|
||
if result.returncode != 0:
|
||
print(f"❌ yt-dlp playlist napaka:\n{result.stderr[-1500:]}", file=sys.stderr)
|
||
return {"is_playlist": False, "playlist_title": "", "items": [], "error": result.stderr[-500:]}
|
||
|
||
items = []
|
||
playlist_title = ""
|
||
for line in result.stdout.strip().split("\n"):
|
||
if not line.strip():
|
||
continue
|
||
try:
|
||
entry = json.loads(line)
|
||
except json.JSONDecodeError:
|
||
continue
|
||
# Playlist header (typ=playlist)
|
||
if entry.get("_type") == "playlist":
|
||
playlist_title = entry.get("title", "")
|
||
continue
|
||
# Video entry
|
||
vid = entry.get("id")
|
||
if not vid:
|
||
continue
|
||
items.append({
|
||
"id": vid,
|
||
"title": entry.get("title", "") or "",
|
||
"url": entry.get("url") or f"https://www.youtube.com/watch?v={vid}",
|
||
"duration": entry.get("duration"),
|
||
"uploader": entry.get("uploader") or entry.get("channel") or "",
|
||
})
|
||
|
||
is_playlist = len(items) > 1 or ("list=" in url)
|
||
return {
|
||
"is_playlist": is_playlist,
|
||
"playlist_title": playlist_title or (items[0]["title"] if items else ""),
|
||
"items": items,
|
||
}
|
||
|
||
|
||
def main():
|
||
ap = argparse.ArgumentParser()
|
||
ap.add_argument("url")
|
||
ap.add_argument("output")
|
||
ap.add_argument("--max-height", type=int, default=1080)
|
||
ap.add_argument("--cookies", default=None,
|
||
help="Pot do cookies.txt (Netscape format)")
|
||
ap.add_argument("--info-only", action="store_true",
|
||
help="Samo metadata, brez prenosa")
|
||
args = ap.parse_args()
|
||
|
||
if args.info_only:
|
||
info = get_info(args.url, cookies_file=args.cookies)
|
||
if info:
|
||
print(json.dumps({
|
||
"id": info.get("id"),
|
||
"title": info.get("title"),
|
||
"duration": info.get("duration"),
|
||
"uploader": info.get("uploader"),
|
||
"uploader_id": info.get("uploader_id"),
|
||
"channel": info.get("channel"),
|
||
"channel_id": info.get("channel_id"),
|
||
"thumbnail": info.get("thumbnail"),
|
||
"description": info.get("description"),
|
||
"upload_date": info.get("upload_date"),
|
||
"view_count": info.get("view_count"),
|
||
"like_count": info.get("like_count"),
|
||
"categories": info.get("categories"),
|
||
"tags": info.get("tags"),
|
||
"webpage_url": info.get("webpage_url"),
|
||
"age_limit": info.get("age_limit"),
|
||
"is_live": info.get("is_live"),
|
||
}, indent=2, ensure_ascii=False))
|
||
else:
|
||
print("❌ Ne morem dobiti info", file=sys.stderr)
|
||
sys.exit(1)
|
||
return
|
||
|
||
download(args.url, args.output, max_height=args.max_height, cookies_file=args.cookies)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|