YT metadata fetch: razširi --info-only output (id, uploader, description, upload_date, view_count, tags, ...). Single video submit fetcha metadata + Qnet match takoj (kot playlist). Worker preskoči info fetch če metadata že obstaja, sicer shrani vsa polja in naredi Qnet match.

- yt_download.py: get_info() probaj najprej yt.biba.live API /download/info (residential IP, sveži cookies), fallback na lokalni yt-dlp. --info-only output razširjen na 17 polj.
- main.py submit_youtube single video: fetcha metadata (yt_get_info) ob submit, shrani youtube_title/uploader/id/description/duration/thumbnail/upload_date in naredi Qnet match (parity s playlist branch).
- main.py worker: skip info fetch če youtube_title in youtube_uploader že obstajata. Sicer shrani VSE polja + Qnet match + parser fallback.
This commit is contained in:
OpenClaw Agent 2026-05-02 15:54:28 +00:00
parent bc73fd8dd3
commit 968eba7205
2 changed files with 143 additions and 25 deletions

View File

@ -634,29 +634,64 @@ def process_job(job_id):
return return
update_job(job_id, input_path=str(input_path)) update_job(job_id, input_path=str(input_path))
# Probaj dobiti YT naslov za artist+title parsing # Probaj dobiti YT metadata (če še ni iz submit-a) — title, uploader, id, ...
try: # Single video submit ali playlist resolve že nastavi metadata, ampak
info_cmd = [ # včasih (npr. če je submit fetch failed) je še manjka.
"python3", str(SCRIPTS_DIR / "yt_download.py"), need_metadata_fetch = not job.get("youtube_title") or not job.get("youtube_uploader")
job["youtube_url"], "/dev/null", "--info-only", if need_metadata_fetch:
] try:
proc = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30) info_cmd = [
if proc.returncode == 0 and proc.stdout: "python3", str(SCRIPTS_DIR / "yt_download.py"),
info = json.loads(proc.stdout) job["youtube_url"], "/dev/null", "--info-only",
yt_title = info.get("title", "") ]
if yt_title: proc = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30)
a, t = parse_artist_title(yt_title) if proc.returncode == 0 and proc.stdout:
updates = {"youtube_title": yt_title} info = json.loads(proc.stdout)
if a: yt_title = info.get("title", "") or ""
updates["parsed_artist"] = a updates = {}
if t: if yt_title:
updates["parsed_title"] = t updates["youtube_title"] = yt_title
updates["has_clean_name"] = bool(a and t) if info.get("id"):
update_job(job_id, **updates) updates["youtube_id"] = info["id"]
# Reload job for downstream use if info.get("uploader") or info.get("channel"):
job = load_job(job_id) updates["youtube_uploader"] = info.get("uploader") or info.get("channel") or ""
except Exception as e: if info.get("duration") is not None:
print(f"⚠️ Cannot fetch YT title: {e}", flush=True) updates["youtube_duration"] = info["duration"]
if info.get("thumbnail"):
updates["youtube_thumbnail"] = info["thumbnail"]
if info.get("description"):
updates["youtube_description"] = info["description"][:2000]
if info.get("upload_date"):
updates["youtube_upload_date"] = info["upload_date"]
if info.get("webpage_url"):
updates["youtube_webpage_url"] = info["webpage_url"]
# Qnet match + parser samo če še nimamo clean name
if yt_title and not job.get("has_clean_name"):
qm = qnet_match.match_filename(yt_title)
if qm["matched"] and qm["confidence"] >= 0.85:
updates["parsed_artist"] = qm["artist"]
updates["parsed_title"] = qm["title"]
updates["has_clean_name"] = True
updates["qnet_match"] = {
"method": qm["method"],
"confidence": qm["confidence"],
"matched_file": qm["file"],
"matched_station": qm["station"],
}
updates["tv_station"] = qm["station"]
else:
a, t = parse_artist_title(yt_title)
if a: updates["parsed_artist"] = a
if t: updates["parsed_title"] = t
updates["has_clean_name"] = bool(a and t)
if updates:
update_job(job_id, **updates)
# Reload job for downstream use
job = load_job(job_id)
except Exception as e:
print(f"⚠️ Cannot fetch YT metadata: {e}", flush=True)
else: else:
input_path = Path(job["input_path"]) input_path = Path(job["input_path"])
@ -1508,6 +1543,50 @@ async def submit_youtube(
"quality": payload.quality, "quality": payload.quality,
"tv_station": payload.tv_station, "tv_station": payload.tv_station,
} }
# Fetch YT metadata že ob submit (preko yt.biba.live API ali lokalni yt-dlp).
# Uporabljamo isti yt_download modul, da konsistentno dobimo vse pomembne polja.
# Če fetch ne uspe, gre job naprej brez metadat — worker bo še enkrat probal.
try:
import sys as _sys
_sys.path.insert(0, str(SCRIPTS_DIR))
from yt_download import get_info as yt_get_info
info = yt_get_info(url)
if info:
yt_title = info.get("title", "") or ""
job["youtube_title"] = yt_title
job["youtube_id"] = info.get("id") or ""
job["youtube_uploader"] = info.get("uploader") or info.get("channel") or ""
job["youtube_duration"] = info.get("duration")
job["youtube_thumbnail"] = info.get("thumbnail") or ""
job["youtube_description"] = (info.get("description") or "")[:2000] # cap na 2KB
job["youtube_upload_date"] = info.get("upload_date") or ""
job["youtube_webpage_url"] = info.get("webpage_url") or url
# Qnet match na YT naslovu (kot pri playlist)
if yt_title:
qm = qnet_match.match_filename(yt_title)
if qm["matched"] and qm["confidence"] >= 0.85:
job["parsed_artist"] = qm["artist"]
job["parsed_title"] = qm["title"]
job["has_clean_name"] = True
job["qnet_match"] = {
"method": qm["method"],
"confidence": qm["confidence"],
"matched_file": qm["file"],
"matched_station": qm["station"],
}
# Auto-set tv_station iz Qnet match-a (override default)
job["tv_station"] = qm["station"]
else:
# Fallback: regex parser na YT naslovu
a, t = parse_artist_title(yt_title)
if a: job["parsed_artist"] = a
if t: job["parsed_title"] = t
job["has_clean_name"] = bool(a and t)
except Exception as e:
print(f"⚠️ Cannot fetch YT metadata at submit: {e}", flush=True)
save_job(job) save_job(job)
return job return job

View File

@ -114,7 +114,33 @@ def download(url, output, max_height=1080, format_str=None, cookies_file=None):
def get_info(url, cookies_file=None): def get_info(url, cookies_file=None):
"""Vrni metadata brez prenosa.""" """Vrni metadata brez prenosa.
Strategija:
1. Najprej probaj yt.biba.live API /download/info (residential IP,
sveži cookies, signature solver manj možnosti za bot detection).
2. Fallback na lokalni yt-dlp --dump-json.
"""
# ─── 1. Try yt.biba.live API ───
if YT_API_TOKEN:
try:
import urllib.request
import urllib.parse
params = urllib.parse.urlencode({"url": url})
req = urllib.request.Request(
f"{YT_API_URL}/download/info?{params}",
headers={"Authorization": f"Bearer {YT_API_TOKEN}"},
)
with urllib.request.urlopen(req, timeout=30) as resp:
api_data = json.loads(resp.read().decode("utf-8"))
if api_data.get("ok"):
print(f" Info via yt.biba.live API: {api_data.get('title','')[:60]}", file=sys.stderr)
# API že vrne flat dict — preimenujmo v yt-dlp obliko za kompatibilnost
return api_data
except Exception as e:
print(f"⚠ yt.biba.live /download/info failed: {e}, fallback to local", file=sys.stderr)
# ─── 2. Fallback: lokalni yt-dlp ───
cmd = ["yt-dlp", "--dump-json", "--no-playlist"] cmd = ["yt-dlp", "--dump-json", "--no-playlist"]
cookies_file = get_cookies_file(cookies_file) cookies_file = get_cookies_file(cookies_file)
if cookies_file: if cookies_file:
@ -230,11 +256,24 @@ def main():
info = get_info(args.url, cookies_file=args.cookies) info = get_info(args.url, cookies_file=args.cookies)
if info: if info:
print(json.dumps({ print(json.dumps({
"id": info.get("id"),
"title": info.get("title"), "title": info.get("title"),
"duration": info.get("duration"), "duration": info.get("duration"),
"uploader": info.get("uploader"), "uploader": info.get("uploader"),
"uploader_id": info.get("uploader_id"),
"channel": info.get("channel"),
"channel_id": info.get("channel_id"),
"thumbnail": info.get("thumbnail"), "thumbnail": info.get("thumbnail"),
}, indent=2)) "description": info.get("description"),
"upload_date": info.get("upload_date"),
"view_count": info.get("view_count"),
"like_count": info.get("like_count"),
"categories": info.get("categories"),
"tags": info.get("tags"),
"webpage_url": info.get("webpage_url"),
"age_limit": info.get("age_limit"),
"is_live": info.get("is_live"),
}, indent=2, ensure_ascii=False))
else: else:
print("❌ Ne morem dobiti info", file=sys.stderr) print("❌ Ne morem dobiti info", file=sys.stderr)
sys.exit(1) sys.exit(1)