YT metadata fetch: razširi --info-only output (id, uploader, description, upload_date, view_count, tags, ...). Single video submit fetcha metadata + Qnet match takoj (kot playlist). Worker preskoči info fetch če metadata že obstaja, sicer shrani vsa polja in naredi Qnet match.
- yt_download.py: get_info() probaj najprej yt.biba.live API /download/info (residential IP, sveži cookies), fallback na lokalni yt-dlp. --info-only output razširjen na 17 polj. - main.py submit_youtube single video: fetcha metadata (yt_get_info) ob submit, shrani youtube_title/uploader/id/description/duration/thumbnail/upload_date in naredi Qnet match (parity s playlist branch). - main.py worker: skip info fetch če youtube_title in youtube_uploader že obstajata. Sicer shrani VSE polja + Qnet match + parser fallback.
This commit is contained in:
parent
bc73fd8dd3
commit
968eba7205
95
app/main.py
95
app/main.py
@ -634,7 +634,11 @@ def process_job(job_id):
|
||||
return
|
||||
update_job(job_id, input_path=str(input_path))
|
||||
|
||||
# Probaj dobiti YT naslov za artist+title parsing
|
||||
# Probaj dobiti YT metadata (če še ni iz submit-a) — title, uploader, id, ...
|
||||
# Single video submit ali playlist resolve že nastavi metadata, ampak
|
||||
# včasih (npr. če je submit fetch failed) je še manjka.
|
||||
need_metadata_fetch = not job.get("youtube_title") or not job.get("youtube_uploader")
|
||||
if need_metadata_fetch:
|
||||
try:
|
||||
info_cmd = [
|
||||
"python3", str(SCRIPTS_DIR / "yt_download.py"),
|
||||
@ -643,20 +647,51 @@ def process_job(job_id):
|
||||
proc = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30)
|
||||
if proc.returncode == 0 and proc.stdout:
|
||||
info = json.loads(proc.stdout)
|
||||
yt_title = info.get("title", "")
|
||||
yt_title = info.get("title", "") or ""
|
||||
updates = {}
|
||||
if yt_title:
|
||||
updates["youtube_title"] = yt_title
|
||||
if info.get("id"):
|
||||
updates["youtube_id"] = info["id"]
|
||||
if info.get("uploader") or info.get("channel"):
|
||||
updates["youtube_uploader"] = info.get("uploader") or info.get("channel") or ""
|
||||
if info.get("duration") is not None:
|
||||
updates["youtube_duration"] = info["duration"]
|
||||
if info.get("thumbnail"):
|
||||
updates["youtube_thumbnail"] = info["thumbnail"]
|
||||
if info.get("description"):
|
||||
updates["youtube_description"] = info["description"][:2000]
|
||||
if info.get("upload_date"):
|
||||
updates["youtube_upload_date"] = info["upload_date"]
|
||||
if info.get("webpage_url"):
|
||||
updates["youtube_webpage_url"] = info["webpage_url"]
|
||||
|
||||
# Qnet match + parser samo če še nimamo clean name
|
||||
if yt_title and not job.get("has_clean_name"):
|
||||
qm = qnet_match.match_filename(yt_title)
|
||||
if qm["matched"] and qm["confidence"] >= 0.85:
|
||||
updates["parsed_artist"] = qm["artist"]
|
||||
updates["parsed_title"] = qm["title"]
|
||||
updates["has_clean_name"] = True
|
||||
updates["qnet_match"] = {
|
||||
"method": qm["method"],
|
||||
"confidence": qm["confidence"],
|
||||
"matched_file": qm["file"],
|
||||
"matched_station": qm["station"],
|
||||
}
|
||||
updates["tv_station"] = qm["station"]
|
||||
else:
|
||||
a, t = parse_artist_title(yt_title)
|
||||
updates = {"youtube_title": yt_title}
|
||||
if a:
|
||||
updates["parsed_artist"] = a
|
||||
if t:
|
||||
updates["parsed_title"] = t
|
||||
if a: updates["parsed_artist"] = a
|
||||
if t: updates["parsed_title"] = t
|
||||
updates["has_clean_name"] = bool(a and t)
|
||||
|
||||
if updates:
|
||||
update_job(job_id, **updates)
|
||||
# Reload job for downstream use
|
||||
job = load_job(job_id)
|
||||
except Exception as e:
|
||||
print(f"⚠️ Cannot fetch YT title: {e}", flush=True)
|
||||
print(f"⚠️ Cannot fetch YT metadata: {e}", flush=True)
|
||||
else:
|
||||
input_path = Path(job["input_path"])
|
||||
|
||||
@ -1508,6 +1543,50 @@ async def submit_youtube(
|
||||
"quality": payload.quality,
|
||||
"tv_station": payload.tv_station,
|
||||
}
|
||||
|
||||
# Fetch YT metadata že ob submit (preko yt.biba.live API ali lokalni yt-dlp).
|
||||
# Uporabljamo isti yt_download modul, da konsistentno dobimo vse pomembne polja.
|
||||
# Če fetch ne uspe, gre job naprej brez metadat — worker bo še enkrat probal.
|
||||
try:
|
||||
import sys as _sys
|
||||
_sys.path.insert(0, str(SCRIPTS_DIR))
|
||||
from yt_download import get_info as yt_get_info
|
||||
info = yt_get_info(url)
|
||||
if info:
|
||||
yt_title = info.get("title", "") or ""
|
||||
job["youtube_title"] = yt_title
|
||||
job["youtube_id"] = info.get("id") or ""
|
||||
job["youtube_uploader"] = info.get("uploader") or info.get("channel") or ""
|
||||
job["youtube_duration"] = info.get("duration")
|
||||
job["youtube_thumbnail"] = info.get("thumbnail") or ""
|
||||
job["youtube_description"] = (info.get("description") or "")[:2000] # cap na 2KB
|
||||
job["youtube_upload_date"] = info.get("upload_date") or ""
|
||||
job["youtube_webpage_url"] = info.get("webpage_url") or url
|
||||
|
||||
# Qnet match na YT naslovu (kot pri playlist)
|
||||
if yt_title:
|
||||
qm = qnet_match.match_filename(yt_title)
|
||||
if qm["matched"] and qm["confidence"] >= 0.85:
|
||||
job["parsed_artist"] = qm["artist"]
|
||||
job["parsed_title"] = qm["title"]
|
||||
job["has_clean_name"] = True
|
||||
job["qnet_match"] = {
|
||||
"method": qm["method"],
|
||||
"confidence": qm["confidence"],
|
||||
"matched_file": qm["file"],
|
||||
"matched_station": qm["station"],
|
||||
}
|
||||
# Auto-set tv_station iz Qnet match-a (override default)
|
||||
job["tv_station"] = qm["station"]
|
||||
else:
|
||||
# Fallback: regex parser na YT naslovu
|
||||
a, t = parse_artist_title(yt_title)
|
||||
if a: job["parsed_artist"] = a
|
||||
if t: job["parsed_title"] = t
|
||||
job["has_clean_name"] = bool(a and t)
|
||||
except Exception as e:
|
||||
print(f"⚠️ Cannot fetch YT metadata at submit: {e}", flush=True)
|
||||
|
||||
save_job(job)
|
||||
return job
|
||||
|
||||
|
||||
@ -114,7 +114,33 @@ def download(url, output, max_height=1080, format_str=None, cookies_file=None):
|
||||
|
||||
|
||||
def get_info(url, cookies_file=None):
|
||||
"""Vrni metadata brez prenosa."""
|
||||
"""Vrni metadata brez prenosa.
|
||||
|
||||
Strategija:
|
||||
1. Najprej probaj yt.biba.live API /download/info (residential IP,
|
||||
sveži cookies, signature solver — manj možnosti za bot detection).
|
||||
2. Fallback na lokalni yt-dlp --dump-json.
|
||||
"""
|
||||
# ─── 1. Try yt.biba.live API ───
|
||||
if YT_API_TOKEN:
|
||||
try:
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
params = urllib.parse.urlencode({"url": url})
|
||||
req = urllib.request.Request(
|
||||
f"{YT_API_URL}/download/info?{params}",
|
||||
headers={"Authorization": f"Bearer {YT_API_TOKEN}"},
|
||||
)
|
||||
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||
api_data = json.loads(resp.read().decode("utf-8"))
|
||||
if api_data.get("ok"):
|
||||
print(f"ℹ Info via yt.biba.live API: {api_data.get('title','')[:60]}", file=sys.stderr)
|
||||
# API že vrne flat dict — preimenujmo v yt-dlp obliko za kompatibilnost
|
||||
return api_data
|
||||
except Exception as e:
|
||||
print(f"⚠ yt.biba.live /download/info failed: {e}, fallback to local", file=sys.stderr)
|
||||
|
||||
# ─── 2. Fallback: lokalni yt-dlp ───
|
||||
cmd = ["yt-dlp", "--dump-json", "--no-playlist"]
|
||||
cookies_file = get_cookies_file(cookies_file)
|
||||
if cookies_file:
|
||||
@ -230,11 +256,24 @@ def main():
|
||||
info = get_info(args.url, cookies_file=args.cookies)
|
||||
if info:
|
||||
print(json.dumps({
|
||||
"id": info.get("id"),
|
||||
"title": info.get("title"),
|
||||
"duration": info.get("duration"),
|
||||
"uploader": info.get("uploader"),
|
||||
"uploader_id": info.get("uploader_id"),
|
||||
"channel": info.get("channel"),
|
||||
"channel_id": info.get("channel_id"),
|
||||
"thumbnail": info.get("thumbnail"),
|
||||
}, indent=2))
|
||||
"description": info.get("description"),
|
||||
"upload_date": info.get("upload_date"),
|
||||
"view_count": info.get("view_count"),
|
||||
"like_count": info.get("like_count"),
|
||||
"categories": info.get("categories"),
|
||||
"tags": info.get("tags"),
|
||||
"webpage_url": info.get("webpage_url"),
|
||||
"age_limit": info.get("age_limit"),
|
||||
"is_live": info.get("is_live"),
|
||||
}, indent=2, ensure_ascii=False))
|
||||
else:
|
||||
print("❌ Ne morem dobiti info", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
Loading…
Reference in New Issue
Block a user