YT metadata fetch: razširi --info-only output (id, uploader, description, upload_date, view_count, tags, ...). Single video submit fetcha metadata + Qnet match takoj (kot playlist). Worker preskoči info fetch če metadata že obstaja, sicer shrani vsa polja in naredi Qnet match.
- yt_download.py: get_info() probaj najprej yt.biba.live API /download/info (residential IP, sveži cookies), fallback na lokalni yt-dlp. --info-only output razširjen na 17 polj. - main.py submit_youtube single video: fetcha metadata (yt_get_info) ob submit, shrani youtube_title/uploader/id/description/duration/thumbnail/upload_date in naredi Qnet match (parity s playlist branch). - main.py worker: skip info fetch če youtube_title in youtube_uploader že obstajata. Sicer shrani VSE polja + Qnet match + parser fallback.
This commit is contained in:
parent
bc73fd8dd3
commit
968eba7205
125
app/main.py
125
app/main.py
@ -634,29 +634,64 @@ def process_job(job_id):
|
|||||||
return
|
return
|
||||||
update_job(job_id, input_path=str(input_path))
|
update_job(job_id, input_path=str(input_path))
|
||||||
|
|
||||||
# Probaj dobiti YT naslov za artist+title parsing
|
# Probaj dobiti YT metadata (če še ni iz submit-a) — title, uploader, id, ...
|
||||||
try:
|
# Single video submit ali playlist resolve že nastavi metadata, ampak
|
||||||
info_cmd = [
|
# včasih (npr. če je submit fetch failed) je še manjka.
|
||||||
"python3", str(SCRIPTS_DIR / "yt_download.py"),
|
need_metadata_fetch = not job.get("youtube_title") or not job.get("youtube_uploader")
|
||||||
job["youtube_url"], "/dev/null", "--info-only",
|
if need_metadata_fetch:
|
||||||
]
|
try:
|
||||||
proc = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30)
|
info_cmd = [
|
||||||
if proc.returncode == 0 and proc.stdout:
|
"python3", str(SCRIPTS_DIR / "yt_download.py"),
|
||||||
info = json.loads(proc.stdout)
|
job["youtube_url"], "/dev/null", "--info-only",
|
||||||
yt_title = info.get("title", "")
|
]
|
||||||
if yt_title:
|
proc = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30)
|
||||||
a, t = parse_artist_title(yt_title)
|
if proc.returncode == 0 and proc.stdout:
|
||||||
updates = {"youtube_title": yt_title}
|
info = json.loads(proc.stdout)
|
||||||
if a:
|
yt_title = info.get("title", "") or ""
|
||||||
updates["parsed_artist"] = a
|
updates = {}
|
||||||
if t:
|
if yt_title:
|
||||||
updates["parsed_title"] = t
|
updates["youtube_title"] = yt_title
|
||||||
updates["has_clean_name"] = bool(a and t)
|
if info.get("id"):
|
||||||
update_job(job_id, **updates)
|
updates["youtube_id"] = info["id"]
|
||||||
# Reload job for downstream use
|
if info.get("uploader") or info.get("channel"):
|
||||||
job = load_job(job_id)
|
updates["youtube_uploader"] = info.get("uploader") or info.get("channel") or ""
|
||||||
except Exception as e:
|
if info.get("duration") is not None:
|
||||||
print(f"⚠️ Cannot fetch YT title: {e}", flush=True)
|
updates["youtube_duration"] = info["duration"]
|
||||||
|
if info.get("thumbnail"):
|
||||||
|
updates["youtube_thumbnail"] = info["thumbnail"]
|
||||||
|
if info.get("description"):
|
||||||
|
updates["youtube_description"] = info["description"][:2000]
|
||||||
|
if info.get("upload_date"):
|
||||||
|
updates["youtube_upload_date"] = info["upload_date"]
|
||||||
|
if info.get("webpage_url"):
|
||||||
|
updates["youtube_webpage_url"] = info["webpage_url"]
|
||||||
|
|
||||||
|
# Qnet match + parser samo če še nimamo clean name
|
||||||
|
if yt_title and not job.get("has_clean_name"):
|
||||||
|
qm = qnet_match.match_filename(yt_title)
|
||||||
|
if qm["matched"] and qm["confidence"] >= 0.85:
|
||||||
|
updates["parsed_artist"] = qm["artist"]
|
||||||
|
updates["parsed_title"] = qm["title"]
|
||||||
|
updates["has_clean_name"] = True
|
||||||
|
updates["qnet_match"] = {
|
||||||
|
"method": qm["method"],
|
||||||
|
"confidence": qm["confidence"],
|
||||||
|
"matched_file": qm["file"],
|
||||||
|
"matched_station": qm["station"],
|
||||||
|
}
|
||||||
|
updates["tv_station"] = qm["station"]
|
||||||
|
else:
|
||||||
|
a, t = parse_artist_title(yt_title)
|
||||||
|
if a: updates["parsed_artist"] = a
|
||||||
|
if t: updates["parsed_title"] = t
|
||||||
|
updates["has_clean_name"] = bool(a and t)
|
||||||
|
|
||||||
|
if updates:
|
||||||
|
update_job(job_id, **updates)
|
||||||
|
# Reload job for downstream use
|
||||||
|
job = load_job(job_id)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Cannot fetch YT metadata: {e}", flush=True)
|
||||||
else:
|
else:
|
||||||
input_path = Path(job["input_path"])
|
input_path = Path(job["input_path"])
|
||||||
|
|
||||||
@ -1508,6 +1543,50 @@ async def submit_youtube(
|
|||||||
"quality": payload.quality,
|
"quality": payload.quality,
|
||||||
"tv_station": payload.tv_station,
|
"tv_station": payload.tv_station,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Fetch YT metadata že ob submit (preko yt.biba.live API ali lokalni yt-dlp).
|
||||||
|
# Uporabljamo isti yt_download modul, da konsistentno dobimo vse pomembne polja.
|
||||||
|
# Če fetch ne uspe, gre job naprej brez metadat — worker bo še enkrat probal.
|
||||||
|
try:
|
||||||
|
import sys as _sys
|
||||||
|
_sys.path.insert(0, str(SCRIPTS_DIR))
|
||||||
|
from yt_download import get_info as yt_get_info
|
||||||
|
info = yt_get_info(url)
|
||||||
|
if info:
|
||||||
|
yt_title = info.get("title", "") or ""
|
||||||
|
job["youtube_title"] = yt_title
|
||||||
|
job["youtube_id"] = info.get("id") or ""
|
||||||
|
job["youtube_uploader"] = info.get("uploader") or info.get("channel") or ""
|
||||||
|
job["youtube_duration"] = info.get("duration")
|
||||||
|
job["youtube_thumbnail"] = info.get("thumbnail") or ""
|
||||||
|
job["youtube_description"] = (info.get("description") or "")[:2000] # cap na 2KB
|
||||||
|
job["youtube_upload_date"] = info.get("upload_date") or ""
|
||||||
|
job["youtube_webpage_url"] = info.get("webpage_url") or url
|
||||||
|
|
||||||
|
# Qnet match na YT naslovu (kot pri playlist)
|
||||||
|
if yt_title:
|
||||||
|
qm = qnet_match.match_filename(yt_title)
|
||||||
|
if qm["matched"] and qm["confidence"] >= 0.85:
|
||||||
|
job["parsed_artist"] = qm["artist"]
|
||||||
|
job["parsed_title"] = qm["title"]
|
||||||
|
job["has_clean_name"] = True
|
||||||
|
job["qnet_match"] = {
|
||||||
|
"method": qm["method"],
|
||||||
|
"confidence": qm["confidence"],
|
||||||
|
"matched_file": qm["file"],
|
||||||
|
"matched_station": qm["station"],
|
||||||
|
}
|
||||||
|
# Auto-set tv_station iz Qnet match-a (override default)
|
||||||
|
job["tv_station"] = qm["station"]
|
||||||
|
else:
|
||||||
|
# Fallback: regex parser na YT naslovu
|
||||||
|
a, t = parse_artist_title(yt_title)
|
||||||
|
if a: job["parsed_artist"] = a
|
||||||
|
if t: job["parsed_title"] = t
|
||||||
|
job["has_clean_name"] = bool(a and t)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Cannot fetch YT metadata at submit: {e}", flush=True)
|
||||||
|
|
||||||
save_job(job)
|
save_job(job)
|
||||||
return job
|
return job
|
||||||
|
|
||||||
|
|||||||
@ -114,7 +114,33 @@ def download(url, output, max_height=1080, format_str=None, cookies_file=None):
|
|||||||
|
|
||||||
|
|
||||||
def get_info(url, cookies_file=None):
|
def get_info(url, cookies_file=None):
|
||||||
"""Vrni metadata brez prenosa."""
|
"""Vrni metadata brez prenosa.
|
||||||
|
|
||||||
|
Strategija:
|
||||||
|
1. Najprej probaj yt.biba.live API /download/info (residential IP,
|
||||||
|
sveži cookies, signature solver — manj možnosti za bot detection).
|
||||||
|
2. Fallback na lokalni yt-dlp --dump-json.
|
||||||
|
"""
|
||||||
|
# ─── 1. Try yt.biba.live API ───
|
||||||
|
if YT_API_TOKEN:
|
||||||
|
try:
|
||||||
|
import urllib.request
|
||||||
|
import urllib.parse
|
||||||
|
params = urllib.parse.urlencode({"url": url})
|
||||||
|
req = urllib.request.Request(
|
||||||
|
f"{YT_API_URL}/download/info?{params}",
|
||||||
|
headers={"Authorization": f"Bearer {YT_API_TOKEN}"},
|
||||||
|
)
|
||||||
|
with urllib.request.urlopen(req, timeout=30) as resp:
|
||||||
|
api_data = json.loads(resp.read().decode("utf-8"))
|
||||||
|
if api_data.get("ok"):
|
||||||
|
print(f"ℹ Info via yt.biba.live API: {api_data.get('title','')[:60]}", file=sys.stderr)
|
||||||
|
# API že vrne flat dict — preimenujmo v yt-dlp obliko za kompatibilnost
|
||||||
|
return api_data
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠ yt.biba.live /download/info failed: {e}, fallback to local", file=sys.stderr)
|
||||||
|
|
||||||
|
# ─── 2. Fallback: lokalni yt-dlp ───
|
||||||
cmd = ["yt-dlp", "--dump-json", "--no-playlist"]
|
cmd = ["yt-dlp", "--dump-json", "--no-playlist"]
|
||||||
cookies_file = get_cookies_file(cookies_file)
|
cookies_file = get_cookies_file(cookies_file)
|
||||||
if cookies_file:
|
if cookies_file:
|
||||||
@ -230,11 +256,24 @@ def main():
|
|||||||
info = get_info(args.url, cookies_file=args.cookies)
|
info = get_info(args.url, cookies_file=args.cookies)
|
||||||
if info:
|
if info:
|
||||||
print(json.dumps({
|
print(json.dumps({
|
||||||
|
"id": info.get("id"),
|
||||||
"title": info.get("title"),
|
"title": info.get("title"),
|
||||||
"duration": info.get("duration"),
|
"duration": info.get("duration"),
|
||||||
"uploader": info.get("uploader"),
|
"uploader": info.get("uploader"),
|
||||||
|
"uploader_id": info.get("uploader_id"),
|
||||||
|
"channel": info.get("channel"),
|
||||||
|
"channel_id": info.get("channel_id"),
|
||||||
"thumbnail": info.get("thumbnail"),
|
"thumbnail": info.get("thumbnail"),
|
||||||
}, indent=2))
|
"description": info.get("description"),
|
||||||
|
"upload_date": info.get("upload_date"),
|
||||||
|
"view_count": info.get("view_count"),
|
||||||
|
"like_count": info.get("like_count"),
|
||||||
|
"categories": info.get("categories"),
|
||||||
|
"tags": info.get("tags"),
|
||||||
|
"webpage_url": info.get("webpage_url"),
|
||||||
|
"age_limit": info.get("age_limit"),
|
||||||
|
"is_live": info.get("is_live"),
|
||||||
|
}, indent=2, ensure_ascii=False))
|
||||||
else:
|
else:
|
||||||
print("❌ Ne morem dobiti info", file=sys.stderr)
|
print("❌ Ne morem dobiti info", file=sys.stderr)
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user