YT metadata fetch: razširi --info-only output (id, uploader, description, upload_date, view_count, tags, ...). Single video submit fetcha metadata + Qnet match takoj (kot playlist). Worker preskoči info fetch če metadata že obstaja, sicer shrani vsa polja in naredi Qnet match.

- yt_download.py: get_info() probaj najprej yt.biba.live API /download/info (residential IP, sveži cookies), fallback na lokalni yt-dlp. --info-only output razširjen na 17 polj. - main.py submit_youtube single video: fetcha metadata (yt_get_info) ob submit, shrani youtube_title/uploader/id/description/duration/thumbnail/upload_date in naredi Qnet match (parity s playlist branch). - main.py worker: skip info fetch če youtube_title in youtube_uploader že obstajata. Sicer shrani VSE polja + Qnet match + parser fallback.
2026-05-02 15:54:28 +00:00 · 2026-05-02 15:54:28 +00:00 · 968eba7205
commit 968eba7205
parent bc73fd8dd3
2 changed files with 143 additions and 25 deletions
--- a/app/main.py
+++ b/app/main.py
@ -634,7 +634,11 @@ def process_job(job_id):
                return
            update_job(job_id, input_path=str(input_path))
            
-            # Probaj dobiti YT naslov za artist+title parsing
+            # Probaj dobiti YT metadata (če še ni iz submit-a) — title, uploader, id, ...
+            # Single video submit ali playlist resolve že nastavi metadata, ampak
+            # včasih (npr. če je submit fetch failed) je še manjka.
+            need_metadata_fetch = not job.get("youtube_title") or not job.get("youtube_uploader")
+            if need_metadata_fetch:
                try:
                    info_cmd = [
                        "python3", str(SCRIPTS_DIR / "yt_download.py"),
@ -643,20 +647,51 @@ def process_job(job_id):
                    proc = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30)
                    if proc.returncode == 0 and proc.stdout:
                        info = json.loads(proc.stdout)
-                    yt_title = info.get("title", "")
+                        yt_title = info.get("title", "") or ""
+                        updates = {}
                        if yt_title:
+                            updates["youtube_title"] = yt_title
+                        if info.get("id"):
+                            updates["youtube_id"] = info["id"]
+                        if info.get("uploader") or info.get("channel"):
+                            updates["youtube_uploader"] = info.get("uploader") or info.get("channel") or ""
+                        if info.get("duration") is not None:
+                            updates["youtube_duration"] = info["duration"]
+                        if info.get("thumbnail"):
+                            updates["youtube_thumbnail"] = info["thumbnail"]
+                        if info.get("description"):
+                            updates["youtube_description"] = info["description"][:2000]
+                        if info.get("upload_date"):
+                            updates["youtube_upload_date"] = info["upload_date"]
+                        if info.get("webpage_url"):
+                            updates["youtube_webpage_url"] = info["webpage_url"]
+
+                        # Qnet match + parser samo če še nimamo clean name
+                        if yt_title and not job.get("has_clean_name"):
+                            qm = qnet_match.match_filename(yt_title)
+                            if qm["matched"] and qm["confidence"] >= 0.85:
+                                updates["parsed_artist"] = qm["artist"]
+                                updates["parsed_title"] = qm["title"]
+                                updates["has_clean_name"] = True
+                                updates["qnet_match"] = {
+                                    "method": qm["method"],
+                                    "confidence": qm["confidence"],
+                                    "matched_file": qm["file"],
+                                    "matched_station": qm["station"],
+                                }
+                                updates["tv_station"] = qm["station"]
+                            else:
                                a, t = parse_artist_title(yt_title)
-                        updates = {"youtube_title": yt_title}
-                        if a:
-                            updates["parsed_artist"] = a
-                        if t:
-                            updates["parsed_title"] = t
+                                if a: updates["parsed_artist"] = a
+                                if t: updates["parsed_title"] = t
                                updates["has_clean_name"] = bool(a and t)
+
+                        if updates:
                            update_job(job_id, **updates)
                            # Reload job for downstream use
                            job = load_job(job_id)
                except Exception as e:
-                print(f"⚠️ Cannot fetch YT title: {e}", flush=True)
+                    print(f"⚠️ Cannot fetch YT metadata: {e}", flush=True)
        else:
            input_path = Path(job["input_path"])

@ -1508,6 +1543,50 @@ async def submit_youtube(
        "quality": payload.quality,
        "tv_station": payload.tv_station,
    }
+
+    # Fetch YT metadata že ob submit (preko yt.biba.live API ali lokalni yt-dlp).
+    # Uporabljamo isti yt_download modul, da konsistentno dobimo vse pomembne polja.
+    # Če fetch ne uspe, gre job naprej brez metadat — worker bo še enkrat probal.
+    try:
+        import sys as _sys
+        _sys.path.insert(0, str(SCRIPTS_DIR))
+        from yt_download import get_info as yt_get_info
+        info = yt_get_info(url)
+        if info:
+            yt_title = info.get("title", "") or ""
+            job["youtube_title"] = yt_title
+            job["youtube_id"] = info.get("id") or ""
+            job["youtube_uploader"] = info.get("uploader") or info.get("channel") or ""
+            job["youtube_duration"] = info.get("duration")
+            job["youtube_thumbnail"] = info.get("thumbnail") or ""
+            job["youtube_description"] = (info.get("description") or "")[:2000]  # cap na 2KB
+            job["youtube_upload_date"] = info.get("upload_date") or ""
+            job["youtube_webpage_url"] = info.get("webpage_url") or url
+
+            # Qnet match na YT naslovu (kot pri playlist)
+            if yt_title:
+                qm = qnet_match.match_filename(yt_title)
+                if qm["matched"] and qm["confidence"] >= 0.85:
+                    job["parsed_artist"] = qm["artist"]
+                    job["parsed_title"] = qm["title"]
+                    job["has_clean_name"] = True
+                    job["qnet_match"] = {
+                        "method": qm["method"],
+                        "confidence": qm["confidence"],
+                        "matched_file": qm["file"],
+                        "matched_station": qm["station"],
+                    }
+                    # Auto-set tv_station iz Qnet match-a (override default)
+                    job["tv_station"] = qm["station"]
+                else:
+                    # Fallback: regex parser na YT naslovu
+                    a, t = parse_artist_title(yt_title)
+                    if a: job["parsed_artist"] = a
+                    if t: job["parsed_title"] = t
+                    job["has_clean_name"] = bool(a and t)
+    except Exception as e:
+        print(f"⚠️ Cannot fetch YT metadata at submit: {e}", flush=True)
+
    save_job(job)
    return job

--- a/scripts/yt_download.py
+++ b/scripts/yt_download.py
@ -114,7 +114,33 @@ def download(url, output, max_height=1080, format_str=None, cookies_file=None):


 def get_info(url, cookies_file=None):
-    """Vrni metadata brez prenosa."""
+    """Vrni metadata brez prenosa.
+
+    Strategija:
+      1. Najprej probaj yt.biba.live API /download/info (residential IP,
+         sveži cookies, signature solver — manj možnosti za bot detection).
+      2. Fallback na lokalni yt-dlp --dump-json.
+    """
+    # ─── 1. Try yt.biba.live API ───
+    if YT_API_TOKEN:
+        try:
+            import urllib.request
+            import urllib.parse
+            params = urllib.parse.urlencode({"url": url})
+            req = urllib.request.Request(
+                f"{YT_API_URL}/download/info?{params}",
+                headers={"Authorization": f"Bearer {YT_API_TOKEN}"},
+            )
+            with urllib.request.urlopen(req, timeout=30) as resp:
+                api_data = json.loads(resp.read().decode("utf-8"))
+            if api_data.get("ok"):
+                print(f"ℹ Info via yt.biba.live API: {api_data.get('title','')[:60]}", file=sys.stderr)
+                # API že vrne flat dict — preimenujmo v yt-dlp obliko za kompatibilnost
+                return api_data
+        except Exception as e:
+            print(f"⚠ yt.biba.live /download/info failed: {e}, fallback to local", file=sys.stderr)
+
+    # ─── 2. Fallback: lokalni yt-dlp ───
    cmd = ["yt-dlp", "--dump-json", "--no-playlist"]
    cookies_file = get_cookies_file(cookies_file)
    if cookies_file:
@ -230,11 +256,24 @@ def main():
        info = get_info(args.url, cookies_file=args.cookies)
        if info:
            print(json.dumps({
+                "id": info.get("id"),
                "title": info.get("title"),
                "duration": info.get("duration"),
                "uploader": info.get("uploader"),
+                "uploader_id": info.get("uploader_id"),
+                "channel": info.get("channel"),
+                "channel_id": info.get("channel_id"),
                "thumbnail": info.get("thumbnail"),
-            }, indent=2))
+                "description": info.get("description"),
+                "upload_date": info.get("upload_date"),
+                "view_count": info.get("view_count"),
+                "like_count": info.get("like_count"),
+                "categories": info.get("categories"),
+                "tags": info.get("tags"),
+                "webpage_url": info.get("webpage_url"),
+                "age_limit": info.get("age_limit"),
+                "is_live": info.get("is_live"),
+            }, indent=2, ensure_ascii=False))
        else:
            print("❌ Ne morem dobiti info", file=sys.stderr)
            sys.exit(1)