From 3877b822ff22f86ec5a5997ee4622fb169957097 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastjan=20Arti=C4=8D?= Date: Wed, 29 Apr 2026 14:15:18 +0000 Subject: [PATCH] Smart download filenames: 'Artist - Title - REEL.mp4' + validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two improvements: 1. DOWNLOAD FILENAME: instead of 'reel_.mp4' (e.g. reel_25e076af7600.mp4), downloads now have descriptive names like: - 'Lady Gaga - Abracadabra - REEL.mp4' - 'Modrijani - S teboj - REEL.mp4' - 'Sarah Connor - FICKA - REEL.mp4' 2. PRE-UPLOAD VALIDATION: when filename doesn't follow 'Artist - Title' format, browser prompts user for both fields. Without them, upload is blocked. This prevents files with names like '12345.mp4' or 'video_final.mp4' from being processed without identifying info. Implementation: - parse_artist_title() helper handles common formats: - 'Artist - Title.mp4' / 'Artist – Title' (em-dash) - 'Artist | Title' / 'Artist : Title' - Strips noise: '(Official Music Video)', '(Audio)', '(HD)', '[Lyric Video]' - Client-side parser mirrors backend (validation before upload) - Backend accepts artist + title form fields (override parsed) - Job stored with parsed_artist + parsed_title + has_clean_name fields - YouTube jobs auto-fetch title via yt-dlp --info-only and parse it - Filename hint to Scribe/Claude uses parsed values (cleaner than raw filename) - Download endpoint uses build_download_filename() for content-disposition - Jobs list shows 'Artist — Title' instead of raw filename Result: downloaded reels are auto-named correctly for Facebook/Instagram upload, no more renaming files manually. --- app/main.py | 147 +++++++++++++++++++++++++++++++++++++++++-- templates/index.html | 91 ++++++++++++++++++++++++--- 2 files changed, 227 insertions(+), 11 deletions(-) diff --git a/app/main.py b/app/main.py index 58b6bff..b6fa38e 100644 --- a/app/main.py +++ b/app/main.py @@ -73,6 +73,97 @@ def check_auth(creds: HTTPBasicCredentials = Depends(security)): return creds.username +# ──────────────────────────────────────────────────────────────── +# Artist + title parsing iz filename / YouTube title +# ──────────────────────────────────────────────────────────────── +import re + +_NOISE_PATTERNS = [ + # Pogosti "noise" ki ga je treba odstraniti + r"\(Official\s+(?:Music\s+)?Video\)", + r"\(Officia[lk]\s+Audio\)", + r"\(Offizielles\s+(?:Musik)?[Vv]ideo\)", + r"\(Lyric[s]?\s+Video\)", + r"\(Audio\)", + r"\(HD\)", r"\(HQ\)", r"\(4K\)", + r"\(Live\)", r"\(Remix\)", + r"\(Remastered\)", r"\(Remaster(?:ed)?\s*\d{0,4}\)", + r"\[Official.*?\]", r"\[Music.*?\]", r"\[Audio.*?\]", + r"\bofficial\s+video\b", r"\bofficial\s+audio\b", + r"\boriginal\s+(?:video|audio)\b", + r"\bMV\b", r"\b4K\b", r"\bHD\b", r"\bHQ\b", +] + +def parse_artist_title(filename_or_title): + """Iz imena datoteke / YouTube naslova ekstrahira (artist, title). + + Podpira pogoste vzorce: + - "Artist - Title.mp4" + - "Artist - Title (Official Music Video).mp4" + - "Artist – Title" (em-dash) + - "Artist | Title" + + Vrne (artist, title) ali (None, None) če ni razvidno. + """ + if not filename_or_title: + return (None, None) + + # Odstrani extension + name = Path(filename_or_title).stem if "." in filename_or_title else filename_or_title + + # Odstrani noise patterns + for pat in _NOISE_PATTERNS: + name = re.sub(pat, "", name, flags=re.IGNORECASE) + + # Normaliziraj presledke + name = re.sub(r"\s+", " ", name).strip() + + # Probaj različne separatorje + for sep in [" - ", " – ", " — ", " | ", " : "]: + if sep in name: + parts = name.split(sep, 1) + artist = parts[0].strip() + title = parts[1].strip() + # Strip trailing/leading puncutation + artist = re.sub(r'^[\s\-–—|.:_]+|[\s\-–—|.:_]+$', '', artist) + title = re.sub(r'^[\s\-–—|.:_]+|[\s\-–—|.:_]+$', '', title) + if artist and title and len(artist) <= 80 and len(title) <= 100: + return (artist, title) + + return (None, None) + + +def safe_filename(s, max_len=80): + """Naredi varno ime datoteke (brez znakov ki bi razbili FS).""" + if not s: + return "" + # Replace problematic chars with safe alternative + s = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '', s) + s = re.sub(r'\s+', ' ', s).strip() + return s[:max_len] + + +def build_download_filename(job): + """Sestavi pravilno ime download datoteke iz job metadata.""" + # Najprej probaj job-shranjene parsed values + artist = job.get("parsed_artist") + title = job.get("parsed_title") + + # Fallback: parse from filename + if not artist or not title: + source = job.get("filename") or job.get("youtube_title") or "" + parsed_artist, parsed_title = parse_artist_title(source) + artist = artist or parsed_artist + title = title or parsed_title + + if artist and title: + return f"{safe_filename(artist)} - {safe_filename(title)} - REEL.mp4" + if title: + return f"{safe_filename(title)} - REEL.mp4" + # Last resort: job ID (vendar to bi se moralo preprečiti že ob upload-u) + return f"reel_{job['id']}.mp4" + + # ──────────────────────────────────────────────────────────────── # Job state (filesystem-based, persistent prek restartov) # ──────────────────────────────────────────────────────────────── @@ -220,6 +311,30 @@ def process_job(job_id): if not run_subprocess_logged(cmd, job_id, "YouTube download"): return update_job(job_id, input_path=str(input_path)) + + # Probaj dobiti YT naslov za artist+title parsing + try: + info_cmd = [ + "python3", str(SCRIPTS_DIR / "yt_download.py"), + job["youtube_url"], "/dev/null", "--info-only", + ] + proc = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30) + if proc.returncode == 0 and proc.stdout: + info = json.loads(proc.stdout) + yt_title = info.get("title", "") + if yt_title: + a, t = parse_artist_title(yt_title) + updates = {"youtube_title": yt_title} + if a: + updates["parsed_artist"] = a + if t: + updates["parsed_title"] = t + updates["has_clean_name"] = bool(a and t) + update_job(job_id, **updates) + # Reload job for downstream use + job = load_job(job_id) + except Exception as e: + print(f"⚠️ Cannot fetch YT title: {e}", flush=True) else: input_path = Path(job["input_path"]) @@ -242,9 +357,11 @@ def process_job(job_id): cmd += ["--llm-provider", job["llm_provider"]] if job.get("llm_model"): cmd += ["--llm-model", job["llm_model"]] - # Filename hint = original filename (Claude lahko prepozna pesem) - if job.get("filename"): - # Brez extension + # Filename hint za Claude/Scribe — preferiraj parsed artist+title (čistejše) + if job.get("parsed_artist") and job.get("parsed_title"): + fn_hint = f"{job['parsed_artist']} - {job['parsed_title']}" + cmd += ["--filename-hint", fn_hint] + elif job.get("filename"): fn_hint = Path(job["filename"]).stem cmd += ["--filename-hint", fn_hint] # STT provider (elevenlabs = Scribe, local = faster-whisper, auto = preferiraj Scribe) @@ -513,6 +630,8 @@ class StartJobIn(BaseModel): @app.post("/api/upload") async def upload_video( file: UploadFile = File(...), + artist: Optional[str] = Form(None), + title: Optional[str] = Form(None), user: str = Depends(check_auth), ): if not file.filename: @@ -543,6 +662,22 @@ async def upload_video( "created_at": time.time(), "updated_at": time.time(), } + + # Artist + title — najprej user-provided, potem parse iz filename + if artist and title: + # User je vpisal ali potrdil + job["parsed_artist"] = artist.strip() + job["parsed_title"] = title.strip() + job["has_clean_name"] = True + else: + # Auto parse iz filename + a, t = parse_artist_title(file.filename) + if a: + job["parsed_artist"] = a + if t: + job["parsed_title"] = t + job["has_clean_name"] = bool(a and t) + save_job(job) return job @@ -670,10 +805,14 @@ async def download(job_id: str, user: str = Depends(check_auth)): out = Path(job["output_path"]) if not out.exists(): raise HTTPException(404, "Output ne obstaja") + + # Pametno ime: "Izvajalec - Naslov - REEL.mp4" + download_name = build_download_filename(job) + return FileResponse( out, media_type="video/mp4", - filename=f"reel_{job_id}.mp4", + filename=download_name, ) diff --git a/templates/index.html b/templates/index.html index 2a2c647..0f5fd80 100644 --- a/templates/index.html +++ b/templates/index.html @@ -443,11 +443,13 @@ const dz = $("#dropzone"); const fileInput = $("#file-input"); let pendingFile = null; + let pendingArtist = null; + let pendingTitle = null; + dz.addEventListener("click", () => fileInput.click()); fileInput.addEventListener("change", () => { if (fileInput.files[0]) { - pendingFile = fileInput.files[0]; - dz.querySelector("div").textContent = `📹 ${pendingFile.name}`; + handleFileSelected(fileInput.files[0]); } }); ["dragover", "dragenter"].forEach(ev => @@ -456,11 +458,82 @@ dz.addEventListener(ev, e => { e.preventDefault(); dz.classList.remove("drag"); })); dz.addEventListener("drop", e => { const f = e.dataTransfer.files[0]; - if (f) { - pendingFile = f; - dz.querySelector("div").textContent = `📹 ${f.name}`; - } + if (f) handleFileSelected(f); }); + + // Klient-side parser (mora ustrezati backend parse_artist_title) + function parseArtistTitle(filename) { + if (!filename) return [null, null]; + let name = filename.replace(/\.[^.]+$/, ""); // remove ext + + // Odstrani noise + const noise = [ + /\(Official\s+(?:Music\s+)?Video\)/gi, + /\(Officia[lk]\s+Audio\)/gi, + /\(Offizielles\s+(?:Musik)?[Vv]ideo\)/gi, + /\(Lyric[s]?\s+Video\)/gi, + /\(Audio\)/gi, + /\(HD\)|\(HQ\)|\(4K\)/gi, + /\(Live\)|\(Remix\)|\(Remaster(?:ed)?\s*\d{0,4}\)/gi, + /\[Official.*?\]|\[Music.*?\]|\[Audio.*?\]/gi, + /\bofficial\s+video\b|\bofficial\s+audio\b/gi, + /\boriginal\s+(?:video|audio)\b/gi, + /\bMV\b|\b4K\b|\bHD\b|\bHQ\b/g, + ]; + for (const r of noise) name = name.replace(r, ""); + name = name.replace(/\s+/g, " ").trim(); + + // Probaj separatorje + for (const sep of [" - ", " – ", " — ", " | ", " : "]) { + if (name.includes(sep)) { + const parts = name.split(sep); + if (parts.length >= 2) { + const artist = parts[0].trim().replace(/^[\s\-–—|.:_]+|[\s\-–—|.:_]+$/g, ""); + const title = parts.slice(1).join(sep).trim().replace(/^[\s\-–—|.:_]+|[\s\-–—|.:_]+$/g, ""); + if (artist && title) return [artist, title]; + } + } + } + return [null, null]; + } + + function handleFileSelected(f) { + const [artist, title] = parseArtistTitle(f.name); + + if (!artist || !title) { + // Ni razvidno ime — vprašaj uporabnika + const userArtist = prompt( + `❗ Iz imena datoteke ni razviden izvajalec in naslov.\n\n` + + `Datoteka: "${f.name}"\n\n` + + `Vpiši IZVAJALCA (npr. "Lady Gaga"):`, + "" + ); + if (!userArtist || !userArtist.trim()) { + alert("⛔ Brez izvajalca ne morem nadaljevati.\n\nPoimenuj datoteko v formatu:\n Izvajalec - Naslov.mp4"); + fileInput.value = ""; + return; + } + const userTitle = prompt( + `Vpiši NASLOV pesmi (npr. "Abracadabra"):`, + "" + ); + if (!userTitle || !userTitle.trim()) { + alert("⛔ Brez naslova ne morem nadaljevati."); + fileInput.value = ""; + return; + } + pendingArtist = userArtist.trim(); + pendingTitle = userTitle.trim(); + } else { + pendingArtist = artist; + pendingTitle = title; + } + + pendingFile = f; + dz.querySelector("div").innerHTML = + `📹 ${pendingArtist} — ${pendingTitle}` + + `
${f.name} (${(f.size/1024/1024).toFixed(1)} MB)
`; + } // ─── Settings collector ───────────────────────── function collectSettings() { @@ -610,6 +683,8 @@ } const fd = new FormData(); fd.append("file", pendingFile); + if (pendingArtist) fd.append("artist", pendingArtist); + if (pendingTitle) fd.append("title", pendingTitle); showLive("Nalaganje datoteke", `${pendingFile.name} (${(pendingFile.size / 1024 / 1024).toFixed(1)} MB)`, 0); @@ -755,7 +830,9 @@ const title = job.source_type === "youtube" ? (job.youtube_url || "YouTube") - : (job.filename || job.id); + : (job.parsed_artist && job.parsed_title + ? `${job.parsed_artist} — ${job.parsed_title}` + : (job.filename || job.id)); const sizeStr = job.output_size_mb ? `${job.output_size_mb} MB` : job.size_mb ? `${job.size_mb} MB` : "";