Smart download filenames: 'Artist - Title - REEL.mp4' + validation

Two improvements:

1. DOWNLOAD FILENAME: instead of 'reel_<job-id>.mp4' (e.g. reel_25e076af7600.mp4),
   downloads now have descriptive names like:
   - 'Lady Gaga - Abracadabra - REEL.mp4'
   - 'Modrijani - S teboj - REEL.mp4'
   - 'Sarah Connor - FICKA - REEL.mp4'

2. PRE-UPLOAD VALIDATION: when filename doesn't follow 'Artist - Title' format,
   browser prompts user for both fields. Without them, upload is blocked.
   This prevents files with names like '12345.mp4' or 'video_final.mp4' from
   being processed without identifying info.

Implementation:
- parse_artist_title() helper handles common formats:
  - 'Artist - Title.mp4' / 'Artist – Title' (em-dash)
  - 'Artist | Title' / 'Artist : Title'
  - Strips noise: '(Official Music Video)', '(Audio)', '(HD)', '[Lyric Video]'
- Client-side parser mirrors backend (validation before upload)
- Backend accepts artist + title form fields (override parsed)
- Job stored with parsed_artist + parsed_title + has_clean_name fields
- YouTube jobs auto-fetch title via yt-dlp --info-only and parse it
- Filename hint to Scribe/Claude uses parsed values (cleaner than raw filename)
- Download endpoint uses build_download_filename() for content-disposition
- Jobs list shows 'Artist — Title' instead of raw filename

Result: downloaded reels are auto-named correctly for Facebook/Instagram
upload, no more renaming files manually.
This commit is contained in:
Sebastjan Artič 2026-04-29 14:15:18 +00:00
parent a30137f1f2
commit 3877b822ff
2 changed files with 227 additions and 11 deletions

View File

@ -73,6 +73,97 @@ def check_auth(creds: HTTPBasicCredentials = Depends(security)):
return creds.username
# ────────────────────────────────────────────────────────────────
# Artist + title parsing iz filename / YouTube title
# ────────────────────────────────────────────────────────────────
import re
_NOISE_PATTERNS = [
# Pogosti "noise" ki ga je treba odstraniti
r"\(Official\s+(?:Music\s+)?Video\)",
r"\(Officia[lk]\s+Audio\)",
r"\(Offizielles\s+(?:Musik)?[Vv]ideo\)",
r"\(Lyric[s]?\s+Video\)",
r"\(Audio\)",
r"\(HD\)", r"\(HQ\)", r"\(4K\)",
r"\(Live\)", r"\(Remix\)",
r"\(Remastered\)", r"\(Remaster(?:ed)?\s*\d{0,4}\)",
r"\[Official.*?\]", r"\[Music.*?\]", r"\[Audio.*?\]",
r"\bofficial\s+video\b", r"\bofficial\s+audio\b",
r"\boriginal\s+(?:video|audio)\b",
r"\bMV\b", r"\b4K\b", r"\bHD\b", r"\bHQ\b",
]
def parse_artist_title(filename_or_title):
"""Iz imena datoteke / YouTube naslova ekstrahira (artist, title).
Podpira pogoste vzorce:
- "Artist - Title.mp4"
- "Artist - Title (Official Music Video).mp4"
- "Artist Title" (em-dash)
- "Artist | Title"
Vrne (artist, title) ali (None, None) če ni razvidno.
"""
if not filename_or_title:
return (None, None)
# Odstrani extension
name = Path(filename_or_title).stem if "." in filename_or_title else filename_or_title
# Odstrani noise patterns
for pat in _NOISE_PATTERNS:
name = re.sub(pat, "", name, flags=re.IGNORECASE)
# Normaliziraj presledke
name = re.sub(r"\s+", " ", name).strip()
# Probaj različne separatorje
for sep in [" - ", " ", "", " | ", " : "]:
if sep in name:
parts = name.split(sep, 1)
artist = parts[0].strip()
title = parts[1].strip()
# Strip trailing/leading puncutation
artist = re.sub(r'^[\s\-–—|.:_]+|[\s\-–—|.:_]+$', '', artist)
title = re.sub(r'^[\s\-–—|.:_]+|[\s\-–—|.:_]+$', '', title)
if artist and title and len(artist) <= 80 and len(title) <= 100:
return (artist, title)
return (None, None)
def safe_filename(s, max_len=80):
"""Naredi varno ime datoteke (brez znakov ki bi razbili FS)."""
if not s:
return ""
# Replace problematic chars with safe alternative
s = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '', s)
s = re.sub(r'\s+', ' ', s).strip()
return s[:max_len]
def build_download_filename(job):
"""Sestavi pravilno ime download datoteke iz job metadata."""
# Najprej probaj job-shranjene parsed values
artist = job.get("parsed_artist")
title = job.get("parsed_title")
# Fallback: parse from filename
if not artist or not title:
source = job.get("filename") or job.get("youtube_title") or ""
parsed_artist, parsed_title = parse_artist_title(source)
artist = artist or parsed_artist
title = title or parsed_title
if artist and title:
return f"{safe_filename(artist)} - {safe_filename(title)} - REEL.mp4"
if title:
return f"{safe_filename(title)} - REEL.mp4"
# Last resort: job ID (vendar to bi se moralo preprečiti že ob upload-u)
return f"reel_{job['id']}.mp4"
# ────────────────────────────────────────────────────────────────
# Job state (filesystem-based, persistent prek restartov)
# ────────────────────────────────────────────────────────────────
@ -220,6 +311,30 @@ def process_job(job_id):
if not run_subprocess_logged(cmd, job_id, "YouTube download"):
return
update_job(job_id, input_path=str(input_path))
# Probaj dobiti YT naslov za artist+title parsing
try:
info_cmd = [
"python3", str(SCRIPTS_DIR / "yt_download.py"),
job["youtube_url"], "/dev/null", "--info-only",
]
proc = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30)
if proc.returncode == 0 and proc.stdout:
info = json.loads(proc.stdout)
yt_title = info.get("title", "")
if yt_title:
a, t = parse_artist_title(yt_title)
updates = {"youtube_title": yt_title}
if a:
updates["parsed_artist"] = a
if t:
updates["parsed_title"] = t
updates["has_clean_name"] = bool(a and t)
update_job(job_id, **updates)
# Reload job for downstream use
job = load_job(job_id)
except Exception as e:
print(f"⚠️ Cannot fetch YT title: {e}", flush=True)
else:
input_path = Path(job["input_path"])
@ -242,9 +357,11 @@ def process_job(job_id):
cmd += ["--llm-provider", job["llm_provider"]]
if job.get("llm_model"):
cmd += ["--llm-model", job["llm_model"]]
# Filename hint = original filename (Claude lahko prepozna pesem)
if job.get("filename"):
# Brez extension
# Filename hint za Claude/Scribe — preferiraj parsed artist+title (čistejše)
if job.get("parsed_artist") and job.get("parsed_title"):
fn_hint = f"{job['parsed_artist']} - {job['parsed_title']}"
cmd += ["--filename-hint", fn_hint]
elif job.get("filename"):
fn_hint = Path(job["filename"]).stem
cmd += ["--filename-hint", fn_hint]
# STT provider (elevenlabs = Scribe, local = faster-whisper, auto = preferiraj Scribe)
@ -513,6 +630,8 @@ class StartJobIn(BaseModel):
@app.post("/api/upload")
async def upload_video(
file: UploadFile = File(...),
artist: Optional[str] = Form(None),
title: Optional[str] = Form(None),
user: str = Depends(check_auth),
):
if not file.filename:
@ -543,6 +662,22 @@ async def upload_video(
"created_at": time.time(),
"updated_at": time.time(),
}
# Artist + title — najprej user-provided, potem parse iz filename
if artist and title:
# User je vpisal ali potrdil
job["parsed_artist"] = artist.strip()
job["parsed_title"] = title.strip()
job["has_clean_name"] = True
else:
# Auto parse iz filename
a, t = parse_artist_title(file.filename)
if a:
job["parsed_artist"] = a
if t:
job["parsed_title"] = t
job["has_clean_name"] = bool(a and t)
save_job(job)
return job
@ -670,10 +805,14 @@ async def download(job_id: str, user: str = Depends(check_auth)):
out = Path(job["output_path"])
if not out.exists():
raise HTTPException(404, "Output ne obstaja")
# Pametno ime: "Izvajalec - Naslov - REEL.mp4"
download_name = build_download_filename(job)
return FileResponse(
out,
media_type="video/mp4",
filename=f"reel_{job_id}.mp4",
filename=download_name,
)

View File

@ -443,11 +443,13 @@
const dz = $("#dropzone");
const fileInput = $("#file-input");
let pendingFile = null;
let pendingArtist = null;
let pendingTitle = null;
dz.addEventListener("click", () => fileInput.click());
fileInput.addEventListener("change", () => {
if (fileInput.files[0]) {
pendingFile = fileInput.files[0];
dz.querySelector("div").textContent = `📹 ${pendingFile.name}`;
handleFileSelected(fileInput.files[0]);
}
});
["dragover", "dragenter"].forEach(ev =>
@ -456,11 +458,82 @@
dz.addEventListener(ev, e => { e.preventDefault(); dz.classList.remove("drag"); }));
dz.addEventListener("drop", e => {
const f = e.dataTransfer.files[0];
if (f) {
pendingFile = f;
dz.querySelector("div").textContent = `📹 ${f.name}`;
}
if (f) handleFileSelected(f);
});
// Klient-side parser (mora ustrezati backend parse_artist_title)
function parseArtistTitle(filename) {
if (!filename) return [null, null];
let name = filename.replace(/\.[^.]+$/, ""); // remove ext
// Odstrani noise
const noise = [
/\(Official\s+(?:Music\s+)?Video\)/gi,
/\(Officia[lk]\s+Audio\)/gi,
/\(Offizielles\s+(?:Musik)?[Vv]ideo\)/gi,
/\(Lyric[s]?\s+Video\)/gi,
/\(Audio\)/gi,
/\(HD\)|\(HQ\)|\(4K\)/gi,
/\(Live\)|\(Remix\)|\(Remaster(?:ed)?\s*\d{0,4}\)/gi,
/\[Official.*?\]|\[Music.*?\]|\[Audio.*?\]/gi,
/\bofficial\s+video\b|\bofficial\s+audio\b/gi,
/\boriginal\s+(?:video|audio)\b/gi,
/\bMV\b|\b4K\b|\bHD\b|\bHQ\b/g,
];
for (const r of noise) name = name.replace(r, "");
name = name.replace(/\s+/g, " ").trim();
// Probaj separatorje
for (const sep of [" - ", " ", " — ", " | ", " : "]) {
if (name.includes(sep)) {
const parts = name.split(sep);
if (parts.length >= 2) {
const artist = parts[0].trim().replace(/^[\s\-–—|.:_]+|[\s\-–—|.:_]+$/g, "");
const title = parts.slice(1).join(sep).trim().replace(/^[\s\-–—|.:_]+|[\s\-–—|.:_]+$/g, "");
if (artist && title) return [artist, title];
}
}
}
return [null, null];
}
function handleFileSelected(f) {
const [artist, title] = parseArtistTitle(f.name);
if (!artist || !title) {
// Ni razvidno ime — vprašaj uporabnika
const userArtist = prompt(
`❗ Iz imena datoteke ni razviden izvajalec in naslov.\n\n` +
`Datoteka: "${f.name}"\n\n` +
`Vpiši IZVAJALCA (npr. "Lady Gaga"):`,
""
);
if (!userArtist || !userArtist.trim()) {
alert("⛔ Brez izvajalca ne morem nadaljevati.\n\nPoimenuj datoteko v formatu:\n Izvajalec - Naslov.mp4");
fileInput.value = "";
return;
}
const userTitle = prompt(
`Vpiši NASLOV pesmi (npr. "Abracadabra"):`,
""
);
if (!userTitle || !userTitle.trim()) {
alert("⛔ Brez naslova ne morem nadaljevati.");
fileInput.value = "";
return;
}
pendingArtist = userArtist.trim();
pendingTitle = userTitle.trim();
} else {
pendingArtist = artist;
pendingTitle = title;
}
pendingFile = f;
dz.querySelector("div").innerHTML =
`📹 <b>${pendingArtist} — ${pendingTitle}</b>` +
`<div style="font-size: 11px; color: var(--muted); margin-top: 4px;">${f.name} (${(f.size/1024/1024).toFixed(1)} MB)</div>`;
}
// ─── Settings collector ─────────────────────────
function collectSettings() {
@ -610,6 +683,8 @@
}
const fd = new FormData();
fd.append("file", pendingFile);
if (pendingArtist) fd.append("artist", pendingArtist);
if (pendingTitle) fd.append("title", pendingTitle);
showLive("Nalaganje datoteke", `${pendingFile.name} (${(pendingFile.size / 1024 / 1024).toFixed(1)} MB)`, 0);
@ -755,7 +830,9 @@
const title = job.source_type === "youtube"
? (job.youtube_url || "YouTube")
: (job.filename || job.id);
: (job.parsed_artist && job.parsed_title
? `${job.parsed_artist} — ${job.parsed_title}`
: (job.filename || job.id));
const sizeStr = job.output_size_mb ? `${job.output_size_mb} MB` :
job.size_mb ? `${job.size_mb} MB` : "";