Smart download filenames: 'Artist - Title - REEL.mp4' + validation
Two improvements: 1. DOWNLOAD FILENAME: instead of 'reel_<job-id>.mp4' (e.g. reel_25e076af7600.mp4), downloads now have descriptive names like: - 'Lady Gaga - Abracadabra - REEL.mp4' - 'Modrijani - S teboj - REEL.mp4' - 'Sarah Connor - FICKA - REEL.mp4' 2. PRE-UPLOAD VALIDATION: when filename doesn't follow 'Artist - Title' format, browser prompts user for both fields. Without them, upload is blocked. This prevents files with names like '12345.mp4' or 'video_final.mp4' from being processed without identifying info. Implementation: - parse_artist_title() helper handles common formats: - 'Artist - Title.mp4' / 'Artist – Title' (em-dash) - 'Artist | Title' / 'Artist : Title' - Strips noise: '(Official Music Video)', '(Audio)', '(HD)', '[Lyric Video]' - Client-side parser mirrors backend (validation before upload) - Backend accepts artist + title form fields (override parsed) - Job stored with parsed_artist + parsed_title + has_clean_name fields - YouTube jobs auto-fetch title via yt-dlp --info-only and parse it - Filename hint to Scribe/Claude uses parsed values (cleaner than raw filename) - Download endpoint uses build_download_filename() for content-disposition - Jobs list shows 'Artist — Title' instead of raw filename Result: downloaded reels are auto-named correctly for Facebook/Instagram upload, no more renaming files manually.
This commit is contained in:
parent
a30137f1f2
commit
3877b822ff
147
app/main.py
147
app/main.py
@ -73,6 +73,97 @@ def check_auth(creds: HTTPBasicCredentials = Depends(security)):
|
||||
return creds.username
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────
|
||||
# Artist + title parsing iz filename / YouTube title
|
||||
# ────────────────────────────────────────────────────────────────
|
||||
import re
|
||||
|
||||
_NOISE_PATTERNS = [
|
||||
# Pogosti "noise" ki ga je treba odstraniti
|
||||
r"\(Official\s+(?:Music\s+)?Video\)",
|
||||
r"\(Officia[lk]\s+Audio\)",
|
||||
r"\(Offizielles\s+(?:Musik)?[Vv]ideo\)",
|
||||
r"\(Lyric[s]?\s+Video\)",
|
||||
r"\(Audio\)",
|
||||
r"\(HD\)", r"\(HQ\)", r"\(4K\)",
|
||||
r"\(Live\)", r"\(Remix\)",
|
||||
r"\(Remastered\)", r"\(Remaster(?:ed)?\s*\d{0,4}\)",
|
||||
r"\[Official.*?\]", r"\[Music.*?\]", r"\[Audio.*?\]",
|
||||
r"\bofficial\s+video\b", r"\bofficial\s+audio\b",
|
||||
r"\boriginal\s+(?:video|audio)\b",
|
||||
r"\bMV\b", r"\b4K\b", r"\bHD\b", r"\bHQ\b",
|
||||
]
|
||||
|
||||
def parse_artist_title(filename_or_title):
|
||||
"""Iz imena datoteke / YouTube naslova ekstrahira (artist, title).
|
||||
|
||||
Podpira pogoste vzorce:
|
||||
- "Artist - Title.mp4"
|
||||
- "Artist - Title (Official Music Video).mp4"
|
||||
- "Artist – Title" (em-dash)
|
||||
- "Artist | Title"
|
||||
|
||||
Vrne (artist, title) ali (None, None) če ni razvidno.
|
||||
"""
|
||||
if not filename_or_title:
|
||||
return (None, None)
|
||||
|
||||
# Odstrani extension
|
||||
name = Path(filename_or_title).stem if "." in filename_or_title else filename_or_title
|
||||
|
||||
# Odstrani noise patterns
|
||||
for pat in _NOISE_PATTERNS:
|
||||
name = re.sub(pat, "", name, flags=re.IGNORECASE)
|
||||
|
||||
# Normaliziraj presledke
|
||||
name = re.sub(r"\s+", " ", name).strip()
|
||||
|
||||
# Probaj različne separatorje
|
||||
for sep in [" - ", " – ", " — ", " | ", " : "]:
|
||||
if sep in name:
|
||||
parts = name.split(sep, 1)
|
||||
artist = parts[0].strip()
|
||||
title = parts[1].strip()
|
||||
# Strip trailing/leading puncutation
|
||||
artist = re.sub(r'^[\s\-–—|.:_]+|[\s\-–—|.:_]+$', '', artist)
|
||||
title = re.sub(r'^[\s\-–—|.:_]+|[\s\-–—|.:_]+$', '', title)
|
||||
if artist and title and len(artist) <= 80 and len(title) <= 100:
|
||||
return (artist, title)
|
||||
|
||||
return (None, None)
|
||||
|
||||
|
||||
def safe_filename(s, max_len=80):
|
||||
"""Naredi varno ime datoteke (brez znakov ki bi razbili FS)."""
|
||||
if not s:
|
||||
return ""
|
||||
# Replace problematic chars with safe alternative
|
||||
s = re.sub(r'[<>:"/\\|?*\x00-\x1f]', '', s)
|
||||
s = re.sub(r'\s+', ' ', s).strip()
|
||||
return s[:max_len]
|
||||
|
||||
|
||||
def build_download_filename(job):
|
||||
"""Sestavi pravilno ime download datoteke iz job metadata."""
|
||||
# Najprej probaj job-shranjene parsed values
|
||||
artist = job.get("parsed_artist")
|
||||
title = job.get("parsed_title")
|
||||
|
||||
# Fallback: parse from filename
|
||||
if not artist or not title:
|
||||
source = job.get("filename") or job.get("youtube_title") or ""
|
||||
parsed_artist, parsed_title = parse_artist_title(source)
|
||||
artist = artist or parsed_artist
|
||||
title = title or parsed_title
|
||||
|
||||
if artist and title:
|
||||
return f"{safe_filename(artist)} - {safe_filename(title)} - REEL.mp4"
|
||||
if title:
|
||||
return f"{safe_filename(title)} - REEL.mp4"
|
||||
# Last resort: job ID (vendar to bi se moralo preprečiti že ob upload-u)
|
||||
return f"reel_{job['id']}.mp4"
|
||||
|
||||
|
||||
# ────────────────────────────────────────────────────────────────
|
||||
# Job state (filesystem-based, persistent prek restartov)
|
||||
# ────────────────────────────────────────────────────────────────
|
||||
@ -220,6 +311,30 @@ def process_job(job_id):
|
||||
if not run_subprocess_logged(cmd, job_id, "YouTube download"):
|
||||
return
|
||||
update_job(job_id, input_path=str(input_path))
|
||||
|
||||
# Probaj dobiti YT naslov za artist+title parsing
|
||||
try:
|
||||
info_cmd = [
|
||||
"python3", str(SCRIPTS_DIR / "yt_download.py"),
|
||||
job["youtube_url"], "/dev/null", "--info-only",
|
||||
]
|
||||
proc = subprocess.run(info_cmd, capture_output=True, text=True, timeout=30)
|
||||
if proc.returncode == 0 and proc.stdout:
|
||||
info = json.loads(proc.stdout)
|
||||
yt_title = info.get("title", "")
|
||||
if yt_title:
|
||||
a, t = parse_artist_title(yt_title)
|
||||
updates = {"youtube_title": yt_title}
|
||||
if a:
|
||||
updates["parsed_artist"] = a
|
||||
if t:
|
||||
updates["parsed_title"] = t
|
||||
updates["has_clean_name"] = bool(a and t)
|
||||
update_job(job_id, **updates)
|
||||
# Reload job for downstream use
|
||||
job = load_job(job_id)
|
||||
except Exception as e:
|
||||
print(f"⚠️ Cannot fetch YT title: {e}", flush=True)
|
||||
else:
|
||||
input_path = Path(job["input_path"])
|
||||
|
||||
@ -242,9 +357,11 @@ def process_job(job_id):
|
||||
cmd += ["--llm-provider", job["llm_provider"]]
|
||||
if job.get("llm_model"):
|
||||
cmd += ["--llm-model", job["llm_model"]]
|
||||
# Filename hint = original filename (Claude lahko prepozna pesem)
|
||||
if job.get("filename"):
|
||||
# Brez extension
|
||||
# Filename hint za Claude/Scribe — preferiraj parsed artist+title (čistejše)
|
||||
if job.get("parsed_artist") and job.get("parsed_title"):
|
||||
fn_hint = f"{job['parsed_artist']} - {job['parsed_title']}"
|
||||
cmd += ["--filename-hint", fn_hint]
|
||||
elif job.get("filename"):
|
||||
fn_hint = Path(job["filename"]).stem
|
||||
cmd += ["--filename-hint", fn_hint]
|
||||
# STT provider (elevenlabs = Scribe, local = faster-whisper, auto = preferiraj Scribe)
|
||||
@ -513,6 +630,8 @@ class StartJobIn(BaseModel):
|
||||
@app.post("/api/upload")
|
||||
async def upload_video(
|
||||
file: UploadFile = File(...),
|
||||
artist: Optional[str] = Form(None),
|
||||
title: Optional[str] = Form(None),
|
||||
user: str = Depends(check_auth),
|
||||
):
|
||||
if not file.filename:
|
||||
@ -543,6 +662,22 @@ async def upload_video(
|
||||
"created_at": time.time(),
|
||||
"updated_at": time.time(),
|
||||
}
|
||||
|
||||
# Artist + title — najprej user-provided, potem parse iz filename
|
||||
if artist and title:
|
||||
# User je vpisal ali potrdil
|
||||
job["parsed_artist"] = artist.strip()
|
||||
job["parsed_title"] = title.strip()
|
||||
job["has_clean_name"] = True
|
||||
else:
|
||||
# Auto parse iz filename
|
||||
a, t = parse_artist_title(file.filename)
|
||||
if a:
|
||||
job["parsed_artist"] = a
|
||||
if t:
|
||||
job["parsed_title"] = t
|
||||
job["has_clean_name"] = bool(a and t)
|
||||
|
||||
save_job(job)
|
||||
return job
|
||||
|
||||
@ -670,10 +805,14 @@ async def download(job_id: str, user: str = Depends(check_auth)):
|
||||
out = Path(job["output_path"])
|
||||
if not out.exists():
|
||||
raise HTTPException(404, "Output ne obstaja")
|
||||
|
||||
# Pametno ime: "Izvajalec - Naslov - REEL.mp4"
|
||||
download_name = build_download_filename(job)
|
||||
|
||||
return FileResponse(
|
||||
out,
|
||||
media_type="video/mp4",
|
||||
filename=f"reel_{job_id}.mp4",
|
||||
filename=download_name,
|
||||
)
|
||||
|
||||
|
||||
|
||||
@ -443,11 +443,13 @@
|
||||
const dz = $("#dropzone");
|
||||
const fileInput = $("#file-input");
|
||||
let pendingFile = null;
|
||||
let pendingArtist = null;
|
||||
let pendingTitle = null;
|
||||
|
||||
dz.addEventListener("click", () => fileInput.click());
|
||||
fileInput.addEventListener("change", () => {
|
||||
if (fileInput.files[0]) {
|
||||
pendingFile = fileInput.files[0];
|
||||
dz.querySelector("div").textContent = `📹 ${pendingFile.name}`;
|
||||
handleFileSelected(fileInput.files[0]);
|
||||
}
|
||||
});
|
||||
["dragover", "dragenter"].forEach(ev =>
|
||||
@ -456,11 +458,82 @@
|
||||
dz.addEventListener(ev, e => { e.preventDefault(); dz.classList.remove("drag"); }));
|
||||
dz.addEventListener("drop", e => {
|
||||
const f = e.dataTransfer.files[0];
|
||||
if (f) {
|
||||
pendingFile = f;
|
||||
dz.querySelector("div").textContent = `📹 ${f.name}`;
|
||||
}
|
||||
if (f) handleFileSelected(f);
|
||||
});
|
||||
|
||||
// Klient-side parser (mora ustrezati backend parse_artist_title)
|
||||
function parseArtistTitle(filename) {
|
||||
if (!filename) return [null, null];
|
||||
let name = filename.replace(/\.[^.]+$/, ""); // remove ext
|
||||
|
||||
// Odstrani noise
|
||||
const noise = [
|
||||
/\(Official\s+(?:Music\s+)?Video\)/gi,
|
||||
/\(Officia[lk]\s+Audio\)/gi,
|
||||
/\(Offizielles\s+(?:Musik)?[Vv]ideo\)/gi,
|
||||
/\(Lyric[s]?\s+Video\)/gi,
|
||||
/\(Audio\)/gi,
|
||||
/\(HD\)|\(HQ\)|\(4K\)/gi,
|
||||
/\(Live\)|\(Remix\)|\(Remaster(?:ed)?\s*\d{0,4}\)/gi,
|
||||
/\[Official.*?\]|\[Music.*?\]|\[Audio.*?\]/gi,
|
||||
/\bofficial\s+video\b|\bofficial\s+audio\b/gi,
|
||||
/\boriginal\s+(?:video|audio)\b/gi,
|
||||
/\bMV\b|\b4K\b|\bHD\b|\bHQ\b/g,
|
||||
];
|
||||
for (const r of noise) name = name.replace(r, "");
|
||||
name = name.replace(/\s+/g, " ").trim();
|
||||
|
||||
// Probaj separatorje
|
||||
for (const sep of [" - ", " – ", " — ", " | ", " : "]) {
|
||||
if (name.includes(sep)) {
|
||||
const parts = name.split(sep);
|
||||
if (parts.length >= 2) {
|
||||
const artist = parts[0].trim().replace(/^[\s\-–—|.:_]+|[\s\-–—|.:_]+$/g, "");
|
||||
const title = parts.slice(1).join(sep).trim().replace(/^[\s\-–—|.:_]+|[\s\-–—|.:_]+$/g, "");
|
||||
if (artist && title) return [artist, title];
|
||||
}
|
||||
}
|
||||
}
|
||||
return [null, null];
|
||||
}
|
||||
|
||||
function handleFileSelected(f) {
|
||||
const [artist, title] = parseArtistTitle(f.name);
|
||||
|
||||
if (!artist || !title) {
|
||||
// Ni razvidno ime — vprašaj uporabnika
|
||||
const userArtist = prompt(
|
||||
`❗ Iz imena datoteke ni razviden izvajalec in naslov.\n\n` +
|
||||
`Datoteka: "${f.name}"\n\n` +
|
||||
`Vpiši IZVAJALCA (npr. "Lady Gaga"):`,
|
||||
""
|
||||
);
|
||||
if (!userArtist || !userArtist.trim()) {
|
||||
alert("⛔ Brez izvajalca ne morem nadaljevati.\n\nPoimenuj datoteko v formatu:\n Izvajalec - Naslov.mp4");
|
||||
fileInput.value = "";
|
||||
return;
|
||||
}
|
||||
const userTitle = prompt(
|
||||
`Vpiši NASLOV pesmi (npr. "Abracadabra"):`,
|
||||
""
|
||||
);
|
||||
if (!userTitle || !userTitle.trim()) {
|
||||
alert("⛔ Brez naslova ne morem nadaljevati.");
|
||||
fileInput.value = "";
|
||||
return;
|
||||
}
|
||||
pendingArtist = userArtist.trim();
|
||||
pendingTitle = userTitle.trim();
|
||||
} else {
|
||||
pendingArtist = artist;
|
||||
pendingTitle = title;
|
||||
}
|
||||
|
||||
pendingFile = f;
|
||||
dz.querySelector("div").innerHTML =
|
||||
`📹 <b>${pendingArtist} — ${pendingTitle}</b>` +
|
||||
`<div style="font-size: 11px; color: var(--muted); margin-top: 4px;">${f.name} (${(f.size/1024/1024).toFixed(1)} MB)</div>`;
|
||||
}
|
||||
|
||||
// ─── Settings collector ─────────────────────────
|
||||
function collectSettings() {
|
||||
@ -610,6 +683,8 @@
|
||||
}
|
||||
const fd = new FormData();
|
||||
fd.append("file", pendingFile);
|
||||
if (pendingArtist) fd.append("artist", pendingArtist);
|
||||
if (pendingTitle) fd.append("title", pendingTitle);
|
||||
|
||||
showLive("Nalaganje datoteke", `${pendingFile.name} (${(pendingFile.size / 1024 / 1024).toFixed(1)} MB)`, 0);
|
||||
|
||||
@ -755,7 +830,9 @@
|
||||
|
||||
const title = job.source_type === "youtube"
|
||||
? (job.youtube_url || "YouTube")
|
||||
: (job.filename || job.id);
|
||||
: (job.parsed_artist && job.parsed_title
|
||||
? `${job.parsed_artist} — ${job.parsed_title}`
|
||||
: (job.filename || job.id));
|
||||
|
||||
const sizeStr = job.output_size_mb ? `${job.output_size_mb} MB` :
|
||||
job.size_mb ? `${job.size_mb} MB` : "";
|
||||
|
||||
Loading…
Reference in New Issue
Block a user