From f2034f99700ce785357afcf43f955aa7c706d351 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastjan=20Arti=C4=8D?= Date: Thu, 30 Apr 2026 15:00:10 +0000 Subject: [PATCH] =?UTF-8?q?Dedup:=20SQLite=20baza=20za=20=C5=BEe=20obdelan?= =?UTF-8?q?e=20komade?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User feedback: 'dodaj da če čekira in shranjuje že obdelani komadi v SQL bazo, da če nalagamo komad ki smo ga že naložili da ga ne naloži' NEW: SQLite dedup database at /data/processed.db Schema: processed_videos - normalized_name (PK part 1) - tv_station (PK part 2) — isti komad lahko obstaja na različnih postajah - filename_orig - job_id - nextcloud_url - file_size_mb - uploaded_at Filename normalization removes noise: 'BRAJDE (Official Video).mp4' → 'brajde' 'Brajde (HD).mxf' → 'brajde' 'BRAJDE - LIVE 2024.mp4' → 'brajde' (strips parentheses, suffixes like Official/HD/4K/Live, extension, lowercase) NEW endpoints: - POST /api/dedup/check — preveri katera imena so že obdelana - POST /api/dedup/remove — pobriše dedup zapis (Re-process) - GET /api/dedup/list — seznam vseh obdelanih (opt. filter po tv_station) Integration: - Nextcloud upload (manual + auto): zabeleži v dedup po uspešnem PUT - File queue (frontend): pred dodajanjem preveri dedup → prikaže rdeč warning '⚠ Že naložen na ONE DE (29.4.2026) — Re-process' → opacity 0.6 (vizualno blediji) → submit jih SKIP-a (osim če 'Re-process' kliknil) --- app/main.py | 158 +++++++++++++++++++++++++++++++++++++++++++ templates/index.html | 78 +++++++++++++++++++-- 2 files changed, 231 insertions(+), 5 deletions(-) diff --git a/app/main.py b/app/main.py index 3bf3ea8..715025f 100644 --- a/app/main.py +++ b/app/main.py @@ -49,6 +49,101 @@ UPLOAD_DIR.mkdir(parents=True, exist_ok=True) OUTPUT_DIR.mkdir(parents=True, exist_ok=True) JOBS_DIR.mkdir(parents=True, exist_ok=True) +# Dedup DB — sledi že obdelanim/naloženim komadom +DEDUP_DB = DATA_DIR / "processed.db" + + +def _normalize_filename(filename: str) -> str: + """Normaliziraj filename za dedup primerjavo. + + 'BRAJDE (Official Video).mp4' → 'brajde' + 'Brajde (HD).mxf' → 'brajde' + """ + import re + name = Path(filename).stem.lower() + # Odstrani pogoste suffix-e + name = re.sub(r'\b(official|video|hd|4k|lyric|audio|music|mv|live|cover|version|remix)\b', '', name) + # Odstrani parentheses content + name = re.sub(r'\([^)]*\)', '', name) + name = re.sub(r'\[[^\]]*\]', '', name) + # Whitespace normalize + name = re.sub(r'\s+', ' ', name).strip() + # Odstrani pogoste ločila + name = re.sub(r'[-_.]+', ' ', name).strip() + return name + + +def _dedup_init(): + """Ustvari SQLite tabelo če ne obstaja.""" + import sqlite3 + conn = sqlite3.connect(str(DEDUP_DB)) + conn.execute(""" + CREATE TABLE IF NOT EXISTS processed_videos ( + normalized_name TEXT NOT NULL, + tv_station TEXT NOT NULL, + filename_orig TEXT NOT NULL, + job_id TEXT NOT NULL, + nextcloud_url TEXT, + file_size_mb REAL, + uploaded_at REAL NOT NULL, + PRIMARY KEY (normalized_name, tv_station) + ) + """) + conn.execute("CREATE INDEX IF NOT EXISTS idx_norm ON processed_videos(normalized_name)") + conn.commit() + conn.close() + + +def dedup_check(filename: str, tv_station: str) -> Optional[dict]: + """Vrne dict z info o že obdelanem komadu, ali None.""" + import sqlite3 + _dedup_init() + norm = _normalize_filename(filename) + if not norm: + return None + conn = sqlite3.connect(str(DEDUP_DB)) + conn.row_factory = sqlite3.Row + row = conn.execute( + "SELECT * FROM processed_videos WHERE normalized_name = ? AND tv_station = ?", + (norm, tv_station) + ).fetchone() + conn.close() + if row: + return dict(row) + return None + + +def dedup_record(filename: str, tv_station: str, job_id: str, nextcloud_url: str = None, file_size_mb: float = None): + """Zabeleži uspešno obdelan + naložen komad.""" + import sqlite3 + _dedup_init() + norm = _normalize_filename(filename) + if not norm: + return + conn = sqlite3.connect(str(DEDUP_DB)) + conn.execute(""" + INSERT OR REPLACE INTO processed_videos + (normalized_name, tv_station, filename_orig, job_id, nextcloud_url, file_size_mb, uploaded_at) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, (norm, tv_station, filename, job_id, nextcloud_url, file_size_mb, time.time())) + conn.commit() + conn.close() + print(f"📒 Dedup: zabeležen {norm} → {tv_station} (job {job_id})", flush=True) + + +def dedup_remove(filename: str, tv_station: str): + """Izbriši zapis (npr. če uporabnik želi re-narediti).""" + import sqlite3 + _dedup_init() + norm = _normalize_filename(filename) + if not norm: + return + conn = sqlite3.connect(str(DEDUP_DB)) + conn.execute("DELETE FROM processed_videos WHERE normalized_name = ? AND tv_station = ?", (norm, tv_station)) + conn.commit() + conn.close() + + AUTH_USER = os.environ.get("AUTH_USER", "sebastjan") AUTH_PASS = os.environ.get("AUTH_PASS", "change-me-in-coolify-env") @@ -787,6 +882,13 @@ def process_job(job_id): auto_upload_to_nextcloud=False, # disable da se ne ponovi hidden_after_upload=True, # signal za UI da ga skrije ) + # Zabeleži v dedup + try: + orig_filename = final_job.get("filename") or download_name + file_mb = final_job.get("output_size_mb") or final_job.get("size_mb") + dedup_record(orig_filename, tv_station, job_id, nextcloud_url=result, file_size_mb=file_mb) + except Exception as e: + print(f"⚠️ Dedup record failed: {e}", flush=True) print(f"☁️ Auto-upload OK: /{target_subdir}/{download_name}", flush=True) else: update_job(job_id, nextcloud_status="error", nextcloud_error=result) @@ -1062,6 +1164,55 @@ class StartJobIn(BaseModel): tv_station: str = "FOLX SLOVENIJA" +# ──────────────────────────────────────────────────────────────── +# Dedup check +# ──────────────────────────────────────────────────────────────── +class DedupCheckRequest(BaseModel): + filenames: list[str] + tv_station: str = "FOLX SLOVENIJA" + + +@app.post("/api/dedup/check") +async def dedup_check_endpoint(payload: DedupCheckRequest, user: str = Depends(check_auth)): + """Preveri katere filename so že obdelane (na isti TV postaji). + + Vrne dict { filename: {match} | null } + """ + result = {} + for fn in payload.filenames: + match = dedup_check(fn, payload.tv_station) + result[fn] = match + return {"results": result, "tv_station": payload.tv_station} + + +@app.post("/api/dedup/remove") +async def dedup_remove_endpoint(payload: DedupCheckRequest, user: str = Depends(check_auth)): + """Izbriši dedup zapise — uporabnik želi re-narediti komad.""" + for fn in payload.filenames: + dedup_remove(fn, payload.tv_station) + return {"ok": True, "removed": payload.filenames} + + +@app.get("/api/dedup/list") +async def dedup_list(tv_station: Optional[str] = None, user: str = Depends(check_auth)): + """Seznam vseh obdelanih komadov (opcijsko filtrirano po TV postaji).""" + import sqlite3 + _dedup_init() + conn = sqlite3.connect(str(DEDUP_DB)) + conn.row_factory = sqlite3.Row + if tv_station: + rows = conn.execute( + "SELECT * FROM processed_videos WHERE tv_station = ? ORDER BY uploaded_at DESC", + (tv_station,) + ).fetchall() + else: + rows = conn.execute( + "SELECT * FROM processed_videos ORDER BY uploaded_at DESC" + ).fetchall() + conn.close() + return {"count": len(rows), "items": [dict(r) for r in rows]} + + # ──────────────────────────────────────────────────────────────── # Upload (file) # ──────────────────────────────────────────────────────────────── @@ -1654,6 +1805,13 @@ async def upload_nextcloud(job_id: str, user: str = Depends(check_auth)): if success: update_job(job_id, nextcloud_status="uploaded", nextcloud_url=result, nextcloud_error=None) print(f"☁️ Upload OK: /{target_subdir}/{download_name}", flush=True) + # Zabeleži v dedup + try: + orig_filename = job.get("filename") or download_name + file_mb = job.get("output_size_mb") or job.get("size_mb") + dedup_record(orig_filename, tv_station, job_id, nextcloud_url=result, file_size_mb=file_mb) + except Exception as e: + print(f"⚠️ Dedup record failed: {e}", flush=True) return {"ok": True, "url": result, "filename": download_name, "tv_station": tv_station} else: update_job(job_id, nextcloud_status="error", nextcloud_error=result) diff --git a/templates/index.html b/templates/index.html index a6374a0..3effa19 100644 --- a/templates/index.html +++ b/templates/index.html @@ -547,11 +547,35 @@ return [null, null]; } - function addFilesToQueue(files) { + async function addFilesToQueue(files) { + const newItems = []; for (const f of files) { const [artist, title] = parseArtistTitle(f.name); - pendingFiles.push({ file: f, artist, title }); + newItems.push({ file: f, artist, title, dedup: null }); } + + // Dedup check pred dodanjem v queue + const tvStation = $("#tv-station-input").value || "FOLX SLOVENIJA"; + try { + const r = await fetch("/api/dedup/check", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ filenames: newItems.map(i => i.file.name), tv_station: tvStation }), + }); + if (r.ok) { + const data = await r.json(); + newItems.forEach(item => { + const match = data.results[item.file.name]; + if (match) { + item.dedup = match; // {normalized_name, tv_station, filename_orig, job_id, nextcloud_url, file_size_mb, uploaded_at} + } + }); + } + } catch (e) { + console.warn("Dedup check failed:", e); + } + + pendingFiles.push(...newItems); renderFileQueue(); } @@ -560,6 +584,26 @@ renderFileQueue(); } + // Uporabnik želi vseeno re-process komada ki je bil že naložen + window.forceReprocess = async function(idx) { + const item = pendingFiles[idx]; + if (!item || !item.dedup) return; + const tvStation = item.dedup.tv_station; + // Izbriši dedup zapis + try { + await fetch("/api/dedup/remove", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ filenames: [item.file.name], tv_station: tvStation }), + }); + } catch (e) { + console.warn("Dedup remove failed:", e); + } + item.dedup = null; + item.forceReprocess = true; + renderFileQueue(); + }; + function renderFileQueue() { const q = $("#file-queue"); if (!q) return; @@ -589,11 +633,23 @@ nameHtml = `${escapeHtml(item.file.name)}` + `
⚠ Brez razvidnega imena — ACR bo poskusil prepoznati
`; } + // Dedup warning + if (item.dedup) { + const date = new Date(item.dedup.uploaded_at * 1000).toLocaleDateString("sl-SI"); + nameHtml += `
+ ⚠ Že naložen na ${escapeHtml(item.dedup.tv_station)} (${date}) — Re-process +
`; + } div.innerHTML = `
${nameHtml}
${sizeMB} MB
`; + if (item.dedup && !item.forceReprocess) { + div.style.opacity = "0.6"; + } + q.appendChild(div); + }); q.appendChild(div); }); @@ -769,11 +825,23 @@ // Generate batch ID za skupinsko sledenje (Telegram summary) const batchId = "batch-" + Date.now().toString(36) + "-" + Math.random().toString(36).slice(2, 8); - const totalFiles = pendingFiles.length; + + // Filtriraj ven dedup-ed items (uporabnik mora kliknili Re-process) + const filesToProcess = pendingFiles.filter(item => !item.dedup); + if (filesToProcess.length === 0) { + alert("Vsi izbrani komadi so že naloženi. Klikni 'Re-process' za ponovno obdelavo."); + $("#submit-btn").disabled = false; + return; + } + if (filesToProcess.length < pendingFiles.length) { + const skipped = pendingFiles.length - filesToProcess.length; + console.log(`Preskočil ${skipped} že obdelanih komadov`); + } + const totalFiles = filesToProcess.length; // Upload + queue all files SEQUENTIALLY (1 hkrati za stabilnost) - for (let i = 0; i < pendingFiles.length; i++) { - const item = pendingFiles[i]; + for (let i = 0; i < filesToProcess.length; i++) { + const item = filesToProcess[i]; const f = item.file; const sizeMB = (f.size / 1024 / 1024).toFixed(1);