Dedup: SQLite baza za že obdelane komade

User feedback: 'dodaj da če čekira in shranjuje že obdelani komadi v SQL bazo,
da če nalagamo komad ki smo ga že naložili da ga ne naloži'

NEW: SQLite dedup database at /data/processed.db
Schema: processed_videos
  - normalized_name (PK part 1)
  - tv_station (PK part 2) — isti komad lahko obstaja na različnih postajah
  - filename_orig
  - job_id
  - nextcloud_url
  - file_size_mb
  - uploaded_at

Filename normalization removes noise:
  'BRAJDE (Official Video).mp4' → 'brajde'
  'Brajde (HD).mxf' → 'brajde'
  'BRAJDE - LIVE 2024.mp4' → 'brajde'
(strips parentheses, suffixes like Official/HD/4K/Live, extension, lowercase)

NEW endpoints:
- POST /api/dedup/check — preveri katera imena so že obdelana
- POST /api/dedup/remove — pobriše dedup zapis (Re-process)
- GET /api/dedup/list — seznam vseh obdelanih (opt. filter po tv_station)

Integration:
- Nextcloud upload (manual + auto): zabeleži v dedup po uspešnem PUT
- File queue (frontend): pred dodajanjem preveri dedup
  → prikaže rdeč warning '⚠ Že naložen na ONE DE (29.4.2026) — Re-process'
  → opacity 0.6 (vizualno blediji)
  → submit jih SKIP-a (osim če 'Re-process' kliknil)
This commit is contained in:
Sebastjan Artič 2026-04-30 15:00:10 +00:00
parent 16c332b490
commit f2034f9970
2 changed files with 231 additions and 5 deletions

View File

@ -49,6 +49,101 @@ UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
JOBS_DIR.mkdir(parents=True, exist_ok=True)
# Dedup DB — sledi že obdelanim/naloženim komadom
DEDUP_DB = DATA_DIR / "processed.db"
def _normalize_filename(filename: str) -> str:
"""Normaliziraj filename za dedup primerjavo.
'BRAJDE (Official Video).mp4' 'brajde'
'Brajde (HD).mxf' 'brajde'
"""
import re
name = Path(filename).stem.lower()
# Odstrani pogoste suffix-e
name = re.sub(r'\b(official|video|hd|4k|lyric|audio|music|mv|live|cover|version|remix)\b', '', name)
# Odstrani parentheses content
name = re.sub(r'\([^)]*\)', '', name)
name = re.sub(r'\[[^\]]*\]', '', name)
# Whitespace normalize
name = re.sub(r'\s+', ' ', name).strip()
# Odstrani pogoste ločila
name = re.sub(r'[-_.]+', ' ', name).strip()
return name
def _dedup_init():
"""Ustvari SQLite tabelo če ne obstaja."""
import sqlite3
conn = sqlite3.connect(str(DEDUP_DB))
conn.execute("""
CREATE TABLE IF NOT EXISTS processed_videos (
normalized_name TEXT NOT NULL,
tv_station TEXT NOT NULL,
filename_orig TEXT NOT NULL,
job_id TEXT NOT NULL,
nextcloud_url TEXT,
file_size_mb REAL,
uploaded_at REAL NOT NULL,
PRIMARY KEY (normalized_name, tv_station)
)
""")
conn.execute("CREATE INDEX IF NOT EXISTS idx_norm ON processed_videos(normalized_name)")
conn.commit()
conn.close()
def dedup_check(filename: str, tv_station: str) -> Optional[dict]:
"""Vrne dict z info o že obdelanem komadu, ali None."""
import sqlite3
_dedup_init()
norm = _normalize_filename(filename)
if not norm:
return None
conn = sqlite3.connect(str(DEDUP_DB))
conn.row_factory = sqlite3.Row
row = conn.execute(
"SELECT * FROM processed_videos WHERE normalized_name = ? AND tv_station = ?",
(norm, tv_station)
).fetchone()
conn.close()
if row:
return dict(row)
return None
def dedup_record(filename: str, tv_station: str, job_id: str, nextcloud_url: str = None, file_size_mb: float = None):
"""Zabeleži uspešno obdelan + naložen komad."""
import sqlite3
_dedup_init()
norm = _normalize_filename(filename)
if not norm:
return
conn = sqlite3.connect(str(DEDUP_DB))
conn.execute("""
INSERT OR REPLACE INTO processed_videos
(normalized_name, tv_station, filename_orig, job_id, nextcloud_url, file_size_mb, uploaded_at)
VALUES (?, ?, ?, ?, ?, ?, ?)
""", (norm, tv_station, filename, job_id, nextcloud_url, file_size_mb, time.time()))
conn.commit()
conn.close()
print(f"📒 Dedup: zabeležen {norm}{tv_station} (job {job_id})", flush=True)
def dedup_remove(filename: str, tv_station: str):
"""Izbriši zapis (npr. če uporabnik želi re-narediti)."""
import sqlite3
_dedup_init()
norm = _normalize_filename(filename)
if not norm:
return
conn = sqlite3.connect(str(DEDUP_DB))
conn.execute("DELETE FROM processed_videos WHERE normalized_name = ? AND tv_station = ?", (norm, tv_station))
conn.commit()
conn.close()
AUTH_USER = os.environ.get("AUTH_USER", "sebastjan")
AUTH_PASS = os.environ.get("AUTH_PASS", "change-me-in-coolify-env")
@ -787,6 +882,13 @@ def process_job(job_id):
auto_upload_to_nextcloud=False, # disable da se ne ponovi
hidden_after_upload=True, # signal za UI da ga skrije
)
# Zabeleži v dedup
try:
orig_filename = final_job.get("filename") or download_name
file_mb = final_job.get("output_size_mb") or final_job.get("size_mb")
dedup_record(orig_filename, tv_station, job_id, nextcloud_url=result, file_size_mb=file_mb)
except Exception as e:
print(f"⚠️ Dedup record failed: {e}", flush=True)
print(f"☁️ Auto-upload OK: /{target_subdir}/{download_name}", flush=True)
else:
update_job(job_id, nextcloud_status="error", nextcloud_error=result)
@ -1062,6 +1164,55 @@ class StartJobIn(BaseModel):
tv_station: str = "FOLX SLOVENIJA"
# ────────────────────────────────────────────────────────────────
# Dedup check
# ────────────────────────────────────────────────────────────────
class DedupCheckRequest(BaseModel):
filenames: list[str]
tv_station: str = "FOLX SLOVENIJA"
@app.post("/api/dedup/check")
async def dedup_check_endpoint(payload: DedupCheckRequest, user: str = Depends(check_auth)):
"""Preveri katere filename so že obdelane (na isti TV postaji).
Vrne dict { filename: {match} | null }
"""
result = {}
for fn in payload.filenames:
match = dedup_check(fn, payload.tv_station)
result[fn] = match
return {"results": result, "tv_station": payload.tv_station}
@app.post("/api/dedup/remove")
async def dedup_remove_endpoint(payload: DedupCheckRequest, user: str = Depends(check_auth)):
"""Izbriši dedup zapise — uporabnik želi re-narediti komad."""
for fn in payload.filenames:
dedup_remove(fn, payload.tv_station)
return {"ok": True, "removed": payload.filenames}
@app.get("/api/dedup/list")
async def dedup_list(tv_station: Optional[str] = None, user: str = Depends(check_auth)):
"""Seznam vseh obdelanih komadov (opcijsko filtrirano po TV postaji)."""
import sqlite3
_dedup_init()
conn = sqlite3.connect(str(DEDUP_DB))
conn.row_factory = sqlite3.Row
if tv_station:
rows = conn.execute(
"SELECT * FROM processed_videos WHERE tv_station = ? ORDER BY uploaded_at DESC",
(tv_station,)
).fetchall()
else:
rows = conn.execute(
"SELECT * FROM processed_videos ORDER BY uploaded_at DESC"
).fetchall()
conn.close()
return {"count": len(rows), "items": [dict(r) for r in rows]}
# ────────────────────────────────────────────────────────────────
# Upload (file)
# ────────────────────────────────────────────────────────────────
@ -1654,6 +1805,13 @@ async def upload_nextcloud(job_id: str, user: str = Depends(check_auth)):
if success:
update_job(job_id, nextcloud_status="uploaded", nextcloud_url=result, nextcloud_error=None)
print(f"☁️ Upload OK: /{target_subdir}/{download_name}", flush=True)
# Zabeleži v dedup
try:
orig_filename = job.get("filename") or download_name
file_mb = job.get("output_size_mb") or job.get("size_mb")
dedup_record(orig_filename, tv_station, job_id, nextcloud_url=result, file_size_mb=file_mb)
except Exception as e:
print(f"⚠️ Dedup record failed: {e}", flush=True)
return {"ok": True, "url": result, "filename": download_name, "tv_station": tv_station}
else:
update_job(job_id, nextcloud_status="error", nextcloud_error=result)

View File

@ -547,11 +547,35 @@
return [null, null];
}
function addFilesToQueue(files) {
async function addFilesToQueue(files) {
const newItems = [];
for (const f of files) {
const [artist, title] = parseArtistTitle(f.name);
pendingFiles.push({ file: f, artist, title });
newItems.push({ file: f, artist, title, dedup: null });
}
// Dedup check pred dodanjem v queue
const tvStation = $("#tv-station-input").value || "FOLX SLOVENIJA";
try {
const r = await fetch("/api/dedup/check", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ filenames: newItems.map(i => i.file.name), tv_station: tvStation }),
});
if (r.ok) {
const data = await r.json();
newItems.forEach(item => {
const match = data.results[item.file.name];
if (match) {
item.dedup = match; // {normalized_name, tv_station, filename_orig, job_id, nextcloud_url, file_size_mb, uploaded_at}
}
});
}
} catch (e) {
console.warn("Dedup check failed:", e);
}
pendingFiles.push(...newItems);
renderFileQueue();
}
@ -560,6 +584,26 @@
renderFileQueue();
}
// Uporabnik želi vseeno re-process komada ki je bil že naložen
window.forceReprocess = async function(idx) {
const item = pendingFiles[idx];
if (!item || !item.dedup) return;
const tvStation = item.dedup.tv_station;
// Izbriši dedup zapis
try {
await fetch("/api/dedup/remove", {
method: "POST",
headers: { "Content-Type": "application/json" },
body: JSON.stringify({ filenames: [item.file.name], tv_station: tvStation }),
});
} catch (e) {
console.warn("Dedup remove failed:", e);
}
item.dedup = null;
item.forceReprocess = true;
renderFileQueue();
};
function renderFileQueue() {
const q = $("#file-queue");
if (!q) return;
@ -589,11 +633,23 @@
nameHtml = `${escapeHtml(item.file.name)}` +
`<div class="warn">⚠ Brez razvidnega imena — ACR bo poskusil prepoznati</div>`;
}
// Dedup warning
if (item.dedup) {
const date = new Date(item.dedup.uploaded_at * 1000).toLocaleDateString("sl-SI");
nameHtml += `<div style="margin-top:4px; padding:4px 6px; background:rgba(239,68,68,0.15); border-left:3px solid #ef4444; border-radius:3px; font-size:11px; color:#fca5a5;">
<b>Že naložen na ${escapeHtml(item.dedup.tv_station)}</b> (${date}) — <a href="#" onclick="forceReprocess(${idx}); return false;" style="color:#ffd700; text-decoration:underline;">Re-process</a>
</div>`;
}
div.innerHTML = `
<div class="name">${nameHtml}</div>
<div class="size">${sizeMB} MB</div>
<button class="remove" data-idx="${idx}" title="Odstrani">×</button>
`;
if (item.dedup && !item.forceReprocess) {
div.style.opacity = "0.6";
}
q.appendChild(div);
});
q.appendChild(div);
});
@ -769,11 +825,23 @@
// Generate batch ID za skupinsko sledenje (Telegram summary)
const batchId = "batch-" + Date.now().toString(36) + "-" + Math.random().toString(36).slice(2, 8);
const totalFiles = pendingFiles.length;
// Filtriraj ven dedup-ed items (uporabnik mora kliknili Re-process)
const filesToProcess = pendingFiles.filter(item => !item.dedup);
if (filesToProcess.length === 0) {
alert("Vsi izbrani komadi so že naloženi. Klikni 'Re-process' za ponovno obdelavo.");
$("#submit-btn").disabled = false;
return;
}
if (filesToProcess.length < pendingFiles.length) {
const skipped = pendingFiles.length - filesToProcess.length;
console.log(`Preskočil ${skipped} že obdelanih komadov`);
}
const totalFiles = filesToProcess.length;
// Upload + queue all files SEQUENTIALLY (1 hkrati za stabilnost)
for (let i = 0; i < pendingFiles.length; i++) {
const item = pendingFiles[i];
for (let i = 0; i < filesToProcess.length; i++) {
const item = filesToProcess[i];
const f = item.file;
const sizeMB = (f.size / 1024 / 1024).toFixed(1);