Dedup: SQLite baza za že obdelane komade
User feedback: 'dodaj da če čekira in shranjuje že obdelani komadi v SQL bazo, da če nalagamo komad ki smo ga že naložili da ga ne naloži' NEW: SQLite dedup database at /data/processed.db Schema: processed_videos - normalized_name (PK part 1) - tv_station (PK part 2) — isti komad lahko obstaja na različnih postajah - filename_orig - job_id - nextcloud_url - file_size_mb - uploaded_at Filename normalization removes noise: 'BRAJDE (Official Video).mp4' → 'brajde' 'Brajde (HD).mxf' → 'brajde' 'BRAJDE - LIVE 2024.mp4' → 'brajde' (strips parentheses, suffixes like Official/HD/4K/Live, extension, lowercase) NEW endpoints: - POST /api/dedup/check — preveri katera imena so že obdelana - POST /api/dedup/remove — pobriše dedup zapis (Re-process) - GET /api/dedup/list — seznam vseh obdelanih (opt. filter po tv_station) Integration: - Nextcloud upload (manual + auto): zabeleži v dedup po uspešnem PUT - File queue (frontend): pred dodajanjem preveri dedup → prikaže rdeč warning '⚠ Že naložen na ONE DE (29.4.2026) — Re-process' → opacity 0.6 (vizualno blediji) → submit jih SKIP-a (osim če 'Re-process' kliknil)
This commit is contained in:
parent
16c332b490
commit
f2034f9970
158
app/main.py
158
app/main.py
@ -49,6 +49,101 @@ UPLOAD_DIR.mkdir(parents=True, exist_ok=True)
|
|||||||
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
JOBS_DIR.mkdir(parents=True, exist_ok=True)
|
JOBS_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
|
# Dedup DB — sledi že obdelanim/naloženim komadom
|
||||||
|
DEDUP_DB = DATA_DIR / "processed.db"
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_filename(filename: str) -> str:
|
||||||
|
"""Normaliziraj filename za dedup primerjavo.
|
||||||
|
|
||||||
|
'BRAJDE (Official Video).mp4' → 'brajde'
|
||||||
|
'Brajde (HD).mxf' → 'brajde'
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
name = Path(filename).stem.lower()
|
||||||
|
# Odstrani pogoste suffix-e
|
||||||
|
name = re.sub(r'\b(official|video|hd|4k|lyric|audio|music|mv|live|cover|version|remix)\b', '', name)
|
||||||
|
# Odstrani parentheses content
|
||||||
|
name = re.sub(r'\([^)]*\)', '', name)
|
||||||
|
name = re.sub(r'\[[^\]]*\]', '', name)
|
||||||
|
# Whitespace normalize
|
||||||
|
name = re.sub(r'\s+', ' ', name).strip()
|
||||||
|
# Odstrani pogoste ločila
|
||||||
|
name = re.sub(r'[-_.]+', ' ', name).strip()
|
||||||
|
return name
|
||||||
|
|
||||||
|
|
||||||
|
def _dedup_init():
|
||||||
|
"""Ustvari SQLite tabelo če ne obstaja."""
|
||||||
|
import sqlite3
|
||||||
|
conn = sqlite3.connect(str(DEDUP_DB))
|
||||||
|
conn.execute("""
|
||||||
|
CREATE TABLE IF NOT EXISTS processed_videos (
|
||||||
|
normalized_name TEXT NOT NULL,
|
||||||
|
tv_station TEXT NOT NULL,
|
||||||
|
filename_orig TEXT NOT NULL,
|
||||||
|
job_id TEXT NOT NULL,
|
||||||
|
nextcloud_url TEXT,
|
||||||
|
file_size_mb REAL,
|
||||||
|
uploaded_at REAL NOT NULL,
|
||||||
|
PRIMARY KEY (normalized_name, tv_station)
|
||||||
|
)
|
||||||
|
""")
|
||||||
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_norm ON processed_videos(normalized_name)")
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
|
def dedup_check(filename: str, tv_station: str) -> Optional[dict]:
|
||||||
|
"""Vrne dict z info o že obdelanem komadu, ali None."""
|
||||||
|
import sqlite3
|
||||||
|
_dedup_init()
|
||||||
|
norm = _normalize_filename(filename)
|
||||||
|
if not norm:
|
||||||
|
return None
|
||||||
|
conn = sqlite3.connect(str(DEDUP_DB))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
row = conn.execute(
|
||||||
|
"SELECT * FROM processed_videos WHERE normalized_name = ? AND tv_station = ?",
|
||||||
|
(norm, tv_station)
|
||||||
|
).fetchone()
|
||||||
|
conn.close()
|
||||||
|
if row:
|
||||||
|
return dict(row)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def dedup_record(filename: str, tv_station: str, job_id: str, nextcloud_url: str = None, file_size_mb: float = None):
|
||||||
|
"""Zabeleži uspešno obdelan + naložen komad."""
|
||||||
|
import sqlite3
|
||||||
|
_dedup_init()
|
||||||
|
norm = _normalize_filename(filename)
|
||||||
|
if not norm:
|
||||||
|
return
|
||||||
|
conn = sqlite3.connect(str(DEDUP_DB))
|
||||||
|
conn.execute("""
|
||||||
|
INSERT OR REPLACE INTO processed_videos
|
||||||
|
(normalized_name, tv_station, filename_orig, job_id, nextcloud_url, file_size_mb, uploaded_at)
|
||||||
|
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||||
|
""", (norm, tv_station, filename, job_id, nextcloud_url, file_size_mb, time.time()))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
print(f"📒 Dedup: zabeležen {norm} → {tv_station} (job {job_id})", flush=True)
|
||||||
|
|
||||||
|
|
||||||
|
def dedup_remove(filename: str, tv_station: str):
|
||||||
|
"""Izbriši zapis (npr. če uporabnik želi re-narediti)."""
|
||||||
|
import sqlite3
|
||||||
|
_dedup_init()
|
||||||
|
norm = _normalize_filename(filename)
|
||||||
|
if not norm:
|
||||||
|
return
|
||||||
|
conn = sqlite3.connect(str(DEDUP_DB))
|
||||||
|
conn.execute("DELETE FROM processed_videos WHERE normalized_name = ? AND tv_station = ?", (norm, tv_station))
|
||||||
|
conn.commit()
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
|
||||||
AUTH_USER = os.environ.get("AUTH_USER", "sebastjan")
|
AUTH_USER = os.environ.get("AUTH_USER", "sebastjan")
|
||||||
AUTH_PASS = os.environ.get("AUTH_PASS", "change-me-in-coolify-env")
|
AUTH_PASS = os.environ.get("AUTH_PASS", "change-me-in-coolify-env")
|
||||||
|
|
||||||
@ -787,6 +882,13 @@ def process_job(job_id):
|
|||||||
auto_upload_to_nextcloud=False, # disable da se ne ponovi
|
auto_upload_to_nextcloud=False, # disable da se ne ponovi
|
||||||
hidden_after_upload=True, # signal za UI da ga skrije
|
hidden_after_upload=True, # signal za UI da ga skrije
|
||||||
)
|
)
|
||||||
|
# Zabeleži v dedup
|
||||||
|
try:
|
||||||
|
orig_filename = final_job.get("filename") or download_name
|
||||||
|
file_mb = final_job.get("output_size_mb") or final_job.get("size_mb")
|
||||||
|
dedup_record(orig_filename, tv_station, job_id, nextcloud_url=result, file_size_mb=file_mb)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Dedup record failed: {e}", flush=True)
|
||||||
print(f"☁️ Auto-upload OK: /{target_subdir}/{download_name}", flush=True)
|
print(f"☁️ Auto-upload OK: /{target_subdir}/{download_name}", flush=True)
|
||||||
else:
|
else:
|
||||||
update_job(job_id, nextcloud_status="error", nextcloud_error=result)
|
update_job(job_id, nextcloud_status="error", nextcloud_error=result)
|
||||||
@ -1062,6 +1164,55 @@ class StartJobIn(BaseModel):
|
|||||||
tv_station: str = "FOLX SLOVENIJA"
|
tv_station: str = "FOLX SLOVENIJA"
|
||||||
|
|
||||||
|
|
||||||
|
# ────────────────────────────────────────────────────────────────
|
||||||
|
# Dedup check
|
||||||
|
# ────────────────────────────────────────────────────────────────
|
||||||
|
class DedupCheckRequest(BaseModel):
|
||||||
|
filenames: list[str]
|
||||||
|
tv_station: str = "FOLX SLOVENIJA"
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/dedup/check")
|
||||||
|
async def dedup_check_endpoint(payload: DedupCheckRequest, user: str = Depends(check_auth)):
|
||||||
|
"""Preveri katere filename so že obdelane (na isti TV postaji).
|
||||||
|
|
||||||
|
Vrne dict { filename: {match} | null }
|
||||||
|
"""
|
||||||
|
result = {}
|
||||||
|
for fn in payload.filenames:
|
||||||
|
match = dedup_check(fn, payload.tv_station)
|
||||||
|
result[fn] = match
|
||||||
|
return {"results": result, "tv_station": payload.tv_station}
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/api/dedup/remove")
|
||||||
|
async def dedup_remove_endpoint(payload: DedupCheckRequest, user: str = Depends(check_auth)):
|
||||||
|
"""Izbriši dedup zapise — uporabnik želi re-narediti komad."""
|
||||||
|
for fn in payload.filenames:
|
||||||
|
dedup_remove(fn, payload.tv_station)
|
||||||
|
return {"ok": True, "removed": payload.filenames}
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/dedup/list")
|
||||||
|
async def dedup_list(tv_station: Optional[str] = None, user: str = Depends(check_auth)):
|
||||||
|
"""Seznam vseh obdelanih komadov (opcijsko filtrirano po TV postaji)."""
|
||||||
|
import sqlite3
|
||||||
|
_dedup_init()
|
||||||
|
conn = sqlite3.connect(str(DEDUP_DB))
|
||||||
|
conn.row_factory = sqlite3.Row
|
||||||
|
if tv_station:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT * FROM processed_videos WHERE tv_station = ? ORDER BY uploaded_at DESC",
|
||||||
|
(tv_station,)
|
||||||
|
).fetchall()
|
||||||
|
else:
|
||||||
|
rows = conn.execute(
|
||||||
|
"SELECT * FROM processed_videos ORDER BY uploaded_at DESC"
|
||||||
|
).fetchall()
|
||||||
|
conn.close()
|
||||||
|
return {"count": len(rows), "items": [dict(r) for r in rows]}
|
||||||
|
|
||||||
|
|
||||||
# ────────────────────────────────────────────────────────────────
|
# ────────────────────────────────────────────────────────────────
|
||||||
# Upload (file)
|
# Upload (file)
|
||||||
# ────────────────────────────────────────────────────────────────
|
# ────────────────────────────────────────────────────────────────
|
||||||
@ -1654,6 +1805,13 @@ async def upload_nextcloud(job_id: str, user: str = Depends(check_auth)):
|
|||||||
if success:
|
if success:
|
||||||
update_job(job_id, nextcloud_status="uploaded", nextcloud_url=result, nextcloud_error=None)
|
update_job(job_id, nextcloud_status="uploaded", nextcloud_url=result, nextcloud_error=None)
|
||||||
print(f"☁️ Upload OK: /{target_subdir}/{download_name}", flush=True)
|
print(f"☁️ Upload OK: /{target_subdir}/{download_name}", flush=True)
|
||||||
|
# Zabeleži v dedup
|
||||||
|
try:
|
||||||
|
orig_filename = job.get("filename") or download_name
|
||||||
|
file_mb = job.get("output_size_mb") or job.get("size_mb")
|
||||||
|
dedup_record(orig_filename, tv_station, job_id, nextcloud_url=result, file_size_mb=file_mb)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ Dedup record failed: {e}", flush=True)
|
||||||
return {"ok": True, "url": result, "filename": download_name, "tv_station": tv_station}
|
return {"ok": True, "url": result, "filename": download_name, "tv_station": tv_station}
|
||||||
else:
|
else:
|
||||||
update_job(job_id, nextcloud_status="error", nextcloud_error=result)
|
update_job(job_id, nextcloud_status="error", nextcloud_error=result)
|
||||||
|
|||||||
@ -547,11 +547,35 @@
|
|||||||
return [null, null];
|
return [null, null];
|
||||||
}
|
}
|
||||||
|
|
||||||
function addFilesToQueue(files) {
|
async function addFilesToQueue(files) {
|
||||||
|
const newItems = [];
|
||||||
for (const f of files) {
|
for (const f of files) {
|
||||||
const [artist, title] = parseArtistTitle(f.name);
|
const [artist, title] = parseArtistTitle(f.name);
|
||||||
pendingFiles.push({ file: f, artist, title });
|
newItems.push({ file: f, artist, title, dedup: null });
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Dedup check pred dodanjem v queue
|
||||||
|
const tvStation = $("#tv-station-input").value || "FOLX SLOVENIJA";
|
||||||
|
try {
|
||||||
|
const r = await fetch("/api/dedup/check", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ filenames: newItems.map(i => i.file.name), tv_station: tvStation }),
|
||||||
|
});
|
||||||
|
if (r.ok) {
|
||||||
|
const data = await r.json();
|
||||||
|
newItems.forEach(item => {
|
||||||
|
const match = data.results[item.file.name];
|
||||||
|
if (match) {
|
||||||
|
item.dedup = match; // {normalized_name, tv_station, filename_orig, job_id, nextcloud_url, file_size_mb, uploaded_at}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn("Dedup check failed:", e);
|
||||||
|
}
|
||||||
|
|
||||||
|
pendingFiles.push(...newItems);
|
||||||
renderFileQueue();
|
renderFileQueue();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -560,6 +584,26 @@
|
|||||||
renderFileQueue();
|
renderFileQueue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Uporabnik želi vseeno re-process komada ki je bil že naložen
|
||||||
|
window.forceReprocess = async function(idx) {
|
||||||
|
const item = pendingFiles[idx];
|
||||||
|
if (!item || !item.dedup) return;
|
||||||
|
const tvStation = item.dedup.tv_station;
|
||||||
|
// Izbriši dedup zapis
|
||||||
|
try {
|
||||||
|
await fetch("/api/dedup/remove", {
|
||||||
|
method: "POST",
|
||||||
|
headers: { "Content-Type": "application/json" },
|
||||||
|
body: JSON.stringify({ filenames: [item.file.name], tv_station: tvStation }),
|
||||||
|
});
|
||||||
|
} catch (e) {
|
||||||
|
console.warn("Dedup remove failed:", e);
|
||||||
|
}
|
||||||
|
item.dedup = null;
|
||||||
|
item.forceReprocess = true;
|
||||||
|
renderFileQueue();
|
||||||
|
};
|
||||||
|
|
||||||
function renderFileQueue() {
|
function renderFileQueue() {
|
||||||
const q = $("#file-queue");
|
const q = $("#file-queue");
|
||||||
if (!q) return;
|
if (!q) return;
|
||||||
@ -589,11 +633,23 @@
|
|||||||
nameHtml = `${escapeHtml(item.file.name)}` +
|
nameHtml = `${escapeHtml(item.file.name)}` +
|
||||||
`<div class="warn">⚠ Brez razvidnega imena — ACR bo poskusil prepoznati</div>`;
|
`<div class="warn">⚠ Brez razvidnega imena — ACR bo poskusil prepoznati</div>`;
|
||||||
}
|
}
|
||||||
|
// Dedup warning
|
||||||
|
if (item.dedup) {
|
||||||
|
const date = new Date(item.dedup.uploaded_at * 1000).toLocaleDateString("sl-SI");
|
||||||
|
nameHtml += `<div style="margin-top:4px; padding:4px 6px; background:rgba(239,68,68,0.15); border-left:3px solid #ef4444; border-radius:3px; font-size:11px; color:#fca5a5;">
|
||||||
|
⚠ <b>Že naložen na ${escapeHtml(item.dedup.tv_station)}</b> (${date}) — <a href="#" onclick="forceReprocess(${idx}); return false;" style="color:#ffd700; text-decoration:underline;">Re-process</a>
|
||||||
|
</div>`;
|
||||||
|
}
|
||||||
div.innerHTML = `
|
div.innerHTML = `
|
||||||
<div class="name">${nameHtml}</div>
|
<div class="name">${nameHtml}</div>
|
||||||
<div class="size">${sizeMB} MB</div>
|
<div class="size">${sizeMB} MB</div>
|
||||||
<button class="remove" data-idx="${idx}" title="Odstrani">×</button>
|
<button class="remove" data-idx="${idx}" title="Odstrani">×</button>
|
||||||
`;
|
`;
|
||||||
|
if (item.dedup && !item.forceReprocess) {
|
||||||
|
div.style.opacity = "0.6";
|
||||||
|
}
|
||||||
|
q.appendChild(div);
|
||||||
|
});
|
||||||
q.appendChild(div);
|
q.appendChild(div);
|
||||||
});
|
});
|
||||||
|
|
||||||
@ -769,11 +825,23 @@
|
|||||||
|
|
||||||
// Generate batch ID za skupinsko sledenje (Telegram summary)
|
// Generate batch ID za skupinsko sledenje (Telegram summary)
|
||||||
const batchId = "batch-" + Date.now().toString(36) + "-" + Math.random().toString(36).slice(2, 8);
|
const batchId = "batch-" + Date.now().toString(36) + "-" + Math.random().toString(36).slice(2, 8);
|
||||||
const totalFiles = pendingFiles.length;
|
|
||||||
|
// Filtriraj ven dedup-ed items (uporabnik mora kliknili Re-process)
|
||||||
|
const filesToProcess = pendingFiles.filter(item => !item.dedup);
|
||||||
|
if (filesToProcess.length === 0) {
|
||||||
|
alert("Vsi izbrani komadi so že naloženi. Klikni 'Re-process' za ponovno obdelavo.");
|
||||||
|
$("#submit-btn").disabled = false;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (filesToProcess.length < pendingFiles.length) {
|
||||||
|
const skipped = pendingFiles.length - filesToProcess.length;
|
||||||
|
console.log(`Preskočil ${skipped} že obdelanih komadov`);
|
||||||
|
}
|
||||||
|
const totalFiles = filesToProcess.length;
|
||||||
|
|
||||||
// Upload + queue all files SEQUENTIALLY (1 hkrati za stabilnost)
|
// Upload + queue all files SEQUENTIALLY (1 hkrati za stabilnost)
|
||||||
for (let i = 0; i < pendingFiles.length; i++) {
|
for (let i = 0; i < filesToProcess.length; i++) {
|
||||||
const item = pendingFiles[i];
|
const item = filesToProcess[i];
|
||||||
const f = item.file;
|
const f = item.file;
|
||||||
const sizeMB = (f.size / 1024 / 1024).toFixed(1);
|
const sizeMB = (f.size / 1024 / 1024).toFixed(1);
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user