From 157e6b781e5900be39944172ab3542e799bb521c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastjan=20Arti=C4=8D?= Date: Wed, 29 Apr 2026 15:04:18 +0000 Subject: [PATCH] =?UTF-8?q?Fix=20'=C5=BDena'=20word=20still=20cut:=20word-?= =?UTF-8?q?level=20start=20extension=20instead=20of=20segment-level?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous fix used segment boundaries — required segments <3s for type 1 or <4s for type 2. But Žena was in a 4.3s segment ('saj še doma mi več noč'jo verjet'. Žena me'), so the condition wasn't met and clip start stayed at 77.7s, exactly at end of word 'Žena' (76.88-77.70s). New approach: scan word-level timestamps directly: 1. If clip start falls MID-WORD → extend back to word start - 0.15s 2. If a word ends 0-0.5s BEFORE clip start AND next word is at clip start → that word is suspect (may be first word of chorus that Scribe put in previous segment), extend back to its start - 0.15s Word-level timestamps are always available from Scribe (timestamps_granularity=word). Falls back to segment-level for local Whisper without word timing. This handles arbitrary segment lengths and is universal — works for any language where the chorus starts on a word that the STT placed in the previous segment. --- scripts/analyze.py | 89 ++++++++++++++++++++++++++++---------------- templates/index.html | 49 +++++++++++++++++++++++- 2 files changed, 103 insertions(+), 35 deletions(-) diff --git a/scripts/analyze.py b/scripts/analyze.py index 3b8b320..1739961 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -1363,42 +1363,65 @@ def main(): # # Strategija: če clip start pade SREDI segmenta (ne tik na začetku), # razširi nazaj na začetek tega segmenta + 0.2s buffer. + # ── EXTEND clip START nazaj če Claude začne sredi besede ali tik za njo ── + # Pesem se pogosto začne na isti besedi v transkriptu, ampak Scribe lahko + # zazna mejo med segmenti **PO** prvi besedi (npr. "Žena me tepe" — "Žena" + # je v prejšnjem segmentu pri 76.88-77.70s, novi segment začne 78.30). + # Claude reže tipično na začetku novega segmenta = odrezana prva beseda. + # + # Strategija: **na ravni besed** — najdi besedo katere konec je + # blizu clip start (±0.5s) IN preveri ali se lahko ta beseda + # "naslanja" na clip (z malo pavze do naslednje besede). current_start = clip_range["start"] + + # Zberi VSE besede z njihovimi timestampi + all_words = [] for seg in corrected_segs: - seg_start = float(seg.get("start", 0)) - seg_end = float(seg.get("end", 0)) - # Segment ki se prekriva s clip start (start je MED njim) - if seg_start < current_start < seg_end: - # Razširi nazaj — vendar samo če je segment kratek (<3s) in - # se "naslanja" na clip (zadnja beseda lahko vodi v refren) - if (seg_end - seg_start) < 3.0: - new_start = max(0, seg_start - 0.2) # 0.2s buffer pred prvo besedo - if new_start < current_start: - print(f" 🎵 Razširim clip začetek {current_start:.1f}s → {new_start:.1f}s " - f"(prva beseda refrena je v prejšnjem segmentu)", file=sys.stderr) + for w in seg.get("words", []): + if w.get("start") is not None and w.get("end") is not None: + all_words.append({ + "start": float(w["start"]), + "end": float(w["end"]), + "text": w.get("text", ""), + }) + + if all_words: + # Najdi "rob" — beseda kjer končanje zelo blizu clip start + # ALI clip start je sredi besede (besedo bi odrezali) + for i, w in enumerate(all_words): + # Beseda zaobsega clip start (clip reže sredi besede) + if w["start"] < current_start < w["end"]: + new_start = max(0, w["start"] - 0.15) + print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s " + f"(clip rezal sredi besede '{w['text'].strip()}')", file=sys.stderr) + current_start = new_start + break + # Beseda končana TIK pred clip start (do 0.5s pred) + # IN je naslednja beseda PO/blizu clip start + if 0 < (current_start - w["end"]) <= 0.5: + # Preveri naslednjo besedo + next_w = all_words[i + 1] if i + 1 < len(all_words) else None + if next_w and next_w["start"] >= current_start - 0.1: + # Razdalja od te besede do naslednje > 0.3s pomeni mogoče prelom verz/refren + gap_to_next = next_w["start"] - w["end"] + # Razširi nazaj na začetek te besede - 0.15s buffer + new_start = max(0, w["start"] - 0.15) + print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s " + f"(beseda '{w['text'].strip()}' končana {current_start - w['end']:.2f}s pred clip start, " + f"morda začne refren; gap do '{next_w['text'].strip()}' = {gap_to_next:.2f}s)", file=sys.stderr) current_start = new_start - break - # Segment ki se konča TOČNO ali tik pred clip start (lahko zadnja - # beseda refrena = "Žena" se konča na 78.2 ko clip začne 78.3) - elif current_start - 0.5 <= seg_end <= current_start + 0.1: - # Preveri ali zadnja beseda v segmentu morda **začne refren** - seg_text = seg.get("text", "").strip() - # Če segment kaže novo frazo (z veliko začetnico po pavzi) ali - # vsebuje znake interpunkcije, morda zadnja beseda res začne refren - if seg_text and (seg_end - seg_start) < 4.0: - # Razširi nazaj na začetek POSLEDJE besede tega segmenta - words = seg.get("words", []) - if words: - last_word = words[-1] - new_start = max(0, float(last_word.get("start", seg_start)) - 0.15) - else: - # Brez word-level: vzemi 0.5s nazaj - new_start = max(0, current_start - 0.5) - if new_start < current_start: - print(f" 🎵 Razširim clip začetek {current_start:.1f}s → {new_start:.1f}s " - f"(zadnja beseda prejšnjega segmenta morda začne refren)", file=sys.stderr) - current_start = new_start - break + break + else: + # Fallback: če ni word-level (npr. local Whisper), uporabi segmente kot prej + for seg in corrected_segs: + seg_start = float(seg.get("start", 0)) + seg_end = float(seg.get("end", 0)) + if seg_start < current_start < seg_end: + new_start = max(0, current_start - 0.5) + print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s " + f"(brez word-level, fallback -0.5s)", file=sys.stderr) + current_start = new_start + break if current_start < clip_range["start"]: clip_range["start"] = round(current_start, 2) diff --git a/templates/index.html b/templates/index.html index 6cafa24..fb6a6fc 100644 --- a/templates/index.html +++ b/templates/index.html @@ -263,6 +263,50 @@ } .modal-actions button:hover { background: var(--panel-2); } .modal-actions button.primary:hover { background: var(--accent-2); } + + /* ─── Multi-file queue ─── */ + .file-queue { + margin-top: 12px; + display: flex; + flex-direction: column; + gap: 6px; + } + .file-queue-item { + background: var(--panel-2); + border: 1px solid var(--border); + border-radius: 6px; + padding: 8px 10px; + display: flex; + align-items: center; + gap: 10px; + font-size: 13px; + } + .file-queue-item .name { + flex: 1; + overflow: hidden; + text-overflow: ellipsis; + white-space: nowrap; + } + .file-queue-item .name b { color: var(--accent-2); } + .file-queue-item .size { + color: var(--muted); + font-size: 11px; + flex-shrink: 0; + } + .file-queue-item .remove { + background: transparent; + border: none; + color: var(--muted); + cursor: pointer; + font-size: 16px; + padding: 0 4px; + line-height: 1; + } + .file-queue-item .remove:hover { color: var(--error); } + .file-queue-item .warn { + color: var(--warn); + font-size: 10px; + } @@ -289,9 +333,10 @@
Klikni ali povleci video sem
-
.mp4, .mov, .webm — do 2 GB
- +
.mp4, .mov, .webm, .mxf, .mpg — do 10 GB · Lahko izberete več datotek hkrati
+ +