Fix 'Žena' word still cut: word-level start extension instead of segment-level
Previous fix used segment boundaries — required segments <3s for type 1
or <4s for type 2. But Žena was in a 4.3s segment ('saj še doma mi več
noč'jo verjet'. Žena me'), so the condition wasn't met and clip start
stayed at 77.7s, exactly at end of word 'Žena' (76.88-77.70s).
New approach: scan word-level timestamps directly:
1. If clip start falls MID-WORD → extend back to word start - 0.15s
2. If a word ends 0-0.5s BEFORE clip start AND next word is at clip start
→ that word is suspect (may be first word of chorus that Scribe put
in previous segment), extend back to its start - 0.15s
Word-level timestamps are always available from Scribe (timestamps_granularity=word).
Falls back to segment-level for local Whisper without word timing.
This handles arbitrary segment lengths and is universal — works for any
language where the chorus starts on a word that the STT placed in the
previous segment.
This commit is contained in:
parent
a5097c5acc
commit
157e6b781e
@ -1363,42 +1363,65 @@ def main():
|
||||
#
|
||||
# Strategija: če clip start pade SREDI segmenta (ne tik na začetku),
|
||||
# razširi nazaj na začetek tega segmenta + 0.2s buffer.
|
||||
# ── EXTEND clip START nazaj če Claude začne sredi besede ali tik za njo ──
|
||||
# Pesem se pogosto začne na isti besedi v transkriptu, ampak Scribe lahko
|
||||
# zazna mejo med segmenti **PO** prvi besedi (npr. "Žena me tepe" — "Žena"
|
||||
# je v prejšnjem segmentu pri 76.88-77.70s, novi segment začne 78.30).
|
||||
# Claude reže tipično na začetku novega segmenta = odrezana prva beseda.
|
||||
#
|
||||
# Strategija: **na ravni besed** — najdi besedo katere konec je
|
||||
# blizu clip start (±0.5s) IN preveri ali se lahko ta beseda
|
||||
# "naslanja" na clip (z malo pavze do naslednje besede).
|
||||
current_start = clip_range["start"]
|
||||
|
||||
# Zberi VSE besede z njihovimi timestampi
|
||||
all_words = []
|
||||
for seg in corrected_segs:
|
||||
seg_start = float(seg.get("start", 0))
|
||||
seg_end = float(seg.get("end", 0))
|
||||
# Segment ki se prekriva s clip start (start je MED njim)
|
||||
if seg_start < current_start < seg_end:
|
||||
# Razširi nazaj — vendar samo če je segment kratek (<3s) in
|
||||
# se "naslanja" na clip (zadnja beseda lahko vodi v refren)
|
||||
if (seg_end - seg_start) < 3.0:
|
||||
new_start = max(0, seg_start - 0.2) # 0.2s buffer pred prvo besedo
|
||||
if new_start < current_start:
|
||||
print(f" 🎵 Razširim clip začetek {current_start:.1f}s → {new_start:.1f}s "
|
||||
f"(prva beseda refrena je v prejšnjem segmentu)", file=sys.stderr)
|
||||
for w in seg.get("words", []):
|
||||
if w.get("start") is not None and w.get("end") is not None:
|
||||
all_words.append({
|
||||
"start": float(w["start"]),
|
||||
"end": float(w["end"]),
|
||||
"text": w.get("text", ""),
|
||||
})
|
||||
|
||||
if all_words:
|
||||
# Najdi "rob" — beseda kjer končanje zelo blizu clip start
|
||||
# ALI clip start je sredi besede (besedo bi odrezali)
|
||||
for i, w in enumerate(all_words):
|
||||
# Beseda zaobsega clip start (clip reže sredi besede)
|
||||
if w["start"] < current_start < w["end"]:
|
||||
new_start = max(0, w["start"] - 0.15)
|
||||
print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s "
|
||||
f"(clip rezal sredi besede '{w['text'].strip()}')", file=sys.stderr)
|
||||
current_start = new_start
|
||||
break
|
||||
# Beseda končana TIK pred clip start (do 0.5s pred)
|
||||
# IN je naslednja beseda PO/blizu clip start
|
||||
if 0 < (current_start - w["end"]) <= 0.5:
|
||||
# Preveri naslednjo besedo
|
||||
next_w = all_words[i + 1] if i + 1 < len(all_words) else None
|
||||
if next_w and next_w["start"] >= current_start - 0.1:
|
||||
# Razdalja od te besede do naslednje > 0.3s pomeni mogoče prelom verz/refren
|
||||
gap_to_next = next_w["start"] - w["end"]
|
||||
# Razširi nazaj na začetek te besede - 0.15s buffer
|
||||
new_start = max(0, w["start"] - 0.15)
|
||||
print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s "
|
||||
f"(beseda '{w['text'].strip()}' končana {current_start - w['end']:.2f}s pred clip start, "
|
||||
f"morda začne refren; gap do '{next_w['text'].strip()}' = {gap_to_next:.2f}s)", file=sys.stderr)
|
||||
current_start = new_start
|
||||
break
|
||||
# Segment ki se konča TOČNO ali tik pred clip start (lahko zadnja
|
||||
# beseda refrena = "Žena" se konča na 78.2 ko clip začne 78.3)
|
||||
elif current_start - 0.5 <= seg_end <= current_start + 0.1:
|
||||
# Preveri ali zadnja beseda v segmentu morda **začne refren**
|
||||
seg_text = seg.get("text", "").strip()
|
||||
# Če segment kaže novo frazo (z veliko začetnico po pavzi) ali
|
||||
# vsebuje znake interpunkcije, morda zadnja beseda res začne refren
|
||||
if seg_text and (seg_end - seg_start) < 4.0:
|
||||
# Razširi nazaj na začetek POSLEDJE besede tega segmenta
|
||||
words = seg.get("words", [])
|
||||
if words:
|
||||
last_word = words[-1]
|
||||
new_start = max(0, float(last_word.get("start", seg_start)) - 0.15)
|
||||
else:
|
||||
# Brez word-level: vzemi 0.5s nazaj
|
||||
new_start = max(0, current_start - 0.5)
|
||||
if new_start < current_start:
|
||||
print(f" 🎵 Razširim clip začetek {current_start:.1f}s → {new_start:.1f}s "
|
||||
f"(zadnja beseda prejšnjega segmenta morda začne refren)", file=sys.stderr)
|
||||
current_start = new_start
|
||||
break
|
||||
break
|
||||
else:
|
||||
# Fallback: če ni word-level (npr. local Whisper), uporabi segmente kot prej
|
||||
for seg in corrected_segs:
|
||||
seg_start = float(seg.get("start", 0))
|
||||
seg_end = float(seg.get("end", 0))
|
||||
if seg_start < current_start < seg_end:
|
||||
new_start = max(0, current_start - 0.5)
|
||||
print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s "
|
||||
f"(brez word-level, fallback -0.5s)", file=sys.stderr)
|
||||
current_start = new_start
|
||||
break
|
||||
|
||||
if current_start < clip_range["start"]:
|
||||
clip_range["start"] = round(current_start, 2)
|
||||
|
||||
@ -263,6 +263,50 @@
|
||||
}
|
||||
.modal-actions button:hover { background: var(--panel-2); }
|
||||
.modal-actions button.primary:hover { background: var(--accent-2); }
|
||||
|
||||
/* ─── Multi-file queue ─── */
|
||||
.file-queue {
|
||||
margin-top: 12px;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 6px;
|
||||
}
|
||||
.file-queue-item {
|
||||
background: var(--panel-2);
|
||||
border: 1px solid var(--border);
|
||||
border-radius: 6px;
|
||||
padding: 8px 10px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 10px;
|
||||
font-size: 13px;
|
||||
}
|
||||
.file-queue-item .name {
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
text-overflow: ellipsis;
|
||||
white-space: nowrap;
|
||||
}
|
||||
.file-queue-item .name b { color: var(--accent-2); }
|
||||
.file-queue-item .size {
|
||||
color: var(--muted);
|
||||
font-size: 11px;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
.file-queue-item .remove {
|
||||
background: transparent;
|
||||
border: none;
|
||||
color: var(--muted);
|
||||
cursor: pointer;
|
||||
font-size: 16px;
|
||||
padding: 0 4px;
|
||||
line-height: 1;
|
||||
}
|
||||
.file-queue-item .remove:hover { color: var(--error); }
|
||||
.file-queue-item .warn {
|
||||
color: var(--warn);
|
||||
font-size: 10px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
@ -289,9 +333,10 @@
|
||||
<line x1="12" y1="3" x2="12" y2="15"/>
|
||||
</svg>
|
||||
<div>Klikni ali povleci video sem</div>
|
||||
<div class="small">.mp4, .mov, .webm — do 2 GB</div>
|
||||
<input type="file" id="file-input" accept="video/*,.mxf,.mpg,.mpeg,.ts,.m2ts,.mts" style="display:none">
|
||||
<div class="small">.mp4, .mov, .webm, .mxf, .mpg — do 10 GB · <b>Lahko izberete več datotek hkrati</b></div>
|
||||
<input type="file" id="file-input" accept="video/*,.mxf,.mpg,.mpeg,.ts,.m2ts,.mts" multiple style="display:none">
|
||||
</div>
|
||||
<div id="file-queue" class="file-queue"></div>
|
||||
</div>
|
||||
|
||||
<div id="tab-youtube" class="hidden">
|
||||
|
||||
Loading…
Reference in New Issue
Block a user