Word-level extension: lookback to capture full phrase before clip
Bug: Claude picked clip start at 78.19s (0.3s before segment 'tepe' at 78.4s). Word-level extension then found word 'me' (77.88-78.16s) right before clip start, extended to 77.73s. But the FULL phrase was 'Žena me' where 'Žena' [76.88-77.74] precedes 'me' [77.88-78.16] in the same breath/speech burst (gap 0.14s, not a real pause). Fix: when extending back via word-level, do a lookback through earlier words. Stop only when finding a real pause (gap >= 0.5s between words). This captures the entire connected phrase before clip start. Now: clip start 78.19s → finds 'me' at 78.16s → looks back: 'Žena' at 77.74s (gap to 'me' = 0.14s, < 0.5s) → continue. Earlier 'verjet.' at 76.78s (gap to 'Žena' = 0.10s) → also captured if connected... actually 'verjet.' is part of previous verse, but anchor stops at next pause >= 0.5s. For the Žena case, anchor will be at 'Žena' (or earlier if no big pause). This makes the extension MUCH more robust for cases where multiple words of the chorus opening fall in the previous transcript segment.
This commit is contained in:
parent
d73453fe50
commit
49a80599e1
@ -1436,27 +1436,50 @@ def main():
|
|||||||
if all_words:
|
if all_words:
|
||||||
# Najdi "rob" — beseda kjer končanje zelo blizu clip start
|
# Najdi "rob" — beseda kjer končanje zelo blizu clip start
|
||||||
# ALI clip start je sredi besede (besedo bi odrezali)
|
# ALI clip start je sredi besede (besedo bi odrezali)
|
||||||
|
# ALI prejšnje besede so del istega govora pred clip start
|
||||||
|
|
||||||
|
# Strategija: poišči prvo besedo PRED clip start ki je dovolj blizu
|
||||||
|
# (gap < 0.5s do naslednje), nato razširi nazaj na **vse povezane besede**
|
||||||
|
# do prve "prave" pavze (>= 0.5s tihota med besedami).
|
||||||
|
|
||||||
for i, w in enumerate(all_words):
|
for i, w in enumerate(all_words):
|
||||||
# Beseda zaobsega clip start (clip reže sredi besede)
|
# Beseda zaobsega clip start (clip reže sredi besede)
|
||||||
if w["start"] < current_start < w["end"]:
|
if w["start"] < current_start < w["end"]:
|
||||||
new_start = max(0, w["start"] - 0.15)
|
# Razširi nazaj na začetek te besede in vse predhodne povezane
|
||||||
|
anchor_idx = i
|
||||||
|
# Najdi najbolj zgodnjo povezano besedo (lookback)
|
||||||
|
for j in range(i, 0, -1):
|
||||||
|
prev = all_words[j - 1]
|
||||||
|
curr = all_words[j]
|
||||||
|
gap = curr["start"] - prev["end"]
|
||||||
|
if gap >= 0.5:
|
||||||
|
break # našli pavzo, prejšnji ne sodi sem
|
||||||
|
anchor_idx = j - 1
|
||||||
|
new_start = max(0, all_words[anchor_idx]["start"] - 0.15)
|
||||||
|
captured = " ".join(w2["text"].strip() for w2 in all_words[anchor_idx:i+1])
|
||||||
print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s "
|
print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s "
|
||||||
f"(clip rezal sredi besede '{w['text'].strip()}')", file=sys.stderr)
|
f"(clip sredi besede; ujamem '{captured}')", file=sys.stderr)
|
||||||
current_start = new_start
|
current_start = new_start
|
||||||
break
|
break
|
||||||
# Beseda končana TIK pred clip start (do 0.5s pred)
|
# Beseda končana TIK pred clip start (do 0.5s pred)
|
||||||
# IN je naslednja beseda PO/blizu clip start
|
|
||||||
if 0 < (current_start - w["end"]) <= 0.5:
|
if 0 < (current_start - w["end"]) <= 0.5:
|
||||||
# Preveri naslednjo besedo
|
# Preveri naslednjo besedo
|
||||||
next_w = all_words[i + 1] if i + 1 < len(all_words) else None
|
next_w = all_words[i + 1] if i + 1 < len(all_words) else None
|
||||||
if next_w and next_w["start"] >= current_start - 0.1:
|
if next_w and next_w["start"] >= current_start - 0.1:
|
||||||
# Razdalja od te besede do naslednje > 0.3s pomeni mogoče prelom verz/refren
|
# Najdi najbolj zgodnjo povezano besedo (lookback)
|
||||||
gap_to_next = next_w["start"] - w["end"]
|
anchor_idx = i
|
||||||
# Razširi nazaj na začetek te besede - 0.15s buffer
|
for j in range(i, 0, -1):
|
||||||
new_start = max(0, w["start"] - 0.15)
|
prev = all_words[j - 1]
|
||||||
|
curr = all_words[j]
|
||||||
|
gap = curr["start"] - prev["end"]
|
||||||
|
if gap >= 0.5:
|
||||||
|
break
|
||||||
|
anchor_idx = j - 1
|
||||||
|
new_start = max(0, all_words[anchor_idx]["start"] - 0.15)
|
||||||
|
captured = " ".join(w2["text"].strip() for w2 in all_words[anchor_idx:i+1])
|
||||||
print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s "
|
print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s "
|
||||||
f"(beseda '{w['text'].strip()}' končana {current_start - w['end']:.2f}s pred clip start, "
|
f"(beseda '{w['text'].strip()}' tik pred clip start; "
|
||||||
f"morda začne refren; gap do '{next_w['text'].strip()}' = {gap_to_next:.2f}s)", file=sys.stderr)
|
f"ujamem celo frazo '{captured}')", file=sys.stderr)
|
||||||
current_start = new_start
|
current_start = new_start
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user