From d3b71942d2048ac0324285251cc0c1f08083219f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastjan=20Arti=C4=8D?= Date: Wed, 29 Apr 2026 16:52:44 +0000 Subject: [PATCH] Word-level extension: 2-word lookback (not full phrase) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Refinement of previous lookback fix - limit to MAX 2 words back. Reason: with unlimited lookback, the lookback would chain through words with gaps < 0.5s and keep walking back into the previous verse. For Žena ME TEPE: 'verjet.' [76.78] → 'Žena' [76.88] gap is 0.10s, which means lookback would walk back to verses before chorus. With 2-word limit: - Clip at 78.19s → 'me' [78.16] is closest preceding word (gap 0.03s) - Lookback j=i: 'me' → 'Žena' gap 0.14s → captured (i-1) - Lookback j=i-1: 'Žena' → 'verjet.' gap 0.10s → would be captured but MAX_LOOKBACK_WORDS=2 stops here ✓ Result: anchor = 'Žena' at 76.88s → new_start = 76.73s. Subtitle: 'ŽENA ME TEPE' (full phrase, no verse leakage). --- scripts/analyze.py | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) diff --git a/scripts/analyze.py b/scripts/analyze.py index 4612287..5146e8b 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -1436,27 +1436,50 @@ def main(): if all_words: # Najdi "rob" — beseda kjer končanje zelo blizu clip start # ALI clip start je sredi besede (besedo bi odrezali) + # ALI prejšnje besede so del istega govora pred clip start + + # Strategija: poišči besedo PRED clip start, nato razširi nazaj + # za **1-2 besedi** (ne celo frazo - to bi zajelo prejšnji verz). + # Kombiniraj z amplitude defense (Layer 3) ki dodatno doda buffer. + MAX_LOOKBACK_WORDS = 2 # max 2 besedi nazaj + for i, w in enumerate(all_words): # Beseda zaobsega clip start (clip reže sredi besede) if w["start"] < current_start < w["end"]: - new_start = max(0, w["start"] - 0.15) + # Razširi nazaj na začetek te besede in največ MAX_LOOKBACK_WORDS predhodnih + anchor_idx = i + for j in range(i, max(0, i - MAX_LOOKBACK_WORDS), -1): + prev = all_words[j - 1] + curr = all_words[j] + gap = curr["start"] - prev["end"] + if gap >= 0.5: + break + anchor_idx = j - 1 + new_start = max(0, all_words[anchor_idx]["start"] - 0.15) + captured = " ".join(w2["text"].strip() for w2 in all_words[anchor_idx:i+1]) print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s " - f"(clip rezal sredi besede '{w['text'].strip()}')", file=sys.stderr) + f"(clip sredi besede; ujamem '{captured}')", file=sys.stderr) current_start = new_start break # Beseda končana TIK pred clip start (do 0.5s pred) - # IN je naslednja beseda PO/blizu clip start if 0 < (current_start - w["end"]) <= 0.5: # Preveri naslednjo besedo next_w = all_words[i + 1] if i + 1 < len(all_words) else None if next_w and next_w["start"] >= current_start - 0.1: - # Razdalja od te besede do naslednje > 0.3s pomeni mogoče prelom verz/refren - gap_to_next = next_w["start"] - w["end"] - # Razširi nazaj na začetek te besede - 0.15s buffer - new_start = max(0, w["start"] - 0.15) + # Najdi anchor: do MAX_LOOKBACK_WORDS nazaj + anchor_idx = i + for j in range(i, max(0, i - MAX_LOOKBACK_WORDS), -1): + prev = all_words[j - 1] + curr = all_words[j] + gap = curr["start"] - prev["end"] + if gap >= 0.5: + break + anchor_idx = j - 1 + new_start = max(0, all_words[anchor_idx]["start"] - 0.15) + captured = " ".join(w2["text"].strip() for w2 in all_words[anchor_idx:i+1]) print(f" 🎵 Razširim clip začetek {current_start:.2f}s → {new_start:.2f}s " - f"(beseda '{w['text'].strip()}' končana {current_start - w['end']:.2f}s pred clip start, " - f"morda začne refren; gap do '{next_w['text'].strip()}' = {gap_to_next:.2f}s)", file=sys.stderr) + f"(beseda '{w['text'].strip()}' tik pred clip start; " + f"ujamem celo frazo '{captured}')", file=sys.stderr) current_start = new_start break else: