From 4efd7261769588c45a1130cc8bfa9b0f8944e9c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastjan=20Arti=C4=8D?= Date: Wed, 29 Apr 2026 13:12:28 +0000 Subject: [PATCH] Extend clip end past chorus to capture outro/sustained notes Problem: Claude was cutting clip exactly at last transcribed word of chorus, but in real songs: - Singer holds last note 1-3s longer (still meaningful) - Outro 'ej-ej-ej' / 'oh' / 'yeah' may not be transcribed as words - Result felt like 'incomplete chorus' even though SRT was correct Fix has two parts: 1. Prompt enhancement: - Ask Claude to add 1-2s padding AFTER last chorus word - Explicit example with timing math - Mention outro fillers (ej-ej-ej, oh, yeah) 2. Post-LLM extension logic: - After Claude returns clip range, scan corrected_segments for segments overlapping or starting just after current end - If next segment is within 1s pause and ends within max_duration+5s, extend clip to include it (with 0.3s breathing room) - Hard cap at max_duration + 5s to prevent unbounded extension This ensures chorus naturally trails off rather than being cut mid-emotional-peak. --- scripts/analyze.py | 53 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 50 insertions(+), 3 deletions(-) diff --git a/scripts/analyze.py b/scripts/analyze.py index 88ddc2c..6c86fe2 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -816,11 +816,13 @@ PROSIM: - Refren = ves prvi nastop refrena (običajno 10-20s) - SKUPAJ: 20-35 sekund - Začni na začetku build-up verza (ne sredi besede) - - Končaj na zadnji besedi refrena - - Primer: če refren začne na 32s in je dolg 16s → izberi 20-48s (12s build-up + 16s refren) + - **Končaj 1-2 sekunde PO zadnji besedi refrena** — pevec ima outro / drži ton, naj clip zajame celoten emocionalni vrh + - **NE reži clip točno na zadnjo besedo** — refren naj se naravno izteče (vključno s "ej-ej-ej", "oh", "yeah" outroji) + - Primer: če refren začne na 32s in zadnja beseda konča na 48s → izberi 20-50s (12s build-up + 16s refren + 2s outro padding) 🥈 **DRUGA IZBIRA** (samo če pre-chorus ni dovolj močan): samo cel **PRVI** refren - - Brez build-upa, samo refren z malo dihanja okoli (1-2s padding) + - Brez build-upa, samo refren z 1-2s padding pred in 1-2s padding po + - Ne sekaj ravno na zadnji besedi 🥉 **TRETJA IZBIRA** (samo če pesem nima jasnega refrena): najbolj dramatičen/zaključen del @@ -1277,6 +1279,51 @@ def main(): clip_range["end"] = clip_range["start"] + args.max_duration clip_range["duration"] = args.max_duration clip_range["reason"] += " (capped at max_duration)" + + # ── EXTEND clip end do naslednje naravne pavze ── + # LLM pogosto reže točno na zadnji besedi refrena, ampak zadnja + # beseda ima še "ej-ej-ej" outro / pevec drži zadnji ton 1-3s. + # Razširimo clip do naslednje >= 1s pavze ali instrumentalnega bridg-a, + # ampak ne čez max_duration + 5s. + corrected_segs = claude_result.get("corrected_segments") or transcript["segments"] + current_end = clip_range["end"] + extension_limit = min( + clip_range["start"] + args.max_duration + 5, # max 5s nad max_duration + duration # ne čez celoten audio + ) + + # Najdi vse segmente ki se začnejo PO trenutnem clip end + for seg in corrected_segs: + seg_start = float(seg.get("start", 0)) + seg_end = float(seg.get("end", 0)) + # Segment začnemo po trenutnem clip end + if seg_start <= current_end: + # Segment se prekriva s clip — če konča pred extension_limit, podaljšaj + if seg_end > current_end and seg_end <= extension_limit: + # Podaljšaj clip do konca tega segmenta + 0.3s diha + new_end = min(seg_end + 0.3, extension_limit) + if new_end > current_end: + print(f" 🎵 Podaljšam clip {current_end:.1f}s → {new_end:.1f}s " + f"(zadnji segment refrena se zaključi)", file=sys.stderr) + current_end = new_end + else: + # Segment je popolnoma za clip end — preverim ali je v dosegu in ali nima dolge pavze pred njim + pause = seg_start - current_end + if pause < 1.0 and seg_end <= extension_limit: + # Še vedno povezano s clipom (kratko pavzo, naprej outro/echo) + new_end = min(seg_end + 0.3, extension_limit) + if new_end > current_end: + print(f" 🎵 Podaljšam clip {current_end:.1f}s → {new_end:.1f}s " + f"(outro segment z {pause:.1f}s pavzo)", file=sys.stderr) + current_end = new_end + else: + # Daljša pavza — ustavi se tu + break + + if current_end > clip_range["end"]: + clip_range["end"] = round(current_end, 2) + clip_range["duration"] = round(current_end - clip_range["start"], 2) + clip_range["reason"] += f" (extended to natural pause)" else: clip_range = smart_clip_range( chorus, transcript, duration,