Add audio amplitude defense (Layer 3) for first-word cut prevention
Žena problem persists: even after word-level extension, some cases where Scribe doesn't transcribe the very first word still result in clip cutting the vocal start. Layer 3 defense: after word-level start extension, probe the FIRST 150ms of audio at clip start with ffmpeg volumedetect. If mean_volume > -35 dB (threshold for vocal/music vs silence), extend clip start back 0.5s as a safety buffer. This catches cases where: - Scribe missed the word entirely (no word-level timestamp to extend to) - LLM picked a start that's already inside vocal energy - Word-level extension didn't trigger because no nearby word matched The check is fast (<100ms) and conservative (only triggers if audio is clearly NOT silent). If it's a true musical break (silence before chorus), mean_volume will be < -40 dB and extension is skipped. Three layers of defense now: 1. Claude prompt: 'start ~0.3s before first chorus word' 2. Word-level boundary detection (Scribe word timestamps) 3. Audio amplitude check (catches cases 1-2 missed)
This commit is contained in:
parent
9df58212b2
commit
e06c3efb8e
@ -1428,6 +1428,51 @@ def main():
|
|||||||
clip_range["duration"] = round(clip_range["end"] - current_start, 2)
|
clip_range["duration"] = round(clip_range["end"] - current_start, 2)
|
||||||
clip_range["reason"] += f" (start extended back)"
|
clip_range["reason"] += f" (start extended back)"
|
||||||
|
|
||||||
|
# ── SLOJ 3: AUDIO AMPLITUDE CHECK na samem začetku clipa ──
|
||||||
|
# Tudi po word-level extension lahko clip začne sredi vokala (npr. če
|
||||||
|
# Scribe ni zaznal besede). Kot zadnja obramba: preveri RMS audio
|
||||||
|
# amplitudo v prvih 100ms clipa. Če je > silence threshold = vokal je
|
||||||
|
# že tam, dodaj 0.5s buffer nazaj.
|
||||||
|
try:
|
||||||
|
import subprocess as _sp
|
||||||
|
# ffmpeg lahko prebere kratek segment in vrne RMS volume
|
||||||
|
probe_start = clip_range["start"]
|
||||||
|
probe_dur = 0.15 # prvih 150ms
|
||||||
|
if probe_start >= 0.5: # samo če imamo prostor za buffer
|
||||||
|
cmd_probe = [
|
||||||
|
"ffmpeg", "-hide_banner", "-loglevel", "error",
|
||||||
|
"-ss", str(probe_start), "-t", str(probe_dur),
|
||||||
|
"-i", str(args.video),
|
||||||
|
"-af", "volumedetect",
|
||||||
|
"-f", "null", "-"
|
||||||
|
]
|
||||||
|
pr = _sp.run(cmd_probe, capture_output=True, text=True, timeout=10)
|
||||||
|
output = pr.stderr or ""
|
||||||
|
# Iščemo "mean_volume: -XX.X dB"
|
||||||
|
import re as _re_amp
|
||||||
|
m = _re_amp.search(r'mean_volume:\s*(-?\d+\.?\d*)\s*dB', output)
|
||||||
|
if m:
|
||||||
|
mean_db = float(m.group(1))
|
||||||
|
# Silence threshold: pod -40 dB = tihota
|
||||||
|
# Vokal/glasba je običajno -30 do -10 dB
|
||||||
|
if mean_db > -35:
|
||||||
|
# Audio je že "glasen" na začetku clipa = vokal/glasba
|
||||||
|
# Dodaj 0.5s buffer nazaj (varno, ne prepogosto)
|
||||||
|
old_start = clip_range["start"]
|
||||||
|
new_start = max(0, old_start - 0.5)
|
||||||
|
if new_start < old_start:
|
||||||
|
print(f" 🎵 Audio amplitude check: prvih {probe_dur}s "
|
||||||
|
f"ima mean_volume {mean_db:.1f} dB (> -35 dB = vokal/glasba). "
|
||||||
|
f"Razširim clip {old_start:.2f}s → {new_start:.2f}s.", file=sys.stderr)
|
||||||
|
clip_range["start"] = round(new_start, 2)
|
||||||
|
clip_range["duration"] = round(clip_range["end"] - new_start, 2)
|
||||||
|
clip_range["reason"] += " (amplitude defense -0.5s)"
|
||||||
|
else:
|
||||||
|
print(f" 🎵 Audio amplitude check: prvih {probe_dur}s "
|
||||||
|
f"ima mean_volume {mean_db:.1f} dB (≤ -35 dB = tiho). OK.", file=sys.stderr)
|
||||||
|
except Exception as _e:
|
||||||
|
print(f" ⚠️ Audio amplitude check skipped: {_e}", file=sys.stderr)
|
||||||
|
|
||||||
# Najdi vse segmente ki se začnejo PO trenutnem clip end
|
# Najdi vse segmente ki se začnejo PO trenutnem clip end
|
||||||
# STROŽJA pravila: ne podaljšuj v naslednji refren / verz / instrumental.
|
# STROŽJA pravila: ne podaljšuj v naslednji refren / verz / instrumental.
|
||||||
# Razširjamo SAMO če zadnji segment se prekriva s clip (klesti iz njega) ALI
|
# Razširjamo SAMO če zadnji segment se prekriva s clip (klesti iz njega) ALI
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user