Fix Scribe stopping mid-song: enable tag_audio_events=true + filter events out
ROOT CAUSE FOUND: tag_audio_events=false caused Scribe to stop transcribing when instrumental music dominates (polka harmonica taking over from vocals). Real-world test on Avseniki - Ena bolha za pomoč (186s polka): - tag_audio_events=false: 20% coverage (37s only) — fails - tag_audio_events=true: 100% coverage (186s full) — works When tag_audio_events=true, Scribe inserts placeholder markers like '(glasba)' / '(plesalna glasba)' for instrumental sections instead of giving up. We then filter these out so they don't appear in subtitles. Filtering logic: - Skip word.type != 'word' (audio_event types) - Skip parenthesized text legacy fallback like '(music)', '(applause)' This is the core fix — no longer reliant on filename for transcription completeness. Even untitled files like '12345.mp4' now get full coverage.
This commit is contained in:
parent
7d00730051
commit
81bae81401
@ -206,7 +206,11 @@ def transcribe_with_elevenlabs(audio_path, lang=None, model="scribe_v1", filenam
|
||||
|
||||
add_text("model_id", model)
|
||||
add_text("timestamps_granularity", "word")
|
||||
add_text("tag_audio_events", "false")
|
||||
# tag_audio_events=true je kritično: brez tega Scribe predčasno preneha s transkripcijo
|
||||
# ko zazna instrumentalni del (npr. polka harmonika prevzame). Z true vstavi oznake
|
||||
# kot "(glasba)" in nadaljuje transkripcijo do konca audia.
|
||||
# Te oznake potem post-processing odstrani iz besedila.
|
||||
add_text("tag_audio_events", "true")
|
||||
if lang:
|
||||
scribe_lang = LANG_1_TO_3.get(lang, lang)
|
||||
add_text("language_code", scribe_lang)
|
||||
@ -252,8 +256,22 @@ def transcribe_with_elevenlabs(audio_path, lang=None, model="scribe_v1", filenam
|
||||
segments = []
|
||||
|
||||
if words:
|
||||
# Filter out whitespace tokens
|
||||
real_words = [w for w in words if w.get("text", "").strip()]
|
||||
# Filter out:
|
||||
# 1. whitespace tokens
|
||||
# 2. audio event tags type='audio_event' or text in (parenthesis) like "(glasba)", "(music)"
|
||||
real_words = []
|
||||
for w in words:
|
||||
t = w.get("text", "").strip()
|
||||
wtype = w.get("type", "word")
|
||||
# Skip non-word events
|
||||
if wtype != "word":
|
||||
continue
|
||||
if not t:
|
||||
continue
|
||||
# Skip parenthesized audio events (legacy fallback)
|
||||
if t.startswith("(") and t.endswith(")"):
|
||||
continue
|
||||
real_words.append(w)
|
||||
|
||||
if real_words:
|
||||
current_seg_words = []
|
||||
|
||||
Loading…
Reference in New Issue
Block a user