diff --git a/scripts/analyze.py b/scripts/analyze.py
index f5fa742..88ddc2c 100644
--- a/scripts/analyze.py
+++ b/scripts/analyze.py
@@ -206,7 +206,11 @@ def transcribe_with_elevenlabs(audio_path, lang=None, model="scribe_v1", filenam
 
     add_text("model_id", model)
     add_text("timestamps_granularity", "word")
-    add_text("tag_audio_events", "false")
+    # tag_audio_events=true je kritično: brez tega Scribe predčasno preneha s transkripcijo
+    # ko zazna instrumentalni del (npr. polka harmonika prevzame). Z true vstavi oznake
+    # kot "(glasba)" in nadaljuje transkripcijo do konca audia.
+    # Te oznake potem post-processing odstrani iz besedila.
+    add_text("tag_audio_events", "true")
     if lang:
         scribe_lang = LANG_1_TO_3.get(lang, lang)
         add_text("language_code", scribe_lang)
@@ -252,8 +256,22 @@ def transcribe_with_elevenlabs(audio_path, lang=None, model="scribe_v1", filenam
     segments = []
 
     if words:
-        # Filter out whitespace tokens
-        real_words = [w for w in words if w.get("text", "").strip()]
+        # Filter out:
+        # 1. whitespace tokens
+        # 2. audio event tags type='audio_event' or text in (parenthesis) like "(glasba)", "(music)"
+        real_words = []
+        for w in words:
+            t = w.get("text", "").strip()
+            wtype = w.get("type", "word")
+            # Skip non-word events
+            if wtype != "word":
+                continue
+            if not t:
+                continue
+            # Skip parenthesized audio events (legacy fallback)
+            if t.startswith("(") and t.endswith(")"):
+                continue
+            real_words.append(w)
         
         if real_words:
             current_seg_words = []