From 5d5e169f9dc8b1116e34feff5efc93c5360a5ca5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastjan=20Arti=C4=8D?= Date: Wed, 29 Apr 2026 07:07:29 +0000 Subject: [PATCH] =?UTF-8?q?Disable=20Whisper=20VAD=20filter=20=E2=80=94=20?= =?UTF-8?q?was=20dropping=20vocal=20segments=20in=20songs=20creating=20gap?= =?UTF-8?q?s=20in=20subtitles?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/analyze.py | 3 ++- scripts/subtitle.py | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/analyze.py b/scripts/analyze.py index d31026e..85c5cec 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -59,7 +59,8 @@ def transcribe_full(audio_path, lang=None, model_size="small"): audio_path, language=lang, word_timestamps=True, - vad_filter=True, + # VAD filter kdaj izpusti vokal med glasbo — pri pesmi bolje brez + vad_filter=False, ) detected_lang = info.language detected_prob = float(info.language_probability) diff --git a/scripts/subtitle.py b/scripts/subtitle.py index 8efdb07..0ff8e2d 100644 --- a/scripts/subtitle.py +++ b/scripts/subtitle.py @@ -27,7 +27,8 @@ def transcribe(video, lang=None, model_size="small"): str(video), language=lang, word_timestamps=True, - vad_filter=True, + # VAD filter kdaj izpusti vokal med glasbo — pri pesmi bolje brez + vad_filter=False, ) print(f" Detekcija: {info.language} (p={info.language_probability:.2f})")