diff --git a/scripts/analyze.py b/scripts/analyze.py index 1411526..bf2ac63 100644 --- a/scripts/analyze.py +++ b/scripts/analyze.py @@ -47,19 +47,31 @@ def extract_audio(video_path): def transcribe_full(audio_path, lang=None, model_size="small"): - """Whisper transcript celega avdia. lang=None → auto-detect.""" + """Whisper transcript celega avdia. lang=None → auto-detect. + + Vrne empty transcript če Whisper ne najde govora (popolnoma instrumental).""" from faster_whisper import WhisperModel print(f"🧠 Whisper {model_size}, lang={lang or 'auto'}", file=sys.stderr) m = WhisperModel(model_size, device="cpu", compute_type="int8") - segs, info = m.transcribe( - audio_path, - language=lang, - word_timestamps=True, - vad_filter=True, - ) - detected_lang = info.language - detected_prob = float(info.language_probability) + try: + segs, info = m.transcribe( + audio_path, + language=lang, + word_timestamps=True, + vad_filter=True, + ) + detected_lang = info.language + detected_prob = float(info.language_probability) + except (ValueError, RuntimeError) as e: + # Whisper failure (např. pri popolnoma instrumentalnih datotekah z VAD) + print(f" ⚠️ Whisper transcribe failed: {e}", file=sys.stderr) + return { + "language": "unknown", + "language_probability": 0.0, + "segments": [], + } + print(f" Detekcija: {detected_lang} (p={detected_prob:.2f})", file=sys.stderr) segments = []