Fix: handle Whisper transcribe failure for instrumental-only audio (fallback to empty transcript)

This commit is contained in:
Sebastjan Artič 2026-04-29 06:33:52 +00:00
parent 33a138af9e
commit e072eec362

View File

@ -47,19 +47,31 @@ def extract_audio(video_path):
def transcribe_full(audio_path, lang=None, model_size="small"):
"""Whisper transcript celega avdia. lang=None → auto-detect."""
"""Whisper transcript celega avdia. lang=None → auto-detect.
Vrne empty transcript če Whisper ne najde govora (popolnoma instrumental)."""
from faster_whisper import WhisperModel
print(f"🧠 Whisper {model_size}, lang={lang or 'auto'}", file=sys.stderr)
m = WhisperModel(model_size, device="cpu", compute_type="int8")
segs, info = m.transcribe(
audio_path,
language=lang,
word_timestamps=True,
vad_filter=True,
)
detected_lang = info.language
detected_prob = float(info.language_probability)
try:
segs, info = m.transcribe(
audio_path,
language=lang,
word_timestamps=True,
vad_filter=True,
)
detected_lang = info.language
detected_prob = float(info.language_probability)
except (ValueError, RuntimeError) as e:
# Whisper failure (např. pri popolnoma instrumentalnih datotekah z VAD)
print(f" ⚠️ Whisper transcribe failed: {e}", file=sys.stderr)
return {
"language": "unknown",
"language_probability": 0.0,
"segments": [],
}
print(f" Detekcija: {detected_lang} (p={detected_prob:.2f})", file=sys.stderr)
segments = []