Upgrade default Whisper model: small/medium → large-v3 for much better Slovenian/Slavic transcription accuracy

This commit is contained in:
Sebastjan Artič 2026-04-29 08:20:18 +00:00
parent 4bc5ac6756
commit 69fb2f5ce8
5 changed files with 8 additions and 8 deletions

View File

@ -240,7 +240,7 @@ def process_job(job_id):
# lang: če None ali 'auto', pusti analyze.py auto-detect
if job.get("lang") and job["lang"] not in ("auto", ""):
cmd += ["--lang", job["lang"]]
cmd += ["--model", job.get("whisper_model", "small")]
cmd += ["--model", job.get("whisper_model", "large-v3")]
proc = subprocess.run(cmd, capture_output=True, text=True)
srt_from_claude = None # Pot do SRT iz Claude-popravljenega transcript-a
@ -334,7 +334,7 @@ def process_job(job_id):
cmd += ["--lang", chosen_lang]
if job.get("no_subs"):
cmd += ["--no-subs"]
cmd += ["--model", job.get("whisper_model", "small")]
cmd += ["--model", job.get("whisper_model", "large-v3")]
# DEBUG: zapiši natanko kakšen ukaz se izvede
update_job(job_id, debug_clip_cmd=" ".join(cmd))
@ -391,7 +391,7 @@ class YouTubeJobIn(BaseModel):
duration: Optional[float] = 30
no_subs: bool = False
subtitle_style: str = "reels"
whisper_model: str = "small"
whisper_model: str = "large-v3"
quality: str = "medium"
@ -407,7 +407,7 @@ class StartJobIn(BaseModel):
min_duration: Optional[float] = 20
no_subs: bool = False
subtitle_style: str = "reels"
whisper_model: str = "small"
whisper_model: str = "large-v3"
quality: str = "medium"

View File

@ -592,7 +592,7 @@ def main():
ap = argparse.ArgumentParser()
ap.add_argument("video", help="Vhod video file")
ap.add_argument("--lang", default=None, help="ISO 639-1 ali 'auto' (default: auto)")
ap.add_argument("--model", default="small", help="Whisper model")
ap.add_argument("--model", default="large-v3", help="Whisper model")
ap.add_argument("--target-duration", type=float, default=30.0)
ap.add_argument("--max-duration", type=float, default=45.0)
ap.add_argument("--min-duration", type=float, default=20.0)

View File

@ -112,7 +112,7 @@ def main():
help="Več klipov: '0:30-1:00,2:15-2:45'")
ap.add_argument("--mode", default="track", choices=["track", "center", "blur"])
ap.add_argument("--lang", default=None, help="sl, de, en, ... (privzeto auto)")
ap.add_argument("--model", default="small",
ap.add_argument("--model", default="large-v3",
choices=["tiny", "base", "small", "medium", "large-v3"])
ap.add_argument("--style", default="reels", choices=["reels", "yellow", "minimal"])
ap.add_argument("--no-subs", action="store_true")

View File

@ -279,7 +279,7 @@ def main():
ap.add_argument("input")
ap.add_argument("output")
ap.add_argument("--lang", default=None, help="Jezik (sl, de, en, ...) ali auto")
ap.add_argument("--model", default="small", choices=["tiny", "base", "small", "medium", "large-v3"])
ap.add_argument("--model", default="large-v3", choices=["tiny", "base", "small", "medium", "large-v3"])
ap.add_argument("--style", default="reels", choices=list(SUBTITLE_STYLES.keys()))
ap.add_argument("--keep-srt", action="store_true", help="Ohrani .srt poleg output")
ap.add_argument("--srt", default=None, help="Že-pripravljen SRT (preskoči Whisper transkripcijo)")

View File

@ -214,7 +214,7 @@
<!-- Skrita polja: jezik in model sta avto. Vrednosti uporabljene v JS submit. -->
<input type="hidden" id="lang" value="">
<input type="hidden" id="model" value="medium">
<input type="hidden" id="model" value="large-v3">
<div style="font-size: 12px; color: var(--text-dim); margin-top: 8px;">
🤖 Jezik: avtomatsko zaznan (Whisper, 3-sample voting) · Model: medium · LLM analiza: Claude