#!/usr/bin/env python3 """ subtitle.py — Generiraj podnapise iz videa in jih burn-in v output. Uporablja faster-whisper za transkripcijo, FFmpeg za burn-in. Primer: python3 subtitle.py video.mp4 video_sub.mp4 python3 subtitle.py video.mp4 video_sub.mp4 --lang sl --model small python3 subtitle.py video.mp4 video_sub.mp4 --style reels # velik beli centriran tekst """ import argparse import subprocess import sys import tempfile import os from pathlib import Path def transcribe(video, lang=None, model_size="small"): """Vrne pot do .srt datoteke.""" from faster_whisper import WhisperModel print(f"🧠 Whisper model: {model_size}, lang={lang or 'auto'}") model = WhisperModel(model_size, device="cpu", compute_type="int8") segments, info = model.transcribe( str(video), language=lang, word_timestamps=True, vad_filter=True, ) print(f" Detekcija: {info.language} (p={info.language_probability:.2f})") srt_path = tempfile.NamedTemporaryFile(suffix=".srt", delete=False, mode="w", encoding="utf-8") def fmt_ts(s): h = int(s // 3600) m = int((s % 3600) // 60) sec = s % 60 return f"{h:02d}:{m:02d}:{sec:06.3f}".replace(".", ",") # Generiramo word-level chunked podnapise: 3-5 besed naenkrat idx = 1 for seg in segments: words = seg.words or [] if not words: srt_path.write(f"{idx}\n{fmt_ts(seg.start)} --> {fmt_ts(seg.end)}\n{seg.text.strip()}\n\n") idx += 1 continue # Združi v skupine po ~4 besede group = [] for w in words: group.append(w) if len(group) >= 4 or w.word.strip().endswith((".", "?", "!")): start = group[0].start end = group[-1].end text = "".join(g.word for g in group).strip() srt_path.write(f"{idx}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n\n") idx += 1 group = [] if group: start = group[0].start end = group[-1].end text = "".join(g.word for g in group).strip() srt_path.write(f"{idx}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n\n") idx += 1 srt_path.close() print(f"📝 SRT: {srt_path.name} ({idx - 1} segmentov)") return srt_path.name SUBTITLE_STYLES = { "reels": ( "FontName=Arial,FontSize=18,Bold=1," "PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,BackColour=&H80000000," "Outline=2,Shadow=0,Alignment=2,MarginV=180,BorderStyle=1" ), "yellow": ( "FontName=Arial,FontSize=20,Bold=1," "PrimaryColour=&H0000FFFF,OutlineColour=&H00000000," "Outline=3,Shadow=0,Alignment=2,MarginV=200,BorderStyle=1" ), "minimal": ( "FontName=Arial,FontSize=14," "PrimaryColour=&H00FFFFFF,OutlineColour=&H80000000," "Outline=1,Shadow=0,Alignment=2,MarginV=80,BorderStyle=1" ), } def burn_subtitles(video, srt, output, style="reels"): style_str = SUBTITLE_STYLES.get(style, SUBTITLE_STYLES["reels"]) # Escape srt path za FFmpeg subtitles filter srt_escaped = srt.replace("\\", "\\\\").replace(":", "\\:").replace("'", r"\'") vf = f"subtitles='{srt_escaped}':force_style='{style_str}'" cmd = [ "ffmpeg", "-y", "-i", str(video), "-vf", vf, "-c:v", "libx264", "-preset", "medium", "-crf", "21", "-c:a", "copy", "-movflags", "+faststart", str(output), ] print("🔥 Burn-in podnapisov...") result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: print("❌ FFmpeg napaka:", file=sys.stderr) print(result.stderr[-2000:], file=sys.stderr) sys.exit(1) print(f"✅ {output}") def main(): ap = argparse.ArgumentParser() ap.add_argument("input") ap.add_argument("output") ap.add_argument("--lang", default=None, help="Jezik (sl, de, en, ...) ali auto") ap.add_argument("--model", default="small", choices=["tiny", "base", "small", "medium", "large-v3"]) ap.add_argument("--style", default="reels", choices=list(SUBTITLE_STYLES.keys())) ap.add_argument("--keep-srt", action="store_true", help="Ohrani .srt poleg output") args = ap.parse_args() src = Path(args.input) if not src.exists(): print(f"❌ {src} ne obstaja", file=sys.stderr) sys.exit(1) srt = transcribe(src, lang=args.lang, model_size=args.model) burn_subtitles(src, srt, args.output, style=args.style) if args.keep_srt: keep_path = Path(args.output).with_suffix(".srt") os.rename(srt, keep_path) print(f"💾 SRT shranjen: {keep_path}") else: os.unlink(srt) if __name__ == "__main__": main()