reels-app/scripts/subtitle.py

205 lines
7.0 KiB
Python

#!/usr/bin/env python3
"""
subtitle.py — Generiraj podnapise iz videa in jih burn-in v output.
Uporablja faster-whisper za transkripcijo, FFmpeg za burn-in.
Primer:
python3 subtitle.py video.mp4 video_sub.mp4
python3 subtitle.py video.mp4 video_sub.mp4 --lang sl --model small
python3 subtitle.py video.mp4 video_sub.mp4 --style reels # velik beli centriran tekst
"""
import argparse
import subprocess
import sys
import tempfile
import os
from pathlib import Path
def transcribe(video, lang=None, model_size="small"):
"""Vrne pot do .srt datoteke."""
from faster_whisper import WhisperModel
print(f"🧠 Whisper model: {model_size}, lang={lang or 'auto'}")
model = WhisperModel(model_size, device="cpu", compute_type="int8")
segments, info = model.transcribe(
str(video),
language=lang,
word_timestamps=True,
vad_filter=True,
)
print(f" Detekcija: {info.language} (p={info.language_probability:.2f})")
srt_path = tempfile.NamedTemporaryFile(suffix=".srt", delete=False, mode="w", encoding="utf-8")
def fmt_ts(s):
h = int(s // 3600)
m = int((s % 3600) // 60)
sec = s % 60
return f"{h:02d}:{m:02d}:{sec:06.3f}".replace(".", ",")
# Generiramo word-level chunked podnapise: 3-5 besed naenkrat
# Vse v VELIKIH TISKANIH ČRKAH za reels stil
idx = 1
for seg in segments:
words = seg.words or []
if not words:
srt_path.write(f"{idx}\n{fmt_ts(seg.start)} --> {fmt_ts(seg.end)}\n{seg.text.strip().upper()}\n\n")
idx += 1
continue
# Združi v skupine po ~4 besede
group = []
for w in words:
group.append(w)
if len(group) >= 4 or w.word.strip().endswith((".", "?", "!")):
start = group[0].start
end = group[-1].end
text = "".join(g.word for g in group).strip().upper()
srt_path.write(f"{idx}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n\n")
idx += 1
group = []
if group:
start = group[0].start
end = group[-1].end
text = "".join(g.word for g in group).strip().upper()
srt_path.write(f"{idx}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n\n")
idx += 1
srt_path.close()
print(f"📝 SRT: {srt_path.name} ({idx - 1} segmentov)")
return srt_path.name
SUBTITLE_STYLES = {
"reels": (
# Velike bele črke z debelim črnim outline-om, na spodnji tretjini
"FontName=Arial,FontSize=42,Bold=1,"
"PrimaryColour=&HFFFFFF,OutlineColour=&H000000,"
"Outline=4,Shadow=1,Alignment=2,MarginV=120,BorderStyle=1"
),
"yellow": (
"FontName=Arial,FontSize=42,Bold=1,"
"PrimaryColour=&H00FFFF,OutlineColour=&H000000,"
"Outline=4,Shadow=1,Alignment=2,MarginV=120,BorderStyle=1"
),
"minimal": (
"FontName=Arial,FontSize=28,"
"PrimaryColour=&HFFFFFF,OutlineColour=&H000000,"
"Outline=2,Shadow=0,Alignment=2,MarginV=80,BorderStyle=1"
),
}
def burn_subtitles(video, srt, output, style="reels"):
"""Burn-in podnapisov. Najprej pretvorimo SRT v ASS z eksplicitnim stylom, ker
FFmpeg force_style je ne-zanesljivo in pogosto silently ignore-an."""
# Pretvorimo SRT → ASS s pravim stylom
ass_path = srt.replace(".srt", ".ass") if srt.endswith(".srt") else srt + ".ass"
# Style nastavitve glede na izbiro
if style == "yellow":
primary = "&H0000FFFF" # rumeno
else:
primary = "&H00FFFFFF" # belo
# ASS PlayResY 1920 → MarginV je v pikslih 1:1
ass_header = f"""[Script Info]
ScriptType: v4.00+
PlayResX: 1080
PlayResY: 1920
WrapStyle: 2
ScaledBorderAndShadow: yes
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,Arial,84,{primary},&H00FFFFFF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,1,8,0,2,40,40,200,1
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
"""
# Parse SRT in convert v ASS dialogue lines
with open(srt, "r", encoding="utf-8") as f:
srt_content = f.read()
def srt_to_ass_time(t):
# 00:00:09,520 → 0:00:09.52
h, m, rest = t.split(":")
s, ms = rest.split(",")
return f"{int(h)}:{int(m):02d}:{int(s):02d}.{int(ms)//10:02d}"
dialogue_lines = []
blocks = srt_content.strip().split("\n\n")
for block in blocks:
lines = block.strip().split("\n")
if len(lines) < 3:
continue
# lines[0] = idx, lines[1] = timecode, lines[2:] = text
timecode = lines[1]
text = " ".join(lines[2:]).replace("\n", " ")
if " --> " not in timecode:
continue
start_t, end_t = timecode.split(" --> ")
ass_start = srt_to_ass_time(start_t.strip())
ass_end = srt_to_ass_time(end_t.strip())
dialogue_lines.append(f"Dialogue: 0,{ass_start},{ass_end},Default,,0,0,0,,{text}")
with open(ass_path, "w", encoding="utf-8") as f:
f.write(ass_header)
f.write("\n".join(dialogue_lines))
f.write("\n")
print(f"📝 ASS: {ass_path} ({len(dialogue_lines)} dialogov)")
# Burn-in z ass filtrom (boljši kot subtitles za naš primer)
ass_escaped = ass_path.replace("\\", "\\\\").replace(":", "\\:").replace("'", r"\'")
vf = f"ass='{ass_escaped}'"
cmd = [
"ffmpeg", "-y", "-i", str(video),
"-vf", vf,
"-c:v", "libx264", "-preset", "medium", "-crf", "21",
"-c:a", "copy",
"-movflags", "+faststart",
str(output),
]
print("🔥 Burn-in podnapisov...")
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print("❌ FFmpeg napaka:", file=sys.stderr)
print(result.stderr[-2000:], file=sys.stderr)
sys.exit(1)
print(f"{output}")
def main():
ap = argparse.ArgumentParser()
ap.add_argument("input")
ap.add_argument("output")
ap.add_argument("--lang", default=None, help="Jezik (sl, de, en, ...) ali auto")
ap.add_argument("--model", default="small", choices=["tiny", "base", "small", "medium", "large-v3"])
ap.add_argument("--style", default="reels", choices=list(SUBTITLE_STYLES.keys()))
ap.add_argument("--keep-srt", action="store_true", help="Ohrani .srt poleg output")
args = ap.parse_args()
src = Path(args.input)
if not src.exists():
print(f"{src} ne obstaja", file=sys.stderr)
sys.exit(1)
srt = transcribe(src, lang=args.lang, model_size=args.model)
burn_subtitles(src, srt, args.output, style=args.style)
if args.keep_srt:
keep_path = Path(args.output).with_suffix(".srt")
os.rename(srt, keep_path)
print(f"💾 SRT shranjen: {keep_path}")
else:
os.unlink(srt)
if __name__ == "__main__":
main()