- FastAPI backend (auth, jobs, SSE, download) - Frontend: drag&drop + YouTube URL + jobs panel - Pipeline: yt_download → find_chorus → reframe → subtitle - Modes: track (face follow), center, blur - Whisper for SI/DE/EN subtitles - Auto-chorus detection via Whisper + RMS energy - Docker + Coolify ready
144 lines
4.7 KiB
Python
144 lines
4.7 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
subtitle.py — Generiraj podnapise iz videa in jih burn-in v output.
|
|
|
|
Uporablja faster-whisper za transkripcijo, FFmpeg za burn-in.
|
|
|
|
Primer:
|
|
python3 subtitle.py video.mp4 video_sub.mp4
|
|
python3 subtitle.py video.mp4 video_sub.mp4 --lang sl --model small
|
|
python3 subtitle.py video.mp4 video_sub.mp4 --style reels # velik beli centriran tekst
|
|
"""
|
|
import argparse
|
|
import subprocess
|
|
import sys
|
|
import tempfile
|
|
import os
|
|
from pathlib import Path
|
|
|
|
|
|
def transcribe(video, lang=None, model_size="small"):
|
|
"""Vrne pot do .srt datoteke."""
|
|
from faster_whisper import WhisperModel
|
|
|
|
print(f"🧠 Whisper model: {model_size}, lang={lang or 'auto'}")
|
|
model = WhisperModel(model_size, device="cpu", compute_type="int8")
|
|
segments, info = model.transcribe(
|
|
str(video),
|
|
language=lang,
|
|
word_timestamps=True,
|
|
vad_filter=True,
|
|
)
|
|
print(f" Detekcija: {info.language} (p={info.language_probability:.2f})")
|
|
|
|
srt_path = tempfile.NamedTemporaryFile(suffix=".srt", delete=False, mode="w", encoding="utf-8")
|
|
|
|
def fmt_ts(s):
|
|
h = int(s // 3600)
|
|
m = int((s % 3600) // 60)
|
|
sec = s % 60
|
|
return f"{h:02d}:{m:02d}:{sec:06.3f}".replace(".", ",")
|
|
|
|
# Generiramo word-level chunked podnapise: 3-5 besed naenkrat
|
|
idx = 1
|
|
for seg in segments:
|
|
words = seg.words or []
|
|
if not words:
|
|
srt_path.write(f"{idx}\n{fmt_ts(seg.start)} --> {fmt_ts(seg.end)}\n{seg.text.strip()}\n\n")
|
|
idx += 1
|
|
continue
|
|
|
|
# Združi v skupine po ~4 besede
|
|
group = []
|
|
for w in words:
|
|
group.append(w)
|
|
if len(group) >= 4 or w.word.strip().endswith((".", "?", "!")):
|
|
start = group[0].start
|
|
end = group[-1].end
|
|
text = "".join(g.word for g in group).strip()
|
|
srt_path.write(f"{idx}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n\n")
|
|
idx += 1
|
|
group = []
|
|
if group:
|
|
start = group[0].start
|
|
end = group[-1].end
|
|
text = "".join(g.word for g in group).strip()
|
|
srt_path.write(f"{idx}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n\n")
|
|
idx += 1
|
|
|
|
srt_path.close()
|
|
print(f"📝 SRT: {srt_path.name} ({idx - 1} segmentov)")
|
|
return srt_path.name
|
|
|
|
|
|
SUBTITLE_STYLES = {
|
|
"reels": (
|
|
"FontName=Arial,FontSize=18,Bold=1,"
|
|
"PrimaryColour=&H00FFFFFF,OutlineColour=&H00000000,BackColour=&H80000000,"
|
|
"Outline=2,Shadow=0,Alignment=2,MarginV=180,BorderStyle=1"
|
|
),
|
|
"yellow": (
|
|
"FontName=Arial,FontSize=20,Bold=1,"
|
|
"PrimaryColour=&H0000FFFF,OutlineColour=&H00000000,"
|
|
"Outline=3,Shadow=0,Alignment=2,MarginV=200,BorderStyle=1"
|
|
),
|
|
"minimal": (
|
|
"FontName=Arial,FontSize=14,"
|
|
"PrimaryColour=&H00FFFFFF,OutlineColour=&H80000000,"
|
|
"Outline=1,Shadow=0,Alignment=2,MarginV=80,BorderStyle=1"
|
|
),
|
|
}
|
|
|
|
|
|
def burn_subtitles(video, srt, output, style="reels"):
|
|
style_str = SUBTITLE_STYLES.get(style, SUBTITLE_STYLES["reels"])
|
|
# Escape srt path za FFmpeg subtitles filter
|
|
srt_escaped = srt.replace("\\", "\\\\").replace(":", "\\:").replace("'", r"\'")
|
|
vf = f"subtitles='{srt_escaped}':force_style='{style_str}'"
|
|
|
|
cmd = [
|
|
"ffmpeg", "-y", "-i", str(video),
|
|
"-vf", vf,
|
|
"-c:v", "libx264", "-preset", "medium", "-crf", "21",
|
|
"-c:a", "copy",
|
|
"-movflags", "+faststart",
|
|
str(output),
|
|
]
|
|
print("🔥 Burn-in podnapisov...")
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
if result.returncode != 0:
|
|
print("❌ FFmpeg napaka:", file=sys.stderr)
|
|
print(result.stderr[-2000:], file=sys.stderr)
|
|
sys.exit(1)
|
|
print(f"✅ {output}")
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("input")
|
|
ap.add_argument("output")
|
|
ap.add_argument("--lang", default=None, help="Jezik (sl, de, en, ...) ali auto")
|
|
ap.add_argument("--model", default="small", choices=["tiny", "base", "small", "medium", "large-v3"])
|
|
ap.add_argument("--style", default="reels", choices=list(SUBTITLE_STYLES.keys()))
|
|
ap.add_argument("--keep-srt", action="store_true", help="Ohrani .srt poleg output")
|
|
args = ap.parse_args()
|
|
|
|
src = Path(args.input)
|
|
if not src.exists():
|
|
print(f"❌ {src} ne obstaja", file=sys.stderr)
|
|
sys.exit(1)
|
|
|
|
srt = transcribe(src, lang=args.lang, model_size=args.model)
|
|
burn_subtitles(src, srt, args.output, style=args.style)
|
|
|
|
if args.keep_srt:
|
|
keep_path = Path(args.output).with_suffix(".srt")
|
|
os.rename(srt, keep_path)
|
|
print(f"💾 SRT shranjen: {keep_path}")
|
|
else:
|
|
os.unlink(srt)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|