diff --git a/scripts/subtitle.py b/scripts/subtitle.py index 865178a..1293d14 100644 --- a/scripts/subtitle.py +++ b/scripts/subtitle.py @@ -39,14 +39,32 @@ def transcribe(video, lang=None, model_size="small"): sec = s % 60 return f"{h:02d}:{m:02d}:{sec:06.3f}".replace(".", ",") - # Generiramo word-level chunked podnapise: 3-5 besed naenkrat - # Vse v VELIKIH TISKANIH ČRKAH za reels stil + # Generiramo word-level chunked podnapise: 3-4 besede naenkrat, + # max 2.5s na chunk za hiter pacing v reels stil. + # Vse v VELIKIH TISKANIH ČRKAH. + MAX_CHUNK_DURATION = 2.5 # sekund + + def write_chunk(idx, start, end, text): + # Če je chunk daljši od MAX, razdeli ga na N enakih kosov z istim tekstom + duration = end - start + if duration <= MAX_CHUNK_DURATION: + srt_path.write(f"{idx}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n\n") + return idx + 1 + # Razdeli na N enakih kosov + n_parts = int(duration / MAX_CHUNK_DURATION) + 1 + chunk_dur = duration / n_parts + for i in range(n_parts): + chunk_start = start + i * chunk_dur + chunk_end = start + (i + 1) * chunk_dur + srt_path.write(f"{idx}\n{fmt_ts(chunk_start)} --> {fmt_ts(chunk_end)}\n{text}\n\n") + idx += 1 + return idx + idx = 1 for seg in segments: words = seg.words or [] if not words: - srt_path.write(f"{idx}\n{fmt_ts(seg.start)} --> {fmt_ts(seg.end)}\n{seg.text.strip().upper()}\n\n") - idx += 1 + idx = write_chunk(idx, seg.start, seg.end, seg.text.strip().upper()) continue # Združi v skupine po ~4 besede @@ -57,15 +75,13 @@ def transcribe(video, lang=None, model_size="small"): start = group[0].start end = group[-1].end text = "".join(g.word for g in group).strip().upper() - srt_path.write(f"{idx}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n\n") - idx += 1 + idx = write_chunk(idx, start, end, text) group = [] if group: start = group[0].start end = group[-1].end text = "".join(g.word for g in group).strip().upper() - srt_path.write(f"{idx}\n{fmt_ts(start)} --> {fmt_ts(end)}\n{text}\n\n") - idx += 1 + idx = write_chunk(idx, start, end, text) srt_path.close() print(f"📝 SRT: {srt_path.name} ({idx - 1} segmentov)")