Fix: extend clip if ends mid-vocal (no chorus cut-off), DejaVu Sans font (supports SLO/HR/BS chars), auto-upgrade to medium Whisper model for Slavic languages

This commit is contained in:
Sebastjan Artič 2026-04-29 07:35:00 +00:00
parent 5d5e169f9d
commit c870d80726
4 changed files with 85 additions and 20 deletions

View File

@ -166,6 +166,8 @@ def process_job(job_id):
"--min-duration", str(job.get("min_duration", 20)),
"--output", str(analysis_path),
]
if job.get("include_prebuild"):
cmd += ["--include-prebuild"]
# lang: če None ali 'auto', pusti analyze.py auto-detect
if job.get("lang") and job["lang"] not in ("auto", ""):
cmd += ["--lang", job["lang"]]
@ -188,7 +190,15 @@ def process_job(job_id):
"fade": fade,
"chorus_preview": analysis["chorus"]["best"]["text_preview"]
if analysis.get("chorus") and analysis["chorus"].get("best") else None,
"video_duration": analysis.get("video_duration"),
"candidates": analysis["chorus"].get("all_candidates", [])[:5]
if analysis.get("chorus") else [],
},
# Cel transkript shranimo za UI prikaz
full_transcript=[
{"start": s["start"], "end": s["end"], "text": s["text"]}
for s in analysis.get("transcript", {}).get("segments", [])
],
start=cr["start"],
duration=cr["duration"],
fade_in=fade["fade_in"],
@ -299,9 +309,10 @@ class StartJobIn(BaseModel):
mode: str = "track"
lang: Optional[str] = None # None/auto = Whisper auto-detect
auto_chorus: bool = True
include_prebuild: bool = False # vključi pre-chorus build-up
start: Optional[float] = None
duration: Optional[float] = 30
max_duration: Optional[float] = 45 # Smart selection lahko gre do 45s
max_duration: Optional[float] = 45
min_duration: Optional[float] = 20
no_subs: bool = False
subtitle_style: str = "reels"
@ -401,6 +412,7 @@ async def start_processing(
mode=payload.mode,
lang=payload.lang,
auto_chorus=payload.auto_chorus,
include_prebuild=payload.include_prebuild,
start=payload.start,
duration=payload.duration,
max_duration=payload.max_duration,

View File

@ -346,31 +346,46 @@ def smart_clip_range(chorus, transcript, video_duration,
}
def detect_audio_fade(clip_range, transcript):
"""Določi fade-in/fade-out trajanje.
def detect_audio_fade(clip_range, transcript, video_duration=None):
"""Določi fade-in/fade-out trajanje + ev. razširi clip range, da fade
ne reže besedila na koncu refrena.
Logika:
- Če clip začne sredi vokala 0.5s fade in
- Če se konča sredi vokala 1.0s fade out
- Če se konča sredi vokala razširi clip do konca segmenta (+ buffer),
potem 1.0s fade out
- Sicer manj fade
"""
cs, ce = clip_range["start"], clip_range["end"]
# Vokal pri začetku?
# Najdi segment, ki konča znotraj clip-a (ali je clip end znotraj segmenta)
starts_in_vocal = False
ends_in_vocal = False
end_segment = None
for seg in transcript["segments"]:
# Začetek clip-a znotraj segmenta
if seg["start"] <= cs <= seg["end"]:
starts_in_vocal = True
# Konec clip-a znotraj segmenta
if seg["start"] <= ce <= seg["end"]:
ends_in_vocal = True
end_segment = seg
fade_in = 0.5 if starts_in_vocal else 0.2
fade_out = 1.5 if ends_in_vocal else 0.3
# Če clip konča znotraj segmenta, razširi do konca segmenta + 0.5s buffer
extended_end = ce
if end_segment:
extended_end = end_segment["end"] + 0.5
if video_duration is not None:
extended_end = min(extended_end, video_duration)
return {"fade_in": fade_in, "fade_out": fade_out}
fade_in = 0.4 if starts_in_vocal else 0.2
# Krajši fade out (0.5s) ker zdaj clip konča po koncu vokala
fade_out = 0.5 if ends_in_vocal else 0.3
return {
"fade_in": fade_in,
"fade_out": fade_out,
"extended_end": round(extended_end, 2),
"ends_in_vocal": ends_in_vocal,
}
def analyze_with_claude(transcript, video_duration, target_duration=30):
@ -612,10 +627,19 @@ def main():
f"(duration: {clip_range['duration']}s, source: {clip_range.get('source')})",
file=sys.stderr)
# 7. Fade params
fade = detect_audio_fade(clip_range, transcript)
# 7. Fade params (lahko razširi clip end če konča sredi vokala)
fade = detect_audio_fade(clip_range, transcript, video_duration=duration)
print(f"🎚 Fade: in={fade['fade_in']}s, out={fade['fade_out']}s", file=sys.stderr)
# Če fade detection razširi end (ker clip konča sredi vokala), apply
if fade.get("extended_end") and fade["extended_end"] > clip_range["end"]:
old_end = clip_range["end"]
new_end = min(fade["extended_end"], clip_range["start"] + args.max_duration)
clip_range["end"] = round(new_end, 2)
clip_range["duration"] = round(new_end - clip_range["start"], 2)
print(f" ↳ Razširjen za {new_end - old_end:.1f}s (zaključek besedila)",
file=sys.stderr)
result = {
"video": str(video),
"video_duration": duration,

View File

@ -133,7 +133,7 @@ ScaledBorderAndShadow: yes
[V4+ Styles]
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
Style: Default,Arial,56,{primary},&H00FFFFFF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,1,5,1,2,80,80,400,1
Style: Default,DejaVu Sans,56,{primary},&H00FFFFFF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,1,5,1,2,80,80,400,1
[Events]
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text

View File

@ -233,9 +233,9 @@
<select id="model">
<option value="tiny">tiny (najhitrejši)</option>
<option value="base">base</option>
<option value="small" selected>small (priporočeno)</option>
<option value="medium">medium (zelo dobro)</option>
<option value="large-v3">large-v3 (najboljše)</option>
<option value="small" selected>small (DE/EN, hitro)</option>
<option value="medium">medium (priporočeno za SLO/HR/BS)</option>
<option value="large-v3">large-v3 (najboljše, počasno)</option>
</select>
</div>
</div>
@ -245,9 +245,16 @@
Pametna izbira odseka (Whisper + energy → najde refren)
</label>
<div style="font-size: 12px; color: var(--text-dim); margin-top: 4px; margin-left: 26px;">
Sistem analizira celoten video, najde refren ter pre-chorus build-up.
Lahko traja malo dlje (do 1.5×) če to bolje prikazuje pesem.
Audio fade in/out je avtomatsko dodan.
Sistem naredi <b>celoten transkript</b> in <b>energy profile</b>, najde refren in ga izreže.
Audio fade in/out je avtomatsko dodan na meje vokala.
</div>
<label class="toggle" style="margin-top: 12px; margin-left: 26px;">
<input type="checkbox" id="include-prebuild">
Vključi pre-chorus (build-up pred refrenom)
</label>
<div style="font-size: 12px; color: var(--text-dim); margin-top: 2px; margin-left: 52px;">
Privzeto izklopljeno: dobiš čist refren brez kitice.
</div>
<div id="manual-times" class="row hidden">
@ -300,6 +307,14 @@
<div class="progress-bar" id="live-bar" style="width: 0%;"></div>
</div>
<div style="font-size: 12px; color: var(--muted);" id="live-detail">Pripravljam...</div>
<!-- Analysis summary z izbranim odsekom in transkriptom -->
<div id="live-analysis" class="hidden" style="margin-top: 12px; padding: 10px; background: var(--panel); border-radius: 6px; font-size: 12px;">
<div id="live-analysis-summary" style="margin-bottom: 8px; color: var(--text-dim);"></div>
<details style="margin-top: 6px;">
<summary style="cursor: pointer; color: var(--accent); font-weight: 600;">Pokaži celoten transkript</summary>
<div id="live-transcript" style="margin-top: 8px; max-height: 240px; overflow-y: auto; font-family: monospace; font-size: 11px; line-height: 1.6;"></div>
</details>
</div>
<div id="live-result" class="hidden" style="margin-top: 12px; display: flex; gap: 8px;">
<button class="small" id="live-download" style="display: none;">⬇ Download</button>
<button class="small ghost" id="live-preview" style="display: none;">▶ Preview</button>
@ -337,6 +352,15 @@
$("#manual-times").classList.toggle("hidden", e.target.checked);
});
// ─── Auto-upgrade Whisper model za slovanske jezike ──
$("#lang").addEventListener("change", e => {
const slavicLangs = ["sl", "hr", "bs", "sr"];
const currentModel = $("#model").value;
if (slavicLangs.includes(e.target.value) && (currentModel === "tiny" || currentModel === "base" || currentModel === "small")) {
$("#model").value = "medium";
}
});
// ─── Drag & drop ────────────────────────────────
const dz = $("#dropzone");
const fileInput = $("#file-input");
@ -369,9 +393,9 @@
lang: $("#lang").value || null,
whisper_model: $("#model").value,
auto_chorus: auto,
include_prebuild: $("#include-prebuild").checked,
start: !auto && $("#start").value ? parseTimestamp($("#start").value) : null,
duration: duration,
// Smart selection: max do 1.5x ciljno trajanje, min 0.7x
max_duration: auto ? Math.round(duration * 1.5) : duration,
min_duration: auto ? Math.round(duration * 0.7) : duration,
subtitle_style: $("#subtitle-style").value,
@ -569,6 +593,11 @@
const job = JSON.parse(e.data);
updateJobInList(job);
// Pokaži analysis summary in transkript če je na voljo
if (job.analysis_summary || job.full_transcript) {
updateAnalysisDisplay(job);
}
// Update live panel
const step = job.current_step || "";
const info = STAGE_INFO[step] || { pct: null, friendly: step };