Fix: extend clip if ends mid-vocal (no chorus cut-off), DejaVu Sans font (supports SLO/HR/BS chars), auto-upgrade to medium Whisper model for Slavic languages
This commit is contained in:
parent
5d5e169f9d
commit
c870d80726
14
app/main.py
14
app/main.py
@ -166,6 +166,8 @@ def process_job(job_id):
|
||||
"--min-duration", str(job.get("min_duration", 20)),
|
||||
"--output", str(analysis_path),
|
||||
]
|
||||
if job.get("include_prebuild"):
|
||||
cmd += ["--include-prebuild"]
|
||||
# lang: če None ali 'auto', pusti analyze.py auto-detect
|
||||
if job.get("lang") and job["lang"] not in ("auto", ""):
|
||||
cmd += ["--lang", job["lang"]]
|
||||
@ -188,7 +190,15 @@ def process_job(job_id):
|
||||
"fade": fade,
|
||||
"chorus_preview": analysis["chorus"]["best"]["text_preview"]
|
||||
if analysis.get("chorus") and analysis["chorus"].get("best") else None,
|
||||
"video_duration": analysis.get("video_duration"),
|
||||
"candidates": analysis["chorus"].get("all_candidates", [])[:5]
|
||||
if analysis.get("chorus") else [],
|
||||
},
|
||||
# Cel transkript shranimo za UI prikaz
|
||||
full_transcript=[
|
||||
{"start": s["start"], "end": s["end"], "text": s["text"]}
|
||||
for s in analysis.get("transcript", {}).get("segments", [])
|
||||
],
|
||||
start=cr["start"],
|
||||
duration=cr["duration"],
|
||||
fade_in=fade["fade_in"],
|
||||
@ -299,9 +309,10 @@ class StartJobIn(BaseModel):
|
||||
mode: str = "track"
|
||||
lang: Optional[str] = None # None/auto = Whisper auto-detect
|
||||
auto_chorus: bool = True
|
||||
include_prebuild: bool = False # vključi pre-chorus build-up
|
||||
start: Optional[float] = None
|
||||
duration: Optional[float] = 30
|
||||
max_duration: Optional[float] = 45 # Smart selection lahko gre do 45s
|
||||
max_duration: Optional[float] = 45
|
||||
min_duration: Optional[float] = 20
|
||||
no_subs: bool = False
|
||||
subtitle_style: str = "reels"
|
||||
@ -401,6 +412,7 @@ async def start_processing(
|
||||
mode=payload.mode,
|
||||
lang=payload.lang,
|
||||
auto_chorus=payload.auto_chorus,
|
||||
include_prebuild=payload.include_prebuild,
|
||||
start=payload.start,
|
||||
duration=payload.duration,
|
||||
max_duration=payload.max_duration,
|
||||
|
||||
@ -346,31 +346,46 @@ def smart_clip_range(chorus, transcript, video_duration,
|
||||
}
|
||||
|
||||
|
||||
def detect_audio_fade(clip_range, transcript):
|
||||
"""Določi fade-in/fade-out trajanje.
|
||||
def detect_audio_fade(clip_range, transcript, video_duration=None):
|
||||
"""Določi fade-in/fade-out trajanje + ev. razširi clip range, da fade
|
||||
ne reže besedila na koncu refrena.
|
||||
|
||||
Logika:
|
||||
- Če clip začne sredi vokala → 0.5s fade in
|
||||
- Če se konča sredi vokala → 1.0s fade out
|
||||
- Če se konča sredi vokala → razširi clip do konca segmenta (+ buffer),
|
||||
potem 1.0s fade out
|
||||
- Sicer manj fade
|
||||
"""
|
||||
cs, ce = clip_range["start"], clip_range["end"]
|
||||
|
||||
# Vokal pri začetku?
|
||||
# Najdi segment, ki konča znotraj clip-a (ali je clip end znotraj segmenta)
|
||||
starts_in_vocal = False
|
||||
ends_in_vocal = False
|
||||
end_segment = None
|
||||
for seg in transcript["segments"]:
|
||||
# Začetek clip-a znotraj segmenta
|
||||
if seg["start"] <= cs <= seg["end"]:
|
||||
starts_in_vocal = True
|
||||
# Konec clip-a znotraj segmenta
|
||||
if seg["start"] <= ce <= seg["end"]:
|
||||
ends_in_vocal = True
|
||||
end_segment = seg
|
||||
|
||||
fade_in = 0.5 if starts_in_vocal else 0.2
|
||||
fade_out = 1.5 if ends_in_vocal else 0.3
|
||||
# Če clip konča znotraj segmenta, razširi do konca segmenta + 0.5s buffer
|
||||
extended_end = ce
|
||||
if end_segment:
|
||||
extended_end = end_segment["end"] + 0.5
|
||||
if video_duration is not None:
|
||||
extended_end = min(extended_end, video_duration)
|
||||
|
||||
return {"fade_in": fade_in, "fade_out": fade_out}
|
||||
fade_in = 0.4 if starts_in_vocal else 0.2
|
||||
# Krajši fade out (0.5s) ker zdaj clip konča po koncu vokala
|
||||
fade_out = 0.5 if ends_in_vocal else 0.3
|
||||
|
||||
return {
|
||||
"fade_in": fade_in,
|
||||
"fade_out": fade_out,
|
||||
"extended_end": round(extended_end, 2),
|
||||
"ends_in_vocal": ends_in_vocal,
|
||||
}
|
||||
|
||||
|
||||
def analyze_with_claude(transcript, video_duration, target_duration=30):
|
||||
@ -612,10 +627,19 @@ def main():
|
||||
f"(duration: {clip_range['duration']}s, source: {clip_range.get('source')})",
|
||||
file=sys.stderr)
|
||||
|
||||
# 7. Fade params
|
||||
fade = detect_audio_fade(clip_range, transcript)
|
||||
# 7. Fade params (lahko razširi clip end če konča sredi vokala)
|
||||
fade = detect_audio_fade(clip_range, transcript, video_duration=duration)
|
||||
print(f"🎚 Fade: in={fade['fade_in']}s, out={fade['fade_out']}s", file=sys.stderr)
|
||||
|
||||
# Če fade detection razširi end (ker clip konča sredi vokala), apply
|
||||
if fade.get("extended_end") and fade["extended_end"] > clip_range["end"]:
|
||||
old_end = clip_range["end"]
|
||||
new_end = min(fade["extended_end"], clip_range["start"] + args.max_duration)
|
||||
clip_range["end"] = round(new_end, 2)
|
||||
clip_range["duration"] = round(new_end - clip_range["start"], 2)
|
||||
print(f" ↳ Razširjen za {new_end - old_end:.1f}s (zaključek besedila)",
|
||||
file=sys.stderr)
|
||||
|
||||
result = {
|
||||
"video": str(video),
|
||||
"video_duration": duration,
|
||||
|
||||
@ -133,7 +133,7 @@ ScaledBorderAndShadow: yes
|
||||
|
||||
[V4+ Styles]
|
||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||
Style: Default,Arial,56,{primary},&H00FFFFFF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,1,5,1,2,80,80,400,1
|
||||
Style: Default,DejaVu Sans,56,{primary},&H00FFFFFF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,1,5,1,2,80,80,400,1
|
||||
|
||||
[Events]
|
||||
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||
|
||||
@ -233,9 +233,9 @@
|
||||
<select id="model">
|
||||
<option value="tiny">tiny (najhitrejši)</option>
|
||||
<option value="base">base</option>
|
||||
<option value="small" selected>small (priporočeno)</option>
|
||||
<option value="medium">medium (zelo dobro)</option>
|
||||
<option value="large-v3">large-v3 (najboljše)</option>
|
||||
<option value="small" selected>small (DE/EN, hitro)</option>
|
||||
<option value="medium">medium (priporočeno za SLO/HR/BS)</option>
|
||||
<option value="large-v3">large-v3 (najboljše, počasno)</option>
|
||||
</select>
|
||||
</div>
|
||||
</div>
|
||||
@ -245,9 +245,16 @@
|
||||
Pametna izbira odseka (Whisper + energy → najde refren)
|
||||
</label>
|
||||
<div style="font-size: 12px; color: var(--text-dim); margin-top: 4px; margin-left: 26px;">
|
||||
Sistem analizira celoten video, najde refren ter pre-chorus build-up.
|
||||
Lahko traja malo dlje (do 1.5×) če to bolje prikazuje pesem.
|
||||
Audio fade in/out je avtomatsko dodan.
|
||||
Sistem naredi <b>celoten transkript</b> in <b>energy profile</b>, najde refren in ga izreže.
|
||||
Audio fade in/out je avtomatsko dodan na meje vokala.
|
||||
</div>
|
||||
|
||||
<label class="toggle" style="margin-top: 12px; margin-left: 26px;">
|
||||
<input type="checkbox" id="include-prebuild">
|
||||
Vključi pre-chorus (build-up pred refrenom)
|
||||
</label>
|
||||
<div style="font-size: 12px; color: var(--text-dim); margin-top: 2px; margin-left: 52px;">
|
||||
Privzeto izklopljeno: dobiš čist refren brez kitice.
|
||||
</div>
|
||||
|
||||
<div id="manual-times" class="row hidden">
|
||||
@ -300,6 +307,14 @@
|
||||
<div class="progress-bar" id="live-bar" style="width: 0%;"></div>
|
||||
</div>
|
||||
<div style="font-size: 12px; color: var(--muted);" id="live-detail">Pripravljam...</div>
|
||||
<!-- Analysis summary z izbranim odsekom in transkriptom -->
|
||||
<div id="live-analysis" class="hidden" style="margin-top: 12px; padding: 10px; background: var(--panel); border-radius: 6px; font-size: 12px;">
|
||||
<div id="live-analysis-summary" style="margin-bottom: 8px; color: var(--text-dim);"></div>
|
||||
<details style="margin-top: 6px;">
|
||||
<summary style="cursor: pointer; color: var(--accent); font-weight: 600;">Pokaži celoten transkript</summary>
|
||||
<div id="live-transcript" style="margin-top: 8px; max-height: 240px; overflow-y: auto; font-family: monospace; font-size: 11px; line-height: 1.6;"></div>
|
||||
</details>
|
||||
</div>
|
||||
<div id="live-result" class="hidden" style="margin-top: 12px; display: flex; gap: 8px;">
|
||||
<button class="small" id="live-download" style="display: none;">⬇ Download</button>
|
||||
<button class="small ghost" id="live-preview" style="display: none;">▶ Preview</button>
|
||||
@ -337,6 +352,15 @@
|
||||
$("#manual-times").classList.toggle("hidden", e.target.checked);
|
||||
});
|
||||
|
||||
// ─── Auto-upgrade Whisper model za slovanske jezike ──
|
||||
$("#lang").addEventListener("change", e => {
|
||||
const slavicLangs = ["sl", "hr", "bs", "sr"];
|
||||
const currentModel = $("#model").value;
|
||||
if (slavicLangs.includes(e.target.value) && (currentModel === "tiny" || currentModel === "base" || currentModel === "small")) {
|
||||
$("#model").value = "medium";
|
||||
}
|
||||
});
|
||||
|
||||
// ─── Drag & drop ────────────────────────────────
|
||||
const dz = $("#dropzone");
|
||||
const fileInput = $("#file-input");
|
||||
@ -369,9 +393,9 @@
|
||||
lang: $("#lang").value || null,
|
||||
whisper_model: $("#model").value,
|
||||
auto_chorus: auto,
|
||||
include_prebuild: $("#include-prebuild").checked,
|
||||
start: !auto && $("#start").value ? parseTimestamp($("#start").value) : null,
|
||||
duration: duration,
|
||||
// Smart selection: max do 1.5x ciljno trajanje, min 0.7x
|
||||
max_duration: auto ? Math.round(duration * 1.5) : duration,
|
||||
min_duration: auto ? Math.round(duration * 0.7) : duration,
|
||||
subtitle_style: $("#subtitle-style").value,
|
||||
@ -569,6 +593,11 @@
|
||||
const job = JSON.parse(e.data);
|
||||
updateJobInList(job);
|
||||
|
||||
// Pokaži analysis summary in transkript če je na voljo
|
||||
if (job.analysis_summary || job.full_transcript) {
|
||||
updateAnalysisDisplay(job);
|
||||
}
|
||||
|
||||
// Update live panel
|
||||
const step = job.current_step || "";
|
||||
const info = STAGE_INFO[step] || { pct: null, friendly: step };
|
||||
|
||||
Loading…
Reference in New Issue
Block a user