Fix: extend clip if ends mid-vocal (no chorus cut-off), DejaVu Sans font (supports SLO/HR/BS chars), auto-upgrade to medium Whisper model for Slavic languages
This commit is contained in:
parent
5d5e169f9d
commit
c870d80726
14
app/main.py
14
app/main.py
@ -166,6 +166,8 @@ def process_job(job_id):
|
|||||||
"--min-duration", str(job.get("min_duration", 20)),
|
"--min-duration", str(job.get("min_duration", 20)),
|
||||||
"--output", str(analysis_path),
|
"--output", str(analysis_path),
|
||||||
]
|
]
|
||||||
|
if job.get("include_prebuild"):
|
||||||
|
cmd += ["--include-prebuild"]
|
||||||
# lang: če None ali 'auto', pusti analyze.py auto-detect
|
# lang: če None ali 'auto', pusti analyze.py auto-detect
|
||||||
if job.get("lang") and job["lang"] not in ("auto", ""):
|
if job.get("lang") and job["lang"] not in ("auto", ""):
|
||||||
cmd += ["--lang", job["lang"]]
|
cmd += ["--lang", job["lang"]]
|
||||||
@ -188,7 +190,15 @@ def process_job(job_id):
|
|||||||
"fade": fade,
|
"fade": fade,
|
||||||
"chorus_preview": analysis["chorus"]["best"]["text_preview"]
|
"chorus_preview": analysis["chorus"]["best"]["text_preview"]
|
||||||
if analysis.get("chorus") and analysis["chorus"].get("best") else None,
|
if analysis.get("chorus") and analysis["chorus"].get("best") else None,
|
||||||
|
"video_duration": analysis.get("video_duration"),
|
||||||
|
"candidates": analysis["chorus"].get("all_candidates", [])[:5]
|
||||||
|
if analysis.get("chorus") else [],
|
||||||
},
|
},
|
||||||
|
# Cel transkript shranimo za UI prikaz
|
||||||
|
full_transcript=[
|
||||||
|
{"start": s["start"], "end": s["end"], "text": s["text"]}
|
||||||
|
for s in analysis.get("transcript", {}).get("segments", [])
|
||||||
|
],
|
||||||
start=cr["start"],
|
start=cr["start"],
|
||||||
duration=cr["duration"],
|
duration=cr["duration"],
|
||||||
fade_in=fade["fade_in"],
|
fade_in=fade["fade_in"],
|
||||||
@ -299,9 +309,10 @@ class StartJobIn(BaseModel):
|
|||||||
mode: str = "track"
|
mode: str = "track"
|
||||||
lang: Optional[str] = None # None/auto = Whisper auto-detect
|
lang: Optional[str] = None # None/auto = Whisper auto-detect
|
||||||
auto_chorus: bool = True
|
auto_chorus: bool = True
|
||||||
|
include_prebuild: bool = False # vključi pre-chorus build-up
|
||||||
start: Optional[float] = None
|
start: Optional[float] = None
|
||||||
duration: Optional[float] = 30
|
duration: Optional[float] = 30
|
||||||
max_duration: Optional[float] = 45 # Smart selection lahko gre do 45s
|
max_duration: Optional[float] = 45
|
||||||
min_duration: Optional[float] = 20
|
min_duration: Optional[float] = 20
|
||||||
no_subs: bool = False
|
no_subs: bool = False
|
||||||
subtitle_style: str = "reels"
|
subtitle_style: str = "reels"
|
||||||
@ -401,6 +412,7 @@ async def start_processing(
|
|||||||
mode=payload.mode,
|
mode=payload.mode,
|
||||||
lang=payload.lang,
|
lang=payload.lang,
|
||||||
auto_chorus=payload.auto_chorus,
|
auto_chorus=payload.auto_chorus,
|
||||||
|
include_prebuild=payload.include_prebuild,
|
||||||
start=payload.start,
|
start=payload.start,
|
||||||
duration=payload.duration,
|
duration=payload.duration,
|
||||||
max_duration=payload.max_duration,
|
max_duration=payload.max_duration,
|
||||||
|
|||||||
@ -346,31 +346,46 @@ def smart_clip_range(chorus, transcript, video_duration,
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def detect_audio_fade(clip_range, transcript):
|
def detect_audio_fade(clip_range, transcript, video_duration=None):
|
||||||
"""Določi fade-in/fade-out trajanje.
|
"""Določi fade-in/fade-out trajanje + ev. razširi clip range, da fade
|
||||||
|
ne reže besedila na koncu refrena.
|
||||||
|
|
||||||
Logika:
|
Logika:
|
||||||
- Če clip začne sredi vokala → 0.5s fade in
|
- Če clip začne sredi vokala → 0.5s fade in
|
||||||
- Če se konča sredi vokala → 1.0s fade out
|
- Če se konča sredi vokala → razširi clip do konca segmenta (+ buffer),
|
||||||
|
potem 1.0s fade out
|
||||||
- Sicer manj fade
|
- Sicer manj fade
|
||||||
"""
|
"""
|
||||||
cs, ce = clip_range["start"], clip_range["end"]
|
cs, ce = clip_range["start"], clip_range["end"]
|
||||||
|
|
||||||
# Vokal pri začetku?
|
# Najdi segment, ki konča znotraj clip-a (ali je clip end znotraj segmenta)
|
||||||
starts_in_vocal = False
|
starts_in_vocal = False
|
||||||
ends_in_vocal = False
|
ends_in_vocal = False
|
||||||
|
end_segment = None
|
||||||
for seg in transcript["segments"]:
|
for seg in transcript["segments"]:
|
||||||
# Začetek clip-a znotraj segmenta
|
|
||||||
if seg["start"] <= cs <= seg["end"]:
|
if seg["start"] <= cs <= seg["end"]:
|
||||||
starts_in_vocal = True
|
starts_in_vocal = True
|
||||||
# Konec clip-a znotraj segmenta
|
|
||||||
if seg["start"] <= ce <= seg["end"]:
|
if seg["start"] <= ce <= seg["end"]:
|
||||||
ends_in_vocal = True
|
ends_in_vocal = True
|
||||||
|
end_segment = seg
|
||||||
|
|
||||||
fade_in = 0.5 if starts_in_vocal else 0.2
|
# Če clip konča znotraj segmenta, razširi do konca segmenta + 0.5s buffer
|
||||||
fade_out = 1.5 if ends_in_vocal else 0.3
|
extended_end = ce
|
||||||
|
if end_segment:
|
||||||
|
extended_end = end_segment["end"] + 0.5
|
||||||
|
if video_duration is not None:
|
||||||
|
extended_end = min(extended_end, video_duration)
|
||||||
|
|
||||||
return {"fade_in": fade_in, "fade_out": fade_out}
|
fade_in = 0.4 if starts_in_vocal else 0.2
|
||||||
|
# Krajši fade out (0.5s) ker zdaj clip konča po koncu vokala
|
||||||
|
fade_out = 0.5 if ends_in_vocal else 0.3
|
||||||
|
|
||||||
|
return {
|
||||||
|
"fade_in": fade_in,
|
||||||
|
"fade_out": fade_out,
|
||||||
|
"extended_end": round(extended_end, 2),
|
||||||
|
"ends_in_vocal": ends_in_vocal,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def analyze_with_claude(transcript, video_duration, target_duration=30):
|
def analyze_with_claude(transcript, video_duration, target_duration=30):
|
||||||
@ -612,10 +627,19 @@ def main():
|
|||||||
f"(duration: {clip_range['duration']}s, source: {clip_range.get('source')})",
|
f"(duration: {clip_range['duration']}s, source: {clip_range.get('source')})",
|
||||||
file=sys.stderr)
|
file=sys.stderr)
|
||||||
|
|
||||||
# 7. Fade params
|
# 7. Fade params (lahko razširi clip end če konča sredi vokala)
|
||||||
fade = detect_audio_fade(clip_range, transcript)
|
fade = detect_audio_fade(clip_range, transcript, video_duration=duration)
|
||||||
print(f"🎚 Fade: in={fade['fade_in']}s, out={fade['fade_out']}s", file=sys.stderr)
|
print(f"🎚 Fade: in={fade['fade_in']}s, out={fade['fade_out']}s", file=sys.stderr)
|
||||||
|
|
||||||
|
# Če fade detection razširi end (ker clip konča sredi vokala), apply
|
||||||
|
if fade.get("extended_end") and fade["extended_end"] > clip_range["end"]:
|
||||||
|
old_end = clip_range["end"]
|
||||||
|
new_end = min(fade["extended_end"], clip_range["start"] + args.max_duration)
|
||||||
|
clip_range["end"] = round(new_end, 2)
|
||||||
|
clip_range["duration"] = round(new_end - clip_range["start"], 2)
|
||||||
|
print(f" ↳ Razširjen za {new_end - old_end:.1f}s (zaključek besedila)",
|
||||||
|
file=sys.stderr)
|
||||||
|
|
||||||
result = {
|
result = {
|
||||||
"video": str(video),
|
"video": str(video),
|
||||||
"video_duration": duration,
|
"video_duration": duration,
|
||||||
|
|||||||
@ -133,7 +133,7 @@ ScaledBorderAndShadow: yes
|
|||||||
|
|
||||||
[V4+ Styles]
|
[V4+ Styles]
|
||||||
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
|
||||||
Style: Default,Arial,56,{primary},&H00FFFFFF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,1,5,1,2,80,80,400,1
|
Style: Default,DejaVu Sans,56,{primary},&H00FFFFFF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,1,5,1,2,80,80,400,1
|
||||||
|
|
||||||
[Events]
|
[Events]
|
||||||
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
|
||||||
|
|||||||
@ -233,9 +233,9 @@
|
|||||||
<select id="model">
|
<select id="model">
|
||||||
<option value="tiny">tiny (najhitrejši)</option>
|
<option value="tiny">tiny (najhitrejši)</option>
|
||||||
<option value="base">base</option>
|
<option value="base">base</option>
|
||||||
<option value="small" selected>small (priporočeno)</option>
|
<option value="small" selected>small (DE/EN, hitro)</option>
|
||||||
<option value="medium">medium (zelo dobro)</option>
|
<option value="medium">medium (priporočeno za SLO/HR/BS)</option>
|
||||||
<option value="large-v3">large-v3 (najboljše)</option>
|
<option value="large-v3">large-v3 (najboljše, počasno)</option>
|
||||||
</select>
|
</select>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@ -245,9 +245,16 @@
|
|||||||
Pametna izbira odseka (Whisper + energy → najde refren)
|
Pametna izbira odseka (Whisper + energy → najde refren)
|
||||||
</label>
|
</label>
|
||||||
<div style="font-size: 12px; color: var(--text-dim); margin-top: 4px; margin-left: 26px;">
|
<div style="font-size: 12px; color: var(--text-dim); margin-top: 4px; margin-left: 26px;">
|
||||||
Sistem analizira celoten video, najde refren ter pre-chorus build-up.
|
Sistem naredi <b>celoten transkript</b> in <b>energy profile</b>, najde refren in ga izreže.
|
||||||
Lahko traja malo dlje (do 1.5×) če to bolje prikazuje pesem.
|
Audio fade in/out je avtomatsko dodan na meje vokala.
|
||||||
Audio fade in/out je avtomatsko dodan.
|
</div>
|
||||||
|
|
||||||
|
<label class="toggle" style="margin-top: 12px; margin-left: 26px;">
|
||||||
|
<input type="checkbox" id="include-prebuild">
|
||||||
|
Vključi pre-chorus (build-up pred refrenom)
|
||||||
|
</label>
|
||||||
|
<div style="font-size: 12px; color: var(--text-dim); margin-top: 2px; margin-left: 52px;">
|
||||||
|
Privzeto izklopljeno: dobiš čist refren brez kitice.
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="manual-times" class="row hidden">
|
<div id="manual-times" class="row hidden">
|
||||||
@ -300,6 +307,14 @@
|
|||||||
<div class="progress-bar" id="live-bar" style="width: 0%;"></div>
|
<div class="progress-bar" id="live-bar" style="width: 0%;"></div>
|
||||||
</div>
|
</div>
|
||||||
<div style="font-size: 12px; color: var(--muted);" id="live-detail">Pripravljam...</div>
|
<div style="font-size: 12px; color: var(--muted);" id="live-detail">Pripravljam...</div>
|
||||||
|
<!-- Analysis summary z izbranim odsekom in transkriptom -->
|
||||||
|
<div id="live-analysis" class="hidden" style="margin-top: 12px; padding: 10px; background: var(--panel); border-radius: 6px; font-size: 12px;">
|
||||||
|
<div id="live-analysis-summary" style="margin-bottom: 8px; color: var(--text-dim);"></div>
|
||||||
|
<details style="margin-top: 6px;">
|
||||||
|
<summary style="cursor: pointer; color: var(--accent); font-weight: 600;">Pokaži celoten transkript</summary>
|
||||||
|
<div id="live-transcript" style="margin-top: 8px; max-height: 240px; overflow-y: auto; font-family: monospace; font-size: 11px; line-height: 1.6;"></div>
|
||||||
|
</details>
|
||||||
|
</div>
|
||||||
<div id="live-result" class="hidden" style="margin-top: 12px; display: flex; gap: 8px;">
|
<div id="live-result" class="hidden" style="margin-top: 12px; display: flex; gap: 8px;">
|
||||||
<button class="small" id="live-download" style="display: none;">⬇ Download</button>
|
<button class="small" id="live-download" style="display: none;">⬇ Download</button>
|
||||||
<button class="small ghost" id="live-preview" style="display: none;">▶ Preview</button>
|
<button class="small ghost" id="live-preview" style="display: none;">▶ Preview</button>
|
||||||
@ -337,6 +352,15 @@
|
|||||||
$("#manual-times").classList.toggle("hidden", e.target.checked);
|
$("#manual-times").classList.toggle("hidden", e.target.checked);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// ─── Auto-upgrade Whisper model za slovanske jezike ──
|
||||||
|
$("#lang").addEventListener("change", e => {
|
||||||
|
const slavicLangs = ["sl", "hr", "bs", "sr"];
|
||||||
|
const currentModel = $("#model").value;
|
||||||
|
if (slavicLangs.includes(e.target.value) && (currentModel === "tiny" || currentModel === "base" || currentModel === "small")) {
|
||||||
|
$("#model").value = "medium";
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
// ─── Drag & drop ────────────────────────────────
|
// ─── Drag & drop ────────────────────────────────
|
||||||
const dz = $("#dropzone");
|
const dz = $("#dropzone");
|
||||||
const fileInput = $("#file-input");
|
const fileInput = $("#file-input");
|
||||||
@ -369,9 +393,9 @@
|
|||||||
lang: $("#lang").value || null,
|
lang: $("#lang").value || null,
|
||||||
whisper_model: $("#model").value,
|
whisper_model: $("#model").value,
|
||||||
auto_chorus: auto,
|
auto_chorus: auto,
|
||||||
|
include_prebuild: $("#include-prebuild").checked,
|
||||||
start: !auto && $("#start").value ? parseTimestamp($("#start").value) : null,
|
start: !auto && $("#start").value ? parseTimestamp($("#start").value) : null,
|
||||||
duration: duration,
|
duration: duration,
|
||||||
// Smart selection: max do 1.5x ciljno trajanje, min 0.7x
|
|
||||||
max_duration: auto ? Math.round(duration * 1.5) : duration,
|
max_duration: auto ? Math.round(duration * 1.5) : duration,
|
||||||
min_duration: auto ? Math.round(duration * 0.7) : duration,
|
min_duration: auto ? Math.round(duration * 0.7) : duration,
|
||||||
subtitle_style: $("#subtitle-style").value,
|
subtitle_style: $("#subtitle-style").value,
|
||||||
@ -569,6 +593,11 @@
|
|||||||
const job = JSON.parse(e.data);
|
const job = JSON.parse(e.data);
|
||||||
updateJobInList(job);
|
updateJobInList(job);
|
||||||
|
|
||||||
|
// Pokaži analysis summary in transkript če je na voljo
|
||||||
|
if (job.analysis_summary || job.full_transcript) {
|
||||||
|
updateAnalysisDisplay(job);
|
||||||
|
}
|
||||||
|
|
||||||
// Update live panel
|
// Update live panel
|
||||||
const step = job.current_step || "";
|
const step = job.current_step || "";
|
||||||
const info = STAGE_INFO[step] || { pct: null, friendly: step };
|
const info = STAGE_INFO[step] || { pct: null, friendly: step };
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user