Fix: extend clip if ends mid-vocal (no chorus cut-off), DejaVu Sans font (supports SLO/HR/BS chars), auto-upgrade to medium Whisper model for Slavic languages

2026-04-29 07:35:00 +00:00 · 2026-04-29 07:35:00 +00:00 · c870d80726
commit c870d80726
parent 5d5e169f9d
4 changed files with 85 additions and 20 deletions
--- a/app/main.py
+++ b/app/main.py
@ -166,6 +166,8 @@ def process_job(job_id):
                "--min-duration", str(job.get("min_duration", 20)),
                "--output", str(analysis_path),
            ]
+            if job.get("include_prebuild"):
+                cmd += ["--include-prebuild"]
            # lang: če None ali 'auto', pusti analyze.py auto-detect
            if job.get("lang") and job["lang"] not in ("auto", ""):
                cmd += ["--lang", job["lang"]]
@ -188,7 +190,15 @@ def process_job(job_id):
                            "fade": fade,
                            "chorus_preview": analysis["chorus"]["best"]["text_preview"]
                                if analysis.get("chorus") and analysis["chorus"].get("best") else None,
+                            "video_duration": analysis.get("video_duration"),
+                            "candidates": analysis["chorus"].get("all_candidates", [])[:5]
+                                if analysis.get("chorus") else [],
                        },
+                        # Cel transkript shranimo za UI prikaz
+                        full_transcript=[
+                            {"start": s["start"], "end": s["end"], "text": s["text"]}
+                            for s in analysis.get("transcript", {}).get("segments", [])
+                        ],
                        start=cr["start"],
                        duration=cr["duration"],
                        fade_in=fade["fade_in"],
@ -299,9 +309,10 @@ class StartJobIn(BaseModel):
    mode: str = "track"
    lang: Optional[str] = None  # None/auto = Whisper auto-detect
    auto_chorus: bool = True
+    include_prebuild: bool = False  # vključi pre-chorus build-up
    start: Optional[float] = None
    duration: Optional[float] = 30
-    max_duration: Optional[float] = 45  # Smart selection lahko gre do 45s
+    max_duration: Optional[float] = 45
    min_duration: Optional[float] = 20
    no_subs: bool = False
    subtitle_style: str = "reels"
@ -401,6 +412,7 @@ async def start_processing(
        mode=payload.mode,
        lang=payload.lang,
        auto_chorus=payload.auto_chorus,
+        include_prebuild=payload.include_prebuild,
        start=payload.start,
        duration=payload.duration,
        max_duration=payload.max_duration,
--- a/scripts/analyze.py
+++ b/scripts/analyze.py
@ -346,31 +346,46 @@ def smart_clip_range(chorus, transcript, video_duration,
    }


-def detect_audio_fade(clip_range, transcript):
-    """Določi fade-in/fade-out trajanje.
+def detect_audio_fade(clip_range, transcript, video_duration=None):
+    """Določi fade-in/fade-out trajanje + ev. razširi clip range, da fade
+    ne reže besedila na koncu refrena.

    Logika:
    - Če clip začne sredi vokala → 0.5s fade in
-    - Če se konča sredi vokala → 1.0s fade out
+    - Če se konča sredi vokala → razširi clip do konca segmenta (+ buffer),
+      potem 1.0s fade out
    - Sicer manj fade
    """
    cs, ce = clip_range["start"], clip_range["end"]

-    # Vokal pri začetku?
+    # Najdi segment, ki konča znotraj clip-a (ali je clip end znotraj segmenta)
    starts_in_vocal = False
    ends_in_vocal = False
+    end_segment = None
    for seg in transcript["segments"]:
-        # Začetek clip-a znotraj segmenta
        if seg["start"] <= cs <= seg["end"]:
            starts_in_vocal = True
-        # Konec clip-a znotraj segmenta
        if seg["start"] <= ce <= seg["end"]:
            ends_in_vocal = True
+            end_segment = seg

-    fade_in = 0.5 if starts_in_vocal else 0.2
-    fade_out = 1.5 if ends_in_vocal else 0.3
+    # Če clip konča znotraj segmenta, razširi do konca segmenta + 0.5s buffer
+    extended_end = ce
+    if end_segment:
+        extended_end = end_segment["end"] + 0.5
+        if video_duration is not None:
+            extended_end = min(extended_end, video_duration)

-    return {"fade_in": fade_in, "fade_out": fade_out}
+    fade_in = 0.4 if starts_in_vocal else 0.2
+    # Krajši fade out (0.5s) ker zdaj clip konča po koncu vokala
+    fade_out = 0.5 if ends_in_vocal else 0.3
+
+    return {
+        "fade_in": fade_in,
+        "fade_out": fade_out,
+        "extended_end": round(extended_end, 2),
+        "ends_in_vocal": ends_in_vocal,
+    }


 def analyze_with_claude(transcript, video_duration, target_duration=30):
@ -612,10 +627,19 @@ def main():
              f"(duration: {clip_range['duration']}s, source: {clip_range.get('source')})",
              file=sys.stderr)

-        # 7. Fade params
-        fade = detect_audio_fade(clip_range, transcript)
+        # 7. Fade params (lahko razširi clip end če konča sredi vokala)
+        fade = detect_audio_fade(clip_range, transcript, video_duration=duration)
        print(f"🎚  Fade: in={fade['fade_in']}s, out={fade['fade_out']}s", file=sys.stderr)

+        # Če fade detection razširi end (ker clip konča sredi vokala), apply
+        if fade.get("extended_end") and fade["extended_end"] > clip_range["end"]:
+            old_end = clip_range["end"]
+            new_end = min(fade["extended_end"], clip_range["start"] + args.max_duration)
+            clip_range["end"] = round(new_end, 2)
+            clip_range["duration"] = round(new_end - clip_range["start"], 2)
+            print(f"   ↳ Razširjen za {new_end - old_end:.1f}s (zaključek besedila)",
+                  file=sys.stderr)
+
        result = {
            "video": str(video),
            "video_duration": duration,
--- a/scripts/subtitle.py
+++ b/scripts/subtitle.py
@ -133,7 +133,7 @@ ScaledBorderAndShadow: yes

 [V4+ Styles]
 Format: Name, Fontname, Fontsize, PrimaryColour, SecondaryColour, OutlineColour, BackColour, Bold, Italic, Underline, StrikeOut, ScaleX, ScaleY, Spacing, Angle, BorderStyle, Outline, Shadow, Alignment, MarginL, MarginR, MarginV, Encoding
-Style: Default,Arial,56,{primary},&H00FFFFFF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,1,5,1,2,80,80,400,1
+Style: Default,DejaVu Sans,56,{primary},&H00FFFFFF,&H00000000,&H00000000,1,0,0,0,100,100,0,0,1,5,1,2,80,80,400,1

 [Events]
 Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
--- a/templates/index.html
+++ b/templates/index.html
@ -233,9 +233,9 @@
          <select id="model">
            <option value="tiny">tiny (najhitrejši)</option>
            <option value="base">base</option>
-            <option value="small" selected>small (priporočeno)</option>
-            <option value="medium">medium (zelo dobro)</option>
-            <option value="large-v3">large-v3 (najboljše)</option>
+            <option value="small" selected>small (DE/EN, hitro)</option>
+            <option value="medium">medium (priporočeno za SLO/HR/BS)</option>
+            <option value="large-v3">large-v3 (najboljše, počasno)</option>
          </select>
        </div>
      </div>
@ -245,9 +245,16 @@
        Pametna izbira odseka (Whisper + energy → najde refren)
      </label>
      <div style="font-size: 12px; color: var(--text-dim); margin-top: 4px; margin-left: 26px;">
-        Sistem analizira celoten video, najde refren ter pre-chorus build-up.
-        Lahko traja malo dlje (do 1.5×) če to bolje prikazuje pesem.
-        Audio fade in/out je avtomatsko dodan.
+        Sistem naredi <b>celoten transkript</b> in <b>energy profile</b>, najde refren in ga izreže.
+        Audio fade in/out je avtomatsko dodan na meje vokala.
+      </div>
+
+      <label class="toggle" style="margin-top: 12px; margin-left: 26px;">
+        <input type="checkbox" id="include-prebuild">
+        Vključi pre-chorus (build-up pred refrenom)
+      </label>
+      <div style="font-size: 12px; color: var(--text-dim); margin-top: 2px; margin-left: 52px;">
+        Privzeto izklopljeno: dobiš čist refren brez kitice.
      </div>

      <div id="manual-times" class="row hidden">
@ -300,6 +307,14 @@
          <div class="progress-bar" id="live-bar" style="width: 0%;"></div>
        </div>
        <div style="font-size: 12px; color: var(--muted);" id="live-detail">Pripravljam...</div>
+        <!-- Analysis summary z izbranim odsekom in transkriptom -->
+        <div id="live-analysis" class="hidden" style="margin-top: 12px; padding: 10px; background: var(--panel); border-radius: 6px; font-size: 12px;">
+          <div id="live-analysis-summary" style="margin-bottom: 8px; color: var(--text-dim);"></div>
+          <details style="margin-top: 6px;">
+            <summary style="cursor: pointer; color: var(--accent); font-weight: 600;">Pokaži celoten transkript</summary>
+            <div id="live-transcript" style="margin-top: 8px; max-height: 240px; overflow-y: auto; font-family: monospace; font-size: 11px; line-height: 1.6;"></div>
+          </details>
+        </div>
        <div id="live-result" class="hidden" style="margin-top: 12px; display: flex; gap: 8px;">
          <button class="small" id="live-download" style="display: none;">⬇ Download</button>
          <button class="small ghost" id="live-preview" style="display: none;">▶ Preview</button>
@ -337,6 +352,15 @@
      $("#manual-times").classList.toggle("hidden", e.target.checked);
    });

+    // ─── Auto-upgrade Whisper model za slovanske jezike ──
+    $("#lang").addEventListener("change", e => {
+      const slavicLangs = ["sl", "hr", "bs", "sr"];
+      const currentModel = $("#model").value;
+      if (slavicLangs.includes(e.target.value) && (currentModel === "tiny" || currentModel === "base" || currentModel === "small")) {
+        $("#model").value = "medium";
+      }
+    });
+
    // ─── Drag & drop ────────────────────────────────
    const dz = $("#dropzone");
    const fileInput = $("#file-input");
@ -369,9 +393,9 @@
        lang: $("#lang").value || null,
        whisper_model: $("#model").value,
        auto_chorus: auto,
+        include_prebuild: $("#include-prebuild").checked,
        start: !auto && $("#start").value ? parseTimestamp($("#start").value) : null,
        duration: duration,
-        // Smart selection: max do 1.5x ciljno trajanje, min 0.7x
        max_duration: auto ? Math.round(duration * 1.5) : duration,
        min_duration: auto ? Math.round(duration * 0.7) : duration,
        subtitle_style: $("#subtitle-style").value,
@ -569,6 +593,11 @@
          const job = JSON.parse(e.data);
          updateJobInList(job);

+          // Pokaži analysis summary in transkript če je na voljo
+          if (job.analysis_summary || job.full_transcript) {
+            updateAnalysisDisplay(job);
+          }
+
          // Update live panel
          const step = job.current_step || "";
          const info = STAGE_INFO[step] || { pct: null, friendly: step };