Fix find_chorus: RMS energy parser was broken (no pts_time available), now syntheses timestamps; energy weight x10 (refren je glasnejši)

2026-04-28 16:55:51 +00:00 · 2026-04-28 16:55:51 +00:00 · c17578521a
commit c17578521a
parent 64e8854cea
1 changed files with 28 additions and 18 deletions
--- a/scripts/find_chorus.py
+++ b/scripts/find_chorus.py
@ -119,33 +119,42 @@ def find_repeated_lines(lines, similarity_threshold=0.5):
 def compute_energy(audio_path, window_sec=1.0):
    """
-    Vrni list (timestamp, rms_db) preko FFmpeg ebur128 filter.
+    Vrni list (timestamp, rms_db) preko FFmpeg astats filter.
    Vsako okno window_sec sekund vrne en RMS sample.
    """
    # Uporabi ebur128 ali astats za RMS
    cmd = [
        "ffmpeg", "-i", audio_path,
        "-af", f"asetnsamples=n={int(16000 * window_sec)}:p=0,astats=metadata=1:reset={window_sec},"
-               "ametadata=print:key=lavfi.astats.Overall.RMS_level",
+               "ametadata=print:key=lavfi.astats.Overall.RMS_level:file=-",
        "-f", "null", "-",
    ]
    result = subprocess.run(cmd, capture_output=True, text=True)
-    output = result.stderr
+    # ametadata file=- pošilja na stdout
    output = result.stdout + "\n" + result.stderr
    energies = []
    current_pts = None
    for line in output.split("\n"):
        line = line.strip()
-        if line.startswith("frame:"):
+        # Format A: "frame:N pts:X pts_time:Y"
-            # frame:N pts:X pts_time:Y
+        m = re.search(r"pts_time:(\S+)", line)
-            m = re.search(r"pts_time:(\S+)", line)
+        if m:
-            if m:
+            try:
                current_pts = float(m.group(1))
-        elif line.startswith("lavfi.astats.Overall.RMS_level="):
+            except ValueError:
-            val = line.split("=")[1]
+                pass
            continue
        # Format B: lavfi.astats.Overall.RMS_level=-15.123
        if "RMS_level=" in line:
            val = line.split("RMS_level=")[-1].strip()
            try:
                rms = float(val)
-                if current_pts is not None:
+                # Če nimamo timestamp-a, sintetiziraj na podlagi vrstnega reda
-                    energies.append((current_pts, rms))
+                if current_pts is None:
                    current_pts = len(energies) * window_sec
                energies.append((current_pts, rms))
                # Increment za naslednji vzorec, če FFmpeg ne pošilja pts
                current_pts += window_sec
            except ValueError:
                pass
@ -210,11 +219,12 @@ def find_chorus(video, lang=None, model_size="small", target_duration=30.0):
                avg_e = avg_energy_in_range(energies, start, start + target_duration)
                energy_score = max(0, avg_e - avg_overall)  # koliko nad povprečjem
-                # Score: število ponovitev + energy + dolžina vrstice
+                # Score: ponovitve + energija + dolžina vrstice
                # Refren je navadno glasnejši kot verz — energija je močnejši signal
                score = (
-                    len(cluster) * 10  # repetition weight
+                    len(cluster) * 5  # repetition weight (zmanjšano)
-                    + energy_score * 2  # energy weight
+                    + energy_score * 10  # energy weight (povečano — refren je glasnejši)
-                    + min(len(rep_text.split()), 10)  # text richness
+                    + min(len(rep_text.split()), 10)
                )
                candidates.append({
@ -229,11 +239,11 @@ def find_chorus(video, lang=None, model_size="small", target_duration=30.0):
                    "cluster_id": cluster_idx,
                })
-        # Sort by score, dedupe close candidates
+        # Sort by score, dedupe close candidates (vsaj 20s narazen)
        candidates.sort(key=lambda c: -c["score"])
        deduped = []
        for c in candidates:
-            if all(abs(c["start"] - d["start"]) > 5 for d in deduped):
+            if all(abs(c["start"] - d["start"]) > 20 for d in deduped):
                deduped.append(c)
            if len(deduped) >= 5:
                break