diff --git a/scripts/find_chorus.py b/scripts/find_chorus.py index e1d24b6..32fdc10 100644 --- a/scripts/find_chorus.py +++ b/scripts/find_chorus.py @@ -119,33 +119,42 @@ def find_repeated_lines(lines, similarity_threshold=0.5): def compute_energy(audio_path, window_sec=1.0): """ - Vrni list (timestamp, rms_db) preko FFmpeg ebur128 filter. + Vrni list (timestamp, rms_db) preko FFmpeg astats filter. + Vsako okno window_sec sekund vrne en RMS sample. """ - # Uporabi ebur128 ali astats za RMS cmd = [ "ffmpeg", "-i", audio_path, "-af", f"asetnsamples=n={int(16000 * window_sec)}:p=0,astats=metadata=1:reset={window_sec}," - "ametadata=print:key=lavfi.astats.Overall.RMS_level", + "ametadata=print:key=lavfi.astats.Overall.RMS_level:file=-", "-f", "null", "-", ] result = subprocess.run(cmd, capture_output=True, text=True) - output = result.stderr + # ametadata file=- pošilja na stdout + output = result.stdout + "\n" + result.stderr energies = [] current_pts = None for line in output.split("\n"): line = line.strip() - if line.startswith("frame:"): - # frame:N pts:X pts_time:Y - m = re.search(r"pts_time:(\S+)", line) - if m: + # Format A: "frame:N pts:X pts_time:Y" + m = re.search(r"pts_time:(\S+)", line) + if m: + try: current_pts = float(m.group(1)) - elif line.startswith("lavfi.astats.Overall.RMS_level="): - val = line.split("=")[1] + except ValueError: + pass + continue + # Format B: lavfi.astats.Overall.RMS_level=-15.123 + if "RMS_level=" in line: + val = line.split("RMS_level=")[-1].strip() try: rms = float(val) - if current_pts is not None: - energies.append((current_pts, rms)) + # Če nimamo timestamp-a, sintetiziraj na podlagi vrstnega reda + if current_pts is None: + current_pts = len(energies) * window_sec + energies.append((current_pts, rms)) + # Increment za naslednji vzorec, če FFmpeg ne pošilja pts + current_pts += window_sec except ValueError: pass @@ -210,11 +219,12 @@ def find_chorus(video, lang=None, model_size="small", target_duration=30.0): avg_e = avg_energy_in_range(energies, start, start + target_duration) energy_score = max(0, avg_e - avg_overall) # koliko nad povprečjem - # Score: število ponovitev + energy + dolžina vrstice + # Score: ponovitve + energija + dolžina vrstice + # Refren je navadno glasnejši kot verz — energija je močnejši signal score = ( - len(cluster) * 10 # repetition weight - + energy_score * 2 # energy weight - + min(len(rep_text.split()), 10) # text richness + len(cluster) * 5 # repetition weight (zmanjšano) + + energy_score * 10 # energy weight (povečano — refren je glasnejši) + + min(len(rep_text.split()), 10) ) candidates.append({ @@ -229,11 +239,11 @@ def find_chorus(video, lang=None, model_size="small", target_duration=30.0): "cluster_id": cluster_idx, }) - # Sort by score, dedupe close candidates + # Sort by score, dedupe close candidates (vsaj 20s narazen) candidates.sort(key=lambda c: -c["score"]) deduped = [] for c in candidates: - if all(abs(c["start"] - d["start"]) > 5 for d in deduped): + if all(abs(c["start"] - d["start"]) > 20 for d in deduped): deduped.append(c) if len(deduped) >= 5: break