Fix find_chorus: RMS energy parser was broken (no pts_time available), now syntheses timestamps; energy weight x10 (refren je glasnejši)

This commit is contained in:
Sebastjan Artič 2026-04-28 16:55:51 +00:00
parent 64e8854cea
commit c17578521a

View File

@ -119,33 +119,42 @@ def find_repeated_lines(lines, similarity_threshold=0.5):
def compute_energy(audio_path, window_sec=1.0): def compute_energy(audio_path, window_sec=1.0):
""" """
Vrni list (timestamp, rms_db) preko FFmpeg ebur128 filter. Vrni list (timestamp, rms_db) preko FFmpeg astats filter.
Vsako okno window_sec sekund vrne en RMS sample.
""" """
# Uporabi ebur128 ali astats za RMS
cmd = [ cmd = [
"ffmpeg", "-i", audio_path, "ffmpeg", "-i", audio_path,
"-af", f"asetnsamples=n={int(16000 * window_sec)}:p=0,astats=metadata=1:reset={window_sec}," "-af", f"asetnsamples=n={int(16000 * window_sec)}:p=0,astats=metadata=1:reset={window_sec},"
"ametadata=print:key=lavfi.astats.Overall.RMS_level", "ametadata=print:key=lavfi.astats.Overall.RMS_level:file=-",
"-f", "null", "-", "-f", "null", "-",
] ]
result = subprocess.run(cmd, capture_output=True, text=True) result = subprocess.run(cmd, capture_output=True, text=True)
output = result.stderr # ametadata file=- pošilja na stdout
output = result.stdout + "\n" + result.stderr
energies = [] energies = []
current_pts = None current_pts = None
for line in output.split("\n"): for line in output.split("\n"):
line = line.strip() line = line.strip()
if line.startswith("frame:"): # Format A: "frame:N pts:X pts_time:Y"
# frame:N pts:X pts_time:Y m = re.search(r"pts_time:(\S+)", line)
m = re.search(r"pts_time:(\S+)", line) if m:
if m: try:
current_pts = float(m.group(1)) current_pts = float(m.group(1))
elif line.startswith("lavfi.astats.Overall.RMS_level="): except ValueError:
val = line.split("=")[1] pass
continue
# Format B: lavfi.astats.Overall.RMS_level=-15.123
if "RMS_level=" in line:
val = line.split("RMS_level=")[-1].strip()
try: try:
rms = float(val) rms = float(val)
if current_pts is not None: # Če nimamo timestamp-a, sintetiziraj na podlagi vrstnega reda
energies.append((current_pts, rms)) if current_pts is None:
current_pts = len(energies) * window_sec
energies.append((current_pts, rms))
# Increment za naslednji vzorec, če FFmpeg ne pošilja pts
current_pts += window_sec
except ValueError: except ValueError:
pass pass
@ -210,11 +219,12 @@ def find_chorus(video, lang=None, model_size="small", target_duration=30.0):
avg_e = avg_energy_in_range(energies, start, start + target_duration) avg_e = avg_energy_in_range(energies, start, start + target_duration)
energy_score = max(0, avg_e - avg_overall) # koliko nad povprečjem energy_score = max(0, avg_e - avg_overall) # koliko nad povprečjem
# Score: število ponovitev + energy + dolžina vrstice # Score: ponovitve + energija + dolžina vrstice
# Refren je navadno glasnejši kot verz — energija je močnejši signal
score = ( score = (
len(cluster) * 10 # repetition weight len(cluster) * 5 # repetition weight (zmanjšano)
+ energy_score * 2 # energy weight + energy_score * 10 # energy weight (povečano — refren je glasnejši)
+ min(len(rep_text.split()), 10) # text richness + min(len(rep_text.split()), 10)
) )
candidates.append({ candidates.append({
@ -229,11 +239,11 @@ def find_chorus(video, lang=None, model_size="small", target_duration=30.0):
"cluster_id": cluster_idx, "cluster_id": cluster_idx,
}) })
# Sort by score, dedupe close candidates # Sort by score, dedupe close candidates (vsaj 20s narazen)
candidates.sort(key=lambda c: -c["score"]) candidates.sort(key=lambda c: -c["score"])
deduped = [] deduped = []
for c in candidates: for c in candidates:
if all(abs(c["start"] - d["start"]) > 5 for d in deduped): if all(abs(c["start"] - d["start"]) > 20 for d in deduped):
deduped.append(c) deduped.append(c)
if len(deduped) >= 5: if len(deduped) >= 5:
break break