Fix find_chorus: RMS energy parser was broken (no pts_time available), now syntheses timestamps; energy weight x10 (refren je glasnejši)
This commit is contained in:
parent
64e8854cea
commit
c17578521a
@ -119,33 +119,42 @@ def find_repeated_lines(lines, similarity_threshold=0.5):
|
||||
|
||||
def compute_energy(audio_path, window_sec=1.0):
|
||||
"""
|
||||
Vrni list (timestamp, rms_db) preko FFmpeg ebur128 filter.
|
||||
Vrni list (timestamp, rms_db) preko FFmpeg astats filter.
|
||||
Vsako okno window_sec sekund vrne en RMS sample.
|
||||
"""
|
||||
# Uporabi ebur128 ali astats za RMS
|
||||
cmd = [
|
||||
"ffmpeg", "-i", audio_path,
|
||||
"-af", f"asetnsamples=n={int(16000 * window_sec)}:p=0,astats=metadata=1:reset={window_sec},"
|
||||
"ametadata=print:key=lavfi.astats.Overall.RMS_level",
|
||||
"ametadata=print:key=lavfi.astats.Overall.RMS_level:file=-",
|
||||
"-f", "null", "-",
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||
output = result.stderr
|
||||
# ametadata file=- pošilja na stdout
|
||||
output = result.stdout + "\n" + result.stderr
|
||||
|
||||
energies = []
|
||||
current_pts = None
|
||||
for line in output.split("\n"):
|
||||
line = line.strip()
|
||||
if line.startswith("frame:"):
|
||||
# frame:N pts:X pts_time:Y
|
||||
m = re.search(r"pts_time:(\S+)", line)
|
||||
if m:
|
||||
# Format A: "frame:N pts:X pts_time:Y"
|
||||
m = re.search(r"pts_time:(\S+)", line)
|
||||
if m:
|
||||
try:
|
||||
current_pts = float(m.group(1))
|
||||
elif line.startswith("lavfi.astats.Overall.RMS_level="):
|
||||
val = line.split("=")[1]
|
||||
except ValueError:
|
||||
pass
|
||||
continue
|
||||
# Format B: lavfi.astats.Overall.RMS_level=-15.123
|
||||
if "RMS_level=" in line:
|
||||
val = line.split("RMS_level=")[-1].strip()
|
||||
try:
|
||||
rms = float(val)
|
||||
if current_pts is not None:
|
||||
energies.append((current_pts, rms))
|
||||
# Če nimamo timestamp-a, sintetiziraj na podlagi vrstnega reda
|
||||
if current_pts is None:
|
||||
current_pts = len(energies) * window_sec
|
||||
energies.append((current_pts, rms))
|
||||
# Increment za naslednji vzorec, če FFmpeg ne pošilja pts
|
||||
current_pts += window_sec
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
@ -210,11 +219,12 @@ def find_chorus(video, lang=None, model_size="small", target_duration=30.0):
|
||||
avg_e = avg_energy_in_range(energies, start, start + target_duration)
|
||||
energy_score = max(0, avg_e - avg_overall) # koliko nad povprečjem
|
||||
|
||||
# Score: število ponovitev + energy + dolžina vrstice
|
||||
# Score: ponovitve + energija + dolžina vrstice
|
||||
# Refren je navadno glasnejši kot verz — energija je močnejši signal
|
||||
score = (
|
||||
len(cluster) * 10 # repetition weight
|
||||
+ energy_score * 2 # energy weight
|
||||
+ min(len(rep_text.split()), 10) # text richness
|
||||
len(cluster) * 5 # repetition weight (zmanjšano)
|
||||
+ energy_score * 10 # energy weight (povečano — refren je glasnejši)
|
||||
+ min(len(rep_text.split()), 10)
|
||||
)
|
||||
|
||||
candidates.append({
|
||||
@ -229,11 +239,11 @@ def find_chorus(video, lang=None, model_size="small", target_duration=30.0):
|
||||
"cluster_id": cluster_idx,
|
||||
})
|
||||
|
||||
# Sort by score, dedupe close candidates
|
||||
# Sort by score, dedupe close candidates (vsaj 20s narazen)
|
||||
candidates.sort(key=lambda c: -c["score"])
|
||||
deduped = []
|
||||
for c in candidates:
|
||||
if all(abs(c["start"] - d["start"]) > 5 for d in deduped):
|
||||
if all(abs(c["start"] - d["start"]) > 20 for d in deduped):
|
||||
deduped.append(c)
|
||||
if len(deduped) >= 5:
|
||||
break
|
||||
|
||||
Loading…
Reference in New Issue
Block a user