Fix find_chorus: RMS energy parser was broken (no pts_time available), now syntheses timestamps; energy weight x10 (refren je glasnejši)
This commit is contained in:
parent
64e8854cea
commit
c17578521a
@ -119,33 +119,42 @@ def find_repeated_lines(lines, similarity_threshold=0.5):
|
|||||||
|
|
||||||
def compute_energy(audio_path, window_sec=1.0):
|
def compute_energy(audio_path, window_sec=1.0):
|
||||||
"""
|
"""
|
||||||
Vrni list (timestamp, rms_db) preko FFmpeg ebur128 filter.
|
Vrni list (timestamp, rms_db) preko FFmpeg astats filter.
|
||||||
|
Vsako okno window_sec sekund vrne en RMS sample.
|
||||||
"""
|
"""
|
||||||
# Uporabi ebur128 ali astats za RMS
|
|
||||||
cmd = [
|
cmd = [
|
||||||
"ffmpeg", "-i", audio_path,
|
"ffmpeg", "-i", audio_path,
|
||||||
"-af", f"asetnsamples=n={int(16000 * window_sec)}:p=0,astats=metadata=1:reset={window_sec},"
|
"-af", f"asetnsamples=n={int(16000 * window_sec)}:p=0,astats=metadata=1:reset={window_sec},"
|
||||||
"ametadata=print:key=lavfi.astats.Overall.RMS_level",
|
"ametadata=print:key=lavfi.astats.Overall.RMS_level:file=-",
|
||||||
"-f", "null", "-",
|
"-f", "null", "-",
|
||||||
]
|
]
|
||||||
result = subprocess.run(cmd, capture_output=True, text=True)
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
output = result.stderr
|
# ametadata file=- pošilja na stdout
|
||||||
|
output = result.stdout + "\n" + result.stderr
|
||||||
|
|
||||||
energies = []
|
energies = []
|
||||||
current_pts = None
|
current_pts = None
|
||||||
for line in output.split("\n"):
|
for line in output.split("\n"):
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
if line.startswith("frame:"):
|
# Format A: "frame:N pts:X pts_time:Y"
|
||||||
# frame:N pts:X pts_time:Y
|
m = re.search(r"pts_time:(\S+)", line)
|
||||||
m = re.search(r"pts_time:(\S+)", line)
|
if m:
|
||||||
if m:
|
try:
|
||||||
current_pts = float(m.group(1))
|
current_pts = float(m.group(1))
|
||||||
elif line.startswith("lavfi.astats.Overall.RMS_level="):
|
except ValueError:
|
||||||
val = line.split("=")[1]
|
pass
|
||||||
|
continue
|
||||||
|
# Format B: lavfi.astats.Overall.RMS_level=-15.123
|
||||||
|
if "RMS_level=" in line:
|
||||||
|
val = line.split("RMS_level=")[-1].strip()
|
||||||
try:
|
try:
|
||||||
rms = float(val)
|
rms = float(val)
|
||||||
if current_pts is not None:
|
# Če nimamo timestamp-a, sintetiziraj na podlagi vrstnega reda
|
||||||
energies.append((current_pts, rms))
|
if current_pts is None:
|
||||||
|
current_pts = len(energies) * window_sec
|
||||||
|
energies.append((current_pts, rms))
|
||||||
|
# Increment za naslednji vzorec, če FFmpeg ne pošilja pts
|
||||||
|
current_pts += window_sec
|
||||||
except ValueError:
|
except ValueError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@ -210,11 +219,12 @@ def find_chorus(video, lang=None, model_size="small", target_duration=30.0):
|
|||||||
avg_e = avg_energy_in_range(energies, start, start + target_duration)
|
avg_e = avg_energy_in_range(energies, start, start + target_duration)
|
||||||
energy_score = max(0, avg_e - avg_overall) # koliko nad povprečjem
|
energy_score = max(0, avg_e - avg_overall) # koliko nad povprečjem
|
||||||
|
|
||||||
# Score: število ponovitev + energy + dolžina vrstice
|
# Score: ponovitve + energija + dolžina vrstice
|
||||||
|
# Refren je navadno glasnejši kot verz — energija je močnejši signal
|
||||||
score = (
|
score = (
|
||||||
len(cluster) * 10 # repetition weight
|
len(cluster) * 5 # repetition weight (zmanjšano)
|
||||||
+ energy_score * 2 # energy weight
|
+ energy_score * 10 # energy weight (povečano — refren je glasnejši)
|
||||||
+ min(len(rep_text.split()), 10) # text richness
|
+ min(len(rep_text.split()), 10)
|
||||||
)
|
)
|
||||||
|
|
||||||
candidates.append({
|
candidates.append({
|
||||||
@ -229,11 +239,11 @@ def find_chorus(video, lang=None, model_size="small", target_duration=30.0):
|
|||||||
"cluster_id": cluster_idx,
|
"cluster_id": cluster_idx,
|
||||||
})
|
})
|
||||||
|
|
||||||
# Sort by score, dedupe close candidates
|
# Sort by score, dedupe close candidates (vsaj 20s narazen)
|
||||||
candidates.sort(key=lambda c: -c["score"])
|
candidates.sort(key=lambda c: -c["score"])
|
||||||
deduped = []
|
deduped = []
|
||||||
for c in candidates:
|
for c in candidates:
|
||||||
if all(abs(c["start"] - d["start"]) > 5 for d in deduped):
|
if all(abs(c["start"] - d["start"]) > 20 for d in deduped):
|
||||||
deduped.append(c)
|
deduped.append(c)
|
||||||
if len(deduped) >= 5:
|
if len(deduped) >= 5:
|
||||||
break
|
break
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user