ACRCloud auto-recognition: never block uploads, fall back to fingerprinting

Changes:

1. UI: removed blocking prompt() that asked for artist+title on filename
   that didn't match 'Artist - Title' pattern. Upload always proceeds.
   Instead shows yellow warning saying 'server will try to recognize'.

2. Backend: added scripts/acr_recognize.py — extracts 20s audio sample
   from video (at 15s and 60s offsets for robustness), computes ACRCloud
   fingerprint via native binary (3KB payload), sends to identify API.

3. Pipeline: process_job() now runs ACR recognition step before analysis
   IF parsed_artist or parsed_title is missing. Result is saved to job
   metadata and used for download filename + Scribe/Claude filename hint.

4. Credentials: ACR_HOST + ACR_ACCESS_KEY + ACR_SECRET_KEY env vars
   added to Coolify (using existing keys from openclaw fb-agent metka).

5. requirements.txt: added pyacrcloud==1.0.11 for native fingerprinting.

This unblocks future automation/cron upload pipelines — files don't need
to be perfectly named, ACRCloud will identify them automatically.

Fallback chain:
1. Filename parsing (Artist - Title.mp4)
2. ACRCloud audio fingerprint (works even for '12345.mp4', 'IMG_001.mp4')
3. If both fail: download filename uses 'reel_<id>.mp4' (still works)
This commit is contained in:
Sebastjan Artič 2026-04-29 14:24:53 +00:00
parent 3877b822ff
commit b543057cee
4 changed files with 241 additions and 32 deletions

View File

@ -338,6 +338,35 @@ def process_job(job_id):
else:
input_path = Path(job["input_path"])
# ── 1b. Music recognition (ACRCloud) — če nimamo artist+title ─────
# Tudi za YouTube jobs lahko naslov ni razviden (npr. iz playliste, "Track 5")
if not (job.get("parsed_artist") and job.get("parsed_title")):
update_job(job_id, current_step="Avto-prepoznavam pesem (ACRCloud)")
try:
acr_cmd = [
"python3", str(SCRIPTS_DIR / "acr_recognize.py"),
str(input_path),
]
proc = subprocess.run(acr_cmd, capture_output=True, text=True, timeout=120)
if proc.returncode == 0 and proc.stdout:
data = json.loads(proc.stdout)
a, t = data.get("artist"), data.get("title")
if a and t:
update_job(
job_id,
parsed_artist=a, parsed_title=t,
has_clean_name=True,
recognized_via="acrcloud",
)
job = load_job(job_id)
print(f"✅ ACR prepoznal: {a} - {t}", flush=True)
else:
print(f"⚠️ ACR ni prepoznal pesmi", flush=True)
else:
print(f"⚠️ ACR exit {proc.returncode}: {proc.stderr[:200]}", flush=True)
except Exception as e:
print(f"⚠️ ACR error: {e}", flush=True)
# ── 2. Smart analysis (če auto_chorus) ──────────────────────────
if job.get("auto_chorus"):
update_job(job_id, current_step="Analiza pesmi (transkript + energija)")

View File

@ -6,3 +6,4 @@ faster-whisper==1.0.3
opencv-python-headless==4.10.0.84
numpy==1.26.4
yt-dlp>=2025.10.0
pyacrcloud==1.0.11

196
scripts/acr_recognize.py Normal file
View File

@ -0,0 +1,196 @@
"""
acr_recognize.py Audio recognition prek ACRCloud Audio Fingerprinting API.
Uporabi native binary za fingerprinting (3KB sample namesto cel audio).
Vrne (artist, title) ali (None, None) če pesem ni prepoznana.
Credentials se preberejo iz env vars (ACR_ACCESS_KEY, ACR_SECRET_KEY, ACR_HOST).
"""
import os
import sys
import json
import hmac
import base64
import time
import subprocess
import tempfile
import urllib.request
import urllib.parse
import uuid
def _sign(string_to_sign, secret):
h = hmac.new(secret.encode('ascii'), string_to_sign.encode('ascii'), digestmod='sha1')
return base64.b64encode(h.digest()).decode('ascii')
def _build_multipart(fields, files):
"""Sestavi multipart/form-data body."""
boundary = uuid.uuid4().hex
parts = []
for k, v in fields.items():
parts.append(f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"\r\n\r\n{v}\r\n".encode())
for k, (fname, content, ctype) in files.items():
parts.append(
f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"; filename=\"{fname}\"\r\n"
f"Content-Type: {ctype}\r\n\r\n".encode() + content + b"\r\n"
)
parts.append(f"--{boundary}--\r\n".encode())
body = b"".join(parts)
return body, boundary
def recognize_audio_file(audio_path, timeout=30):
"""Pošlji audio file (ali fingerprint) ACRCloud-u in vrni raw response.
audio_path: pot do MP3/WAV/M4A datoteke.
Vrne dict z odgovorom ali None ob napaki.
"""
host = os.environ.get("ACR_HOST", "identify-eu-west-1.acrcloud.com")
access_key = os.environ.get("ACR_ACCESS_KEY")
secret_key = os.environ.get("ACR_SECRET_KEY")
if not access_key or not secret_key:
print("⚠️ ACR_ACCESS_KEY/SECRET_KEY nista nastavljena", file=sys.stderr)
return None
if not os.path.exists(audio_path):
print(f"⚠️ Audio file ne obstaja: {audio_path}", file=sys.stderr)
return None
# Probaj native binary za fingerprint (manjši payload)
fingerprint = None
try:
from acrcloud import acrcloud_extr_tool as acr
fingerprint = acr.create_fingerprint_by_file(audio_path, 0, 30, 0)
except (ImportError, Exception) as e:
print(f" Fingerprint binary ni na voljo ({e}), uporabljam audio direktno", file=sys.stderr)
timestamp = str(int(time.time()))
http_method = "POST"
http_uri = "/v1/identify"
signature_version = "1"
if fingerprint:
data_type = "fingerprint"
sample_data = fingerprint
sample_name = "sample.fp"
sample_ctype = "application/octet-stream"
else:
data_type = "audio"
with open(audio_path, "rb") as f:
sample_data = f.read()
sample_name = "sample.mp3"
sample_ctype = "audio/mpeg"
string_to_sign = f"{http_method}\n{http_uri}\n{access_key}\n{data_type}\n{signature_version}\n{timestamp}"
signature = _sign(string_to_sign, secret_key)
fields = {
"access_key": access_key,
"sample_bytes": str(len(sample_data)),
"timestamp": timestamp,
"signature": signature,
"data_type": data_type,
"signature_version": signature_version,
}
files = {"sample": (sample_name, sample_data, sample_ctype)}
body, boundary = _build_multipart(fields, files)
url = f"https://{host}/v1/identify"
req = urllib.request.Request(
url, data=body,
headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
method="POST",
)
try:
with urllib.request.urlopen(req, timeout=timeout) as resp:
return json.loads(resp.read().decode())
except Exception as e:
print(f"⚠️ ACR API napaka: {e}", file=sys.stderr)
return None
def extract_short_audio(video_path, duration=20, start_offset=15):
"""Izloči kratek audio iz videa za ACR fingerprint.
Ponavadi začnemo 15s v video (preskoči intro) in vzamemo 20s.
Vrne pot do tmp MP3 ali None.
"""
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".mp3")
os.close(tmp_fd)
cmd = [
"ffmpeg", "-i", video_path,
"-ss", str(start_offset),
"-t", str(duration),
"-vn", "-ar", "44100", "-ac", "1", "-b:a", "128k",
"-f", "mp3", tmp_path, "-y", "-loglevel", "quiet"
]
proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.returncode != 0:
os.unlink(tmp_path)
print(f"⚠️ ffmpeg napaka: {proc.stderr[:200]}", file=sys.stderr)
return None
return tmp_path
def recognize_video(video_path):
"""Glavni vstop: prepoznaj pesem v videu, vrni (artist, title) ali (None, None).
Probavamo 2 točki v videu (15s in 60s) za večjo robustnost če ni intro/instrumental.
"""
if not os.environ.get("ACR_ACCESS_KEY"):
return (None, None)
for start_offset in [15, 60]:
audio = extract_short_audio(video_path, duration=20, start_offset=start_offset)
if not audio:
continue
try:
result = recognize_audio_file(audio)
finally:
try:
os.unlink(audio)
except OSError:
pass
if not result:
continue
status = result.get("status", {}).get("code")
if status != 0:
# 1001 = no result, drugi = napaka
msg = result.get("status", {}).get("msg", "")
print(f" ACR @{start_offset}s: status={status} ({msg})", file=sys.stderr)
continue
# Uspeh — vzemi prvi music match
music = result.get("metadata", {}).get("music", [])
if not music:
continue
first = music[0]
title = first.get("title", "").strip()
artists = first.get("artists", [])
artist = artists[0].get("name", "").strip() if artists else ""
if artist and title:
print(f" ✅ ACR @{start_offset}s prepoznal: {artist} - {title}", file=sys.stderr)
return (artist, title)
print(" ⚠️ ACR pesem ni prepoznana", file=sys.stderr)
return (None, None)
if __name__ == "__main__":
if len(sys.argv) != 2:
print("Uporaba: python3 acr_recognize.py <video.mp4>")
sys.exit(1)
artist, title = recognize_video(sys.argv[1])
if artist and title:
print(json.dumps({"artist": artist, "title": title}, ensure_ascii=False))
sys.exit(0)
else:
print(json.dumps({"artist": None, "title": None}))
sys.exit(1)

View File

@ -500,39 +500,22 @@
function handleFileSelected(f) {
const [artist, title] = parseArtistTitle(f.name);
if (!artist || !title) {
// Ni razvidno ime — vprašaj uporabnika
const userArtist = prompt(
`❗ Iz imena datoteke ni razviden izvajalec in naslov.\n\n` +
`Datoteka: "${f.name}"\n\n` +
`Vpiši IZVAJALCA (npr. "Lady Gaga"):`,
""
);
if (!userArtist || !userArtist.trim()) {
alert("⛔ Brez izvajalca ne morem nadaljevati.\n\nPoimenuj datoteko v formatu:\n Izvajalec - Naslov.mp4");
fileInput.value = "";
return;
}
const userTitle = prompt(
`Vpiši NASLOV pesmi (npr. "Abracadabra"):`,
""
);
if (!userTitle || !userTitle.trim()) {
alert("⛔ Brez naslova ne morem nadaljevati.");
fileInput.value = "";
return;
}
pendingArtist = userArtist.trim();
pendingTitle = userTitle.trim();
} else {
pendingArtist = artist;
pendingTitle = title;
}
pendingFile = f;
dz.querySelector("div").innerHTML =
`📹 <b>${pendingArtist} — ${pendingTitle}</b>` +
`<div style="font-size: 11px; color: var(--muted); margin-top: 4px;">${f.name} (${(f.size/1024/1024).toFixed(1)} MB)</div>`;
pendingArtist = artist;
pendingTitle = title;
if (artist && title) {
// Razvidno iz filename
dz.querySelector("div").innerHTML =
`📹 <b>${pendingArtist} — ${pendingTitle}</b>` +
`<div style="font-size: 11px; color: var(--muted); margin-top: 4px;">${f.name} (${(f.size/1024/1024).toFixed(1)} MB)</div>`;
} else {
// Ni razvidno — pokaži opozorilo, ampak NE blokiraj (server bo poskusil ACR auto-recognize)
dz.querySelector("div").innerHTML =
`📹 ${f.name}` +
`<div style="font-size: 11px; color: var(--warn); margin-top: 4px;">⚠ Iz imena ni razviden izvajalec — server bo poskusil avto-prepoznati pesem (ACRCloud)</div>` +
`<div style="font-size: 11px; color: var(--muted); margin-top: 2px;">${(f.size/1024/1024).toFixed(1)} MB</div>`;
}
}
// ─── Settings collector ─────────────────────────