From b543057cee790772598f5913f0f375d4e71b53c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastjan=20Arti=C4=8D?= Date: Wed, 29 Apr 2026 14:24:53 +0000 Subject: [PATCH] ACRCloud auto-recognition: never block uploads, fall back to fingerprinting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Changes: 1. UI: removed blocking prompt() that asked for artist+title on filename that didn't match 'Artist - Title' pattern. Upload always proceeds. Instead shows yellow warning saying 'server will try to recognize'. 2. Backend: added scripts/acr_recognize.py — extracts 20s audio sample from video (at 15s and 60s offsets for robustness), computes ACRCloud fingerprint via native binary (3KB payload), sends to identify API. 3. Pipeline: process_job() now runs ACR recognition step before analysis IF parsed_artist or parsed_title is missing. Result is saved to job metadata and used for download filename + Scribe/Claude filename hint. 4. Credentials: ACR_HOST + ACR_ACCESS_KEY + ACR_SECRET_KEY env vars added to Coolify (using existing keys from openclaw fb-agent metka). 5. requirements.txt: added pyacrcloud==1.0.11 for native fingerprinting. This unblocks future automation/cron upload pipelines — files don't need to be perfectly named, ACRCloud will identify them automatically. Fallback chain: 1. Filename parsing (Artist - Title.mp4) 2. ACRCloud audio fingerprint (works even for '12345.mp4', 'IMG_001.mp4') 3. If both fail: download filename uses 'reel_.mp4' (still works) --- app/main.py | 29 ++++++ requirements.txt | 1 + scripts/acr_recognize.py | 196 +++++++++++++++++++++++++++++++++++++++ templates/index.html | 47 +++------- 4 files changed, 241 insertions(+), 32 deletions(-) create mode 100644 scripts/acr_recognize.py diff --git a/app/main.py b/app/main.py index b6fa38e..95471c1 100644 --- a/app/main.py +++ b/app/main.py @@ -338,6 +338,35 @@ def process_job(job_id): else: input_path = Path(job["input_path"]) + # ── 1b. Music recognition (ACRCloud) — če nimamo artist+title ───── + # Tudi za YouTube jobs lahko naslov ni razviden (npr. iz playliste, "Track 5") + if not (job.get("parsed_artist") and job.get("parsed_title")): + update_job(job_id, current_step="Avto-prepoznavam pesem (ACRCloud)") + try: + acr_cmd = [ + "python3", str(SCRIPTS_DIR / "acr_recognize.py"), + str(input_path), + ] + proc = subprocess.run(acr_cmd, capture_output=True, text=True, timeout=120) + if proc.returncode == 0 and proc.stdout: + data = json.loads(proc.stdout) + a, t = data.get("artist"), data.get("title") + if a and t: + update_job( + job_id, + parsed_artist=a, parsed_title=t, + has_clean_name=True, + recognized_via="acrcloud", + ) + job = load_job(job_id) + print(f"✅ ACR prepoznal: {a} - {t}", flush=True) + else: + print(f"⚠️ ACR ni prepoznal pesmi", flush=True) + else: + print(f"⚠️ ACR exit {proc.returncode}: {proc.stderr[:200]}", flush=True) + except Exception as e: + print(f"⚠️ ACR error: {e}", flush=True) + # ── 2. Smart analysis (če auto_chorus) ────────────────────────── if job.get("auto_chorus"): update_job(job_id, current_step="Analiza pesmi (transkript + energija)") diff --git a/requirements.txt b/requirements.txt index 99bd29b..a105403 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,4 @@ faster-whisper==1.0.3 opencv-python-headless==4.10.0.84 numpy==1.26.4 yt-dlp>=2025.10.0 +pyacrcloud==1.0.11 diff --git a/scripts/acr_recognize.py b/scripts/acr_recognize.py new file mode 100644 index 0000000..64e0319 --- /dev/null +++ b/scripts/acr_recognize.py @@ -0,0 +1,196 @@ +""" +acr_recognize.py — Audio recognition prek ACRCloud Audio Fingerprinting API. + +Uporabi native binary za fingerprinting (3KB sample namesto cel audio). +Vrne (artist, title) ali (None, None) če pesem ni prepoznana. + +Credentials se preberejo iz env vars (ACR_ACCESS_KEY, ACR_SECRET_KEY, ACR_HOST). +""" +import os +import sys +import json +import hmac +import base64 +import time +import subprocess +import tempfile +import urllib.request +import urllib.parse +import uuid + + +def _sign(string_to_sign, secret): + h = hmac.new(secret.encode('ascii'), string_to_sign.encode('ascii'), digestmod='sha1') + return base64.b64encode(h.digest()).decode('ascii') + + +def _build_multipart(fields, files): + """Sestavi multipart/form-data body.""" + boundary = uuid.uuid4().hex + parts = [] + for k, v in fields.items(): + parts.append(f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"\r\n\r\n{v}\r\n".encode()) + for k, (fname, content, ctype) in files.items(): + parts.append( + f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"; filename=\"{fname}\"\r\n" + f"Content-Type: {ctype}\r\n\r\n".encode() + content + b"\r\n" + ) + parts.append(f"--{boundary}--\r\n".encode()) + body = b"".join(parts) + return body, boundary + + +def recognize_audio_file(audio_path, timeout=30): + """Pošlji audio file (ali fingerprint) ACRCloud-u in vrni raw response. + + audio_path: pot do MP3/WAV/M4A datoteke. + Vrne dict z odgovorom ali None ob napaki. + """ + host = os.environ.get("ACR_HOST", "identify-eu-west-1.acrcloud.com") + access_key = os.environ.get("ACR_ACCESS_KEY") + secret_key = os.environ.get("ACR_SECRET_KEY") + + if not access_key or not secret_key: + print("⚠️ ACR_ACCESS_KEY/SECRET_KEY nista nastavljena", file=sys.stderr) + return None + + if not os.path.exists(audio_path): + print(f"⚠️ Audio file ne obstaja: {audio_path}", file=sys.stderr) + return None + + # Probaj native binary za fingerprint (manjši payload) + fingerprint = None + try: + from acrcloud import acrcloud_extr_tool as acr + fingerprint = acr.create_fingerprint_by_file(audio_path, 0, 30, 0) + except (ImportError, Exception) as e: + print(f" ℹ️ Fingerprint binary ni na voljo ({e}), uporabljam audio direktno", file=sys.stderr) + + timestamp = str(int(time.time())) + http_method = "POST" + http_uri = "/v1/identify" + signature_version = "1" + + if fingerprint: + data_type = "fingerprint" + sample_data = fingerprint + sample_name = "sample.fp" + sample_ctype = "application/octet-stream" + else: + data_type = "audio" + with open(audio_path, "rb") as f: + sample_data = f.read() + sample_name = "sample.mp3" + sample_ctype = "audio/mpeg" + + string_to_sign = f"{http_method}\n{http_uri}\n{access_key}\n{data_type}\n{signature_version}\n{timestamp}" + signature = _sign(string_to_sign, secret_key) + + fields = { + "access_key": access_key, + "sample_bytes": str(len(sample_data)), + "timestamp": timestamp, + "signature": signature, + "data_type": data_type, + "signature_version": signature_version, + } + files = {"sample": (sample_name, sample_data, sample_ctype)} + body, boundary = _build_multipart(fields, files) + + url = f"https://{host}/v1/identify" + req = urllib.request.Request( + url, data=body, + headers={"Content-Type": f"multipart/form-data; boundary={boundary}"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + return json.loads(resp.read().decode()) + except Exception as e: + print(f"⚠️ ACR API napaka: {e}", file=sys.stderr) + return None + + +def extract_short_audio(video_path, duration=20, start_offset=15): + """Izloči kratek audio iz videa za ACR fingerprint. + + Ponavadi začnemo 15s v video (preskoči intro) in vzamemo 20s. + Vrne pot do tmp MP3 ali None. + """ + tmp_fd, tmp_path = tempfile.mkstemp(suffix=".mp3") + os.close(tmp_fd) + cmd = [ + "ffmpeg", "-i", video_path, + "-ss", str(start_offset), + "-t", str(duration), + "-vn", "-ar", "44100", "-ac", "1", "-b:a", "128k", + "-f", "mp3", tmp_path, "-y", "-loglevel", "quiet" + ] + proc = subprocess.run(cmd, capture_output=True, text=True) + if proc.returncode != 0: + os.unlink(tmp_path) + print(f"⚠️ ffmpeg napaka: {proc.stderr[:200]}", file=sys.stderr) + return None + return tmp_path + + +def recognize_video(video_path): + """Glavni vstop: prepoznaj pesem v videu, vrni (artist, title) ali (None, None). + + Probavamo 2 točki v videu (15s in 60s) za večjo robustnost — če ni intro/instrumental. + """ + if not os.environ.get("ACR_ACCESS_KEY"): + return (None, None) + + for start_offset in [15, 60]: + audio = extract_short_audio(video_path, duration=20, start_offset=start_offset) + if not audio: + continue + try: + result = recognize_audio_file(audio) + finally: + try: + os.unlink(audio) + except OSError: + pass + + if not result: + continue + + status = result.get("status", {}).get("code") + if status != 0: + # 1001 = no result, drugi = napaka + msg = result.get("status", {}).get("msg", "") + print(f" ℹ️ ACR @{start_offset}s: status={status} ({msg})", file=sys.stderr) + continue + + # Uspeh — vzemi prvi music match + music = result.get("metadata", {}).get("music", []) + if not music: + continue + + first = music[0] + title = first.get("title", "").strip() + artists = first.get("artists", []) + artist = artists[0].get("name", "").strip() if artists else "" + + if artist and title: + print(f" ✅ ACR @{start_offset}s prepoznal: {artist} - {title}", file=sys.stderr) + return (artist, title) + + print(" ⚠️ ACR pesem ni prepoznana", file=sys.stderr) + return (None, None) + + +if __name__ == "__main__": + if len(sys.argv) != 2: + print("Uporaba: python3 acr_recognize.py ") + sys.exit(1) + + artist, title = recognize_video(sys.argv[1]) + if artist and title: + print(json.dumps({"artist": artist, "title": title}, ensure_ascii=False)) + sys.exit(0) + else: + print(json.dumps({"artist": None, "title": None})) + sys.exit(1) diff --git a/templates/index.html b/templates/index.html index 0f5fd80..34db1c3 100644 --- a/templates/index.html +++ b/templates/index.html @@ -500,39 +500,22 @@ function handleFileSelected(f) { const [artist, title] = parseArtistTitle(f.name); - if (!artist || !title) { - // Ni razvidno ime — vprašaj uporabnika - const userArtist = prompt( - `❗ Iz imena datoteke ni razviden izvajalec in naslov.\n\n` + - `Datoteka: "${f.name}"\n\n` + - `Vpiši IZVAJALCA (npr. "Lady Gaga"):`, - "" - ); - if (!userArtist || !userArtist.trim()) { - alert("⛔ Brez izvajalca ne morem nadaljevati.\n\nPoimenuj datoteko v formatu:\n Izvajalec - Naslov.mp4"); - fileInput.value = ""; - return; - } - const userTitle = prompt( - `Vpiši NASLOV pesmi (npr. "Abracadabra"):`, - "" - ); - if (!userTitle || !userTitle.trim()) { - alert("⛔ Brez naslova ne morem nadaljevati."); - fileInput.value = ""; - return; - } - pendingArtist = userArtist.trim(); - pendingTitle = userTitle.trim(); - } else { - pendingArtist = artist; - pendingTitle = title; - } - pendingFile = f; - dz.querySelector("div").innerHTML = - `📹 ${pendingArtist} — ${pendingTitle}` + - `
${f.name} (${(f.size/1024/1024).toFixed(1)} MB)
`; + pendingArtist = artist; + pendingTitle = title; + + if (artist && title) { + // Razvidno iz filename + dz.querySelector("div").innerHTML = + `📹 ${pendingArtist} — ${pendingTitle}` + + `
${f.name} (${(f.size/1024/1024).toFixed(1)} MB)
`; + } else { + // Ni razvidno — pokaži opozorilo, ampak NE blokiraj (server bo poskusil ACR auto-recognize) + dz.querySelector("div").innerHTML = + `📹 ${f.name}` + + `
⚠ Iz imena ni razviden izvajalec — server bo poskusil avto-prepoznati pesem (ACRCloud)
` + + `
${(f.size/1024/1024).toFixed(1)} MB
`; + } } // ─── Settings collector ─────────────────────────