ACRCloud auto-recognition: never block uploads, fall back to fingerprinting

Changes: 1. UI: removed blocking prompt() that asked for artist+title on filename that didn't match 'Artist - Title' pattern. Upload always proceeds. Instead shows yellow warning saying 'server will try to recognize'. 2. Backend: added scripts/acr_recognize.py — extracts 20s audio sample from video (at 15s and 60s offsets for robustness), computes ACRCloud fingerprint via native binary (3KB payload), sends to identify API. 3. Pipeline: process_job() now runs ACR recognition step before analysis IF parsed_artist or parsed_title is missing. Result is saved to job metadata and used for download filename + Scribe/Claude filename hint. 4. Credentials: ACR_HOST + ACR_ACCESS_KEY + ACR_SECRET_KEY env vars added to Coolify (using existing keys from openclaw fb-agent metka). 5. requirements.txt: added pyacrcloud==1.0.11 for native fingerprinting. This unblocks future automation/cron upload pipelines — files don't need to be perfectly named, ACRCloud will identify them automatically. Fallback chain: 1. Filename parsing (Artist - Title.mp4) 2. ACRCloud audio fingerprint (works even for '12345.mp4', 'IMG_001.mp4') 3. If both fail: download filename uses 'reel_<id>.mp4' (still works)
2026-04-29 14:24:53 +00:00 · 2026-04-29 14:24:53 +00:00 · b543057cee
commit b543057cee
parent 3877b822ff
4 changed files with 241 additions and 32 deletions
--- a/app/main.py
+++ b/app/main.py
@ -338,6 +338,35 @@ def process_job(job_id):
        else:
            input_path = Path(job["input_path"])

+        # ── 1b. Music recognition (ACRCloud) — če nimamo artist+title ─────
+        # Tudi za YouTube jobs lahko naslov ni razviden (npr. iz playliste, "Track 5")
+        if not (job.get("parsed_artist") and job.get("parsed_title")):
+            update_job(job_id, current_step="Avto-prepoznavam pesem (ACRCloud)")
+            try:
+                acr_cmd = [
+                    "python3", str(SCRIPTS_DIR / "acr_recognize.py"),
+                    str(input_path),
+                ]
+                proc = subprocess.run(acr_cmd, capture_output=True, text=True, timeout=120)
+                if proc.returncode == 0 and proc.stdout:
+                    data = json.loads(proc.stdout)
+                    a, t = data.get("artist"), data.get("title")
+                    if a and t:
+                        update_job(
+                            job_id,
+                            parsed_artist=a, parsed_title=t,
+                            has_clean_name=True,
+                            recognized_via="acrcloud",
+                        )
+                        job = load_job(job_id)
+                        print(f"✅ ACR prepoznal: {a} - {t}", flush=True)
+                    else:
+                        print(f"⚠️ ACR ni prepoznal pesmi", flush=True)
+                else:
+                    print(f"⚠️ ACR exit {proc.returncode}: {proc.stderr[:200]}", flush=True)
+            except Exception as e:
+                print(f"⚠️ ACR error: {e}", flush=True)
+
        # ── 2. Smart analysis (če auto_chorus) ──────────────────────────
        if job.get("auto_chorus"):
            update_job(job_id, current_step="Analiza pesmi (transkript + energija)")
--- a/requirements.txt
+++ b/requirements.txt
@ -6,3 +6,4 @@ faster-whisper==1.0.3
 opencv-python-headless==4.10.0.84
 numpy==1.26.4
 yt-dlp>=2025.10.0
+pyacrcloud==1.0.11
--- a/scripts/acr_recognize.py
+++ b/scripts/acr_recognize.py
@ -0,0 +1,196 @@
+"""
+acr_recognize.py — Audio recognition prek ACRCloud Audio Fingerprinting API.
+
+Uporabi native binary za fingerprinting (3KB sample namesto cel audio).
+Vrne (artist, title) ali (None, None) če pesem ni prepoznana.
+
+Credentials se preberejo iz env vars (ACR_ACCESS_KEY, ACR_SECRET_KEY, ACR_HOST).
+"""
+import os
+import sys
+import json
+import hmac
+import base64
+import time
+import subprocess
+import tempfile
+import urllib.request
+import urllib.parse
+import uuid
+
+
+def _sign(string_to_sign, secret):
+    h = hmac.new(secret.encode('ascii'), string_to_sign.encode('ascii'), digestmod='sha1')
+    return base64.b64encode(h.digest()).decode('ascii')
+
+
+def _build_multipart(fields, files):
+    """Sestavi multipart/form-data body."""
+    boundary = uuid.uuid4().hex
+    parts = []
+    for k, v in fields.items():
+        parts.append(f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"\r\n\r\n{v}\r\n".encode())
+    for k, (fname, content, ctype) in files.items():
+        parts.append(
+            f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"; filename=\"{fname}\"\r\n"
+            f"Content-Type: {ctype}\r\n\r\n".encode() + content + b"\r\n"
+        )
+    parts.append(f"--{boundary}--\r\n".encode())
+    body = b"".join(parts)
+    return body, boundary
+
+
+def recognize_audio_file(audio_path, timeout=30):
+    """Pošlji audio file (ali fingerprint) ACRCloud-u in vrni raw response.
+    
+    audio_path: pot do MP3/WAV/M4A datoteke.
+    Vrne dict z odgovorom ali None ob napaki.
+    """
+    host = os.environ.get("ACR_HOST", "identify-eu-west-1.acrcloud.com")
+    access_key = os.environ.get("ACR_ACCESS_KEY")
+    secret_key = os.environ.get("ACR_SECRET_KEY")
+    
+    if not access_key or not secret_key:
+        print("⚠️ ACR_ACCESS_KEY/SECRET_KEY nista nastavljena", file=sys.stderr)
+        return None
+    
+    if not os.path.exists(audio_path):
+        print(f"⚠️ Audio file ne obstaja: {audio_path}", file=sys.stderr)
+        return None
+    
+    # Probaj native binary za fingerprint (manjši payload)
+    fingerprint = None
+    try:
+        from acrcloud import acrcloud_extr_tool as acr
+        fingerprint = acr.create_fingerprint_by_file(audio_path, 0, 30, 0)
+    except (ImportError, Exception) as e:
+        print(f"   ℹ️ Fingerprint binary ni na voljo ({e}), uporabljam audio direktno", file=sys.stderr)
+    
+    timestamp = str(int(time.time()))
+    http_method = "POST"
+    http_uri = "/v1/identify"
+    signature_version = "1"
+    
+    if fingerprint:
+        data_type = "fingerprint"
+        sample_data = fingerprint
+        sample_name = "sample.fp"
+        sample_ctype = "application/octet-stream"
+    else:
+        data_type = "audio"
+        with open(audio_path, "rb") as f:
+            sample_data = f.read()
+        sample_name = "sample.mp3"
+        sample_ctype = "audio/mpeg"
+    
+    string_to_sign = f"{http_method}\n{http_uri}\n{access_key}\n{data_type}\n{signature_version}\n{timestamp}"
+    signature = _sign(string_to_sign, secret_key)
+    
+    fields = {
+        "access_key": access_key,
+        "sample_bytes": str(len(sample_data)),
+        "timestamp": timestamp,
+        "signature": signature,
+        "data_type": data_type,
+        "signature_version": signature_version,
+    }
+    files = {"sample": (sample_name, sample_data, sample_ctype)}
+    body, boundary = _build_multipart(fields, files)
+    
+    url = f"https://{host}/v1/identify"
+    req = urllib.request.Request(
+        url, data=body,
+        headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as resp:
+            return json.loads(resp.read().decode())
+    except Exception as e:
+        print(f"⚠️ ACR API napaka: {e}", file=sys.stderr)
+        return None
+
+
+def extract_short_audio(video_path, duration=20, start_offset=15):
+    """Izloči kratek audio iz videa za ACR fingerprint.
+    
+    Ponavadi začnemo 15s v video (preskoči intro) in vzamemo 20s.
+    Vrne pot do tmp MP3 ali None.
+    """
+    tmp_fd, tmp_path = tempfile.mkstemp(suffix=".mp3")
+    os.close(tmp_fd)
+    cmd = [
+        "ffmpeg", "-i", video_path,
+        "-ss", str(start_offset),
+        "-t", str(duration),
+        "-vn", "-ar", "44100", "-ac", "1", "-b:a", "128k",
+        "-f", "mp3", tmp_path, "-y", "-loglevel", "quiet"
+    ]
+    proc = subprocess.run(cmd, capture_output=True, text=True)
+    if proc.returncode != 0:
+        os.unlink(tmp_path)
+        print(f"⚠️ ffmpeg napaka: {proc.stderr[:200]}", file=sys.stderr)
+        return None
+    return tmp_path
+
+
+def recognize_video(video_path):
+    """Glavni vstop: prepoznaj pesem v videu, vrni (artist, title) ali (None, None).
+    
+    Probavamo 2 točki v videu (15s in 60s) za večjo robustnost — če ni intro/instrumental.
+    """
+    if not os.environ.get("ACR_ACCESS_KEY"):
+        return (None, None)
+    
+    for start_offset in [15, 60]:
+        audio = extract_short_audio(video_path, duration=20, start_offset=start_offset)
+        if not audio:
+            continue
+        try:
+            result = recognize_audio_file(audio)
+        finally:
+            try:
+                os.unlink(audio)
+            except OSError:
+                pass
+        
+        if not result:
+            continue
+        
+        status = result.get("status", {}).get("code")
+        if status != 0:
+            # 1001 = no result, drugi = napaka
+            msg = result.get("status", {}).get("msg", "")
+            print(f"   ℹ️ ACR @{start_offset}s: status={status} ({msg})", file=sys.stderr)
+            continue
+        
+        # Uspeh — vzemi prvi music match
+        music = result.get("metadata", {}).get("music", [])
+        if not music:
+            continue
+        
+        first = music[0]
+        title = first.get("title", "").strip()
+        artists = first.get("artists", [])
+        artist = artists[0].get("name", "").strip() if artists else ""
+        
+        if artist and title:
+            print(f"   ✅ ACR @{start_offset}s prepoznal: {artist} - {title}", file=sys.stderr)
+            return (artist, title)
+    
+    print("   ⚠️ ACR pesem ni prepoznana", file=sys.stderr)
+    return (None, None)
+
+
+if __name__ == "__main__":
+    if len(sys.argv) != 2:
+        print("Uporaba: python3 acr_recognize.py <video.mp4>")
+        sys.exit(1)
+    
+    artist, title = recognize_video(sys.argv[1])
+    if artist and title:
+        print(json.dumps({"artist": artist, "title": title}, ensure_ascii=False))
+        sys.exit(0)
+    else:
+        print(json.dumps({"artist": None, "title": None}))
+        sys.exit(1)
--- a/templates/index.html
+++ b/templates/index.html
@ -500,39 +500,22 @@
    function handleFileSelected(f) {
      const [artist, title] = parseArtistTitle(f.name);
      
-      if (!artist || !title) {
-        // Ni razvidno ime — vprašaj uporabnika
-        const userArtist = prompt(
-          `❗ Iz imena datoteke ni razviden izvajalec in naslov.\n\n` +
-          `Datoteka: "${f.name}"\n\n` +
-          `Vpiši IZVAJALCA (npr. "Lady Gaga"):`,
-          ""
-        );
-        if (!userArtist || !userArtist.trim()) {
-          alert("⛔ Brez izvajalca ne morem nadaljevati.\n\nPoimenuj datoteko v formatu:\n  Izvajalec - Naslov.mp4");
-          fileInput.value = "";
-          return;
-        }
-        const userTitle = prompt(
-          `Vpiši NASLOV pesmi (npr. "Abracadabra"):`,
-          ""
-        );
-        if (!userTitle || !userTitle.trim()) {
-          alert("⛔ Brez naslova ne morem nadaljevati.");
-          fileInput.value = "";
-          return;
-        }
-        pendingArtist = userArtist.trim();
-        pendingTitle = userTitle.trim();
-      } else {
-        pendingArtist = artist;
-        pendingTitle = title;
-      }
-      
      pendingFile = f;
-      dz.querySelector("div").innerHTML = 
-        `📹 <b>${pendingArtist} — ${pendingTitle}</b>` +
-        `<div style="font-size: 11px; color: var(--muted); margin-top: 4px;">${f.name} (${(f.size/1024/1024).toFixed(1)} MB)</div>`;
+      pendingArtist = artist;
+      pendingTitle = title;
+      
+      if (artist && title) {
+        // Razvidno iz filename
+        dz.querySelector("div").innerHTML = 
+          `📹 <b>${pendingArtist} — ${pendingTitle}</b>` +
+          `<div style="font-size: 11px; color: var(--muted); margin-top: 4px;">${f.name} (${(f.size/1024/1024).toFixed(1)} MB)</div>`;
+      } else {
+        // Ni razvidno — pokaži opozorilo, ampak NE blokiraj (server bo poskusil ACR auto-recognize)
+        dz.querySelector("div").innerHTML = 
+          `📹 ${f.name}` +
+          `<div style="font-size: 11px; color: var(--warn); margin-top: 4px;">⚠ Iz imena ni razviden izvajalec — server bo poskusil avto-prepoznati pesem (ACRCloud)</div>` +
+          `<div style="font-size: 11px; color: var(--muted); margin-top: 2px;">${(f.size/1024/1024).toFixed(1)} MB</div>`;
+      }
    }

    // ─── Settings collector ─────────────────────────