ACRCloud auto-recognition: never block uploads, fall back to fingerprinting
Changes: 1. UI: removed blocking prompt() that asked for artist+title on filename that didn't match 'Artist - Title' pattern. Upload always proceeds. Instead shows yellow warning saying 'server will try to recognize'. 2. Backend: added scripts/acr_recognize.py — extracts 20s audio sample from video (at 15s and 60s offsets for robustness), computes ACRCloud fingerprint via native binary (3KB payload), sends to identify API. 3. Pipeline: process_job() now runs ACR recognition step before analysis IF parsed_artist or parsed_title is missing. Result is saved to job metadata and used for download filename + Scribe/Claude filename hint. 4. Credentials: ACR_HOST + ACR_ACCESS_KEY + ACR_SECRET_KEY env vars added to Coolify (using existing keys from openclaw fb-agent metka). 5. requirements.txt: added pyacrcloud==1.0.11 for native fingerprinting. This unblocks future automation/cron upload pipelines — files don't need to be perfectly named, ACRCloud will identify them automatically. Fallback chain: 1. Filename parsing (Artist - Title.mp4) 2. ACRCloud audio fingerprint (works even for '12345.mp4', 'IMG_001.mp4') 3. If both fail: download filename uses 'reel_<id>.mp4' (still works)
This commit is contained in:
parent
3877b822ff
commit
b543057cee
29
app/main.py
29
app/main.py
@ -338,6 +338,35 @@ def process_job(job_id):
|
||||
else:
|
||||
input_path = Path(job["input_path"])
|
||||
|
||||
# ── 1b. Music recognition (ACRCloud) — če nimamo artist+title ─────
|
||||
# Tudi za YouTube jobs lahko naslov ni razviden (npr. iz playliste, "Track 5")
|
||||
if not (job.get("parsed_artist") and job.get("parsed_title")):
|
||||
update_job(job_id, current_step="Avto-prepoznavam pesem (ACRCloud)")
|
||||
try:
|
||||
acr_cmd = [
|
||||
"python3", str(SCRIPTS_DIR / "acr_recognize.py"),
|
||||
str(input_path),
|
||||
]
|
||||
proc = subprocess.run(acr_cmd, capture_output=True, text=True, timeout=120)
|
||||
if proc.returncode == 0 and proc.stdout:
|
||||
data = json.loads(proc.stdout)
|
||||
a, t = data.get("artist"), data.get("title")
|
||||
if a and t:
|
||||
update_job(
|
||||
job_id,
|
||||
parsed_artist=a, parsed_title=t,
|
||||
has_clean_name=True,
|
||||
recognized_via="acrcloud",
|
||||
)
|
||||
job = load_job(job_id)
|
||||
print(f"✅ ACR prepoznal: {a} - {t}", flush=True)
|
||||
else:
|
||||
print(f"⚠️ ACR ni prepoznal pesmi", flush=True)
|
||||
else:
|
||||
print(f"⚠️ ACR exit {proc.returncode}: {proc.stderr[:200]}", flush=True)
|
||||
except Exception as e:
|
||||
print(f"⚠️ ACR error: {e}", flush=True)
|
||||
|
||||
# ── 2. Smart analysis (če auto_chorus) ──────────────────────────
|
||||
if job.get("auto_chorus"):
|
||||
update_job(job_id, current_step="Analiza pesmi (transkript + energija)")
|
||||
|
||||
@ -6,3 +6,4 @@ faster-whisper==1.0.3
|
||||
opencv-python-headless==4.10.0.84
|
||||
numpy==1.26.4
|
||||
yt-dlp>=2025.10.0
|
||||
pyacrcloud==1.0.11
|
||||
|
||||
196
scripts/acr_recognize.py
Normal file
196
scripts/acr_recognize.py
Normal file
@ -0,0 +1,196 @@
|
||||
"""
|
||||
acr_recognize.py — Audio recognition prek ACRCloud Audio Fingerprinting API.
|
||||
|
||||
Uporabi native binary za fingerprinting (3KB sample namesto cel audio).
|
||||
Vrne (artist, title) ali (None, None) če pesem ni prepoznana.
|
||||
|
||||
Credentials se preberejo iz env vars (ACR_ACCESS_KEY, ACR_SECRET_KEY, ACR_HOST).
|
||||
"""
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import hmac
|
||||
import base64
|
||||
import time
|
||||
import subprocess
|
||||
import tempfile
|
||||
import urllib.request
|
||||
import urllib.parse
|
||||
import uuid
|
||||
|
||||
|
||||
def _sign(string_to_sign, secret):
|
||||
h = hmac.new(secret.encode('ascii'), string_to_sign.encode('ascii'), digestmod='sha1')
|
||||
return base64.b64encode(h.digest()).decode('ascii')
|
||||
|
||||
|
||||
def _build_multipart(fields, files):
|
||||
"""Sestavi multipart/form-data body."""
|
||||
boundary = uuid.uuid4().hex
|
||||
parts = []
|
||||
for k, v in fields.items():
|
||||
parts.append(f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"\r\n\r\n{v}\r\n".encode())
|
||||
for k, (fname, content, ctype) in files.items():
|
||||
parts.append(
|
||||
f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"; filename=\"{fname}\"\r\n"
|
||||
f"Content-Type: {ctype}\r\n\r\n".encode() + content + b"\r\n"
|
||||
)
|
||||
parts.append(f"--{boundary}--\r\n".encode())
|
||||
body = b"".join(parts)
|
||||
return body, boundary
|
||||
|
||||
|
||||
def recognize_audio_file(audio_path, timeout=30):
|
||||
"""Pošlji audio file (ali fingerprint) ACRCloud-u in vrni raw response.
|
||||
|
||||
audio_path: pot do MP3/WAV/M4A datoteke.
|
||||
Vrne dict z odgovorom ali None ob napaki.
|
||||
"""
|
||||
host = os.environ.get("ACR_HOST", "identify-eu-west-1.acrcloud.com")
|
||||
access_key = os.environ.get("ACR_ACCESS_KEY")
|
||||
secret_key = os.environ.get("ACR_SECRET_KEY")
|
||||
|
||||
if not access_key or not secret_key:
|
||||
print("⚠️ ACR_ACCESS_KEY/SECRET_KEY nista nastavljena", file=sys.stderr)
|
||||
return None
|
||||
|
||||
if not os.path.exists(audio_path):
|
||||
print(f"⚠️ Audio file ne obstaja: {audio_path}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# Probaj native binary za fingerprint (manjši payload)
|
||||
fingerprint = None
|
||||
try:
|
||||
from acrcloud import acrcloud_extr_tool as acr
|
||||
fingerprint = acr.create_fingerprint_by_file(audio_path, 0, 30, 0)
|
||||
except (ImportError, Exception) as e:
|
||||
print(f" ℹ️ Fingerprint binary ni na voljo ({e}), uporabljam audio direktno", file=sys.stderr)
|
||||
|
||||
timestamp = str(int(time.time()))
|
||||
http_method = "POST"
|
||||
http_uri = "/v1/identify"
|
||||
signature_version = "1"
|
||||
|
||||
if fingerprint:
|
||||
data_type = "fingerprint"
|
||||
sample_data = fingerprint
|
||||
sample_name = "sample.fp"
|
||||
sample_ctype = "application/octet-stream"
|
||||
else:
|
||||
data_type = "audio"
|
||||
with open(audio_path, "rb") as f:
|
||||
sample_data = f.read()
|
||||
sample_name = "sample.mp3"
|
||||
sample_ctype = "audio/mpeg"
|
||||
|
||||
string_to_sign = f"{http_method}\n{http_uri}\n{access_key}\n{data_type}\n{signature_version}\n{timestamp}"
|
||||
signature = _sign(string_to_sign, secret_key)
|
||||
|
||||
fields = {
|
||||
"access_key": access_key,
|
||||
"sample_bytes": str(len(sample_data)),
|
||||
"timestamp": timestamp,
|
||||
"signature": signature,
|
||||
"data_type": data_type,
|
||||
"signature_version": signature_version,
|
||||
}
|
||||
files = {"sample": (sample_name, sample_data, sample_ctype)}
|
||||
body, boundary = _build_multipart(fields, files)
|
||||
|
||||
url = f"https://{host}/v1/identify"
|
||||
req = urllib.request.Request(
|
||||
url, data=body,
|
||||
headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
|
||||
method="POST",
|
||||
)
|
||||
try:
|
||||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||
return json.loads(resp.read().decode())
|
||||
except Exception as e:
|
||||
print(f"⚠️ ACR API napaka: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def extract_short_audio(video_path, duration=20, start_offset=15):
|
||||
"""Izloči kratek audio iz videa za ACR fingerprint.
|
||||
|
||||
Ponavadi začnemo 15s v video (preskoči intro) in vzamemo 20s.
|
||||
Vrne pot do tmp MP3 ali None.
|
||||
"""
|
||||
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".mp3")
|
||||
os.close(tmp_fd)
|
||||
cmd = [
|
||||
"ffmpeg", "-i", video_path,
|
||||
"-ss", str(start_offset),
|
||||
"-t", str(duration),
|
||||
"-vn", "-ar", "44100", "-ac", "1", "-b:a", "128k",
|
||||
"-f", "mp3", tmp_path, "-y", "-loglevel", "quiet"
|
||||
]
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True)
|
||||
if proc.returncode != 0:
|
||||
os.unlink(tmp_path)
|
||||
print(f"⚠️ ffmpeg napaka: {proc.stderr[:200]}", file=sys.stderr)
|
||||
return None
|
||||
return tmp_path
|
||||
|
||||
|
||||
def recognize_video(video_path):
|
||||
"""Glavni vstop: prepoznaj pesem v videu, vrni (artist, title) ali (None, None).
|
||||
|
||||
Probavamo 2 točki v videu (15s in 60s) za večjo robustnost — če ni intro/instrumental.
|
||||
"""
|
||||
if not os.environ.get("ACR_ACCESS_KEY"):
|
||||
return (None, None)
|
||||
|
||||
for start_offset in [15, 60]:
|
||||
audio = extract_short_audio(video_path, duration=20, start_offset=start_offset)
|
||||
if not audio:
|
||||
continue
|
||||
try:
|
||||
result = recognize_audio_file(audio)
|
||||
finally:
|
||||
try:
|
||||
os.unlink(audio)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
if not result:
|
||||
continue
|
||||
|
||||
status = result.get("status", {}).get("code")
|
||||
if status != 0:
|
||||
# 1001 = no result, drugi = napaka
|
||||
msg = result.get("status", {}).get("msg", "")
|
||||
print(f" ℹ️ ACR @{start_offset}s: status={status} ({msg})", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Uspeh — vzemi prvi music match
|
||||
music = result.get("metadata", {}).get("music", [])
|
||||
if not music:
|
||||
continue
|
||||
|
||||
first = music[0]
|
||||
title = first.get("title", "").strip()
|
||||
artists = first.get("artists", [])
|
||||
artist = artists[0].get("name", "").strip() if artists else ""
|
||||
|
||||
if artist and title:
|
||||
print(f" ✅ ACR @{start_offset}s prepoznal: {artist} - {title}", file=sys.stderr)
|
||||
return (artist, title)
|
||||
|
||||
print(" ⚠️ ACR pesem ni prepoznana", file=sys.stderr)
|
||||
return (None, None)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 2:
|
||||
print("Uporaba: python3 acr_recognize.py <video.mp4>")
|
||||
sys.exit(1)
|
||||
|
||||
artist, title = recognize_video(sys.argv[1])
|
||||
if artist and title:
|
||||
print(json.dumps({"artist": artist, "title": title}, ensure_ascii=False))
|
||||
sys.exit(0)
|
||||
else:
|
||||
print(json.dumps({"artist": None, "title": None}))
|
||||
sys.exit(1)
|
||||
@ -500,39 +500,22 @@
|
||||
function handleFileSelected(f) {
|
||||
const [artist, title] = parseArtistTitle(f.name);
|
||||
|
||||
if (!artist || !title) {
|
||||
// Ni razvidno ime — vprašaj uporabnika
|
||||
const userArtist = prompt(
|
||||
`❗ Iz imena datoteke ni razviden izvajalec in naslov.\n\n` +
|
||||
`Datoteka: "${f.name}"\n\n` +
|
||||
`Vpiši IZVAJALCA (npr. "Lady Gaga"):`,
|
||||
""
|
||||
);
|
||||
if (!userArtist || !userArtist.trim()) {
|
||||
alert("⛔ Brez izvajalca ne morem nadaljevati.\n\nPoimenuj datoteko v formatu:\n Izvajalec - Naslov.mp4");
|
||||
fileInput.value = "";
|
||||
return;
|
||||
}
|
||||
const userTitle = prompt(
|
||||
`Vpiši NASLOV pesmi (npr. "Abracadabra"):`,
|
||||
""
|
||||
);
|
||||
if (!userTitle || !userTitle.trim()) {
|
||||
alert("⛔ Brez naslova ne morem nadaljevati.");
|
||||
fileInput.value = "";
|
||||
return;
|
||||
}
|
||||
pendingArtist = userArtist.trim();
|
||||
pendingTitle = userTitle.trim();
|
||||
} else {
|
||||
pendingArtist = artist;
|
||||
pendingTitle = title;
|
||||
}
|
||||
|
||||
pendingFile = f;
|
||||
dz.querySelector("div").innerHTML =
|
||||
`📹 <b>${pendingArtist} — ${pendingTitle}</b>` +
|
||||
`<div style="font-size: 11px; color: var(--muted); margin-top: 4px;">${f.name} (${(f.size/1024/1024).toFixed(1)} MB)</div>`;
|
||||
pendingArtist = artist;
|
||||
pendingTitle = title;
|
||||
|
||||
if (artist && title) {
|
||||
// Razvidno iz filename
|
||||
dz.querySelector("div").innerHTML =
|
||||
`📹 <b>${pendingArtist} — ${pendingTitle}</b>` +
|
||||
`<div style="font-size: 11px; color: var(--muted); margin-top: 4px;">${f.name} (${(f.size/1024/1024).toFixed(1)} MB)</div>`;
|
||||
} else {
|
||||
// Ni razvidno — pokaži opozorilo, ampak NE blokiraj (server bo poskusil ACR auto-recognize)
|
||||
dz.querySelector("div").innerHTML =
|
||||
`📹 ${f.name}` +
|
||||
`<div style="font-size: 11px; color: var(--warn); margin-top: 4px;">⚠ Iz imena ni razviden izvajalec — server bo poskusil avto-prepoznati pesem (ACRCloud)</div>` +
|
||||
`<div style="font-size: 11px; color: var(--muted); margin-top: 2px;">${(f.size/1024/1024).toFixed(1)} MB</div>`;
|
||||
}
|
||||
}
|
||||
|
||||
// ─── Settings collector ─────────────────────────
|
||||
|
||||
Loading…
Reference in New Issue
Block a user