ACRCloud auto-recognition: never block uploads, fall back to fingerprinting
Changes: 1. UI: removed blocking prompt() that asked for artist+title on filename that didn't match 'Artist - Title' pattern. Upload always proceeds. Instead shows yellow warning saying 'server will try to recognize'. 2. Backend: added scripts/acr_recognize.py — extracts 20s audio sample from video (at 15s and 60s offsets for robustness), computes ACRCloud fingerprint via native binary (3KB payload), sends to identify API. 3. Pipeline: process_job() now runs ACR recognition step before analysis IF parsed_artist or parsed_title is missing. Result is saved to job metadata and used for download filename + Scribe/Claude filename hint. 4. Credentials: ACR_HOST + ACR_ACCESS_KEY + ACR_SECRET_KEY env vars added to Coolify (using existing keys from openclaw fb-agent metka). 5. requirements.txt: added pyacrcloud==1.0.11 for native fingerprinting. This unblocks future automation/cron upload pipelines — files don't need to be perfectly named, ACRCloud will identify them automatically. Fallback chain: 1. Filename parsing (Artist - Title.mp4) 2. ACRCloud audio fingerprint (works even for '12345.mp4', 'IMG_001.mp4') 3. If both fail: download filename uses 'reel_<id>.mp4' (still works)
This commit is contained in:
parent
3877b822ff
commit
b543057cee
29
app/main.py
29
app/main.py
@ -338,6 +338,35 @@ def process_job(job_id):
|
|||||||
else:
|
else:
|
||||||
input_path = Path(job["input_path"])
|
input_path = Path(job["input_path"])
|
||||||
|
|
||||||
|
# ── 1b. Music recognition (ACRCloud) — če nimamo artist+title ─────
|
||||||
|
# Tudi za YouTube jobs lahko naslov ni razviden (npr. iz playliste, "Track 5")
|
||||||
|
if not (job.get("parsed_artist") and job.get("parsed_title")):
|
||||||
|
update_job(job_id, current_step="Avto-prepoznavam pesem (ACRCloud)")
|
||||||
|
try:
|
||||||
|
acr_cmd = [
|
||||||
|
"python3", str(SCRIPTS_DIR / "acr_recognize.py"),
|
||||||
|
str(input_path),
|
||||||
|
]
|
||||||
|
proc = subprocess.run(acr_cmd, capture_output=True, text=True, timeout=120)
|
||||||
|
if proc.returncode == 0 and proc.stdout:
|
||||||
|
data = json.loads(proc.stdout)
|
||||||
|
a, t = data.get("artist"), data.get("title")
|
||||||
|
if a and t:
|
||||||
|
update_job(
|
||||||
|
job_id,
|
||||||
|
parsed_artist=a, parsed_title=t,
|
||||||
|
has_clean_name=True,
|
||||||
|
recognized_via="acrcloud",
|
||||||
|
)
|
||||||
|
job = load_job(job_id)
|
||||||
|
print(f"✅ ACR prepoznal: {a} - {t}", flush=True)
|
||||||
|
else:
|
||||||
|
print(f"⚠️ ACR ni prepoznal pesmi", flush=True)
|
||||||
|
else:
|
||||||
|
print(f"⚠️ ACR exit {proc.returncode}: {proc.stderr[:200]}", flush=True)
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ ACR error: {e}", flush=True)
|
||||||
|
|
||||||
# ── 2. Smart analysis (če auto_chorus) ──────────────────────────
|
# ── 2. Smart analysis (če auto_chorus) ──────────────────────────
|
||||||
if job.get("auto_chorus"):
|
if job.get("auto_chorus"):
|
||||||
update_job(job_id, current_step="Analiza pesmi (transkript + energija)")
|
update_job(job_id, current_step="Analiza pesmi (transkript + energija)")
|
||||||
|
|||||||
@ -6,3 +6,4 @@ faster-whisper==1.0.3
|
|||||||
opencv-python-headless==4.10.0.84
|
opencv-python-headless==4.10.0.84
|
||||||
numpy==1.26.4
|
numpy==1.26.4
|
||||||
yt-dlp>=2025.10.0
|
yt-dlp>=2025.10.0
|
||||||
|
pyacrcloud==1.0.11
|
||||||
|
|||||||
196
scripts/acr_recognize.py
Normal file
196
scripts/acr_recognize.py
Normal file
@ -0,0 +1,196 @@
|
|||||||
|
"""
|
||||||
|
acr_recognize.py — Audio recognition prek ACRCloud Audio Fingerprinting API.
|
||||||
|
|
||||||
|
Uporabi native binary za fingerprinting (3KB sample namesto cel audio).
|
||||||
|
Vrne (artist, title) ali (None, None) če pesem ni prepoznana.
|
||||||
|
|
||||||
|
Credentials se preberejo iz env vars (ACR_ACCESS_KEY, ACR_SECRET_KEY, ACR_HOST).
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
import json
|
||||||
|
import hmac
|
||||||
|
import base64
|
||||||
|
import time
|
||||||
|
import subprocess
|
||||||
|
import tempfile
|
||||||
|
import urllib.request
|
||||||
|
import urllib.parse
|
||||||
|
import uuid
|
||||||
|
|
||||||
|
|
||||||
|
def _sign(string_to_sign, secret):
|
||||||
|
h = hmac.new(secret.encode('ascii'), string_to_sign.encode('ascii'), digestmod='sha1')
|
||||||
|
return base64.b64encode(h.digest()).decode('ascii')
|
||||||
|
|
||||||
|
|
||||||
|
def _build_multipart(fields, files):
|
||||||
|
"""Sestavi multipart/form-data body."""
|
||||||
|
boundary = uuid.uuid4().hex
|
||||||
|
parts = []
|
||||||
|
for k, v in fields.items():
|
||||||
|
parts.append(f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"\r\n\r\n{v}\r\n".encode())
|
||||||
|
for k, (fname, content, ctype) in files.items():
|
||||||
|
parts.append(
|
||||||
|
f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"; filename=\"{fname}\"\r\n"
|
||||||
|
f"Content-Type: {ctype}\r\n\r\n".encode() + content + b"\r\n"
|
||||||
|
)
|
||||||
|
parts.append(f"--{boundary}--\r\n".encode())
|
||||||
|
body = b"".join(parts)
|
||||||
|
return body, boundary
|
||||||
|
|
||||||
|
|
||||||
|
def recognize_audio_file(audio_path, timeout=30):
|
||||||
|
"""Pošlji audio file (ali fingerprint) ACRCloud-u in vrni raw response.
|
||||||
|
|
||||||
|
audio_path: pot do MP3/WAV/M4A datoteke.
|
||||||
|
Vrne dict z odgovorom ali None ob napaki.
|
||||||
|
"""
|
||||||
|
host = os.environ.get("ACR_HOST", "identify-eu-west-1.acrcloud.com")
|
||||||
|
access_key = os.environ.get("ACR_ACCESS_KEY")
|
||||||
|
secret_key = os.environ.get("ACR_SECRET_KEY")
|
||||||
|
|
||||||
|
if not access_key or not secret_key:
|
||||||
|
print("⚠️ ACR_ACCESS_KEY/SECRET_KEY nista nastavljena", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
if not os.path.exists(audio_path):
|
||||||
|
print(f"⚠️ Audio file ne obstaja: {audio_path}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
# Probaj native binary za fingerprint (manjši payload)
|
||||||
|
fingerprint = None
|
||||||
|
try:
|
||||||
|
from acrcloud import acrcloud_extr_tool as acr
|
||||||
|
fingerprint = acr.create_fingerprint_by_file(audio_path, 0, 30, 0)
|
||||||
|
except (ImportError, Exception) as e:
|
||||||
|
print(f" ℹ️ Fingerprint binary ni na voljo ({e}), uporabljam audio direktno", file=sys.stderr)
|
||||||
|
|
||||||
|
timestamp = str(int(time.time()))
|
||||||
|
http_method = "POST"
|
||||||
|
http_uri = "/v1/identify"
|
||||||
|
signature_version = "1"
|
||||||
|
|
||||||
|
if fingerprint:
|
||||||
|
data_type = "fingerprint"
|
||||||
|
sample_data = fingerprint
|
||||||
|
sample_name = "sample.fp"
|
||||||
|
sample_ctype = "application/octet-stream"
|
||||||
|
else:
|
||||||
|
data_type = "audio"
|
||||||
|
with open(audio_path, "rb") as f:
|
||||||
|
sample_data = f.read()
|
||||||
|
sample_name = "sample.mp3"
|
||||||
|
sample_ctype = "audio/mpeg"
|
||||||
|
|
||||||
|
string_to_sign = f"{http_method}\n{http_uri}\n{access_key}\n{data_type}\n{signature_version}\n{timestamp}"
|
||||||
|
signature = _sign(string_to_sign, secret_key)
|
||||||
|
|
||||||
|
fields = {
|
||||||
|
"access_key": access_key,
|
||||||
|
"sample_bytes": str(len(sample_data)),
|
||||||
|
"timestamp": timestamp,
|
||||||
|
"signature": signature,
|
||||||
|
"data_type": data_type,
|
||||||
|
"signature_version": signature_version,
|
||||||
|
}
|
||||||
|
files = {"sample": (sample_name, sample_data, sample_ctype)}
|
||||||
|
body, boundary = _build_multipart(fields, files)
|
||||||
|
|
||||||
|
url = f"https://{host}/v1/identify"
|
||||||
|
req = urllib.request.Request(
|
||||||
|
url, data=body,
|
||||||
|
headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
|
||||||
|
method="POST",
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||||||
|
return json.loads(resp.read().decode())
|
||||||
|
except Exception as e:
|
||||||
|
print(f"⚠️ ACR API napaka: {e}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def extract_short_audio(video_path, duration=20, start_offset=15):
|
||||||
|
"""Izloči kratek audio iz videa za ACR fingerprint.
|
||||||
|
|
||||||
|
Ponavadi začnemo 15s v video (preskoči intro) in vzamemo 20s.
|
||||||
|
Vrne pot do tmp MP3 ali None.
|
||||||
|
"""
|
||||||
|
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".mp3")
|
||||||
|
os.close(tmp_fd)
|
||||||
|
cmd = [
|
||||||
|
"ffmpeg", "-i", video_path,
|
||||||
|
"-ss", str(start_offset),
|
||||||
|
"-t", str(duration),
|
||||||
|
"-vn", "-ar", "44100", "-ac", "1", "-b:a", "128k",
|
||||||
|
"-f", "mp3", tmp_path, "-y", "-loglevel", "quiet"
|
||||||
|
]
|
||||||
|
proc = subprocess.run(cmd, capture_output=True, text=True)
|
||||||
|
if proc.returncode != 0:
|
||||||
|
os.unlink(tmp_path)
|
||||||
|
print(f"⚠️ ffmpeg napaka: {proc.stderr[:200]}", file=sys.stderr)
|
||||||
|
return None
|
||||||
|
return tmp_path
|
||||||
|
|
||||||
|
|
||||||
|
def recognize_video(video_path):
|
||||||
|
"""Glavni vstop: prepoznaj pesem v videu, vrni (artist, title) ali (None, None).
|
||||||
|
|
||||||
|
Probavamo 2 točki v videu (15s in 60s) za večjo robustnost — če ni intro/instrumental.
|
||||||
|
"""
|
||||||
|
if not os.environ.get("ACR_ACCESS_KEY"):
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
|
for start_offset in [15, 60]:
|
||||||
|
audio = extract_short_audio(video_path, duration=20, start_offset=start_offset)
|
||||||
|
if not audio:
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
result = recognize_audio_file(audio)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.unlink(audio)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
if not result:
|
||||||
|
continue
|
||||||
|
|
||||||
|
status = result.get("status", {}).get("code")
|
||||||
|
if status != 0:
|
||||||
|
# 1001 = no result, drugi = napaka
|
||||||
|
msg = result.get("status", {}).get("msg", "")
|
||||||
|
print(f" ℹ️ ACR @{start_offset}s: status={status} ({msg})", file=sys.stderr)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Uspeh — vzemi prvi music match
|
||||||
|
music = result.get("metadata", {}).get("music", [])
|
||||||
|
if not music:
|
||||||
|
continue
|
||||||
|
|
||||||
|
first = music[0]
|
||||||
|
title = first.get("title", "").strip()
|
||||||
|
artists = first.get("artists", [])
|
||||||
|
artist = artists[0].get("name", "").strip() if artists else ""
|
||||||
|
|
||||||
|
if artist and title:
|
||||||
|
print(f" ✅ ACR @{start_offset}s prepoznal: {artist} - {title}", file=sys.stderr)
|
||||||
|
return (artist, title)
|
||||||
|
|
||||||
|
print(" ⚠️ ACR pesem ni prepoznana", file=sys.stderr)
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
if len(sys.argv) != 2:
|
||||||
|
print("Uporaba: python3 acr_recognize.py <video.mp4>")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
artist, title = recognize_video(sys.argv[1])
|
||||||
|
if artist and title:
|
||||||
|
print(json.dumps({"artist": artist, "title": title}, ensure_ascii=False))
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
print(json.dumps({"artist": None, "title": None}))
|
||||||
|
sys.exit(1)
|
||||||
@ -500,39 +500,22 @@
|
|||||||
function handleFileSelected(f) {
|
function handleFileSelected(f) {
|
||||||
const [artist, title] = parseArtistTitle(f.name);
|
const [artist, title] = parseArtistTitle(f.name);
|
||||||
|
|
||||||
if (!artist || !title) {
|
|
||||||
// Ni razvidno ime — vprašaj uporabnika
|
|
||||||
const userArtist = prompt(
|
|
||||||
`❗ Iz imena datoteke ni razviden izvajalec in naslov.\n\n` +
|
|
||||||
`Datoteka: "${f.name}"\n\n` +
|
|
||||||
`Vpiši IZVAJALCA (npr. "Lady Gaga"):`,
|
|
||||||
""
|
|
||||||
);
|
|
||||||
if (!userArtist || !userArtist.trim()) {
|
|
||||||
alert("⛔ Brez izvajalca ne morem nadaljevati.\n\nPoimenuj datoteko v formatu:\n Izvajalec - Naslov.mp4");
|
|
||||||
fileInput.value = "";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const userTitle = prompt(
|
|
||||||
`Vpiši NASLOV pesmi (npr. "Abracadabra"):`,
|
|
||||||
""
|
|
||||||
);
|
|
||||||
if (!userTitle || !userTitle.trim()) {
|
|
||||||
alert("⛔ Brez naslova ne morem nadaljevati.");
|
|
||||||
fileInput.value = "";
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
pendingArtist = userArtist.trim();
|
|
||||||
pendingTitle = userTitle.trim();
|
|
||||||
} else {
|
|
||||||
pendingArtist = artist;
|
|
||||||
pendingTitle = title;
|
|
||||||
}
|
|
||||||
|
|
||||||
pendingFile = f;
|
pendingFile = f;
|
||||||
dz.querySelector("div").innerHTML =
|
pendingArtist = artist;
|
||||||
`📹 <b>${pendingArtist} — ${pendingTitle}</b>` +
|
pendingTitle = title;
|
||||||
`<div style="font-size: 11px; color: var(--muted); margin-top: 4px;">${f.name} (${(f.size/1024/1024).toFixed(1)} MB)</div>`;
|
|
||||||
|
if (artist && title) {
|
||||||
|
// Razvidno iz filename
|
||||||
|
dz.querySelector("div").innerHTML =
|
||||||
|
`📹 <b>${pendingArtist} — ${pendingTitle}</b>` +
|
||||||
|
`<div style="font-size: 11px; color: var(--muted); margin-top: 4px;">${f.name} (${(f.size/1024/1024).toFixed(1)} MB)</div>`;
|
||||||
|
} else {
|
||||||
|
// Ni razvidno — pokaži opozorilo, ampak NE blokiraj (server bo poskusil ACR auto-recognize)
|
||||||
|
dz.querySelector("div").innerHTML =
|
||||||
|
`📹 ${f.name}` +
|
||||||
|
`<div style="font-size: 11px; color: var(--warn); margin-top: 4px;">⚠ Iz imena ni razviden izvajalec — server bo poskusil avto-prepoznati pesem (ACRCloud)</div>` +
|
||||||
|
`<div style="font-size: 11px; color: var(--muted); margin-top: 2px;">${(f.size/1024/1024).toFixed(1)} MB</div>`;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// ─── Settings collector ─────────────────────────
|
// ─── Settings collector ─────────────────────────
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user