Problem: MXF and MPG files (TV broadcast formats) often contain: - Multiple audio streams (4-8 streams for different language tracks) - Multichannel layouts (5.1, 7.1) instead of stereo - Default ffmpeg behavior was -c:a aac without channel limit, which meant multichannel got transcoded as multichannel AAC, overwriting what should have been clean stereo Solution: 1. get_audio_streams() helper probes all audio streams with ffprobe - Returns codec, channels, sample_rate, language, layout for each 2. build_audio_args() picks best stream + downmix: - Prefers first 2-channel stereo stream (usually main mix) - Falls back to first stream if none are 2-ch - Always: -ac 2 (force stereo downmix), -ar 48000, -c:a aac, -b:a 192k - Bitrate raised from 128k to 192k for music quality 3. Smart trim path now detects broadcast formats: - .mxf, .mpg, .mpeg, .ts, .m2ts, .mts → transcode (not stream copy) - Standard MP4/MOV → stream copy (faster, lossless) 4. Pre-conversion step for broadcast files without trim: - Even without --start/--duration, MXF/MPG get converted to MP4 - Same audio handling as trim path 5. Main render adds explicit -map 0✌️0 -map 0🅰️0? -ac 2 to ensure only first video and first audio stream get encoded, with stereo 6. ACR recognize also gets -map 0🅰️0 -ac 2 for MXF compatibility 7. UI accepts: video/*,.mxf,.mpg,.mpeg,.ts,.m2ts,.mts 8. Upload limit raised: 2GB → 10GB (MXF files are large) This means a TV broadcast MXF with [SLO/EN/DE language tracks] now correctly outputs stereo MP4 with the main language track preserved.
203 lines
6.7 KiB
Python
203 lines
6.7 KiB
Python
"""
|
||
acr_recognize.py — Audio recognition prek ACRCloud Audio Fingerprinting API.
|
||
|
||
Uporabi native binary za fingerprinting (3KB sample namesto cel audio).
|
||
Vrne (artist, title) ali (None, None) če pesem ni prepoznana.
|
||
|
||
Credentials se preberejo iz env vars (ACR_ACCESS_KEY, ACR_SECRET_KEY, ACR_HOST).
|
||
"""
|
||
import os
|
||
import sys
|
||
import json
|
||
import hmac
|
||
import base64
|
||
import time
|
||
import subprocess
|
||
import tempfile
|
||
import urllib.request
|
||
import urllib.parse
|
||
import uuid
|
||
|
||
|
||
def _sign(string_to_sign, secret):
|
||
h = hmac.new(secret.encode('ascii'), string_to_sign.encode('ascii'), digestmod='sha1')
|
||
return base64.b64encode(h.digest()).decode('ascii')
|
||
|
||
|
||
def _build_multipart(fields, files):
|
||
"""Sestavi multipart/form-data body."""
|
||
boundary = uuid.uuid4().hex
|
||
parts = []
|
||
for k, v in fields.items():
|
||
parts.append(f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"\r\n\r\n{v}\r\n".encode())
|
||
for k, (fname, content, ctype) in files.items():
|
||
parts.append(
|
||
f"--{boundary}\r\nContent-Disposition: form-data; name=\"{k}\"; filename=\"{fname}\"\r\n"
|
||
f"Content-Type: {ctype}\r\n\r\n".encode() + content + b"\r\n"
|
||
)
|
||
parts.append(f"--{boundary}--\r\n".encode())
|
||
body = b"".join(parts)
|
||
return body, boundary
|
||
|
||
|
||
def recognize_audio_file(audio_path, timeout=30):
|
||
"""Pošlji audio file (ali fingerprint) ACRCloud-u in vrni raw response.
|
||
|
||
audio_path: pot do MP3/WAV/M4A datoteke.
|
||
Vrne dict z odgovorom ali None ob napaki.
|
||
"""
|
||
host = os.environ.get("ACR_HOST", "identify-eu-west-1.acrcloud.com")
|
||
access_key = os.environ.get("ACR_ACCESS_KEY")
|
||
secret_key = os.environ.get("ACR_SECRET_KEY")
|
||
|
||
if not access_key or not secret_key:
|
||
print("⚠️ ACR_ACCESS_KEY/SECRET_KEY nista nastavljena", file=sys.stderr)
|
||
return None
|
||
|
||
if not os.path.exists(audio_path):
|
||
print(f"⚠️ Audio file ne obstaja: {audio_path}", file=sys.stderr)
|
||
return None
|
||
|
||
# Probaj native binary za fingerprint (manjši payload)
|
||
fingerprint = None
|
||
try:
|
||
from acrcloud import acrcloud_extr_tool as acr
|
||
fingerprint = acr.create_fingerprint_by_file(audio_path, 0, 30, 0)
|
||
except (ImportError, Exception) as e:
|
||
print(f" ℹ️ Fingerprint binary ni na voljo ({e}), uporabljam audio direktno", file=sys.stderr)
|
||
|
||
timestamp = str(int(time.time()))
|
||
http_method = "POST"
|
||
http_uri = "/v1/identify"
|
||
signature_version = "1"
|
||
|
||
if fingerprint:
|
||
data_type = "fingerprint"
|
||
sample_data = fingerprint
|
||
sample_name = "sample.fp"
|
||
sample_ctype = "application/octet-stream"
|
||
else:
|
||
data_type = "audio"
|
||
with open(audio_path, "rb") as f:
|
||
sample_data = f.read()
|
||
sample_name = "sample.mp3"
|
||
sample_ctype = "audio/mpeg"
|
||
|
||
string_to_sign = f"{http_method}\n{http_uri}\n{access_key}\n{data_type}\n{signature_version}\n{timestamp}"
|
||
signature = _sign(string_to_sign, secret_key)
|
||
|
||
fields = {
|
||
"access_key": access_key,
|
||
"sample_bytes": str(len(sample_data)),
|
||
"timestamp": timestamp,
|
||
"signature": signature,
|
||
"data_type": data_type,
|
||
"signature_version": signature_version,
|
||
}
|
||
files = {"sample": (sample_name, sample_data, sample_ctype)}
|
||
body, boundary = _build_multipart(fields, files)
|
||
|
||
url = f"https://{host}/v1/identify"
|
||
req = urllib.request.Request(
|
||
url, data=body,
|
||
headers={"Content-Type": f"multipart/form-data; boundary={boundary}"},
|
||
method="POST",
|
||
)
|
||
try:
|
||
with urllib.request.urlopen(req, timeout=timeout) as resp:
|
||
return json.loads(resp.read().decode())
|
||
except Exception as e:
|
||
print(f"⚠️ ACR API napaka: {e}", file=sys.stderr)
|
||
return None
|
||
|
||
|
||
def extract_short_audio(video_path, duration=20, start_offset=15):
|
||
"""Izloči kratek audio iz videa za ACR fingerprint.
|
||
|
||
Začnemo 15s v video (preskoči intro) in vzamemo 20s.
|
||
Za MXF/multichannel: izberemo prvi audio stream, downmix v stereo.
|
||
Vrne pot do tmp MP3 ali None.
|
||
"""
|
||
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".mp3")
|
||
os.close(tmp_fd)
|
||
cmd = [
|
||
"ffmpeg", "-i", video_path,
|
||
"-ss", str(start_offset),
|
||
"-t", str(duration),
|
||
"-map", "0:a:0", # samo prvi audio stream (varno za MXF z več streami)
|
||
"-vn",
|
||
"-ac", "2", # downmix v stereo (če multichannel)
|
||
"-ar", "44100",
|
||
"-b:a", "128k",
|
||
"-f", "mp3", tmp_path,
|
||
"-y", "-loglevel", "error"
|
||
]
|
||
proc = subprocess.run(cmd, capture_output=True, text=True)
|
||
if proc.returncode != 0:
|
||
os.unlink(tmp_path)
|
||
print(f"⚠️ ffmpeg napaka: {proc.stderr[:200]}", file=sys.stderr)
|
||
return None
|
||
return tmp_path
|
||
|
||
|
||
def recognize_video(video_path):
|
||
"""Glavni vstop: prepoznaj pesem v videu, vrni (artist, title) ali (None, None).
|
||
|
||
Probavamo 2 točki v videu (15s in 60s) za večjo robustnost — če ni intro/instrumental.
|
||
"""
|
||
if not os.environ.get("ACR_ACCESS_KEY"):
|
||
return (None, None)
|
||
|
||
for start_offset in [15, 60]:
|
||
audio = extract_short_audio(video_path, duration=20, start_offset=start_offset)
|
||
if not audio:
|
||
continue
|
||
try:
|
||
result = recognize_audio_file(audio)
|
||
finally:
|
||
try:
|
||
os.unlink(audio)
|
||
except OSError:
|
||
pass
|
||
|
||
if not result:
|
||
continue
|
||
|
||
status = result.get("status", {}).get("code")
|
||
if status != 0:
|
||
# 1001 = no result, drugi = napaka
|
||
msg = result.get("status", {}).get("msg", "")
|
||
print(f" ℹ️ ACR @{start_offset}s: status={status} ({msg})", file=sys.stderr)
|
||
continue
|
||
|
||
# Uspeh — vzemi prvi music match
|
||
music = result.get("metadata", {}).get("music", [])
|
||
if not music:
|
||
continue
|
||
|
||
first = music[0]
|
||
title = first.get("title", "").strip()
|
||
artists = first.get("artists", [])
|
||
artist = artists[0].get("name", "").strip() if artists else ""
|
||
|
||
if artist and title:
|
||
print(f" ✅ ACR @{start_offset}s prepoznal: {artist} - {title}", file=sys.stderr)
|
||
return (artist, title)
|
||
|
||
print(" ⚠️ ACR pesem ni prepoznana", file=sys.stderr)
|
||
return (None, None)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
if len(sys.argv) != 2:
|
||
print("Uporaba: python3 acr_recognize.py <video.mp4>")
|
||
sys.exit(1)
|
||
|
||
artist, title = recognize_video(sys.argv[1])
|
||
if artist and title:
|
||
print(json.dumps({"artist": artist, "title": title}, ensure_ascii=False))
|
||
sys.exit(0)
|
||
else:
|
||
print(json.dumps({"artist": None, "title": None}))
|
||
sys.exit(1)
|