diff --git a/scripts/acr_recognize.py b/scripts/acr_recognize.py index 64e0319..a419757 100644 --- a/scripts/acr_recognize.py +++ b/scripts/acr_recognize.py @@ -114,7 +114,8 @@ def recognize_audio_file(audio_path, timeout=30): def extract_short_audio(video_path, duration=20, start_offset=15): """Izloči kratek audio iz videa za ACR fingerprint. - Ponavadi začnemo 15s v video (preskoči intro) in vzamemo 20s. + Začnemo 15s v video (preskoči intro) in vzamemo 20s. + Za MXF/multichannel: izberemo prvi audio stream, downmix v stereo. Vrne pot do tmp MP3 ali None. """ tmp_fd, tmp_path = tempfile.mkstemp(suffix=".mp3") @@ -123,8 +124,13 @@ def extract_short_audio(video_path, duration=20, start_offset=15): "ffmpeg", "-i", video_path, "-ss", str(start_offset), "-t", str(duration), - "-vn", "-ar", "44100", "-ac", "1", "-b:a", "128k", - "-f", "mp3", tmp_path, "-y", "-loglevel", "quiet" + "-map", "0:a:0", # samo prvi audio stream (varno za MXF z več streami) + "-vn", + "-ac", "2", # downmix v stereo (če multichannel) + "-ar", "44100", + "-b:a", "128k", + "-f", "mp3", tmp_path, + "-y", "-loglevel", "error" ] proc = subprocess.run(cmd, capture_output=True, text=True) if proc.returncode != 0: diff --git a/scripts/reframe.py b/scripts/reframe.py index ff26d18..1d4bde3 100644 --- a/scripts/reframe.py +++ b/scripts/reframe.py @@ -43,6 +43,68 @@ def get_video_info(path): } +def get_audio_streams(path): + """Vrni seznam audio streamov z njihovimi metadati. + + MXF datoteke imajo pogosto 4-8 audio streamov ali en stream z 8 kanali. + Vrne: [{'index': 1, 'channels': 2, 'codec': 'pcm_s24le', 'language': 'eng'}, ...] + """ + cmd = [ + "ffprobe", "-v", "quiet", "-print_format", "json", + "-show_streams", "-select_streams", "a", str(path) + ] + try: + data = json.loads(subprocess.check_output(cmd)) + streams = [] + for s in data.get("streams", []): + streams.append({ + "index": s.get("index"), + "channels": int(s.get("channels", 2)), + "codec": s.get("codec_name", ""), + "sample_rate": int(s.get("sample_rate", 48000)), + "language": s.get("tags", {}).get("language", ""), + "channel_layout": s.get("channel_layout", ""), + }) + return streams + except Exception: + return [] + + +def build_audio_args(audio_streams): + """Sestavi FFmpeg argumente za audio: izberi pravi stream + downmix v stereo. + + Strategija: + - Če je samo 1 stream: pretvori ga v stereo (downmix iz multichannel) + - Če je več streamov (MXF z več jezikovnimi kanali): + * preferiraj prvi 2-kanalni stereo stream (običajno glavna audio mix) + * sicer prvi stream + - Vedno: output 2 channels @ 48kHz, codec AAC 192k (boljša kvaliteta za broadcast) + """ + if not audio_streams: + # Brez audia: prazen output + return ["-an"] + + # Najdi najboljši stream + chosen = None + # 1. Stereo (2-kanalni) ima prednost + for s in audio_streams: + if s["channels"] == 2: + chosen = s + break + # 2. Sicer prvi stream + if chosen is None: + chosen = audio_streams[0] + + args = [ + "-map", f"0:{chosen['index']}", # samo izbrani stream + "-ac", "2", # downmix v stereo (če je multichannel) + "-ar", "48000", + "-c:a", "aac", + "-b:a", "192k", # 192k = boljša kvaliteta za glasbo (prej 128k) + ] + return args + + def detect_face_centers(video_path, sample_fps=5): """ Vzorči video pri sample_fps in vrni seznam (timestamp, x_center_normalized). @@ -227,6 +289,23 @@ def main(): # Če imamo --start/--duration, najprej trim z FFmpeg v temp file (hitreje) work_input = src tmp = None + + # Probe audio streams iz originala + audio_streams = get_audio_streams(src) + src_ext = src.suffix.lower() + is_broadcast = src_ext in (".mxf", ".mpg", ".mpeg", ".ts", ".m2ts", ".mts") + has_complex_audio = ( + len(audio_streams) > 1 or + (audio_streams and audio_streams[0].get("channels", 2) > 2) or + is_broadcast + ) + + if has_complex_audio: + print(f"🎚 Broadcast format ({src_ext}) — {len(audio_streams)} audio stream(s):", file=sys.stderr) + for s in audio_streams: + print(f" #{s['index']}: {s['codec']} {s['channels']}ch " + f"{s.get('channel_layout', '')} lang={s.get('language', '?')}", file=sys.stderr) + if args.start is not None or args.duration is not None: tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) tmp.close() @@ -236,21 +315,49 @@ def main(): cmd += ["-i", str(src)] if args.duration is not None: cmd += ["-t", str(args.duration)] - cmd += ["-c", "copy", tmp.name] + + if has_complex_audio: + # Broadcast format ali multichannel: ne kopiraj, transkodiraj v stereo MP4 + audio_args = build_audio_args(audio_streams) + cmd += [ + "-map", "0:v:0", # samo prvi video stream + "-c:v", "libx264", "-preset", "veryfast", "-crf", "20", + ] + cmd += audio_args + else: + # MP4/MOV s standardnim audiom — stream copy je OK in hitrejši + cmd += ["-c", "copy"] + cmd += [tmp.name] + print(f"🔧 TRIM CMD: {' '.join(cmd)}", file=sys.stderr) result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: - print(f"❌ TRIM FAILED: {result.stderr[-500:]}", file=sys.stderr) + print(f"❌ TRIM FAILED: {result.stderr[-1000:]}", file=sys.stderr) sys.exit(1) work_input = Path(tmp.name) print(f"✂ Trim → {work_input}") - # Verify trim output duration verify = subprocess.run( ["ffprobe", "-v", "error", "-show_entries", "format=duration", "-of", "default=nw=1:nokey=1", str(work_input)], capture_output=True, text=True ) print(f"🔍 TRIMMED FILE DURATION: {verify.stdout.strip()}s (expected ~{args.duration}s)", file=sys.stderr) + elif has_complex_audio: + # Brez trim-a, ampak MXF/MPG → še vedno transkodiraj v MP4 z 2-channel audio + tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) + tmp.close() + audio_args = build_audio_args(audio_streams) + cmd = ["ffmpeg", "-y", "-i", str(src), + "-map", "0:v:0", + "-c:v", "libx264", "-preset", "veryfast", "-crf", "20"] + cmd += audio_args + [tmp.name] + print(f"🔧 PRE-CONVERT CMD: {' '.join(cmd)}", file=sys.stderr) + result = subprocess.run(cmd, capture_output=True, text=True) + if result.returncode != 0: + print(f"❌ PRE-CONVERT FAILED: {result.stderr[-1000:]}", file=sys.stderr) + sys.exit(1) + work_input = Path(tmp.name) + print(f"🔄 Pre-converted → {work_input}") info = get_video_info(work_input) print(f"📹 Vhod: {info['width']}x{info['height']} @ {info['fps']:.2f}fps, {info['duration']:.1f}s") @@ -286,7 +393,9 @@ def main(): "ffmpeg", "-y", "-i", str(work_input), "-filter_complex", vfilter, "-c:v", "libx264", "-preset", preset, "-crf", crf, - "-c:a", "aac", "-b:a", "128k", + "-map", "0:a:0?", # samo prvi audio stream (če obstaja) + "-ac", "2", # force stereo + "-c:a", "aac", "-b:a", "192k", ] if audio_filter_str: cmd += ["-af", audio_filter_str] @@ -296,7 +405,9 @@ def main(): "ffmpeg", "-y", "-i", str(work_input), "-vf", vfilter, "-c:v", "libx264", "-preset", preset, "-crf", crf, - "-c:a", "aac", "-b:a", "128k", + "-map", "0:v:0", "-map", "0:a:0?", # video + samo prvi audio + "-ac", "2", # force stereo + "-c:a", "aac", "-b:a", "192k", ] if audio_filter_str: cmd += ["-af", audio_filter_str] diff --git a/templates/index.html b/templates/index.html index 34db1c3..6cafa24 100644 --- a/templates/index.html +++ b/templates/index.html @@ -290,7 +290,7 @@
Klikni ali povleci video sem
.mp4, .mov, .webm — do 2 GB
- +