MXF/MPG broadcast format support: handle multichannel audio properly

Problem: MXF and MPG files (TV broadcast formats) often contain:
- Multiple audio streams (4-8 streams for different language tracks)
- Multichannel layouts (5.1, 7.1) instead of stereo
- Default ffmpeg behavior was -c:a aac without channel limit, which
  meant multichannel got transcoded as multichannel AAC, overwriting
  what should have been clean stereo

Solution:

1. get_audio_streams() helper probes all audio streams with ffprobe
   - Returns codec, channels, sample_rate, language, layout for each

2. build_audio_args() picks best stream + downmix:
   - Prefers first 2-channel stereo stream (usually main mix)
   - Falls back to first stream if none are 2-ch
   - Always: -ac 2 (force stereo downmix), -ar 48000, -c:a aac, -b:a 192k
   - Bitrate raised from 128k to 192k for music quality

3. Smart trim path now detects broadcast formats:
   - .mxf, .mpg, .mpeg, .ts, .m2ts, .mts → transcode (not stream copy)
   - Standard MP4/MOV → stream copy (faster, lossless)

4. Pre-conversion step for broadcast files without trim:
   - Even without --start/--duration, MXF/MPG get converted to MP4
   - Same audio handling as trim path

5. Main render adds explicit -map 0✌️0 -map 0🅰️0? -ac 2 to ensure
   only first video and first audio stream get encoded, with stereo

6. ACR recognize also gets -map 0🅰️0 -ac 2 for MXF compatibility

7. UI accepts: video/*,.mxf,.mpg,.mpeg,.ts,.m2ts,.mts

8. Upload limit raised: 2GB → 10GB (MXF files are large)

This means a TV broadcast MXF with [SLO/EN/DE language tracks] now
correctly outputs stereo MP4 with the main language track preserved.
This commit is contained in:
Sebastjan Artič 2026-04-29 14:38:48 +00:00
parent b543057cee
commit 1cc8e8be35
3 changed files with 126 additions and 9 deletions

View File

@ -114,7 +114,8 @@ def recognize_audio_file(audio_path, timeout=30):
def extract_short_audio(video_path, duration=20, start_offset=15): def extract_short_audio(video_path, duration=20, start_offset=15):
"""Izloči kratek audio iz videa za ACR fingerprint. """Izloči kratek audio iz videa za ACR fingerprint.
Ponavadi začnemo 15s v video (preskoči intro) in vzamemo 20s. Začnemo 15s v video (preskoči intro) in vzamemo 20s.
Za MXF/multichannel: izberemo prvi audio stream, downmix v stereo.
Vrne pot do tmp MP3 ali None. Vrne pot do tmp MP3 ali None.
""" """
tmp_fd, tmp_path = tempfile.mkstemp(suffix=".mp3") tmp_fd, tmp_path = tempfile.mkstemp(suffix=".mp3")
@ -123,8 +124,13 @@ def extract_short_audio(video_path, duration=20, start_offset=15):
"ffmpeg", "-i", video_path, "ffmpeg", "-i", video_path,
"-ss", str(start_offset), "-ss", str(start_offset),
"-t", str(duration), "-t", str(duration),
"-vn", "-ar", "44100", "-ac", "1", "-b:a", "128k", "-map", "0:a:0", # samo prvi audio stream (varno za MXF z več streami)
"-f", "mp3", tmp_path, "-y", "-loglevel", "quiet" "-vn",
"-ac", "2", # downmix v stereo (če multichannel)
"-ar", "44100",
"-b:a", "128k",
"-f", "mp3", tmp_path,
"-y", "-loglevel", "error"
] ]
proc = subprocess.run(cmd, capture_output=True, text=True) proc = subprocess.run(cmd, capture_output=True, text=True)
if proc.returncode != 0: if proc.returncode != 0:

View File

@ -43,6 +43,68 @@ def get_video_info(path):
} }
def get_audio_streams(path):
"""Vrni seznam audio streamov z njihovimi metadati.
MXF datoteke imajo pogosto 4-8 audio streamov ali en stream z 8 kanali.
Vrne: [{'index': 1, 'channels': 2, 'codec': 'pcm_s24le', 'language': 'eng'}, ...]
"""
cmd = [
"ffprobe", "-v", "quiet", "-print_format", "json",
"-show_streams", "-select_streams", "a", str(path)
]
try:
data = json.loads(subprocess.check_output(cmd))
streams = []
for s in data.get("streams", []):
streams.append({
"index": s.get("index"),
"channels": int(s.get("channels", 2)),
"codec": s.get("codec_name", ""),
"sample_rate": int(s.get("sample_rate", 48000)),
"language": s.get("tags", {}).get("language", ""),
"channel_layout": s.get("channel_layout", ""),
})
return streams
except Exception:
return []
def build_audio_args(audio_streams):
"""Sestavi FFmpeg argumente za audio: izberi pravi stream + downmix v stereo.
Strategija:
- Če je samo 1 stream: pretvori ga v stereo (downmix iz multichannel)
- Če je več streamov (MXF z več jezikovnimi kanali):
* preferiraj prvi 2-kanalni stereo stream (običajno glavna audio mix)
* sicer prvi stream
- Vedno: output 2 channels @ 48kHz, codec AAC 192k (boljša kvaliteta za broadcast)
"""
if not audio_streams:
# Brez audia: prazen output
return ["-an"]
# Najdi najboljši stream
chosen = None
# 1. Stereo (2-kanalni) ima prednost
for s in audio_streams:
if s["channels"] == 2:
chosen = s
break
# 2. Sicer prvi stream
if chosen is None:
chosen = audio_streams[0]
args = [
"-map", f"0:{chosen['index']}", # samo izbrani stream
"-ac", "2", # downmix v stereo (če je multichannel)
"-ar", "48000",
"-c:a", "aac",
"-b:a", "192k", # 192k = boljša kvaliteta za glasbo (prej 128k)
]
return args
def detect_face_centers(video_path, sample_fps=5): def detect_face_centers(video_path, sample_fps=5):
""" """
Vzorči video pri sample_fps in vrni seznam (timestamp, x_center_normalized). Vzorči video pri sample_fps in vrni seznam (timestamp, x_center_normalized).
@ -227,6 +289,23 @@ def main():
# Če imamo --start/--duration, najprej trim z FFmpeg v temp file (hitreje) # Če imamo --start/--duration, najprej trim z FFmpeg v temp file (hitreje)
work_input = src work_input = src
tmp = None tmp = None
# Probe audio streams iz originala
audio_streams = get_audio_streams(src)
src_ext = src.suffix.lower()
is_broadcast = src_ext in (".mxf", ".mpg", ".mpeg", ".ts", ".m2ts", ".mts")
has_complex_audio = (
len(audio_streams) > 1 or
(audio_streams and audio_streams[0].get("channels", 2) > 2) or
is_broadcast
)
if has_complex_audio:
print(f"🎚 Broadcast format ({src_ext}) — {len(audio_streams)} audio stream(s):", file=sys.stderr)
for s in audio_streams:
print(f" #{s['index']}: {s['codec']} {s['channels']}ch "
f"{s.get('channel_layout', '')} lang={s.get('language', '?')}", file=sys.stderr)
if args.start is not None or args.duration is not None: if args.start is not None or args.duration is not None:
tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
tmp.close() tmp.close()
@ -236,21 +315,49 @@ def main():
cmd += ["-i", str(src)] cmd += ["-i", str(src)]
if args.duration is not None: if args.duration is not None:
cmd += ["-t", str(args.duration)] cmd += ["-t", str(args.duration)]
cmd += ["-c", "copy", tmp.name]
if has_complex_audio:
# Broadcast format ali multichannel: ne kopiraj, transkodiraj v stereo MP4
audio_args = build_audio_args(audio_streams)
cmd += [
"-map", "0:v:0", # samo prvi video stream
"-c:v", "libx264", "-preset", "veryfast", "-crf", "20",
]
cmd += audio_args
else:
# MP4/MOV s standardnim audiom — stream copy je OK in hitrejši
cmd += ["-c", "copy"]
cmd += [tmp.name]
print(f"🔧 TRIM CMD: {' '.join(cmd)}", file=sys.stderr) print(f"🔧 TRIM CMD: {' '.join(cmd)}", file=sys.stderr)
result = subprocess.run(cmd, capture_output=True, text=True) result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0: if result.returncode != 0:
print(f"❌ TRIM FAILED: {result.stderr[-500:]}", file=sys.stderr) print(f"❌ TRIM FAILED: {result.stderr[-1000:]}", file=sys.stderr)
sys.exit(1) sys.exit(1)
work_input = Path(tmp.name) work_input = Path(tmp.name)
print(f"✂ Trim → {work_input}") print(f"✂ Trim → {work_input}")
# Verify trim output duration
verify = subprocess.run( verify = subprocess.run(
["ffprobe", "-v", "error", "-show_entries", "format=duration", ["ffprobe", "-v", "error", "-show_entries", "format=duration",
"-of", "default=nw=1:nokey=1", str(work_input)], "-of", "default=nw=1:nokey=1", str(work_input)],
capture_output=True, text=True capture_output=True, text=True
) )
print(f"🔍 TRIMMED FILE DURATION: {verify.stdout.strip()}s (expected ~{args.duration}s)", file=sys.stderr) print(f"🔍 TRIMMED FILE DURATION: {verify.stdout.strip()}s (expected ~{args.duration}s)", file=sys.stderr)
elif has_complex_audio:
# Brez trim-a, ampak MXF/MPG → še vedno transkodiraj v MP4 z 2-channel audio
tmp = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
tmp.close()
audio_args = build_audio_args(audio_streams)
cmd = ["ffmpeg", "-y", "-i", str(src),
"-map", "0:v:0",
"-c:v", "libx264", "-preset", "veryfast", "-crf", "20"]
cmd += audio_args + [tmp.name]
print(f"🔧 PRE-CONVERT CMD: {' '.join(cmd)}", file=sys.stderr)
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
print(f"❌ PRE-CONVERT FAILED: {result.stderr[-1000:]}", file=sys.stderr)
sys.exit(1)
work_input = Path(tmp.name)
print(f"🔄 Pre-converted → {work_input}")
info = get_video_info(work_input) info = get_video_info(work_input)
print(f"📹 Vhod: {info['width']}x{info['height']} @ {info['fps']:.2f}fps, {info['duration']:.1f}s") print(f"📹 Vhod: {info['width']}x{info['height']} @ {info['fps']:.2f}fps, {info['duration']:.1f}s")
@ -286,7 +393,9 @@ def main():
"ffmpeg", "-y", "-i", str(work_input), "ffmpeg", "-y", "-i", str(work_input),
"-filter_complex", vfilter, "-filter_complex", vfilter,
"-c:v", "libx264", "-preset", preset, "-crf", crf, "-c:v", "libx264", "-preset", preset, "-crf", crf,
"-c:a", "aac", "-b:a", "128k", "-map", "0:a:0?", # samo prvi audio stream (če obstaja)
"-ac", "2", # force stereo
"-c:a", "aac", "-b:a", "192k",
] ]
if audio_filter_str: if audio_filter_str:
cmd += ["-af", audio_filter_str] cmd += ["-af", audio_filter_str]
@ -296,7 +405,9 @@ def main():
"ffmpeg", "-y", "-i", str(work_input), "ffmpeg", "-y", "-i", str(work_input),
"-vf", vfilter, "-vf", vfilter,
"-c:v", "libx264", "-preset", preset, "-crf", crf, "-c:v", "libx264", "-preset", preset, "-crf", crf,
"-c:a", "aac", "-b:a", "128k", "-map", "0:v:0", "-map", "0:a:0?", # video + samo prvi audio
"-ac", "2", # force stereo
"-c:a", "aac", "-b:a", "192k",
] ]
if audio_filter_str: if audio_filter_str:
cmd += ["-af", audio_filter_str] cmd += ["-af", audio_filter_str]

View File

@ -290,7 +290,7 @@
</svg> </svg>
<div>Klikni ali povleci video sem</div> <div>Klikni ali povleci video sem</div>
<div class="small">.mp4, .mov, .webm — do 2 GB</div> <div class="small">.mp4, .mov, .webm — do 2 GB</div>
<input type="file" id="file-input" accept="video/*" style="display:none"> <input type="file" id="file-input" accept="video/*,.mxf,.mpg,.mpeg,.ts,.m2ts,.mts" style="display:none">
</div> </div>
</div> </div>