#!/usr/bin/env python3 """ reframe.py — Pretvori 16:9 video v 9:16 (reels/shorts/tiktok format). Modi: --mode track : Pametno sledi obrazu/osebi (MediaPipe face detection) Crop okno se gladko premika za subjektom. --mode center : Statični center crop (najhitrejše) --mode blur : 9:16 platno z blur ozadjem + 16:9 video v sredini Primer: python3 reframe.py input.mp4 output.mp4 --mode track python3 reframe.py input.mp4 output.mp4 --mode track --start 10 --duration 30 """ import argparse import subprocess import sys import os import json import tempfile from pathlib import Path import cv2 import numpy as np def get_video_info(path): """Vrni dict z width, height, fps, duration.""" cmd = [ "ffprobe", "-v", "quiet", "-print_format", "json", "-show_streams", "-show_format", str(path) ] data = json.loads(subprocess.check_output(cmd)) vstream = next(s for s in data["streams"] if s["codec_type"] == "video") fps_str = vstream["r_frame_rate"] num, den = fps_str.split("/") fps = float(num) / float(den) return { "width": int(vstream["width"]), "height": int(vstream["height"]), "fps": fps, "duration": float(data["format"]["duration"]), } def detect_face_centers(video_path, sample_fps=5): """ Vzorči video pri sample_fps in vrni seznam (timestamp, x_center_normalized). x_center_normalized je 0..1 (0 = levi rob, 1 = desni rob). Če obraza ni, vrne None za to vzorčenje. Uporablja OpenCV Haar cascade (frontalface_alt2) — robustno, brez external modela. """ cap = cv2.VideoCapture(str(video_path)) src_fps = cap.get(cv2.CAP_PROP_FPS) total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) step = max(1, int(src_fps / sample_fps)) cascade_path = cv2.data.haarcascades + "haarcascade_frontalface_alt2.xml" face_cascade = cv2.CascadeClassifier(cascade_path) samples = [] frame_idx = 0 while True: ret, frame = cap.read() if not ret: break if frame_idx % step == 0: ts = frame_idx / src_fps gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) faces = face_cascade.detectMultiScale( gray, scaleFactor=1.2, minNeighbors=5, minSize=(60, 60) ) if len(faces) > 0: # Vzemi največji obraz x, y, w, h = max(faces, key=lambda f: f[2] * f[3]) x_center = (x + w / 2) / width samples.append((ts, x_center)) else: samples.append((ts, None)) frame_idx += 1 cap.release() return samples, width, height, src_fps, total_frames def smooth_track(samples, total_duration, smoothing_window=2.0): """ Iz seznama (ts, x) naredi gladko krivuljo x(t) za vsako sekundo videa. - None vrednosti se zapolni z zadnjo znano (ali 0.5 default). - Drsno povprečje preko smoothing_window sekund. """ # Zapolni manjkajoče last = 0.5 filled = [] for ts, x in samples: if x is None: x = last else: last = x filled.append((ts, x)) if not filled: return lambda t: 0.5 # Drsno povprečje timestamps = np.array([t for t, _ in filled]) values = np.array([v for _, v in filled]) smoothed = np.zeros_like(values) for i, t in enumerate(timestamps): mask = np.abs(timestamps - t) <= smoothing_window / 2 smoothed[i] = np.mean(values[mask]) def x_at(t): if t <= timestamps[0]: return float(smoothed[0]) if t >= timestamps[-1]: return float(smoothed[-1]) return float(np.interp(t, timestamps, smoothed)) return x_at def build_track_filter(info, x_at, target_w, target_h, fps): """ Sestavi FFmpeg filter za track mode. Generiramo crop expression, ki se premika z x(t). Ker FFmpeg ne podpira poljubne funkcije časa, vzorčimo x(t) in sestavimo piecewise linearno funkcijo prek `if(...)`. Bolj robustno: pre-scale na ciljno višino, potem crop x = f(t). """ src_w = info["width"] src_h = info["height"] # Najprej scale: višina = target_h, širina proporcionalno scale_h = target_h scale_w = int(src_w * (target_h / src_h)) # Po skaliranju je crop širina = target_w # x_center v skaliranem prostoru max_x = scale_w - target_w # max levo-zgornji x # Vzorčimo x(t) na ~5 fps (dovolj gladko po smoothingu) duration = info["duration"] n_samples = max(2, int(duration * 5)) times = np.linspace(0, duration, n_samples) x_centers_norm = [x_at(t) for t in times] # Pretvori normaliziran center v dejanski levi-zgornji x v skaliranem oknu x_lefts = [] for xc in x_centers_norm: x_left = xc * scale_w - target_w / 2 x_left = max(0, min(max_x, x_left)) x_lefts.append(x_left) # Sestavi piecewise expression: če (t < t1, x1, če (t < t2, x2, ...)) # FFmpeg ima omejitev na dolžino expression-a, zato uporabimo drugačen pristop: # Generiramo CSV in uporabimo `sendcmd` filter ali pa preprosto # nizkofrekvenčno linearno interpolacijo prek `if/lerp`. # Pragmatično: zgradimo nested if. Pri 5 fps in 60s = 300 vej; deluje. # Za daljše videe rebajzamo na 2 fps. if duration > 120: n_samples = int(duration * 2) times = np.linspace(0, duration, n_samples) x_lefts_resampled = [] for t in times: x_lefts_resampled.append(np.interp(t, np.linspace(0, duration, len(x_lefts)), x_lefts)) x_lefts = x_lefts_resampled # Linearna interpolacija med vzorci znotraj FFmpeg expression # Format: če(t