videofolxtv/server/face-detection.py
sebastjanartic d321b4f384 Add face detection and thumbnail centering for videos
This commit introduces face detection capabilities to the video platform, enabling automatic identification of faces in video thumbnails. It integrates face-api.js and sharp for image analysis, allowing for face-centered thumbnail crops and dynamic object-positioning. New API endpoints are added to process thumbnails individually and in batches. The database schema is updated to store face detection data, and the storage layer is modified to support these updates and cache face data. The project's dependencies are also updated to include necessary libraries for these new features.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 2eb1084e-b728-4449-9231-f1665924c8d5
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/8cc42625-c1f5-4e43-99bd-77f2c4dedee2/2eb1084e-b728-4449-9231-f1665924c8d5/xF0EUqR
2025-08-29 07:34:08 +00:00

234 lines
8.3 KiB
Python

#!/usr/bin/env python3
"""
Face Detection Service for go4.video platform
Automatically detects faces in thumbnails and creates centered crops
"""
import cv2
import mediapipe as mp
import numpy as np
from PIL import Image, ImageOps
import io
import base64
import sys
import json
import requests
from typing import Tuple, Optional, Dict, Any
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
class FaceDetectionService:
def __init__(self):
"""Initialize MediaPipe face detection"""
self.mp_face_detection = mp.solutions.face_detection
self.mp_drawing = mp.solutions.drawing_utils
self.face_detection = self.mp_face_detection.FaceDetection(
model_selection=0, # 0 for short-range (2 meters), 1 for full-range (5 meters)
min_detection_confidence=0.5
)
def detect_faces(self, image_array: np.ndarray) -> list:
"""
Detect faces in image and return bounding boxes
Args:
image_array: OpenCV image array (BGR format)
Returns:
List of face detection results with bounding boxes
"""
try:
# Convert BGR to RGB for MediaPipe
rgb_image = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)
# Perform face detection
results = self.face_detection.process(rgb_image)
if not results.detections:
return []
faces = []
height, width = image_array.shape[:2]
for detection in results.detections:
bbox = detection.location_data.relative_bounding_box
confidence = detection.score[0]
# Convert relative coordinates to absolute
x = int(bbox.xmin * width)
y = int(bbox.ymin * height)
w = int(bbox.width * width)
h = int(bbox.height * height)
faces.append({
'x': x,
'y': y,
'width': w,
'height': h,
'confidence': confidence,
'center_x': x + w // 2,
'center_y': y + h // 2
})
# Sort by confidence (highest first)
faces.sort(key=lambda f: f['confidence'], reverse=True)
return faces
except Exception as e:
logger.error(f"Error detecting faces: {e}")
return []
def get_face_centered_crop_position(self, image_width: int, image_height: int,
faces: list, target_aspect: float = 9/16) -> Dict[str, int]:
"""
Calculate optimal crop position to center the most prominent face
Args:
image_width: Original image width
image_height: Original image height
faces: List of detected faces
target_aspect: Target aspect ratio (default 9:16 for portrait)
Returns:
Dict with crop coordinates: {x, y, width, height}
"""
if not faces:
# No faces detected, return center crop
if image_width / image_height > target_aspect:
# Image is wider, crop from center horizontally
crop_width = int(image_height * target_aspect)
crop_height = image_height
crop_x = (image_width - crop_width) // 2
crop_y = 0
else:
# Image is taller, crop from center vertically
crop_width = image_width
crop_height = int(image_width / target_aspect)
crop_x = 0
crop_y = (image_height - crop_height) // 2
return {
'x': crop_x,
'y': crop_y,
'width': crop_width,
'height': crop_height
}
# Use the most confident face
primary_face = faces[0]
face_center_x = primary_face['center_x']
face_center_y = primary_face['center_y']
# Calculate crop dimensions based on target aspect ratio
if image_width / image_height > target_aspect:
# Image is wider than target, crop horizontally
crop_width = int(image_height * target_aspect)
crop_height = image_height
# Center crop on face horizontally
crop_x = face_center_x - crop_width // 2
crop_x = max(0, min(crop_x, image_width - crop_width))
crop_y = 0
else:
# Image is taller than target, crop vertically
crop_width = image_width
crop_height = int(image_width / target_aspect)
# Center crop on face vertically
crop_y = face_center_y - crop_height // 2
crop_y = max(0, min(crop_y, image_height - crop_height))
crop_x = 0
return {
'x': crop_x,
'y': crop_y,
'width': crop_width,
'height': crop_height
}
def process_thumbnail_url(self, thumbnail_url: str, target_width: int = 300,
target_height: int = 533) -> Optional[Dict[str, Any]]:
"""
Download, process thumbnail URL and return face detection results
Args:
thumbnail_url: URL of the thumbnail image
target_width: Target width for the processed image
target_height: Target height for the processed image
Returns:
Dict with face detection results and processing info
"""
try:
# Download image
response = requests.get(thumbnail_url, timeout=10)
response.raise_for_status()
# Convert to OpenCV format
image_array = np.frombuffer(response.content, np.uint8)
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
if image is None:
logger.error(f"Failed to decode image from URL: {thumbnail_url}")
return None
height, width = image.shape[:2]
# Detect faces
faces = self.detect_faces(image)
# Get optimal crop position for face centering
crop_info = self.get_face_centered_crop_position(
width, height, faces, target_height / target_width
)
# Create face-centered crop
cropped_image = image[
crop_info['y']:crop_info['y'] + crop_info['height'],
crop_info['x']:crop_info['x'] + crop_info['width']
]
# Resize to target dimensions
resized_image = cv2.resize(cropped_image, (target_width, target_height))
# Convert to base64 for web usage
_, buffer = cv2.imencode('.jpg', resized_image, [cv2.IMWRITE_JPEG_QUALITY, 85])
processed_image_b64 = base64.b64encode(buffer).decode('utf-8')
return {
'success': True,
'faces_detected': len(faces),
'primary_face_confidence': faces[0]['confidence'] if faces else 0,
'crop_info': crop_info,
'original_dimensions': {'width': width, 'height': height},
'processed_image': f"data:image/jpeg;base64,{processed_image_b64}",
'processing_strategy': 'face_centered' if faces else 'center_crop'
}
except Exception as e:
logger.error(f"Error processing thumbnail URL {thumbnail_url}: {e}")
return {
'success': False,
'error': str(e),
'faces_detected': 0
}
def main():
"""Main function for CLI usage"""
if len(sys.argv) < 2:
print("Usage: python3 face-detection.py <thumbnail_url>")
sys.exit(1)
thumbnail_url = sys.argv[1]
service = FaceDetectionService()
result = service.process_thumbnail_url(thumbnail_url)
# Output JSON result
print(json.dumps(result, indent=2))
if __name__ == "__main__":
main()