This commit introduces face detection capabilities to the video platform, enabling automatic identification of faces in video thumbnails. It integrates face-api.js and sharp for image analysis, allowing for face-centered thumbnail crops and dynamic object-positioning. New API endpoints are added to process thumbnails individually and in batches. The database schema is updated to store face detection data, and the storage layer is modified to support these updates and cache face data. The project's dependencies are also updated to include necessary libraries for these new features. Replit-Commit-Author: Agent Replit-Commit-Session-Id: 2eb1084e-b728-4449-9231-f1665924c8d5 Replit-Commit-Checkpoint-Type: full_checkpoint Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/8cc42625-c1f5-4e43-99bd-77f2c4dedee2/2eb1084e-b728-4449-9231-f1665924c8d5/xF0EUqR
234 lines
8.3 KiB
Python
234 lines
8.3 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Face Detection Service for go4.video platform
|
|
Automatically detects faces in thumbnails and creates centered crops
|
|
"""
|
|
|
|
import cv2
|
|
import mediapipe as mp
|
|
import numpy as np
|
|
from PIL import Image, ImageOps
|
|
import io
|
|
import base64
|
|
import sys
|
|
import json
|
|
import requests
|
|
from typing import Tuple, Optional, Dict, Any
|
|
import logging
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
class FaceDetectionService:
|
|
def __init__(self):
|
|
"""Initialize MediaPipe face detection"""
|
|
self.mp_face_detection = mp.solutions.face_detection
|
|
self.mp_drawing = mp.solutions.drawing_utils
|
|
self.face_detection = self.mp_face_detection.FaceDetection(
|
|
model_selection=0, # 0 for short-range (2 meters), 1 for full-range (5 meters)
|
|
min_detection_confidence=0.5
|
|
)
|
|
|
|
def detect_faces(self, image_array: np.ndarray) -> list:
|
|
"""
|
|
Detect faces in image and return bounding boxes
|
|
|
|
Args:
|
|
image_array: OpenCV image array (BGR format)
|
|
|
|
Returns:
|
|
List of face detection results with bounding boxes
|
|
"""
|
|
try:
|
|
# Convert BGR to RGB for MediaPipe
|
|
rgb_image = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)
|
|
|
|
# Perform face detection
|
|
results = self.face_detection.process(rgb_image)
|
|
|
|
if not results.detections:
|
|
return []
|
|
|
|
faces = []
|
|
height, width = image_array.shape[:2]
|
|
|
|
for detection in results.detections:
|
|
bbox = detection.location_data.relative_bounding_box
|
|
confidence = detection.score[0]
|
|
|
|
# Convert relative coordinates to absolute
|
|
x = int(bbox.xmin * width)
|
|
y = int(bbox.ymin * height)
|
|
w = int(bbox.width * width)
|
|
h = int(bbox.height * height)
|
|
|
|
faces.append({
|
|
'x': x,
|
|
'y': y,
|
|
'width': w,
|
|
'height': h,
|
|
'confidence': confidence,
|
|
'center_x': x + w // 2,
|
|
'center_y': y + h // 2
|
|
})
|
|
|
|
# Sort by confidence (highest first)
|
|
faces.sort(key=lambda f: f['confidence'], reverse=True)
|
|
return faces
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error detecting faces: {e}")
|
|
return []
|
|
|
|
def get_face_centered_crop_position(self, image_width: int, image_height: int,
|
|
faces: list, target_aspect: float = 9/16) -> Dict[str, int]:
|
|
"""
|
|
Calculate optimal crop position to center the most prominent face
|
|
|
|
Args:
|
|
image_width: Original image width
|
|
image_height: Original image height
|
|
faces: List of detected faces
|
|
target_aspect: Target aspect ratio (default 9:16 for portrait)
|
|
|
|
Returns:
|
|
Dict with crop coordinates: {x, y, width, height}
|
|
"""
|
|
if not faces:
|
|
# No faces detected, return center crop
|
|
if image_width / image_height > target_aspect:
|
|
# Image is wider, crop from center horizontally
|
|
crop_width = int(image_height * target_aspect)
|
|
crop_height = image_height
|
|
crop_x = (image_width - crop_width) // 2
|
|
crop_y = 0
|
|
else:
|
|
# Image is taller, crop from center vertically
|
|
crop_width = image_width
|
|
crop_height = int(image_width / target_aspect)
|
|
crop_x = 0
|
|
crop_y = (image_height - crop_height) // 2
|
|
|
|
return {
|
|
'x': crop_x,
|
|
'y': crop_y,
|
|
'width': crop_width,
|
|
'height': crop_height
|
|
}
|
|
|
|
# Use the most confident face
|
|
primary_face = faces[0]
|
|
face_center_x = primary_face['center_x']
|
|
face_center_y = primary_face['center_y']
|
|
|
|
# Calculate crop dimensions based on target aspect ratio
|
|
if image_width / image_height > target_aspect:
|
|
# Image is wider than target, crop horizontally
|
|
crop_width = int(image_height * target_aspect)
|
|
crop_height = image_height
|
|
|
|
# Center crop on face horizontally
|
|
crop_x = face_center_x - crop_width // 2
|
|
crop_x = max(0, min(crop_x, image_width - crop_width))
|
|
crop_y = 0
|
|
|
|
else:
|
|
# Image is taller than target, crop vertically
|
|
crop_width = image_width
|
|
crop_height = int(image_width / target_aspect)
|
|
|
|
# Center crop on face vertically
|
|
crop_y = face_center_y - crop_height // 2
|
|
crop_y = max(0, min(crop_y, image_height - crop_height))
|
|
crop_x = 0
|
|
|
|
return {
|
|
'x': crop_x,
|
|
'y': crop_y,
|
|
'width': crop_width,
|
|
'height': crop_height
|
|
}
|
|
|
|
def process_thumbnail_url(self, thumbnail_url: str, target_width: int = 300,
|
|
target_height: int = 533) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Download, process thumbnail URL and return face detection results
|
|
|
|
Args:
|
|
thumbnail_url: URL of the thumbnail image
|
|
target_width: Target width for the processed image
|
|
target_height: Target height for the processed image
|
|
|
|
Returns:
|
|
Dict with face detection results and processing info
|
|
"""
|
|
try:
|
|
# Download image
|
|
response = requests.get(thumbnail_url, timeout=10)
|
|
response.raise_for_status()
|
|
|
|
# Convert to OpenCV format
|
|
image_array = np.frombuffer(response.content, np.uint8)
|
|
image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)
|
|
|
|
if image is None:
|
|
logger.error(f"Failed to decode image from URL: {thumbnail_url}")
|
|
return None
|
|
|
|
height, width = image.shape[:2]
|
|
|
|
# Detect faces
|
|
faces = self.detect_faces(image)
|
|
|
|
# Get optimal crop position for face centering
|
|
crop_info = self.get_face_centered_crop_position(
|
|
width, height, faces, target_height / target_width
|
|
)
|
|
|
|
# Create face-centered crop
|
|
cropped_image = image[
|
|
crop_info['y']:crop_info['y'] + crop_info['height'],
|
|
crop_info['x']:crop_info['x'] + crop_info['width']
|
|
]
|
|
|
|
# Resize to target dimensions
|
|
resized_image = cv2.resize(cropped_image, (target_width, target_height))
|
|
|
|
# Convert to base64 for web usage
|
|
_, buffer = cv2.imencode('.jpg', resized_image, [cv2.IMWRITE_JPEG_QUALITY, 85])
|
|
processed_image_b64 = base64.b64encode(buffer).decode('utf-8')
|
|
|
|
return {
|
|
'success': True,
|
|
'faces_detected': len(faces),
|
|
'primary_face_confidence': faces[0]['confidence'] if faces else 0,
|
|
'crop_info': crop_info,
|
|
'original_dimensions': {'width': width, 'height': height},
|
|
'processed_image': f"data:image/jpeg;base64,{processed_image_b64}",
|
|
'processing_strategy': 'face_centered' if faces else 'center_crop'
|
|
}
|
|
|
|
except Exception as e:
|
|
logger.error(f"Error processing thumbnail URL {thumbnail_url}: {e}")
|
|
return {
|
|
'success': False,
|
|
'error': str(e),
|
|
'faces_detected': 0
|
|
}
|
|
|
|
def main():
|
|
"""Main function for CLI usage"""
|
|
if len(sys.argv) < 2:
|
|
print("Usage: python3 face-detection.py <thumbnail_url>")
|
|
sys.exit(1)
|
|
|
|
thumbnail_url = sys.argv[1]
|
|
service = FaceDetectionService()
|
|
result = service.process_thumbnail_url(thumbnail_url)
|
|
|
|
# Output JSON result
|
|
print(json.dumps(result, indent=2))
|
|
|
|
if __name__ == "__main__":
|
|
main() |