videofolxtv/server/face-detection.py

#!/usr/bin/env python3
"""
Face Detection Service for go4.video platform
Automatically detects faces in thumbnails and creates centered crops
"""

import cv2
import mediapipe as mp
import numpy as np
from PIL import Image, ImageOps
import io
import base64
import sys
import json
import requests
from typing import Tuple, Optional, Dict, Any
import logging

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class FaceDetectionService:
    def __init__(self):
        """Initialize MediaPipe face detection"""
        self.mp_face_detection = mp.solutions.face_detection
        self.mp_drawing = mp.solutions.drawing_utils
        self.face_detection = self.mp_face_detection.FaceDetection(
            model_selection=0,  # 0 for short-range (2 meters), 1 for full-range (5 meters)
            min_detection_confidence=0.5
        )

    def detect_faces(self, image_array: np.ndarray) -> list:
        """
        Detect faces in image and return bounding boxes

        Args:
            image_array: OpenCV image array (BGR format)

        Returns:
            List of face detection results with bounding boxes
        """
        try:
            # Convert BGR to RGB for MediaPipe
            rgb_image = cv2.cvtColor(image_array, cv2.COLOR_BGR2RGB)

            # Perform face detection
            results = self.face_detection.process(rgb_image)

            if not results.detections:
                return []

            faces = []
            height, width = image_array.shape[:2]

            for detection in results.detections:
                bbox = detection.location_data.relative_bounding_box
                confidence = detection.score[0]

                # Convert relative coordinates to absolute
                x = int(bbox.xmin * width)
                y = int(bbox.ymin * height)
                w = int(bbox.width * width)
                h = int(bbox.height * height)

                faces.append({
                    'x': x,
                    'y': y,
                    'width': w,
                    'height': h,
                    'confidence': confidence,
                    'center_x': x + w // 2,
                    'center_y': y + h // 2
                })

            # Sort by confidence (highest first)
            faces.sort(key=lambda f: f['confidence'], reverse=True)
            return faces

        except Exception as e:
            logger.error(f"Error detecting faces: {e}")
            return []

    def get_face_centered_crop_position(self, image_width: int, image_height: int,
                                      faces: list, target_aspect: float = 9/16) -> Dict[str, int]:
        """
        Calculate optimal crop position to center the most prominent face

        Args:
            image_width: Original image width
            image_height: Original image height
            faces: List of detected faces
            target_aspect: Target aspect ratio (default 9:16 for portrait)

        Returns:
            Dict with crop coordinates: {x, y, width, height}
        """
        if not faces:
            # No faces detected, return center crop
            if image_width / image_height > target_aspect:
                # Image is wider, crop from center horizontally
                crop_width = int(image_height * target_aspect)
                crop_height = image_height
                crop_x = (image_width - crop_width) // 2
                crop_y = 0
            else:
                # Image is taller, crop from center vertically
                crop_width = image_width
                crop_height = int(image_width / target_aspect)
                crop_x = 0
                crop_y = (image_height - crop_height) // 2

            return {
                'x': crop_x,
                'y': crop_y,
                'width': crop_width,
                'height': crop_height
            }

        # Use the most confident face
        primary_face = faces[0]
        face_center_x = primary_face['center_x']
        face_center_y = primary_face['center_y']

        # Calculate crop dimensions based on target aspect ratio
        if image_width / image_height > target_aspect:
            # Image is wider than target, crop horizontally
            crop_width = int(image_height * target_aspect)
            crop_height = image_height

            # Center crop on face horizontally
            crop_x = face_center_x - crop_width // 2
            crop_x = max(0, min(crop_x, image_width - crop_width))
            crop_y = 0

        else:
            # Image is taller than target, crop vertically
            crop_width = image_width
            crop_height = int(image_width / target_aspect)

            # Center crop on face vertically
            crop_y = face_center_y - crop_height // 2
            crop_y = max(0, min(crop_y, image_height - crop_height))
            crop_x = 0

        return {
            'x': crop_x,
            'y': crop_y,
            'width': crop_width,
            'height': crop_height
        }

    def process_thumbnail_url(self, thumbnail_url: str, target_width: int = 300,
                            target_height: int = 533) -> Optional[Dict[str, Any]]:
        """
        Download, process thumbnail URL and return face detection results

        Args:
            thumbnail_url: URL of the thumbnail image
            target_width: Target width for the processed image
            target_height: Target height for the processed image

        Returns:
            Dict with face detection results and processing info
        """
        try:
            # Download image
            response = requests.get(thumbnail_url, timeout=10)
            response.raise_for_status()

            # Convert to OpenCV format
            image_array = np.frombuffer(response.content, np.uint8)
            image = cv2.imdecode(image_array, cv2.IMREAD_COLOR)

            if image is None:
                logger.error(f"Failed to decode image from URL: {thumbnail_url}")
                return None

            height, width = image.shape[:2]

            # Detect faces
            faces = self.detect_faces(image)

            # Get optimal crop position for face centering
            crop_info = self.get_face_centered_crop_position(
                width, height, faces, target_height / target_width
            )

            # Create face-centered crop
            cropped_image = image[
                crop_info['y']:crop_info['y'] + crop_info['height'],
                crop_info['x']:crop_info['x'] + crop_info['width']
            ]

            # Resize to target dimensions
            resized_image = cv2.resize(cropped_image, (target_width, target_height))

            # Convert to base64 for web usage
            _, buffer = cv2.imencode('.jpg', resized_image, [cv2.IMWRITE_JPEG_QUALITY, 85])
            processed_image_b64 = base64.b64encode(buffer).decode('utf-8')

            return {
                'success': True,
                'faces_detected': len(faces),
                'primary_face_confidence': faces[0]['confidence'] if faces else 0,
                'crop_info': crop_info,
                'original_dimensions': {'width': width, 'height': height},
                'processed_image': f"data:image/jpeg;base64,{processed_image_b64}",
                'processing_strategy': 'face_centered' if faces else 'center_crop'
            }

        except Exception as e:
            logger.error(f"Error processing thumbnail URL {thumbnail_url}: {e}")
            return {
                'success': False,
                'error': str(e),
                'faces_detected': 0
            }

def main():
    """Main function for CLI usage"""
    if len(sys.argv) < 2:
        print("Usage: python3 face-detection.py <thumbnail_url>")
        sys.exit(1)

    thumbnail_url = sys.argv[1]
    service = FaceDetectionService()
    result = service.process_thumbnail_url(thumbnail_url)

    # Output JSON result
    print(json.dumps(result, indent=2))

if __name__ == "__main__":
    main()