folx-tv/server/focal-point.ts

import OpenAI from "openai";
import fs from "fs";
import path from "path";

const openai = new OpenAI({
  apiKey: process.env.AI_INTEGRATIONS_OPENAI_API_KEY,
  baseURL: process.env.AI_INTEGRATIONS_OPENAI_BASE_URL,
});

const cache = new Map<string, { x: number; y: number }>();

const manualOverrides: Record<string, { x: number; y: number }> = {
  "/uploads/70-jahre-oberkrainer.webp": { x: 50, y: 55 },
};

const SYSTEM_PROMPT = `You are an image analysis tool that detects where faces and people are located in photographs.
Analyze the image and find the PRIMARY person or group of people. Report the CENTER of their face(s) as x,y percentages.
- x=0 means far left edge, x=100 means far right edge
- y=0 means very top edge, y=100 means very bottom edge
- Think carefully: divide the image into a 10x10 grid and locate faces precisely
- For a typical portrait where a person is standing, faces are usually at y=25-40
- For a group photo with people standing, the faces are usually at y=30-45
- For a close-up headshot, the face center is usually at y=40-55
- For people sitting at a table, faces are usually at y=35-50
- Do NOT assume faces are at the very top. Faces are rarely below y=20 unless it's a very tight crop.
- Be PRECISE. Actually look at where the eyes/faces are in the image.
- If there are multiple people, find the center point of ALL their faces.
Return ONLY a JSON object like {"x":42,"y":38} with no other text.`;

export async function analyzeFocalPoint(imagePath: string): Promise<{ x: number; y: number }> {
  const originalPath = imagePath;
  if (manualOverrides[originalPath]) {
    cache.set(originalPath, manualOverrides[originalPath]);
    return manualOverrides[originalPath];
  }
  if (cache.has(originalPath)) {
    return cache.get(originalPath)!;
  }

  try {
    let imageContent: { type: "image_url"; image_url: { url: string; detail: "auto" | "low" | "high" } };

    if (imagePath.startsWith("/uploads/")) {
      const localPath = path.join(process.cwd(), "client/public", imagePath);
      if (!fs.existsSync(localPath)) {
        throw new Error(`File not found: ${localPath}`);
      }
      const buffer = fs.readFileSync(localPath);
      const imageData = buffer.toString("base64");
      let mimeType = "image/webp";
      if (localPath.endsWith(".jpg") || localPath.endsWith(".jpeg")) mimeType = "image/jpeg";
      else if (localPath.endsWith(".png")) mimeType = "image/png";
      imageContent = { type: "image_url", image_url: { url: `data:${mimeType};base64,${imageData}`, detail: "auto" } };
    } else if (imagePath.startsWith("http")) {
      imageContent = { type: "image_url", image_url: { url: imagePath, detail: "auto" } };
    } else {
      throw new Error(`Unsupported path: ${imagePath}`);
    }

    const response = await openai.chat.completions.create({
      model: "gpt-4o-mini",
      messages: [
        { role: "system", content: SYSTEM_PROMPT },
        {
          role: "user",
          content: [
            { type: "text", text: "Where exactly are the faces/people in this image? Be precise with coordinates. Return only JSON." },
            imageContent
          ]
        }
      ],
      max_completion_tokens: 60,
    });

    const text = response.choices[0]?.message?.content?.trim() || "";
    console.log(`[focal-point] ${imagePath}: AI response = ${text}`);
    const match = text.match(/\{[^}]+\}/);
    if (match) {
      const parsed = JSON.parse(match[0]);
      const point = {
        x: Math.max(0, Math.min(100, Number(parsed.x) || 50)),
        y: Math.max(0, Math.min(100, Number(parsed.y) || 30)),
      };
      cache.set(originalPath, point);
      return point;
    }
  } catch (e) {
    console.log("[focal-point] AI analysis failed for", imagePath, ":", (e as Error).message);
  }

  const fallback = { x: 50, y: 30 };
  cache.set(originalPath, fallback);
  return fallback;
}

export async function analyzeAllArticleImages(articles: Array<{ coverImage: string | null }>) {
  for (const article of articles) {
    if (!article.coverImage) continue;
    await analyzeFocalPoint(article.coverImage);
  }
  console.log("[focal-point] Analyzed", cache.size, "images");
}

export function getCachedFocalPoints(): Record<string, { x: number; y: number }> {
  const result: Record<string, { x: number; y: number }> = {};
  for (const [key, value] of cache.entries()) {
    result[key] = value;
  }
  return result;
}