folx-tv/server/focal-point.ts
sebastjanartic 4046bafaab Improve image focus by centering on people and faces
Update AI prompts for more accurate focal point detection and adjust image rendering in various components (MediumCard, SideCard, SingleImageCarousel) to utilize these focal points, enhancing visual composition.

Replit-Commit-Author: Agent
Replit-Commit-Session-Id: 517dfa7b-26ac-463d-a6e1-a58c6df97188
Replit-Commit-Checkpoint-Type: full_checkpoint
Replit-Commit-Event-Id: 4e108b3d-fbcd-43ec-8d2f-0a6ed15b2c47
Replit-Commit-Screenshot-Url: https://storage.googleapis.com/screenshot-production-us-central1/f209e72a-0939-48fa-84fc-57854de71967/517dfa7b-26ac-463d-a6e1-a58c6df97188/drGbo1a
Replit-Helium-Checkpoint-Created: true
2026-02-28 21:16:53 +00:00

100 lines
3.8 KiB
TypeScript

import OpenAI from "openai";
import fs from "fs";
import path from "path";
const openai = new OpenAI({
apiKey: process.env.AI_INTEGRATIONS_OPENAI_API_KEY,
baseURL: process.env.AI_INTEGRATIONS_OPENAI_BASE_URL,
});
const cache = new Map<string, { x: number; y: number }>();
const SYSTEM_PROMPT = `You are an image analysis tool that detects where faces and people are located in photographs.
Analyze the image and find the PRIMARY person or group of people. Report the CENTER of their face(s) as x,y percentages.
- x=0 means far left edge, x=100 means far right edge
- y=0 means very top edge, y=100 means very bottom edge
- For a person's face in the upper third, y should be around 15-35
- For a person standing centered, x should be around 40-60
- For a group photo, find the center of the group's faces
- Be PRECISE, do NOT default to 50,50. Actually look at where faces are.
- If there are multiple people, find the most prominent face or group center.
Return ONLY a JSON object like {"x":42,"y":28} with no other text.`;
export async function analyzeFocalPoint(imagePath: string): Promise<{ x: number; y: number }> {
const originalPath = imagePath;
if (cache.has(originalPath)) {
return cache.get(originalPath)!;
}
try {
let imageContent: { type: "image_url"; image_url: { url: string; detail: "auto" | "low" | "high" } };
if (imagePath.startsWith("/uploads/")) {
const localPath = path.join(process.cwd(), "client/public", imagePath);
if (!fs.existsSync(localPath)) {
throw new Error(`File not found: ${localPath}`);
}
const buffer = fs.readFileSync(localPath);
const imageData = buffer.toString("base64");
let mimeType = "image/webp";
if (localPath.endsWith(".jpg") || localPath.endsWith(".jpeg")) mimeType = "image/jpeg";
else if (localPath.endsWith(".png")) mimeType = "image/png";
imageContent = { type: "image_url", image_url: { url: `data:${mimeType};base64,${imageData}`, detail: "auto" } };
} else if (imagePath.startsWith("http")) {
imageContent = { type: "image_url", image_url: { url: imagePath, detail: "auto" } };
} else {
throw new Error(`Unsupported path: ${imagePath}`);
}
const response = await openai.chat.completions.create({
model: "gpt-4o-mini",
messages: [
{ role: "system", content: SYSTEM_PROMPT },
{
role: "user",
content: [
{ type: "text", text: "Where exactly are the faces/people in this image? Be precise with coordinates. Return only JSON." },
imageContent
]
}
],
max_tokens: 60,
});
const text = response.choices[0]?.message?.content?.trim() || "";
console.log(`[focal-point] ${imagePath}: AI response = ${text}`);
const match = text.match(/\{[^}]+\}/);
if (match) {
const parsed = JSON.parse(match[0]);
const point = {
x: Math.max(0, Math.min(100, Number(parsed.x) || 50)),
y: Math.max(0, Math.min(100, Number(parsed.y) || 30)),
};
cache.set(originalPath, point);
return point;
}
} catch (e) {
console.log("[focal-point] AI analysis failed for", imagePath, ":", (e as Error).message);
}
const fallback = { x: 50, y: 30 };
cache.set(originalPath, fallback);
return fallback;
}
export async function analyzeAllArticleImages(articles: Array<{ coverImage: string | null }>) {
for (const article of articles) {
if (!article.coverImage) continue;
await analyzeFocalPoint(article.coverImage);
}
console.log("[focal-point] Analyzed", cache.size, "images");
}
export function getCachedFocalPoints(): Record<string, { x: number; y: number }> {
const result: Record<string, { x: number; y: number }> = {};
for (const [key, value] of cache.entries()) {
result[key] = value;
}
return result;
}