137 lines
5.0 KiB
TypeScript
137 lines
5.0 KiB
TypeScript
import express, { type Express, type Request, type Response } from "express";
|
|
import { chatStorage } from "../chat/storage";
|
|
import { openai, speechToText, ensureCompatibleFormat } from "./client";
|
|
|
|
// Body parser with 50MB limit for audio payloads
|
|
const audioBodyParser = express.json({ limit: "50mb" });
|
|
|
|
export function registerAudioRoutes(app: Express): void {
|
|
// Get all conversations
|
|
app.get("/api/conversations", async (req: Request, res: Response) => {
|
|
try {
|
|
const conversations = await chatStorage.getAllConversations();
|
|
res.json(conversations);
|
|
} catch (error) {
|
|
console.error("Error fetching conversations:", error);
|
|
res.status(500).json({ error: "Failed to fetch conversations" });
|
|
}
|
|
});
|
|
|
|
// Get single conversation with messages
|
|
app.get("/api/conversations/:id", async (req: Request, res: Response) => {
|
|
try {
|
|
const id = parseInt(req.params.id);
|
|
const conversation = await chatStorage.getConversation(id);
|
|
if (!conversation) {
|
|
return res.status(404).json({ error: "Conversation not found" });
|
|
}
|
|
const messages = await chatStorage.getMessagesByConversation(id);
|
|
res.json({ ...conversation, messages });
|
|
} catch (error) {
|
|
console.error("Error fetching conversation:", error);
|
|
res.status(500).json({ error: "Failed to fetch conversation" });
|
|
}
|
|
});
|
|
|
|
// Create new conversation
|
|
app.post("/api/conversations", async (req: Request, res: Response) => {
|
|
try {
|
|
const { title } = req.body;
|
|
const conversation = await chatStorage.createConversation(title || "New Chat");
|
|
res.status(201).json(conversation);
|
|
} catch (error) {
|
|
console.error("Error creating conversation:", error);
|
|
res.status(500).json({ error: "Failed to create conversation" });
|
|
}
|
|
});
|
|
|
|
// Delete conversation
|
|
app.delete("/api/conversations/:id", async (req: Request, res: Response) => {
|
|
try {
|
|
const id = parseInt(req.params.id);
|
|
await chatStorage.deleteConversation(id);
|
|
res.status(204).send();
|
|
} catch (error) {
|
|
console.error("Error deleting conversation:", error);
|
|
res.status(500).json({ error: "Failed to delete conversation" });
|
|
}
|
|
});
|
|
|
|
// Send voice message and get streaming audio response
|
|
// Auto-detects audio format and converts WebM/MP4/OGG to WAV
|
|
// Uses gpt-4o-mini-transcribe for STT, gpt-audio for voice response
|
|
app.post("/api/conversations/:id/messages", audioBodyParser, async (req: Request, res: Response) => {
|
|
try {
|
|
const conversationId = parseInt(req.params.id);
|
|
const { audio, voice = "alloy" } = req.body;
|
|
|
|
if (!audio) {
|
|
return res.status(400).json({ error: "Audio data (base64) is required" });
|
|
}
|
|
|
|
// 1. Auto-detect format and convert to OpenAI-compatible format
|
|
const rawBuffer = Buffer.from(audio, "base64");
|
|
const { buffer: audioBuffer, format: inputFormat } = await ensureCompatibleFormat(rawBuffer);
|
|
|
|
// 2. Transcribe user audio
|
|
const userTranscript = await speechToText(audioBuffer, inputFormat);
|
|
|
|
// 3. Save user message
|
|
await chatStorage.createMessage(conversationId, "user", userTranscript);
|
|
|
|
// 4. Get conversation history
|
|
const existingMessages = await chatStorage.getMessagesByConversation(conversationId);
|
|
const chatHistory = existingMessages.map((m) => ({
|
|
role: m.role as "user" | "assistant",
|
|
content: m.content,
|
|
}));
|
|
|
|
// 5. Set up SSE
|
|
res.setHeader("Content-Type", "text/event-stream");
|
|
res.setHeader("Cache-Control", "no-cache");
|
|
res.setHeader("Connection", "keep-alive");
|
|
|
|
res.write(`data: ${JSON.stringify({ type: "user_transcript", data: userTranscript })}\n\n`);
|
|
|
|
// 6. Stream audio response from gpt-audio
|
|
const stream = await openai.chat.completions.create({
|
|
model: "gpt-audio",
|
|
modalities: ["text", "audio"],
|
|
audio: { voice, format: "pcm16" },
|
|
messages: chatHistory,
|
|
stream: true,
|
|
});
|
|
|
|
let assistantTranscript = "";
|
|
|
|
for await (const chunk of stream) {
|
|
const delta = chunk.choices?.[0]?.delta as any;
|
|
if (!delta) continue;
|
|
|
|
if (delta?.audio?.transcript) {
|
|
assistantTranscript += delta.audio.transcript;
|
|
res.write(`data: ${JSON.stringify({ type: "transcript", data: delta.audio.transcript })}\n\n`);
|
|
}
|
|
|
|
if (delta?.audio?.data) {
|
|
res.write(`data: ${JSON.stringify({ type: "audio", data: delta.audio.data })}\n\n`);
|
|
}
|
|
}
|
|
|
|
// 7. Save assistant message
|
|
await chatStorage.createMessage(conversationId, "assistant", assistantTranscript);
|
|
|
|
res.write(`data: ${JSON.stringify({ type: "done", transcript: assistantTranscript })}\n\n`);
|
|
res.end();
|
|
} catch (error) {
|
|
console.error("Error processing voice message:", error);
|
|
if (res.headersSent) {
|
|
res.write(`data: ${JSON.stringify({ type: "error", error: "Failed to process voice message" })}\n\n`);
|
|
res.end();
|
|
} else {
|
|
res.status(500).json({ error: "Failed to process voice message" });
|
|
}
|
|
}
|
|
});
|
|
}
|