export async function POST(req: Request) { const { messages, threadId, model, duration, withAudio, }: { messages: UIMessage[]; threadId: string; model?: string; duration?: number; withAudio?: boolean; } = await req.json(); const { user } = await withAuth({ ensureSignedIn: true }); if (!user) return NextResponse.json({ error: "unauthorized" }, { status: 401 }); if (!threadId) { return NextResponse.json({ error: "Thread ID is required" }, { status: 400 }); } // Check thread and get metadata const thread = await getThreadWithMetadata({ threadId, userId: user.id }); if (!thread) { return NextResponse.json({ error: "Thread not found or unauthorized" }, { status: 404 }); } // Detect and store language on first message let threadLanguage = thread.metadata?.language as string | undefined; if (!threadLanguage && messages.length > 0) { // This is likely the first message - detect language const firstUserMessage = messages.find((m) => m.role === "user"); if (firstUserMessage?.parts) { // Extract text from message parts const messageText = firstUserMessage.parts .filter((p) => p.type === "text" && "text" in p) .map((p) => (p as { text: string }).text) .join(" "); if (messageText.trim()) { // Detect language (will use fallback chain if needed) threadLanguage = detectLanguageFromText(messageText); // Store language in thread metadata await updateThreadLanguage({ threadId, language: threadLanguage }); console.log(`🌍 Thread ${threadId} language detected and stored: ${threadLanguage}`); } } } // Default to English if still no language if (!threadLanguage) { threadLanguage = "en"; } // Extract file information from the last user message to pass to agent let fileContextMessage = null; const lastUserMessage = [...messages].reverse().find((m) => m.role === "user"); type ImagesContext = { productImageUrl: string; }; const runtimeContext = new RuntimeContext(); if (lastUserMessage?.parts) { const fileUrls: Array<{ url: string; mediaType: string; filename?: string }> = []; for (const part of lastUserMessage.parts) { if (part.type === "file" && "url" in part && part.url) { fileUrls.push({ url: part.url, mediaType: part.mediaType || "image/png", filename: part.filename, }); } } // Create a system message with file context if files are present if (fileUrls.length > 0) { const fileContext = fileUrls .map((file, idx) => { const type = file.mediaType?.startsWith("image/") ? "Image" : "File"; return `${type} ${idx + 1}: ${file.url}${file.filename ? ` (${file.filename})` : ""}`; }) .join("\n"); if (fileUrls[0].mediaType.startsWith("image/")) runtimeContext.set("productImageUrl", fileUrls[0].url); fileContextMessage = { role: "system" as const, content: `[IMAGE CONTEXT - User uploaded files] ${fileContext} CRITICAL INSTRUCTIONS FOR TOOL CALLS: - When calling generateActorOptionsWorkflowTool, you MUST use the exact Vercel Blob URL listed above for the productImageUrl parameter - Extract the full URL from the list above (starts with https://...blob.vercel-storage.com/) - DO NOT use any other URL format (especially not https://generativelanguage.googleapis.com URLs) - The URL is publicly accessible and must be passed exactly as shown above - Example: If the URL above is "https://example.blob.vercel-storage.com/file.webp", use that EXACT string These URLs are the ONLY valid image URLs for tool parameters. Use them verbatim.`, }; } } // Convert UI messages to Core format WITHOUT converting images to image parts // This prevents Gemini from uploading images to its file API and losing the original URLs const processedMessages = messages.map((msg) => { if (msg.role === "user" && msg.parts) { const textParts: string[] = []; // Extract text parts only - images will be passed via system context for (const part of msg.parts) { if (part.type === "text" && "text" in part) { textParts.push(part.text); } // Skip file parts - they'll be handled via fileContextMessage } // Return message with text only return { ...msg, content: textParts.join("\n") || "", }; } return msg; }); const agent = mastra.getAgent("video-generator"); // Build messages array for the agent with context messages (not stored in DB) const agentMessages = [...processedMessages]; // Add file context system message if present (only for agent, not stored in DB) if (fileContextMessage) { agentMessages.push(fileContextMessage as any); } // Add language directive as system message (only for agent, not stored in DB) const languageName = getLanguageName(threadLanguage); const languageDirectiveMessage = { role: "system" as const, content: `[LANGUAGE DIRECTIVE]\nYou MUST respond strictly in ${languageName} (${threadLanguage}). All text, scripts, descriptions, and prompts must be in ${languageName}. This includes: - Your conversational responses - Video scripts - Actor descriptions - Video prompts - All generated content Thread language: ${threadLanguage}`, }; agentMessages.push(languageDirectiveMessage as any); // Add video settings as system message if provided (only for agent, not stored in DB) if (model && duration) { const videoSettingsMessage = { role: "system" as const, content: `[VIDEO GENERATION SETTINGS]\nModel: ${model}\nDuration: ${duration}s\nAudio: ${withAudio ? "Yes" : "No"}\nLanguage: ${threadLanguage}\n\nUse these settings when generating videos. Ensure all prompts are in ${languageName}.`, }; agentMessages.push(videoSettingsMessage as any); } const stream = await agent.streamVNext(agentMessages, { format: "aisdk", maxSteps: 10, stopWhen: stepCountIs(10), // Allow up to 10 steps for tool execution memory: { resource: user.id, thread: threadId, options: { lastMessages: 50, }, }, runtimeContext, abortSignal: req.signal, }); return stream.toUIMessageStreamResponse(); }