Reubencf's picture
fix: Update Gemini AI to use server-side API key and gemini-flash-latest model
8c79bdb
import { NextRequest, NextResponse } from 'next/server'
// Note: For audio transcription, we'll use Gemini's multimodal capabilities
// In production, you might want to use Google Cloud Speech-to-Text API for better accuracy
const GEMINI_API_KEY = process.env.GEMINI_API_KEY
export async function POST(request: NextRequest) {
try {
const formData = await request.formData()
const audioFile = formData.get('audio') as File
if (!GEMINI_API_KEY) {
return NextResponse.json(
{ error: 'Gemini API key not configured on server. Please set GEMINI_API_KEY environment variable.' },
{ status: 500 }
)
}
if (!audioFile) {
return NextResponse.json(
{ error: 'Audio file is required' },
{ status: 400 }
)
}
// Convert audio file to base64
const bytes = await audioFile.arrayBuffer()
const buffer = Buffer.from(bytes)
const base64Audio = buffer.toString('base64')
// Use Gemini API to transcribe
// Note: Gemini 1.5 Pro supports audio, but Flash might have limitations
const GEMINI_API_URL = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-latest:generateContent'
const requestBody = {
contents: [{
role: 'user',
parts: [
{
text: 'Please transcribe the following audio accurately. Only return the transcription text, nothing else.'
},
{
inline_data: {
mime_type: audioFile.type || 'audio/wav',
data: base64Audio
}
}
]
}],
generationConfig: {
temperature: 0.1,
topK: 1,
topP: 1,
maxOutputTokens: 2048,
}
}
const response = await fetch(`${GEMINI_API_URL}?key=${GEMINI_API_KEY}`, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
},
body: JSON.stringify(requestBody)
})
if (!response.ok) {
const error = await response.json()
// If Gemini doesn't support audio, provide alternative solution
if (error.error?.message?.includes('audio') || error.error?.message?.includes('unsupported')) {
return NextResponse.json({
transcription: '[Audio transcription requires Gemini 1.5 Pro or Google Cloud Speech-to-Text API. Please upgrade your API access or use the chat feature with text input.]',
warning: 'Audio transcription not fully supported with current model'
})
}
throw new Error(error.error?.message || 'Failed to transcribe audio')
}
const data = await response.json()
const transcription = data.candidates?.[0]?.content?.parts?.[0]?.text || 'Could not transcribe audio'
return NextResponse.json({ transcription })
} catch (error) {
console.error('Transcription error:', error)
// Provide a helpful fallback message
return NextResponse.json({
transcription: '',
error: error instanceof Error ? error.message : 'Transcription failed. Note: Audio transcription requires Gemini 1.5 Pro or a dedicated speech-to-text API.'
}, { status: 500 })
}
}