Spaces:

MCP-1st-Birthday
/

Reuben_OS

Running

App Files Files Community

Reuben_OS / app /api /gemini /transcribe /route.ts

Reubencf

fix: Update Gemini AI to use server-side API key and gemini-flash-latest model

8c79bdb 29 days ago

raw

history blame contribute delete

3.14 kB

	import { NextRequest, NextResponse } from 'next/server'

	// Note: For audio transcription, we'll use Gemini's multimodal capabilities
	// In production, you might want to use Google Cloud Speech-to-Text API for better accuracy

	const GEMINI_API_KEY = process.env.GEMINI_API_KEY

	export async function POST(request: NextRequest) {
	try {
	const formData = await request.formData()
	const audioFile = formData.get('audio') as File

	if (!GEMINI_API_KEY) {
	return NextResponse.json(
	{ error: 'Gemini API key not configured on server. Please set GEMINI_API_KEY environment variable.' },
	{ status: 500 }
	)
	}

	if (!audioFile) {
	return NextResponse.json(
	{ error: 'Audio file is required' },
	{ status: 400 }
	)
	}

	// Convert audio file to base64
	const bytes = await audioFile.arrayBuffer()
	const buffer = Buffer.from(bytes)
	const base64Audio = buffer.toString('base64')

	// Use Gemini API to transcribe
	// Note: Gemini 1.5 Pro supports audio, but Flash might have limitations
	const GEMINI_API_URL = 'https://generativelanguage.googleapis.com/v1beta/models/gemini-flash-latest:generateContent'

	const requestBody = {
	contents: [{
	role: 'user',
	parts: [
	{
	text: 'Please transcribe the following audio accurately. Only return the transcription text, nothing else.'
	},
	{
	inline_data: {
	mime_type: audioFile.type \|\| 'audio/wav',
	data: base64Audio
	}
	}
	]
	}],
	generationConfig: {
	temperature: 0.1,
	topK: 1,
	topP: 1,
	maxOutputTokens: 2048,
	}
	}

	const response = await fetch(`${GEMINI_API_URL}?key=${GEMINI_API_KEY}`, {
	method: 'POST',
	headers: {
	'Content-Type': 'application/json',
	},
	body: JSON.stringify(requestBody)
	})

	if (!response.ok) {
	const error = await response.json()

	// If Gemini doesn't support audio, provide alternative solution
	if (error.error?.message?.includes('audio') \|\| error.error?.message?.includes('unsupported')) {
	return NextResponse.json({
	transcription: '[Audio transcription requires Gemini 1.5 Pro or Google Cloud Speech-to-Text API. Please upgrade your API access or use the chat feature with text input.]',
	warning: 'Audio transcription not fully supported with current model'
	})
	}

	throw new Error(error.error?.message \|\| 'Failed to transcribe audio')
	}

	const data = await response.json()
	const transcription = data.candidates?.[0]?.content?.parts?.[0]?.text \|\| 'Could not transcribe audio'

	return NextResponse.json({ transcription })

	} catch (error) {
	console.error('Transcription error:', error)

	// Provide a helpful fallback message
	return NextResponse.json({
	transcription: '',
	error: error instanceof Error ? error.message : 'Transcription failed. Note: Audio transcription requires Gemini 1.5 Pro or a dedicated speech-to-text API.'
	}, { status: 500 })
	}
	}