LifeAdmin-AI / utils /audio_utils.py
Maheen001's picture
Update utils/audio_utils.py
ca3501a verified
raw
history blame
3.17 kB
import os
from pathlib import Path
import asyncio
async def text_to_speech(text: str, output_path: str = None) -> str:
"""
Convert text to speech using ElevenLabs
Args:
text: Text to convert
output_path: Output audio file path
Returns:
Path to generated audio file
"""
if not os.getenv('ELEVENLABS_API_KEY'):
raise ValueError("ELEVENLABS_API_KEY not set")
try:
from elevenlabs.client import AsyncElevenLabs
from elevenlabs import VoiceSettings
client = AsyncElevenLabs(api_key=os.getenv('ELEVENLABS_API_KEY'))
# Generate audio
audio_generator = client.text_to_speech.convert(
text=text,
voice_id="21m00Tcm4TlvDq8ikWAM", # Rachel voice
model_id="eleven_monolingual_v1",
voice_settings=VoiceSettings(
stability=0.5,
similarity_boost=0.75,
style=0.5,
use_speaker_boost=True
)
)
# Save audio
if output_path is None:
output_path = f"data/outputs/speech_{int(asyncio.get_event_loop().time())}.mp3"
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
# Write audio chunks - FIXED: properly handle async generator
with open(output_path, 'wb') as f:
async for chunk in audio_generator:
if chunk:
f.write(chunk)
# Verify file was created and is not empty
if Path(output_path).is_file() and Path(output_path).stat().st_size > 0:
return output_path
else:
return ""
except Exception as e:
print(f"TTS Error: {e}")
# Return empty path on error
return ""
async def speech_to_text(audio_path: str) -> str:
"""
Convert speech to text using Groq Whisper
Args:
audio_path: Path to audio file
Returns:
Transcribed text
"""
if not os.getenv('GROQ_API_KEY'):
raise ValueError("GROQ_API_KEY not set for STT")
try:
from groq import AsyncGroq
client = AsyncGroq(api_key=os.getenv('GROQ_API_KEY'))
with open(audio_path, 'rb') as audio_file:
transcription = await client.audio.transcriptions.create(
file=audio_file,
model="whisper-large-v3",
response_format="text"
)
return transcription
except Exception as e:
print(f"STT Error: {e}")
return ""
async def process_audio_input(audio_data: bytes) -> str:
"""
Process audio input from microphone
Args:
audio_data: Raw audio bytes
Returns:
Transcribed text
"""
# Save temp audio file
temp_path = "/tmp/voice_input.wav"
with open(temp_path, 'wb') as f:
f.write(audio_data)
# Transcribe
text = await speech_to_text(temp_path)
# Cleanup
if os.path.exists(temp_path):
os.remove(temp_path)
return text