Spaces:

MCP-1st-Birthday
/

LifeAdmin-AI

Running

File size: 3,165 Bytes

import os
from pathlib import Path
import asyncio


async def text_to_speech(text: str, output_path: str = None) -> str:
    """
    Convert text to speech using ElevenLabs
    
    Args:
        text: Text to convert
        output_path: Output audio file path
        
    Returns:
        Path to generated audio file
    """
    if not os.getenv('ELEVENLABS_API_KEY'):
        raise ValueError("ELEVENLABS_API_KEY not set")
    
    try:
        from elevenlabs.client import AsyncElevenLabs
        from elevenlabs import VoiceSettings
        
        client = AsyncElevenLabs(api_key=os.getenv('ELEVENLABS_API_KEY'))
        
        # Generate audio
        audio_generator = client.text_to_speech.convert(
            text=text,
            voice_id="21m00Tcm4TlvDq8ikWAM",  # Rachel voice
            model_id="eleven_monolingual_v1",
            voice_settings=VoiceSettings(
                stability=0.5,
                similarity_boost=0.75,
                style=0.5,
                use_speaker_boost=True
            )
        )
        
        # Save audio
        if output_path is None:
            output_path = f"data/outputs/speech_{int(asyncio.get_event_loop().time())}.mp3"
        
        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
        
        # Write audio chunks - FIXED: properly handle async generator
        with open(output_path, 'wb') as f:
            async for chunk in audio_generator:
                if chunk:
                    f.write(chunk)
        
        # Verify file was created and is not empty
        if Path(output_path).is_file() and Path(output_path).stat().st_size > 0:
            return output_path
        else:
            return ""
        
    except Exception as e:
        print(f"TTS Error: {e}")
        # Return empty path on error
        return ""


async def speech_to_text(audio_path: str) -> str:
    """
    Convert speech to text using Groq Whisper
    
    Args:
        audio_path: Path to audio file
        
    Returns:
        Transcribed text
    """
    if not os.getenv('GROQ_API_KEY'):
        raise ValueError("GROQ_API_KEY not set for STT")
    
    try:
        from groq import AsyncGroq
        
        client = AsyncGroq(api_key=os.getenv('GROQ_API_KEY'))
        
        with open(audio_path, 'rb') as audio_file:
            transcription = await client.audio.transcriptions.create(
                file=audio_file,
                model="whisper-large-v3",
                response_format="text"
            )
        
        return transcription
        
    except Exception as e:
        print(f"STT Error: {e}")
        return ""


async def process_audio_input(audio_data: bytes) -> str:
    """
    Process audio input from microphone
    
    Args:
        audio_data: Raw audio bytes
        
    Returns:
        Transcribed text
    """
    # Save temp audio file
    temp_path = "/tmp/voice_input.wav"
    with open(temp_path, 'wb') as f:
        f.write(audio_data)
    
    # Transcribe
    text = await speech_to_text(temp_path)
    
    # Cleanup
    if os.path.exists(temp_path):
        os.remove(temp_path)
    
    return text