File size: 3,165 Bytes
791ca54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca3501a
791ca54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca3501a
791ca54
 
ca3501a
 
791ca54
ca3501a
 
 
 
 
791ca54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ca3501a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import os
from pathlib import Path
import asyncio


async def text_to_speech(text: str, output_path: str = None) -> str:
    """
    Convert text to speech using ElevenLabs
    
    Args:
        text: Text to convert
        output_path: Output audio file path
        
    Returns:
        Path to generated audio file
    """
    if not os.getenv('ELEVENLABS_API_KEY'):
        raise ValueError("ELEVENLABS_API_KEY not set")
    
    try:
        from elevenlabs.client import AsyncElevenLabs
        from elevenlabs import VoiceSettings
        
        client = AsyncElevenLabs(api_key=os.getenv('ELEVENLABS_API_KEY'))
        
        # Generate audio
        audio_generator = client.text_to_speech.convert(
            text=text,
            voice_id="21m00Tcm4TlvDq8ikWAM",  # Rachel voice
            model_id="eleven_monolingual_v1",
            voice_settings=VoiceSettings(
                stability=0.5,
                similarity_boost=0.75,
                style=0.5,
                use_speaker_boost=True
            )
        )
        
        # Save audio
        if output_path is None:
            output_path = f"data/outputs/speech_{int(asyncio.get_event_loop().time())}.mp3"
        
        Path(output_path).parent.mkdir(parents=True, exist_ok=True)
        
        # Write audio chunks - FIXED: properly handle async generator
        with open(output_path, 'wb') as f:
            async for chunk in audio_generator:
                if chunk:
                    f.write(chunk)
        
        # Verify file was created and is not empty
        if Path(output_path).is_file() and Path(output_path).stat().st_size > 0:
            return output_path
        else:
            return ""
        
    except Exception as e:
        print(f"TTS Error: {e}")
        # Return empty path on error
        return ""


async def speech_to_text(audio_path: str) -> str:
    """
    Convert speech to text using Groq Whisper
    
    Args:
        audio_path: Path to audio file
        
    Returns:
        Transcribed text
    """
    if not os.getenv('GROQ_API_KEY'):
        raise ValueError("GROQ_API_KEY not set for STT")
    
    try:
        from groq import AsyncGroq
        
        client = AsyncGroq(api_key=os.getenv('GROQ_API_KEY'))
        
        with open(audio_path, 'rb') as audio_file:
            transcription = await client.audio.transcriptions.create(
                file=audio_file,
                model="whisper-large-v3",
                response_format="text"
            )
        
        return transcription
        
    except Exception as e:
        print(f"STT Error: {e}")
        return ""


async def process_audio_input(audio_data: bytes) -> str:
    """
    Process audio input from microphone
    
    Args:
        audio_data: Raw audio bytes
        
    Returns:
        Transcribed text
    """
    # Save temp audio file
    temp_path = "/tmp/voice_input.wav"
    with open(temp_path, 'wb') as f:
        f.write(audio_data)
    
    # Transcribe
    text = await speech_to_text(temp_path)
    
    # Cleanup
    if os.path.exists(temp_path):
        os.remove(temp_path)
    
    return text