Maheen001 commited on
Commit
791ca54
·
verified ·
1 Parent(s): bf47268

Create utils/audio_utils.py

Browse files
Files changed (1) hide show
  1. utils/audio_utils.py +112 -0
utils/audio_utils.py ADDED
@@ -0,0 +1,112 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from pathlib import Path
3
+ import asyncio
4
+
5
+
6
+ async def text_to_speech(text: str, output_path: str = None) -> str:
7
+ """
8
+ Convert text to speech using ElevenLabs
9
+
10
+ Args:
11
+ text: Text to convert
12
+ output_path: Output audio file path
13
+
14
+ Returns:
15
+ Path to generated audio file
16
+ """
17
+ if not os.getenv('ELEVENLABS_API_KEY'):
18
+ raise ValueError("ELEVENLABS_API_KEY not set")
19
+
20
+ try:
21
+ from elevenlabs.client import AsyncElevenLabs
22
+ from elevenlabs import VoiceSettings
23
+
24
+ client = AsyncElevenLabs(api_key=os.getenv('ELEVENLABS_API_KEY'))
25
+
26
+ # Generate audio
27
+ audio_generator = await client.text_to_speech.convert(
28
+ text=text,
29
+ voice_id="21m00Tcm4TlvDq8ikWAM", # Rachel voice
30
+ model_id="eleven_monolingual_v1",
31
+ voice_settings=VoiceSettings(
32
+ stability=0.5,
33
+ similarity_boost=0.75,
34
+ style=0.5,
35
+ use_speaker_boost=True
36
+ )
37
+ )
38
+
39
+ # Save audio
40
+ if output_path is None:
41
+ output_path = f"data/outputs/speech_{int(asyncio.get_event_loop().time())}.mp3"
42
+
43
+ Path(output_path).parent.mkdir(parents=True, exist_ok=True)
44
+
45
+ # Write audio chunks
46
+ with open(output_path, 'wb') as f:
47
+ async for chunk in audio_generator:
48
+ f.write(chunk)
49
+
50
+ return output_path
51
+
52
+ except Exception as e:
53
+ print(f"TTS Error: {e}")
54
+ # Return empty path on error
55
+ return ""
56
+
57
+
58
+ async def speech_to_text(audio_path: str) -> str:
59
+ """
60
+ Convert speech to text using Groq Whisper
61
+
62
+ Args:
63
+ audio_path: Path to audio file
64
+
65
+ Returns:
66
+ Transcribed text
67
+ """
68
+ if not os.getenv('GROQ_API_KEY'):
69
+ raise ValueError("GROQ_API_KEY not set for STT")
70
+
71
+ try:
72
+ from groq import AsyncGroq
73
+
74
+ client = AsyncGroq(api_key=os.getenv('GROQ_API_KEY'))
75
+
76
+ with open(audio_path, 'rb') as audio_file:
77
+ transcription = await client.audio.transcriptions.create(
78
+ file=audio_file,
79
+ model="whisper-large-v3",
80
+ response_format="text"
81
+ )
82
+
83
+ return transcription
84
+
85
+ except Exception as e:
86
+ print(f"STT Error: {e}")
87
+ return ""
88
+
89
+
90
+ async def process_audio_input(audio_data: bytes) -> str:
91
+ """
92
+ Process audio input from microphone
93
+
94
+ Args:
95
+ audio_data: Raw audio bytes
96
+
97
+ Returns:
98
+ Transcribed text
99
+ """
100
+ # Save temp audio file
101
+ temp_path = "/tmp/voice_input.wav"
102
+ with open(temp_path, 'wb') as f:
103
+ f.write(audio_data)
104
+
105
+ # Transcribe
106
+ text = await speech_to_text(temp_path)
107
+
108
+ # Cleanup
109
+ if os.path.exists(temp_path):
110
+ os.remove(temp_path)
111
+
112
+ return text