Spaces:

MCP-1st-Birthday
/

OmniMind-Orchestrator

Running

App Files Files Community

mgbam commited on 17 days ago

Commit

8d35b9c

verified ·

1 Parent(s): 90848ed

Upload voice_interface.py

Browse files

Files changed (1) hide show

ui/voice_interface.py +168 -0

ui/voice_interface.py ADDED Viewed

	@@ -0,0 +1,168 @@

+"""
+ElevenLabs Voice Interface - For $2K + AirPods Pro Prize
+Voice-first enterprise AI interaction.
+"""
+import os
+from typing import Optional, AsyncGenerator
+import asyncio
+try:
+    from elevenlabs import ElevenLabs, VoiceSettings
+    from elevenlabs.client import AsyncElevenLabs
+    ELEVENLABS_AVAILABLE = True
+except ImportError:
+    ELEVENLABS_AVAILABLE = False
+    print("[WARNING]  ElevenLabs not installed")
+class VoiceInterface:
+    """
+    Voice-first interface for OmniMind using ElevenLabs.
+    Prize Integration: ElevenLabs Category Award ($2K + AirPods Pro)
+    - Natural conversational AI
+    - Streaming voice responses
+    - Enterprise-grade voice quality
+    """
+    def __init__(self):
+        self.api_key = os.getenv("ELEVENLABS_API_KEY")
+        if not ELEVENLABS_AVAILABLE or not self.api_key:
+            self.client = None
+            print("[WARNING]  ElevenLabs not configured")
+            return
+        self.client = AsyncElevenLabs(api_key=self.api_key)
+        # Voice configurations for different personas
+        self.voices = {
+            "professional": "ErXwobaYiN019PkySvjV",  # Antoni - professional male
+            "friendly": "EXAVITQu4vr4xnSDxMaL",  # Sarah - friendly female
+            "executive": "VR6AewLTigWG4xSOukaG",  # Arnold - authoritative male
+        }
+        self.current_voice = "professional"
+    async def text_to_speech(
+        self,
+        text: str,
+        voice: str = "professional",
+        stream: bool = True
+    ) -> AsyncGenerator[bytes, None]:
+        """
+        Convert text to speech with streaming support.
+        Args:
+            text: Text to convert
+            voice: Voice persona (professional, friendly, executive)
+            stream: Stream audio chunks for real-time playback
+        Yields:
+            Audio chunks (bytes)
+        """
+        if not self.client:
+            # Return empty generator if not configured
+            return
+            yield
+        voice_id = self.voices.get(voice, self.voices["professional"])
+        if stream:
+            # Streaming for real-time responses
+            audio_stream = await self.client.text_to_speech.convert_as_stream(
+                text=text,
+                voice_id=voice_id,
+                model_id="eleven_turbo_v2_5",  # Fastest model
+                voice_settings=VoiceSettings(
+                    stability=0.5,
+                    similarity_boost=0.75,
+                    style=0.5,
+                    use_speaker_boost=True
+                )
+            )
+            async for chunk in audio_stream:
+                yield chunk
+        else:
+            # Non-streaming for complete audio
+            audio = await self.client.text_to_speech.convert(
+                text=text,
+                voice_id=voice_id,
+                model_id="eleven_turbo_v2_5",
+                voice_settings=VoiceSettings(
+                    stability=0.5,
+                    similarity_boost=0.75,
+                    style=0.5,
+                    use_speaker_boost=True
+                )
+            )
+            yield audio
+    async def speech_to_text(self, audio_data: bytes) -> str:
+        """
+        Convert speech to text (using OpenAI Whisper as ElevenLabs doesn't have STT).
+        Args:
+            audio_data: Audio bytes (WAV format)
+        Returns:
+            Transcribed text
+        """
+        # ElevenLabs doesn't have STT, so we use OpenAI Whisper
+        from openai import AsyncOpenAI
+        openai_client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        # Save audio temporarily
+        import tempfile
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
+            f.write(audio_data)
+            audio_path = f.name
+        try:
+            with open(audio_path, "rb") as audio_file:
+                transcript = await openai_client.audio.transcriptions.create(
+                    model="whisper-1",
+                    file=audio_file
+                )
+            return transcript.text
+        finally:
+            # Cleanup
+            import os
+            os.unlink(audio_path)
+    async def get_available_voices(self):
+        """Get list of available voices"""
+        if not self.client:
+            return {"status": "unavailable", "voices": []}
+        voices = await self.client.voices.get_all()
+        return {
+            "status": "success",
+            "voices": [
+                {
+                    "voice_id": voice.voice_id,
+                    "name": voice.name,
+                    "category": voice.category
+                }
+                for voice in voices.voices
+            ]
+        }
+    def set_voice(self, voice_name: str):
+        """Set the current voice persona"""
+        if voice_name in self.voices:
+            self.current_voice = voice_name
+            return True
+        return False
+# Global voice interface
+voice = VoiceInterface()