""" ElevenLabs Voice Integration for FocusFlow. Provides optional voice feedback for focus agent and Pomodoro timer. Gracefully falls back to text-only mode if API key is missing or quota exceeded. """ import os import tempfile from typing import Optional, Dict from pathlib import Path class VoiceGenerator: """ Handles text-to-speech generation using ElevenLabs API. Designed for graceful degradation - never crashes if voice unavailable. """ def __init__(self): """Initialize ElevenLabs client if API key available.""" self.initialize() def initialize(self): """Initialize or re-initialize the client.""" self.client = None self.available = False self.voice_id = "JBFqnCBsd6RMkjVDRZzb" # George - friendly, clear voice self.model_id = "eleven_turbo_v2_5" # Fast, low-latency model try: # Check for API key (demo key first, then user key) api_key = os.getenv("DEMO_ELEVEN_API_KEY") or os.getenv("ELEVEN_API_KEY") if not api_key: print("ℹ️ ElevenLabs: No API key found. Voice feedback disabled (text-only mode).") return # Try to initialize client from elevenlabs.client import ElevenLabs self.client = ElevenLabs(api_key=api_key) self.available = True key_type = "demo" if os.getenv("DEMO_ELEVEN_API_KEY") else "user" print(f"✅ ElevenLabs voice initialized ({key_type} key)") except ImportError: print("⚠️ ElevenLabs: Package not installed. Run: pip install elevenlabs") except Exception as e: print(f"⚠️ ElevenLabs: Initialization failed: {e}") def text_to_speech(self, text: str, emotion: str = "neutral") -> Optional[str]: """ Convert text to speech and return path to temporary audio file. Args: text: Text to convert to speech emotion: Emotion hint (not used in current implementation) Returns: Path to temporary MP3 file, or None if voice unavailable """ # Check if voice is enabled globally if os.getenv("VOICE_ENABLED", "true").lower() == "false": return None if not self.available or not self.client: return None try: # Generate audio using ElevenLabs API audio = self.client.text_to_speech.convert( text=text, voice_id=self.voice_id, model_id=self.model_id, output_format="mp3_44100_128" ) # Convert generator/stream to bytes audio_bytes = b"".join(audio) # Save to temporary file (Gradio expects file path, not data URL) temp_file = tempfile.NamedTemporaryFile( delete=False, suffix=".mp3", prefix="focusflow_voice_" ) temp_file.write(audio_bytes) temp_file.close() return temp_file.name except Exception as e: # Graceful degradation - log error but don't crash print(f"⚠️ ElevenLabs: TTS failed: {e}") return None def get_focus_message_audio(self, verdict: str, message: str) -> Optional[str]: """ Generate voice feedback for focus check results. Args: verdict: "On Track", "Distracted", or "Idle" message: Text message to speak Returns: Path to temporary audio file or None """ if not self.available: return None # Add emotion/tone based on verdict (for future voice modulation) emotion_map = { "On Track": "cheerful", "Distracted": "concerned", "Idle": "motivating" } emotion = emotion_map.get(verdict, "neutral") return self.text_to_speech(message, emotion=emotion) def get_pomodoro_audio(self, event_type: str) -> Optional[str]: """ Generate voice alerts for Pomodoro timer events. Args: event_type: "work_complete" or "break_complete" Returns: Path to temporary audio file or None """ if not self.available: return None messages = { "work_complete": "Great work! Time for a 5-minute break. You've earned it!", "break_complete": "Break's over! Let's get back to work and stay focused!" } message = messages.get(event_type, "Timer complete!") return self.text_to_speech(message, emotion="cheerful") def test_voice(self) -> Dict[str, any]: """ Test voice generation (for setup/debugging). Returns: Dict with status, message, and optional audio data """ if not self.available: return { "status": "unavailable", "message": "Voice not available (no API key or initialization failed)", "audio": None } try: test_message = "Hello! FocusFlow voice is working perfectly!" audio = self.text_to_speech(test_message) if audio: return { "status": "success", "message": "Voice test successful!", "audio": audio } else: return { "status": "error", "message": "Voice generation failed", "audio": None } except Exception as e: return { "status": "error", "message": f"Voice test failed: {str(e)}", "audio": None } # Global voice generator instance voice_generator = VoiceGenerator() def get_voice_status() -> str: """ Get human-readable voice status for UI display. Returns: Status string like "✅ ElevenLabs Voice Enabled" or "ℹ️ Voice Disabled" """ if voice_generator.available: return "✅ ElevenLabs Voice Enabled" else: return "ℹ️ Voice Disabled (text-only mode)"