from fastapi import FastAPI, HTTPException, Form, BackgroundTasks from fastapi.responses import FileResponse from kokoro_onnx import Kokoro import tempfile import os from datetime import datetime import soundfile as sf # ============== CONFIG ============== MAX_CHARS = 4500 # ~5 minutes of audio (speaking rate: ~900 chars/min) MIN_CHARS = 5 MAX_AUDIO_DURATION = 300 # 5 minutes of audio # ============== KOKORO TTS MODEL ============== print("🎤 Loading Kokoro TTS model...") try: kokoro = Kokoro("kokoro-v0_19.onnx", "voices") print("✅ Kokoro TTS loaded successfully!") except Exception as e: print(f"⚠️ Kokoro not found locally. Will download on first use.") kokoro = None app = FastAPI( title="Kokoro TTS API - Fast & Simple", description="High-speed text-to-speech with emotional voices", version="2.0" ) @app.on_event("startup") def startup(): global kokoro if kokoro is None: import urllib.request print("📥 Downloading Kokoro TTS model files...") # Create directory for voices os.makedirs("voices", exist_ok=True) # Download voices file voices_file = "voices/voices.bin" if not os.path.exists(voices_file): print("Downloading voices.bin...") urllib.request.urlretrieve( "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin", voices_file ) print("✅ Voices downloaded!") # Download ONNX model model_file = "kokoro-v0_19.onnx" if not os.path.exists(model_file): print("Downloading kokoro-v0_19.onnx...") urllib.request.urlretrieve( "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx", model_file ) print("✅ Model downloaded!") print("🎤 Initializing Kokoro TTS...") kokoro = Kokoro(model_file, voices_file) print("✅ Kokoro TTS loaded!") # ============== HELPERS ============== def cleanup_file(path: str): """Delete temporary file after response is sent""" try: if os.path.exists(path): os.unlink(path) except: pass def generate_speech(text: str, voice: str = "bf_isabella", speed: float = 1.0) -> str: """ Generate speech using Kokoro TTS Available voices: af_heart, af_bella, am_adam, am_michael, bf_emma, bf_isabella """ if len(text) < MIN_CHARS: raise ValueError(f"Text too short. Minimum {MIN_CHARS} characters.") if len(text) > MAX_CHARS: raise ValueError(f"Text too long. Maximum {MAX_CHARS} characters (~5 min audio).") # Generate audio samples samples, sample_rate = kokoro.create( text=text, voice=voice, speed=speed, lang="en-us" ) # Save to temporary file with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp: sf.write(tmp.name, samples, sample_rate) return tmp.name # ============== API ENDPOINTS ============== @app.get("/") def root(): return { "service": "Kokoro TTS API", "status": "running", "model": "Kokoro-82M", "version": "2.0", "features": { "speed": "10x faster than XTTS", "voices": 6, "max_chars": MAX_CHARS, "emotional": True }, "endpoints": { "health": "/health", "generate": "/api/generate (POST)", "docs": "/docs" } } @app.get("/health") def health(): return { "status": "healthy", "model": "Kokoro TTS 82M", "speed": "10x faster than XTTS", "max_chars": MAX_CHARS, "voices": ["af_heart", "af_bella", "am_adam", "am_michael", "bf_emma", "bf_isabella"] } @app.post("/api/generate") async def generate_tts( background_tasks: BackgroundTasks, text: str = Form(..., description="Text to convert to speech"), voice: str = Form("bf_isabella", description="Voice to use"), speed: float = Form(1.0, description="Speech speed (0.5-2.0)") ): """ Generate TTS with Kokoro (Fast & Emotional) **Performance:** - Max audio: 5 minutes (4500 chars) - Generation: ~20-30 seconds on CPU - Speech rate: ~900 chars/minute **Available Voices:** - `af_heart`: American Female (warm) - `af_bella`: American Female (professional) - `am_adam`: American Male (confident) - `am_michael`: American Male (friendly) - `bf_emma`: British Female (elegant) - `bf_isabella`: British Female (storytelling) ⭐ Best for long content **Example:** ```bash curl -X POST https://your-space.hf.space/api/generate \\ -F "text=Hello world, this is Kokoro TTS!" \\ -F "voice=bf_isabella" \\ -F "speed=1.0" \\ --output audio.wav ``` """ try: # Validate speed if speed < 0.5 or speed > 2.0: raise HTTPException(status_code=400, detail="Speed must be between 0.5 and 2.0") # Generate speech output_path = generate_speech(text.strip(), voice, speed) # Schedule cleanup after response is sent background_tasks.add_task(cleanup_file, output_path) # Return audio file response = FileResponse( output_path, media_type="audio/wav", filename=f"kokoro_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav" ) response.headers["X-Character-Count"] = str(len(text)) response.headers["X-Voice-Used"] = voice return response except ValueError as e: raise HTTPException(status_code=400, detail=str(e)) except Exception as e: raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=7860)