root-tts / app.py
Hexa06's picture
Deploy TTS service with failover support
84f6936
from fastapi import FastAPI, HTTPException, Form, BackgroundTasks
from fastapi.responses import FileResponse
from kokoro_onnx import Kokoro
import tempfile
import os
from datetime import datetime
import soundfile as sf
# ============== CONFIG ==============
MAX_CHARS = 4500 # ~5 minutes of audio (speaking rate: ~900 chars/min)
MIN_CHARS = 5
MAX_AUDIO_DURATION = 300 # 5 minutes of audio
# ============== KOKORO TTS MODEL ==============
print("🎤 Loading Kokoro TTS model...")
try:
kokoro = Kokoro("kokoro-v0_19.onnx", "voices")
print("✅ Kokoro TTS loaded successfully!")
except Exception as e:
print(f"⚠️ Kokoro not found locally. Will download on first use.")
kokoro = None
app = FastAPI(
title="Kokoro TTS API - Fast & Simple",
description="High-speed text-to-speech with emotional voices",
version="2.0"
)
@app.on_event("startup")
def startup():
global kokoro
if kokoro is None:
import urllib.request
print("📥 Downloading Kokoro TTS model files...")
# Create directory for voices
os.makedirs("voices", exist_ok=True)
# Download voices file
voices_file = "voices/voices.bin"
if not os.path.exists(voices_file):
print("Downloading voices.bin...")
urllib.request.urlretrieve(
"https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin",
voices_file
)
print("✅ Voices downloaded!")
# Download ONNX model
model_file = "kokoro-v0_19.onnx"
if not os.path.exists(model_file):
print("Downloading kokoro-v0_19.onnx...")
urllib.request.urlretrieve(
"https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx",
model_file
)
print("✅ Model downloaded!")
print("🎤 Initializing Kokoro TTS...")
kokoro = Kokoro(model_file, voices_file)
print("✅ Kokoro TTS loaded!")
# ============== HELPERS ==============
def cleanup_file(path: str):
"""Delete temporary file after response is sent"""
try:
if os.path.exists(path):
os.unlink(path)
except:
pass
def generate_speech(text: str, voice: str = "bf_isabella", speed: float = 1.0) -> str:
"""
Generate speech using Kokoro TTS
Available voices: af_heart, af_bella, am_adam, am_michael, bf_emma, bf_isabella
"""
if len(text) < MIN_CHARS:
raise ValueError(f"Text too short. Minimum {MIN_CHARS} characters.")
if len(text) > MAX_CHARS:
raise ValueError(f"Text too long. Maximum {MAX_CHARS} characters (~5 min audio).")
# Generate audio samples
samples, sample_rate = kokoro.create(
text=text,
voice=voice,
speed=speed,
lang="en-us"
)
# Save to temporary file
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
sf.write(tmp.name, samples, sample_rate)
return tmp.name
# ============== API ENDPOINTS ==============
@app.get("/")
def root():
return {
"service": "Kokoro TTS API",
"status": "running",
"model": "Kokoro-82M",
"version": "2.0",
"features": {
"speed": "10x faster than XTTS",
"voices": 6,
"max_chars": MAX_CHARS,
"emotional": True
},
"endpoints": {
"health": "/health",
"generate": "/api/generate (POST)",
"docs": "/docs"
}
}
@app.get("/health")
def health():
return {
"status": "healthy",
"model": "Kokoro TTS 82M",
"speed": "10x faster than XTTS",
"max_chars": MAX_CHARS,
"voices": ["af_heart", "af_bella", "am_adam", "am_michael", "bf_emma", "bf_isabella"]
}
@app.post("/api/generate")
async def generate_tts(
background_tasks: BackgroundTasks,
text: str = Form(..., description="Text to convert to speech"),
voice: str = Form("bf_isabella", description="Voice to use"),
speed: float = Form(1.0, description="Speech speed (0.5-2.0)")
):
"""
Generate TTS with Kokoro (Fast & Emotional)
**Performance:**
- Max audio: 5 minutes (4500 chars)
- Generation: ~20-30 seconds on CPU
- Speech rate: ~900 chars/minute
**Available Voices:**
- `af_heart`: American Female (warm)
- `af_bella`: American Female (professional)
- `am_adam`: American Male (confident)
- `am_michael`: American Male (friendly)
- `bf_emma`: British Female (elegant)
- `bf_isabella`: British Female (storytelling) ⭐ Best for long content
**Example:**
```bash
curl -X POST https://your-space.hf.space/api/generate \\
-F "text=Hello world, this is Kokoro TTS!" \\
-F "voice=bf_isabella" \\
-F "speed=1.0" \\
--output audio.wav
```
"""
try:
# Validate speed
if speed < 0.5 or speed > 2.0:
raise HTTPException(status_code=400, detail="Speed must be between 0.5 and 2.0")
# Generate speech
output_path = generate_speech(text.strip(), voice, speed)
# Schedule cleanup after response is sent
background_tasks.add_task(cleanup_file, output_path)
# Return audio file
response = FileResponse(
output_path,
media_type="audio/wav",
filename=f"kokoro_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
)
response.headers["X-Character-Count"] = str(len(text))
response.headers["X-Voice-Used"] = voice
return response
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
except Exception as e:
raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)