Spaces:

Hexa09
/

root-tts

Sleeping

File size: 6,043 Bytes

84f6936

from fastapi import FastAPI, HTTPException, Form, BackgroundTasks
from fastapi.responses import FileResponse
from kokoro_onnx import Kokoro
import tempfile
import os
from datetime import datetime
import soundfile as sf

# ============== CONFIG ==============
MAX_CHARS = 4500  # ~5 minutes of audio (speaking rate: ~900 chars/min)
MIN_CHARS = 5
MAX_AUDIO_DURATION = 300  # 5 minutes of audio

# ============== KOKORO TTS MODEL ==============
print("🎤 Loading Kokoro TTS model...")
try:
    kokoro = Kokoro("kokoro-v0_19.onnx", "voices")
    print("✅ Kokoro TTS loaded successfully!")
except Exception as e:
    print(f"⚠️  Kokoro not found locally. Will download on first use.")
    kokoro = None

app = FastAPI(
    title="Kokoro TTS API - Fast & Simple",
    description="High-speed text-to-speech with emotional voices",
    version="2.0"
)

@app.on_event("startup")
def startup():
    global kokoro
    if kokoro is None:
        import urllib.request
        
        print("📥 Downloading Kokoro TTS model files...")
        
        # Create directory for voices
        os.makedirs("voices", exist_ok=True)
        
        # Download voices file
        voices_file = "voices/voices.bin"
        if not os.path.exists(voices_file):
            print("Downloading voices.bin...")
            urllib.request.urlretrieve(
                "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin",
                voices_file
            )
            print("✅ Voices downloaded!")
        
        # Download ONNX model
        model_file = "kokoro-v0_19.onnx"
        if not os.path.exists(model_file):
            print("Downloading kokoro-v0_19.onnx...")
            urllib.request.urlretrieve(
                "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx",
                model_file
            )
            print("✅ Model downloaded!")
        
        print("🎤 Initializing Kokoro TTS...")
        kokoro = Kokoro(model_file, voices_file)
        print("✅ Kokoro TTS loaded!")

# ============== HELPERS ==============
def cleanup_file(path: str):
    """Delete temporary file after response is sent"""
    try:
        if os.path.exists(path):
            os.unlink(path)
    except:
        pass

def generate_speech(text: str, voice: str = "bf_isabella", speed: float = 1.0) -> str:
    """
    Generate speech using Kokoro TTS
    Available voices: af_heart, af_bella, am_adam, am_michael, bf_emma, bf_isabella
    """
    if len(text) < MIN_CHARS:
        raise ValueError(f"Text too short. Minimum {MIN_CHARS} characters.")
    if len(text) > MAX_CHARS:
        raise ValueError(f"Text too long. Maximum {MAX_CHARS} characters (~5 min audio).")
    
    # Generate audio samples
    samples, sample_rate = kokoro.create(
        text=text,
        voice=voice,
        speed=speed,
        lang="en-us"
    )
    
    # Save to temporary file
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
        sf.write(tmp.name, samples, sample_rate)
        return tmp.name

# ============== API ENDPOINTS ==============
@app.get("/")
def root():
    return {
        "service": "Kokoro TTS API",
        "status": "running",
        "model": "Kokoro-82M",
        "version": "2.0",
        "features": {
            "speed": "10x faster than XTTS",
            "voices": 6,
            "max_chars": MAX_CHARS,
            "emotional": True
        },
        "endpoints": {
            "health": "/health",
            "generate": "/api/generate (POST)",
            "docs": "/docs"
        }
    }

@app.get("/health")
def health():
    return {
        "status": "healthy",
        "model": "Kokoro TTS 82M",
        "speed": "10x faster than XTTS",
        "max_chars": MAX_CHARS,
        "voices": ["af_heart", "af_bella", "am_adam", "am_michael", "bf_emma", "bf_isabella"]
    }

@app.post("/api/generate")
async def generate_tts(
    background_tasks: BackgroundTasks,
    text: str = Form(..., description="Text to convert to speech"),
    voice: str = Form("bf_isabella", description="Voice to use"),
    speed: float = Form(1.0, description="Speech speed (0.5-2.0)")
):
    """
    Generate TTS with Kokoro (Fast & Emotional)
    
    **Performance:**
    - Max audio: 5 minutes (4500 chars)
    - Generation: ~20-30 seconds on CPU
    - Speech rate: ~900 chars/minute
    
    **Available Voices:**
    - `af_heart`: American Female (warm)
    - `af_bella`: American Female (professional)
    - `am_adam`: American Male (confident)
    - `am_michael`: American Male (friendly)
    - `bf_emma`: British Female (elegant)
    - `bf_isabella`: British Female (storytelling) ⭐ Best for long content
    
    **Example:**
    ```bash
    curl -X POST https://your-space.hf.space/api/generate \\
      -F "text=Hello world, this is Kokoro TTS!" \\
      -F "voice=bf_isabella" \\
      -F "speed=1.0" \\
      --output audio.wav
    ```
    """
    try:
        # Validate speed
        if speed < 0.5 or speed > 2.0:
            raise HTTPException(status_code=400, detail="Speed must be between 0.5 and 2.0")
        
        # Generate speech
        output_path = generate_speech(text.strip(), voice, speed)
        
        # Schedule cleanup after response is sent
        background_tasks.add_task(cleanup_file, output_path)
        
        # Return audio file
        response = FileResponse(
            output_path, 
            media_type="audio/wav", 
            filename=f"kokoro_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
        )
        response.headers["X-Character-Count"] = str(len(text))
        response.headers["X-Voice-Used"] = voice
        
        return response
        
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
    except Exception as e:
        raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)