Spaces:

Hexa09
/

root-tts

Sleeping

App Files Files Community

root-tts / app.py

Hexa06

Deploy TTS service with failover support

84f6936 20 days ago

raw

history blame contribute delete

6.04 kB

	from fastapi import FastAPI, HTTPException, Form, BackgroundTasks
	from fastapi.responses import FileResponse
	from kokoro_onnx import Kokoro
	import tempfile
	import os
	from datetime import datetime
	import soundfile as sf

	# ============== CONFIG ==============
	MAX_CHARS = 4500 # ~5 minutes of audio (speaking rate: ~900 chars/min)
	MIN_CHARS = 5
	MAX_AUDIO_DURATION = 300 # 5 minutes of audio

	# ============== KOKORO TTS MODEL ==============
	print("🎤 Loading Kokoro TTS model...")
	try:
	kokoro = Kokoro("kokoro-v0_19.onnx", "voices")
	print("✅ Kokoro TTS loaded successfully!")
	except Exception as e:
	print(f"⚠️ Kokoro not found locally. Will download on first use.")
	kokoro = None

	app = FastAPI(
	title="Kokoro TTS API - Fast & Simple",
	description="High-speed text-to-speech with emotional voices",
	version="2.0"
	)

	@app.on_event("startup")
	def startup():
	global kokoro
	if kokoro is None:
	import urllib.request

	print("📥 Downloading Kokoro TTS model files...")

	# Create directory for voices
	os.makedirs("voices", exist_ok=True)

	# Download voices file
	voices_file = "voices/voices.bin"
	if not os.path.exists(voices_file):
	print("Downloading voices.bin...")
	urllib.request.urlretrieve(
	"https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin",
	voices_file
	)
	print("✅ Voices downloaded!")

	# Download ONNX model
	model_file = "kokoro-v0_19.onnx"
	if not os.path.exists(model_file):
	print("Downloading kokoro-v0_19.onnx...")
	urllib.request.urlretrieve(
	"https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx",
	model_file
	)
	print("✅ Model downloaded!")

	print("🎤 Initializing Kokoro TTS...")
	kokoro = Kokoro(model_file, voices_file)
	print("✅ Kokoro TTS loaded!")

	# ============== HELPERS ==============
	def cleanup_file(path: str):
	"""Delete temporary file after response is sent"""
	try:
	if os.path.exists(path):
	os.unlink(path)
	except:
	pass

	def generate_speech(text: str, voice: str = "bf_isabella", speed: float = 1.0) -> str:
	"""
	Generate speech using Kokoro TTS
	Available voices: af_heart, af_bella, am_adam, am_michael, bf_emma, bf_isabella
	"""
	if len(text) < MIN_CHARS:
	raise ValueError(f"Text too short. Minimum {MIN_CHARS} characters.")
	if len(text) > MAX_CHARS:
	raise ValueError(f"Text too long. Maximum {MAX_CHARS} characters (~5 min audio).")

	# Generate audio samples
	samples, sample_rate = kokoro.create(
	text=text,
	voice=voice,
	speed=speed,
	lang="en-us"
	)

	# Save to temporary file
	with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
	sf.write(tmp.name, samples, sample_rate)
	return tmp.name

	# ============== API ENDPOINTS ==============
	@app.get("/")
	def root():
	return {
	"service": "Kokoro TTS API",
	"status": "running",
	"model": "Kokoro-82M",
	"version": "2.0",
	"features": {
	"speed": "10x faster than XTTS",
	"voices": 6,
	"max_chars": MAX_CHARS,
	"emotional": True
	},
	"endpoints": {
	"health": "/health",
	"generate": "/api/generate (POST)",
	"docs": "/docs"
	}
	}

	@app.get("/health")
	def health():
	return {
	"status": "healthy",
	"model": "Kokoro TTS 82M",
	"speed": "10x faster than XTTS",
	"max_chars": MAX_CHARS,
	"voices": ["af_heart", "af_bella", "am_adam", "am_michael", "bf_emma", "bf_isabella"]
	}

	@app.post("/api/generate")
	async def generate_tts(
	background_tasks: BackgroundTasks,
	text: str = Form(..., description="Text to convert to speech"),
	voice: str = Form("bf_isabella", description="Voice to use"),
	speed: float = Form(1.0, description="Speech speed (0.5-2.0)")
	):
	"""
	Generate TTS with Kokoro (Fast & Emotional)

	Performance:
	- Max audio: 5 minutes (4500 chars)
	- Generation: ~20-30 seconds on CPU
	- Speech rate: ~900 chars/minute

	Available Voices:
	- `af_heart`: American Female (warm)
	- `af_bella`: American Female (professional)
	- `am_adam`: American Male (confident)
	- `am_michael`: American Male (friendly)
	- `bf_emma`: British Female (elegant)
	- `bf_isabella`: British Female (storytelling) ⭐ Best for long content

	Example:
	```bash
	curl -X POST https://your-space.hf.space/api/generate \\
	-F "text=Hello world, this is Kokoro TTS!" \\
	-F "voice=bf_isabella" \\
	-F "speed=1.0" \\
	--output audio.wav
	```
	"""
	try:
	# Validate speed
	if speed < 0.5 or speed > 2.0:
	raise HTTPException(status_code=400, detail="Speed must be between 0.5 and 2.0")

	# Generate speech
	output_path = generate_speech(text.strip(), voice, speed)

	# Schedule cleanup after response is sent
	background_tasks.add_task(cleanup_file, output_path)

	# Return audio file
	response = FileResponse(
	output_path,
	media_type="audio/wav",
	filename=f"kokoro_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
	)
	response.headers["X-Character-Count"] = str(len(text))
	response.headers["X-Voice-Used"] = voice

	return response

	except ValueError as e:
	raise HTTPException(status_code=400, detail=str(e))
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")

	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)