Hexa06 commited on
Commit
84f6936
·
1 Parent(s): eddc745

Deploy TTS service with failover support

Browse files
Files changed (3) hide show
  1. app.py +188 -0
  2. packages.txt +1 -0
  3. requirements.txt +6 -0
app.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Form, BackgroundTasks
2
+ from fastapi.responses import FileResponse
3
+ from kokoro_onnx import Kokoro
4
+ import tempfile
5
+ import os
6
+ from datetime import datetime
7
+ import soundfile as sf
8
+
9
+ # ============== CONFIG ==============
10
+ MAX_CHARS = 4500 # ~5 minutes of audio (speaking rate: ~900 chars/min)
11
+ MIN_CHARS = 5
12
+ MAX_AUDIO_DURATION = 300 # 5 minutes of audio
13
+
14
+ # ============== KOKORO TTS MODEL ==============
15
+ print("🎤 Loading Kokoro TTS model...")
16
+ try:
17
+ kokoro = Kokoro("kokoro-v0_19.onnx", "voices")
18
+ print("✅ Kokoro TTS loaded successfully!")
19
+ except Exception as e:
20
+ print(f"⚠️ Kokoro not found locally. Will download on first use.")
21
+ kokoro = None
22
+
23
+ app = FastAPI(
24
+ title="Kokoro TTS API - Fast & Simple",
25
+ description="High-speed text-to-speech with emotional voices",
26
+ version="2.0"
27
+ )
28
+
29
+ @app.on_event("startup")
30
+ def startup():
31
+ global kokoro
32
+ if kokoro is None:
33
+ import urllib.request
34
+
35
+ print("📥 Downloading Kokoro TTS model files...")
36
+
37
+ # Create directory for voices
38
+ os.makedirs("voices", exist_ok=True)
39
+
40
+ # Download voices file
41
+ voices_file = "voices/voices.bin"
42
+ if not os.path.exists(voices_file):
43
+ print("Downloading voices.bin...")
44
+ urllib.request.urlretrieve(
45
+ "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.bin",
46
+ voices_file
47
+ )
48
+ print("✅ Voices downloaded!")
49
+
50
+ # Download ONNX model
51
+ model_file = "kokoro-v0_19.onnx"
52
+ if not os.path.exists(model_file):
53
+ print("Downloading kokoro-v0_19.onnx...")
54
+ urllib.request.urlretrieve(
55
+ "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx",
56
+ model_file
57
+ )
58
+ print("✅ Model downloaded!")
59
+
60
+ print("🎤 Initializing Kokoro TTS...")
61
+ kokoro = Kokoro(model_file, voices_file)
62
+ print("✅ Kokoro TTS loaded!")
63
+
64
+ # ============== HELPERS ==============
65
+ def cleanup_file(path: str):
66
+ """Delete temporary file after response is sent"""
67
+ try:
68
+ if os.path.exists(path):
69
+ os.unlink(path)
70
+ except:
71
+ pass
72
+
73
+ def generate_speech(text: str, voice: str = "bf_isabella", speed: float = 1.0) -> str:
74
+ """
75
+ Generate speech using Kokoro TTS
76
+ Available voices: af_heart, af_bella, am_adam, am_michael, bf_emma, bf_isabella
77
+ """
78
+ if len(text) < MIN_CHARS:
79
+ raise ValueError(f"Text too short. Minimum {MIN_CHARS} characters.")
80
+ if len(text) > MAX_CHARS:
81
+ raise ValueError(f"Text too long. Maximum {MAX_CHARS} characters (~5 min audio).")
82
+
83
+ # Generate audio samples
84
+ samples, sample_rate = kokoro.create(
85
+ text=text,
86
+ voice=voice,
87
+ speed=speed,
88
+ lang="en-us"
89
+ )
90
+
91
+ # Save to temporary file
92
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
93
+ sf.write(tmp.name, samples, sample_rate)
94
+ return tmp.name
95
+
96
+ # ============== API ENDPOINTS ==============
97
+ @app.get("/")
98
+ def root():
99
+ return {
100
+ "service": "Kokoro TTS API",
101
+ "status": "running",
102
+ "model": "Kokoro-82M",
103
+ "version": "2.0",
104
+ "features": {
105
+ "speed": "10x faster than XTTS",
106
+ "voices": 6,
107
+ "max_chars": MAX_CHARS,
108
+ "emotional": True
109
+ },
110
+ "endpoints": {
111
+ "health": "/health",
112
+ "generate": "/api/generate (POST)",
113
+ "docs": "/docs"
114
+ }
115
+ }
116
+
117
+ @app.get("/health")
118
+ def health():
119
+ return {
120
+ "status": "healthy",
121
+ "model": "Kokoro TTS 82M",
122
+ "speed": "10x faster than XTTS",
123
+ "max_chars": MAX_CHARS,
124
+ "voices": ["af_heart", "af_bella", "am_adam", "am_michael", "bf_emma", "bf_isabella"]
125
+ }
126
+
127
+ @app.post("/api/generate")
128
+ async def generate_tts(
129
+ background_tasks: BackgroundTasks,
130
+ text: str = Form(..., description="Text to convert to speech"),
131
+ voice: str = Form("bf_isabella", description="Voice to use"),
132
+ speed: float = Form(1.0, description="Speech speed (0.5-2.0)")
133
+ ):
134
+ """
135
+ Generate TTS with Kokoro (Fast & Emotional)
136
+
137
+ **Performance:**
138
+ - Max audio: 5 minutes (4500 chars)
139
+ - Generation: ~20-30 seconds on CPU
140
+ - Speech rate: ~900 chars/minute
141
+
142
+ **Available Voices:**
143
+ - `af_heart`: American Female (warm)
144
+ - `af_bella`: American Female (professional)
145
+ - `am_adam`: American Male (confident)
146
+ - `am_michael`: American Male (friendly)
147
+ - `bf_emma`: British Female (elegant)
148
+ - `bf_isabella`: British Female (storytelling) ⭐ Best for long content
149
+
150
+ **Example:**
151
+ ```bash
152
+ curl -X POST https://your-space.hf.space/api/generate \\
153
+ -F "text=Hello world, this is Kokoro TTS!" \\
154
+ -F "voice=bf_isabella" \\
155
+ -F "speed=1.0" \\
156
+ --output audio.wav
157
+ ```
158
+ """
159
+ try:
160
+ # Validate speed
161
+ if speed < 0.5 or speed > 2.0:
162
+ raise HTTPException(status_code=400, detail="Speed must be between 0.5 and 2.0")
163
+
164
+ # Generate speech
165
+ output_path = generate_speech(text.strip(), voice, speed)
166
+
167
+ # Schedule cleanup after response is sent
168
+ background_tasks.add_task(cleanup_file, output_path)
169
+
170
+ # Return audio file
171
+ response = FileResponse(
172
+ output_path,
173
+ media_type="audio/wav",
174
+ filename=f"kokoro_{datetime.now().strftime('%Y%m%d_%H%M%S')}.wav"
175
+ )
176
+ response.headers["X-Character-Count"] = str(len(text))
177
+ response.headers["X-Voice-Used"] = voice
178
+
179
+ return response
180
+
181
+ except ValueError as e:
182
+ raise HTTPException(status_code=400, detail=str(e))
183
+ except Exception as e:
184
+ raise HTTPException(status_code=500, detail=f"TTS generation failed: {str(e)}")
185
+
186
+ if __name__ == "__main__":
187
+ import uvicorn
188
+ uvicorn.run(app, host="0.0.0.0", port=7860)
packages.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ libsndfile1
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.115.0
2
+ uvicorn[standard]==0.32.0
3
+ kokoro-onnx
4
+ soundfile
5
+ python-multipart
6
+ numpy