Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Ko-TTS-Arena Contributors
commited on
Commit
·
f54c69b
1
Parent(s):
515ea95
feat: Upgrade Typecast to v3.0 API (ssfm-v30), add Jaesun & Jain voices, deactivate legacy GeumHee
Browse files- models.py +20 -3
- requirements.txt +0 -1
- tts.py +37 -27
models.py
CHANGED
|
@@ -641,15 +641,32 @@ def insert_initial_models():
|
|
| 641 |
is_active=has_humelo,
|
| 642 |
model_url="https://humelo.com/",
|
| 643 |
),
|
| 644 |
-
# Typecast TTS (한국어 특화) - API 키 있을 때만 활성화
|
| 645 |
Model(
|
| 646 |
-
id="typecast-
|
| 647 |
-
name="Typecast (
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 648 |
model_type=ModelType.TTS,
|
| 649 |
is_open=False,
|
| 650 |
is_active=has_typecast,
|
| 651 |
model_url="https://typecast.ai/",
|
| 652 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
# Gemini TTS (Google Cloud - 다국어 지원) - 서비스 계정 JSON 필요
|
| 654 |
Model(
|
| 655 |
id="gemini-tts-aoede",
|
|
|
|
| 641 |
is_active=has_humelo,
|
| 642 |
model_url="https://humelo.com/",
|
| 643 |
),
|
| 644 |
+
# Typecast TTS v3.0 (한국어 특화) - API 키 있을 때만 활성화
|
| 645 |
Model(
|
| 646 |
+
id="typecast-jaesun",
|
| 647 |
+
name="Typecast v3 (재선)",
|
| 648 |
+
model_type=ModelType.TTS,
|
| 649 |
+
is_open=False,
|
| 650 |
+
is_active=has_typecast,
|
| 651 |
+
model_url="https://typecast.ai/",
|
| 652 |
+
),
|
| 653 |
+
Model(
|
| 654 |
+
id="typecast-jain",
|
| 655 |
+
name="Typecast v3 (자인)",
|
| 656 |
model_type=ModelType.TTS,
|
| 657 |
is_open=False,
|
| 658 |
is_active=has_typecast,
|
| 659 |
model_url="https://typecast.ai/",
|
| 660 |
),
|
| 661 |
+
# Legacy Typecast - 비활성화 (v3.0으로 대체)
|
| 662 |
+
Model(
|
| 663 |
+
id="typecast-geumhee",
|
| 664 |
+
name="Typecast (GeumHee) [Legacy]",
|
| 665 |
+
model_type=ModelType.TTS,
|
| 666 |
+
is_open=False,
|
| 667 |
+
is_active=False, # 비활성화
|
| 668 |
+
model_url="https://typecast.ai/",
|
| 669 |
+
),
|
| 670 |
# Gemini TTS (Google Cloud - 다국어 지원) - 서비스 계정 JSON 필요
|
| 671 |
Model(
|
| 672 |
id="gemini-tts-aoede",
|
requirements.txt
CHANGED
|
@@ -14,5 +14,4 @@ huggingface-hub
|
|
| 14 |
scipy
|
| 15 |
numpy
|
| 16 |
pydub
|
| 17 |
-
typecast-python
|
| 18 |
google-cloud-texttospeech
|
|
|
|
| 14 |
scipy
|
| 15 |
numpy
|
| 16 |
pydub
|
|
|
|
| 17 |
google-cloud-texttospeech
|
tts.py
CHANGED
|
@@ -219,17 +219,27 @@ model_mapping = {
|
|
| 219 |
"voice": "리아",
|
| 220 |
"emotion": "neutral",
|
| 221 |
},
|
| 222 |
-
# Typecast TTS (한국어 특화)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
"typecast-geumhee": {
|
| 224 |
"provider": "typecast",
|
| 225 |
-
"voice_id": "
|
| 226 |
-
"model": "ssfm-
|
| 227 |
},
|
| 228 |
-
# Legacy Typecast ID kept for backward compatibility (routes to GeumHee)
|
| 229 |
"typecast-jaeyi": {
|
| 230 |
"provider": "typecast",
|
| 231 |
-
"voice_id": "
|
| 232 |
-
"model": "ssfm-
|
| 233 |
},
|
| 234 |
# Gemini TTS (Google Cloud - 다국어 지원)
|
| 235 |
"gemini-tts-aoede": {
|
|
@@ -435,31 +445,31 @@ def predict_humelo_tts(text: str, voice: str = "리아", emotion: str = "neutral
|
|
| 435 |
return f.name
|
| 436 |
|
| 437 |
|
| 438 |
-
def predict_typecast_tts(text: str, voice_id: str
|
| 439 |
-
"""Typecast TTS API 호출"""
|
| 440 |
api_key = TYPECAST_API_KEY
|
| 441 |
if not api_key:
|
| 442 |
raise ValueError("TYPECAST_API_KEY 환경 변수가 설정되지 않았습니다.")
|
| 443 |
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
| 451 |
-
|
| 452 |
-
|
| 453 |
-
|
| 454 |
-
|
| 455 |
-
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
|
| 462 |
-
|
| 463 |
|
| 464 |
|
| 465 |
def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
|
|
|
|
| 219 |
"voice": "리아",
|
| 220 |
"emotion": "neutral",
|
| 221 |
},
|
| 222 |
+
# Typecast TTS v3.0 (한국어 특화) - 새 보이스
|
| 223 |
+
"typecast-jaesun": {
|
| 224 |
+
"provider": "typecast",
|
| 225 |
+
"voice_id": "tc_684a7a1446e2a628b5b07230", # 재선
|
| 226 |
+
"model": "ssfm-v30",
|
| 227 |
+
},
|
| 228 |
+
"typecast-jain": {
|
| 229 |
+
"provider": "typecast",
|
| 230 |
+
"voice_id": "tc_6809c111e5e8c73f8a0237b2", # 자인
|
| 231 |
+
"model": "ssfm-v30",
|
| 232 |
+
},
|
| 233 |
+
# Legacy Typecast IDs - routes to new Jaesun voice
|
| 234 |
"typecast-geumhee": {
|
| 235 |
"provider": "typecast",
|
| 236 |
+
"voice_id": "tc_684a7a1446e2a628b5b07230",
|
| 237 |
+
"model": "ssfm-v30",
|
| 238 |
},
|
|
|
|
| 239 |
"typecast-jaeyi": {
|
| 240 |
"provider": "typecast",
|
| 241 |
+
"voice_id": "tc_684a7a1446e2a628b5b07230",
|
| 242 |
+
"model": "ssfm-v30",
|
| 243 |
},
|
| 244 |
# Gemini TTS (Google Cloud - 다국어 지원)
|
| 245 |
"gemini-tts-aoede": {
|
|
|
|
| 445 |
return f.name
|
| 446 |
|
| 447 |
|
| 448 |
+
def predict_typecast_tts(text: str, voice_id: str, model: str = "ssfm-v30") -> str:
|
| 449 |
+
"""Typecast TTS API 호출 (v3.0 experiment API)"""
|
| 450 |
api_key = TYPECAST_API_KEY
|
| 451 |
if not api_key:
|
| 452 |
raise ValueError("TYPECAST_API_KEY 환경 변수가 설정되지 않았습니다.")
|
| 453 |
|
| 454 |
+
url = "https://api.typecast.ai/experiment/text-to-speech"
|
| 455 |
+
|
| 456 |
+
payload = {
|
| 457 |
+
"voice_id": voice_id,
|
| 458 |
+
"text": text,
|
| 459 |
+
"model": model
|
| 460 |
+
}
|
| 461 |
+
|
| 462 |
+
headers = {
|
| 463 |
+
"X-API-KEY": api_key,
|
| 464 |
+
"Content-Type": "application/json"
|
| 465 |
+
}
|
| 466 |
+
|
| 467 |
+
response = requests.post(url, headers=headers, json=payload, timeout=60)
|
| 468 |
+
response.raise_for_status()
|
| 469 |
+
|
| 470 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 471 |
+
f.write(response.content)
|
| 472 |
+
return f.name
|
| 473 |
|
| 474 |
|
| 475 |
def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
|