Spaces:
Sleeping
Sleeping
Commit
·
0084d0b
1
Parent(s):
c7ac7fd
feat: add Humelo DIVE TTS (시아 voice)
Browse files- Add Humelo DIVE TTS provider with 시아 voice
- Downloads audio from URL returned by API
- Supports emotion parameter (neutral default)
- Auto-converts to 16kHz WAV for fair comparison
models.py
CHANGED
|
@@ -572,6 +572,7 @@ def insert_initial_models():
|
|
| 572 |
has_google = bool(os.getenv("GOOGLE_API_KEY"))
|
| 573 |
has_supertone = bool(os.getenv("SUPERTONE_API_KEY"))
|
| 574 |
has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
|
|
|
|
| 575 |
|
| 576 |
tts_models = [
|
| 577 |
# 채널톡 TTS (한국어 특화) - 항상 활성화
|
|
@@ -636,6 +637,15 @@ def insert_initial_models():
|
|
| 636 |
is_active=has_supertone,
|
| 637 |
model_url="https://supertone.ai/",
|
| 638 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 639 |
]
|
| 640 |
|
| 641 |
for model in tts_models:
|
|
|
|
| 572 |
has_google = bool(os.getenv("GOOGLE_API_KEY"))
|
| 573 |
has_supertone = bool(os.getenv("SUPERTONE_API_KEY"))
|
| 574 |
has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
|
| 575 |
+
has_humelo = bool(os.getenv("HUMELO_API_KEY"))
|
| 576 |
|
| 577 |
tts_models = [
|
| 578 |
# 채널톡 TTS (한국어 특화) - 항상 활성화
|
|
|
|
| 637 |
is_active=has_supertone,
|
| 638 |
model_url="https://supertone.ai/",
|
| 639 |
),
|
| 640 |
+
# Humelo DIVE TTS (한국어 특화) - API 키 있을 때만 활성화
|
| 641 |
+
Model(
|
| 642 |
+
id="humelo-sia",
|
| 643 |
+
name="Humelo DIVE (시아)",
|
| 644 |
+
model_type=ModelType.TTS,
|
| 645 |
+
is_open=False,
|
| 646 |
+
is_active=has_humelo,
|
| 647 |
+
model_url="https://humelo.com/",
|
| 648 |
+
),
|
| 649 |
]
|
| 650 |
|
| 651 |
for model in tts_models:
|
tts.py
CHANGED
|
@@ -48,6 +48,10 @@ SUPERTONE_VOICE_ID = os.getenv("SUPERTONE_VOICE_ID", "91992bbd4758bdcf9c9b01")
|
|
| 48 |
CLOVA_CLIENT_ID = os.getenv("CLOVA_CLIENT_ID")
|
| 49 |
CLOVA_API_KEY = os.getenv("CLOVA_API_KEY")
|
| 50 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 51 |
def resample_wav_to_16khz(input_path: str) -> str:
|
| 52 |
"""
|
| 53 |
Resample a WAV file to 16kHz for fair comparison.
|
|
@@ -194,6 +198,12 @@ model_mapping = {
|
|
| 194 |
"provider": "supertone",
|
| 195 |
"model": "sona_speech_1",
|
| 196 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
}
|
| 198 |
|
| 199 |
|
|
@@ -347,6 +357,51 @@ def predict_supertone_tts(text: str, model: str = "sona_speech_1") -> str:
|
|
| 347 |
return f.name
|
| 348 |
|
| 349 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str:
|
| 351 |
"""Google Cloud TTS API 호출"""
|
| 352 |
api_key = os.getenv("GOOGLE_API_KEY")
|
|
@@ -430,6 +485,15 @@ def predict_tts(text: str, model: str) -> str:
|
|
| 430 |
audio_path = predict_clova_tts(text, config.get("speaker", "nara"))
|
| 431 |
is_mp3 = True # CLOVA returns MP3
|
| 432 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 433 |
else:
|
| 434 |
raise ValueError(f"알 수 없는 provider: {provider}")
|
| 435 |
|
|
|
|
| 48 |
CLOVA_CLIENT_ID = os.getenv("CLOVA_CLIENT_ID")
|
| 49 |
CLOVA_API_KEY = os.getenv("CLOVA_API_KEY")
|
| 50 |
|
| 51 |
+
# Humelo DIVE TTS
|
| 52 |
+
HUMELO_API_KEY = os.getenv("HUMELO_API_KEY")
|
| 53 |
+
HUMELO_API_URL = "https://agitvxptajouhvoatxio.supabase.co/functions/v1/dive-synthesize-v1"
|
| 54 |
+
|
| 55 |
def resample_wav_to_16khz(input_path: str) -> str:
|
| 56 |
"""
|
| 57 |
Resample a WAV file to 16kHz for fair comparison.
|
|
|
|
| 198 |
"provider": "supertone",
|
| 199 |
"model": "sona_speech_1",
|
| 200 |
},
|
| 201 |
+
# Humelo DIVE TTS (한국어 특화)
|
| 202 |
+
"humelo-sia": {
|
| 203 |
+
"provider": "humelo",
|
| 204 |
+
"voice": "시아",
|
| 205 |
+
"emotion": "neutral",
|
| 206 |
+
},
|
| 207 |
}
|
| 208 |
|
| 209 |
|
|
|
|
| 357 |
return f.name
|
| 358 |
|
| 359 |
|
| 360 |
+
def predict_humelo_tts(text: str, voice: str = "시아", emotion: str = "neutral") -> str:
|
| 361 |
+
"""Humelo DIVE TTS API 호출"""
|
| 362 |
+
api_key = HUMELO_API_KEY
|
| 363 |
+
if not api_key:
|
| 364 |
+
raise ValueError("HUMELO_API_KEY 환경 변수가 설정되지 않았습니다.")
|
| 365 |
+
|
| 366 |
+
response = requests.post(
|
| 367 |
+
HUMELO_API_URL,
|
| 368 |
+
headers={
|
| 369 |
+
"Content-Type": "application/json",
|
| 370 |
+
"X-API-Key": api_key,
|
| 371 |
+
},
|
| 372 |
+
json={
|
| 373 |
+
"text": text,
|
| 374 |
+
"mode": "preset",
|
| 375 |
+
"voiceName": voice,
|
| 376 |
+
"emotion": emotion,
|
| 377 |
+
"lang": "ko",
|
| 378 |
+
},
|
| 379 |
+
timeout=60,
|
| 380 |
+
)
|
| 381 |
+
response.raise_for_status()
|
| 382 |
+
|
| 383 |
+
data = response.json()
|
| 384 |
+
audio_url = data.get("audio_url")
|
| 385 |
+
|
| 386 |
+
if not audio_url:
|
| 387 |
+
raise ValueError("Humelo API가 오디오 URL을 반환하지 않았습니다.")
|
| 388 |
+
|
| 389 |
+
# Download audio from URL
|
| 390 |
+
audio_response = requests.get(audio_url, timeout=60)
|
| 391 |
+
audio_response.raise_for_status()
|
| 392 |
+
|
| 393 |
+
# Determine file extension from URL or content-type
|
| 394 |
+
content_type = audio_response.headers.get("Content-Type", "")
|
| 395 |
+
if "mp3" in content_type or audio_url.endswith(".mp3"):
|
| 396 |
+
suffix = ".mp3"
|
| 397 |
+
else:
|
| 398 |
+
suffix = ".wav"
|
| 399 |
+
|
| 400 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
|
| 401 |
+
f.write(audio_response.content)
|
| 402 |
+
return f.name
|
| 403 |
+
|
| 404 |
+
|
| 405 |
def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str:
|
| 406 |
"""Google Cloud TTS API 호출"""
|
| 407 |
api_key = os.getenv("GOOGLE_API_KEY")
|
|
|
|
| 485 |
audio_path = predict_clova_tts(text, config.get("speaker", "nara"))
|
| 486 |
is_mp3 = True # CLOVA returns MP3
|
| 487 |
|
| 488 |
+
elif provider == "humelo":
|
| 489 |
+
audio_path = predict_humelo_tts(
|
| 490 |
+
text,
|
| 491 |
+
config.get("voice", "시아"),
|
| 492 |
+
config.get("emotion", "neutral"),
|
| 493 |
+
)
|
| 494 |
+
# Humelo might return MP3 or WAV, check extension
|
| 495 |
+
is_mp3 = audio_path.endswith(".mp3")
|
| 496 |
+
|
| 497 |
else:
|
| 498 |
raise ValueError(f"알 수 없는 provider: {provider}")
|
| 499 |
|