blackhole1218 commited on
Commit
0084d0b
·
1 Parent(s): c7ac7fd

feat: add Humelo DIVE TTS (시아 voice)

Browse files

- Add Humelo DIVE TTS provider with 시아 voice
- Downloads audio from URL returned by API
- Supports emotion parameter (neutral default)
- Auto-converts to 16kHz WAV for fair comparison

Files changed (2) hide show
  1. models.py +10 -0
  2. tts.py +64 -0
models.py CHANGED
@@ -572,6 +572,7 @@ def insert_initial_models():
572
  has_google = bool(os.getenv("GOOGLE_API_KEY"))
573
  has_supertone = bool(os.getenv("SUPERTONE_API_KEY"))
574
  has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
 
575
 
576
  tts_models = [
577
  # 채널톡 TTS (한국어 특화) - 항상 활성화
@@ -636,6 +637,15 @@ def insert_initial_models():
636
  is_active=has_supertone,
637
  model_url="https://supertone.ai/",
638
  ),
 
 
 
 
 
 
 
 
 
639
  ]
640
 
641
  for model in tts_models:
 
572
  has_google = bool(os.getenv("GOOGLE_API_KEY"))
573
  has_supertone = bool(os.getenv("SUPERTONE_API_KEY"))
574
  has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
575
+ has_humelo = bool(os.getenv("HUMELO_API_KEY"))
576
 
577
  tts_models = [
578
  # 채널톡 TTS (한국어 특화) - 항상 활성화
 
637
  is_active=has_supertone,
638
  model_url="https://supertone.ai/",
639
  ),
640
+ # Humelo DIVE TTS (한국어 특화) - API 키 있을 때만 활성화
641
+ Model(
642
+ id="humelo-sia",
643
+ name="Humelo DIVE (시아)",
644
+ model_type=ModelType.TTS,
645
+ is_open=False,
646
+ is_active=has_humelo,
647
+ model_url="https://humelo.com/",
648
+ ),
649
  ]
650
 
651
  for model in tts_models:
tts.py CHANGED
@@ -48,6 +48,10 @@ SUPERTONE_VOICE_ID = os.getenv("SUPERTONE_VOICE_ID", "91992bbd4758bdcf9c9b01")
48
  CLOVA_CLIENT_ID = os.getenv("CLOVA_CLIENT_ID")
49
  CLOVA_API_KEY = os.getenv("CLOVA_API_KEY")
50
 
 
 
 
 
51
  def resample_wav_to_16khz(input_path: str) -> str:
52
  """
53
  Resample a WAV file to 16kHz for fair comparison.
@@ -194,6 +198,12 @@ model_mapping = {
194
  "provider": "supertone",
195
  "model": "sona_speech_1",
196
  },
 
 
 
 
 
 
197
  }
198
 
199
 
@@ -347,6 +357,51 @@ def predict_supertone_tts(text: str, model: str = "sona_speech_1") -> str:
347
  return f.name
348
 
349
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str:
351
  """Google Cloud TTS API 호출"""
352
  api_key = os.getenv("GOOGLE_API_KEY")
@@ -430,6 +485,15 @@ def predict_tts(text: str, model: str) -> str:
430
  audio_path = predict_clova_tts(text, config.get("speaker", "nara"))
431
  is_mp3 = True # CLOVA returns MP3
432
 
 
 
 
 
 
 
 
 
 
433
  else:
434
  raise ValueError(f"알 수 없는 provider: {provider}")
435
 
 
48
  CLOVA_CLIENT_ID = os.getenv("CLOVA_CLIENT_ID")
49
  CLOVA_API_KEY = os.getenv("CLOVA_API_KEY")
50
 
51
+ # Humelo DIVE TTS
52
+ HUMELO_API_KEY = os.getenv("HUMELO_API_KEY")
53
+ HUMELO_API_URL = "https://agitvxptajouhvoatxio.supabase.co/functions/v1/dive-synthesize-v1"
54
+
55
  def resample_wav_to_16khz(input_path: str) -> str:
56
  """
57
  Resample a WAV file to 16kHz for fair comparison.
 
198
  "provider": "supertone",
199
  "model": "sona_speech_1",
200
  },
201
+ # Humelo DIVE TTS (한국어 특화)
202
+ "humelo-sia": {
203
+ "provider": "humelo",
204
+ "voice": "시아",
205
+ "emotion": "neutral",
206
+ },
207
  }
208
 
209
 
 
357
  return f.name
358
 
359
 
360
+ def predict_humelo_tts(text: str, voice: str = "시아", emotion: str = "neutral") -> str:
361
+ """Humelo DIVE TTS API 호출"""
362
+ api_key = HUMELO_API_KEY
363
+ if not api_key:
364
+ raise ValueError("HUMELO_API_KEY 환경 변수가 설정되지 않았습니다.")
365
+
366
+ response = requests.post(
367
+ HUMELO_API_URL,
368
+ headers={
369
+ "Content-Type": "application/json",
370
+ "X-API-Key": api_key,
371
+ },
372
+ json={
373
+ "text": text,
374
+ "mode": "preset",
375
+ "voiceName": voice,
376
+ "emotion": emotion,
377
+ "lang": "ko",
378
+ },
379
+ timeout=60,
380
+ )
381
+ response.raise_for_status()
382
+
383
+ data = response.json()
384
+ audio_url = data.get("audio_url")
385
+
386
+ if not audio_url:
387
+ raise ValueError("Humelo API가 오디오 URL을 반환하지 않았습니다.")
388
+
389
+ # Download audio from URL
390
+ audio_response = requests.get(audio_url, timeout=60)
391
+ audio_response.raise_for_status()
392
+
393
+ # Determine file extension from URL or content-type
394
+ content_type = audio_response.headers.get("Content-Type", "")
395
+ if "mp3" in content_type or audio_url.endswith(".mp3"):
396
+ suffix = ".mp3"
397
+ else:
398
+ suffix = ".wav"
399
+
400
+ with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as f:
401
+ f.write(audio_response.content)
402
+ return f.name
403
+
404
+
405
  def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str:
406
  """Google Cloud TTS API 호출"""
407
  api_key = os.getenv("GOOGLE_API_KEY")
 
485
  audio_path = predict_clova_tts(text, config.get("speaker", "nara"))
486
  is_mp3 = True # CLOVA returns MP3
487
 
488
+ elif provider == "humelo":
489
+ audio_path = predict_humelo_tts(
490
+ text,
491
+ config.get("voice", "시아"),
492
+ config.get("emotion", "neutral"),
493
+ )
494
+ # Humelo might return MP3 or WAV, check extension
495
+ is_mp3 = audio_path.endswith(".mp3")
496
+
497
  else:
498
  raise ValueError(f"알 수 없는 provider: {provider}")
499