Ko-TTS-Arena Contributors commited on
Commit
515ea95
·
1 Parent(s): c82ad44

fix: Gemini TTS requires service account JSON (API Key not supported), disable until JSON provided

Browse files
Files changed (3) hide show
  1. models.py +2 -1
  2. requirements.txt +2 -1
  3. tts.py +38 -32
models.py CHANGED
@@ -566,7 +566,8 @@ def insert_initial_models():
566
  has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
567
  has_humelo = bool(os.getenv("HUMELO_API_KEY"))
568
  has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
569
- has_gemini_tts = bool(os.getenv("GEMINI_TTS_API_KEY"))
 
570
 
571
  tts_models = [
572
  # 채널톡 TTS (한국어 특화) - 항상 활성화
 
566
  has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
567
  has_humelo = bool(os.getenv("HUMELO_API_KEY"))
568
  has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
569
+ # Gemini TTS는 서비스 계정 JSON이 필요 (API Key 미지원)
570
+ has_gemini_tts = bool(os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON"))
571
 
572
  tts_models = [
573
  # 채널톡 TTS (한국어 특화) - 항상 활성화
requirements.txt CHANGED
@@ -14,4 +14,5 @@ huggingface-hub
14
  scipy
15
  numpy
16
  pydub
17
- typecast-python
 
 
14
  scipy
15
  numpy
16
  pydub
17
+ typecast-python
18
+ google-cloud-texttospeech
tts.py CHANGED
@@ -55,10 +55,17 @@ HUMELO_API_URL = "https://agitvxptajouhvoatxio.supabase.co/functions/v1/dive-syn
55
  # Typecast TTS
56
  TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
57
 
58
- # Gemini TTS (Google Cloud) - REST API v1beta1 with API Key
59
- GEMINI_TTS_API_KEY = os.getenv("GEMINI_TTS_API_KEY")
60
- if GEMINI_TTS_API_KEY:
61
- print("[Gemini TTS] API Key loaded")
 
 
 
 
 
 
 
62
 
63
  def resample_wav_to_16khz(input_path: str) -> str:
64
  """
@@ -456,43 +463,42 @@ def predict_typecast_tts(text: str, voice_id: str = "tc_612ed01c7eb720fddd3ddedf
456
 
457
 
458
  def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
459
- """Gemini TTS API 호출 (REST API v1beta1 with API Key)"""
460
- if not GEMINI_TTS_API_KEY:
461
- raise ValueError("GEMINI_TTS_API_KEY 환경 변수가 설정되지 않았습니다.")
462
 
463
  try:
464
- url = f"https://texttospeech.googleapis.com/v1beta1/text:synthesize?key={GEMINI_TTS_API_KEY}"
 
465
 
466
- payload = {
467
- "input": {
468
- "text": text,
469
- "prompt": "친절하고 자연스러운 톤으로 말해주세요"
470
- },
471
- "voice": {
472
- "languageCode": "ko-kr",
473
- "name": voice,
474
- "modelName": model
475
- },
476
- "audioConfig": {
477
- "audioEncoding": "LINEAR16",
478
- "sampleRateHertz": 24000
479
- }
480
- }
481
 
482
- response = requests.post(url, json=payload, timeout=60)
483
- response.raise_for_status()
 
 
 
484
 
485
- audio_content = response.json().get("audioContent")
486
- if not audio_content:
487
- raise ValueError("Gemini TTS API가 오디오를 반환하지 않았습니다.")
 
 
 
 
 
 
 
 
488
 
489
- audio_bytes = base64.b64decode(audio_content)
490
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
491
- f.write(audio_bytes)
492
  return f.name
493
 
494
- except requests.exceptions.RequestException as e:
495
- raise ValueError(f"Gemini TTS API 요청 오류: {str(e)}")
496
  except Exception as e:
497
  raise ValueError(f"Gemini TTS API 오류: {str(e)}")
498
 
 
55
  # Typecast TTS
56
  TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
57
 
58
+ # Gemini TTS (Google Cloud) - 서비스 계정 JSON 필요 (API Key 미지원)
59
+ GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
60
+ if GOOGLE_APPLICATION_CREDENTIALS_JSON:
61
+ _credentials_path = os.path.join(tempfile.gettempdir(), "google_credentials.json")
62
+ try:
63
+ with open(_credentials_path, "w") as f:
64
+ f.write(GOOGLE_APPLICATION_CREDENTIALS_JSON)
65
+ os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = _credentials_path
66
+ print("[Gemini TTS] Service account credentials loaded")
67
+ except Exception as e:
68
+ print(f"[Gemini TTS] Failed to save credentials: {e}")
69
 
70
  def resample_wav_to_16khz(input_path: str) -> str:
71
  """
 
463
 
464
 
465
  def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
466
+ """Gemini TTS API 호출 (서비스 계정 JSON 필요)"""
467
+ if not GOOGLE_APPLICATION_CREDENTIALS_JSON:
468
+ raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON 환경 변수가 설정되지 않았습니다.")
469
 
470
  try:
471
+ from google.api_core.client_options import ClientOptions
472
+ from google.cloud import texttospeech_v1beta1 as texttospeech
473
 
474
+ client = texttospeech.TextToSpeechClient(
475
+ client_options=ClientOptions(api_endpoint="texttospeech.googleapis.com")
476
+ )
 
 
 
 
 
 
 
 
 
 
 
 
477
 
478
+ voice_params = texttospeech.VoiceSelectionParams(
479
+ name=voice,
480
+ language_code="ko-kr",
481
+ model_name=model,
482
+ )
483
 
484
+ response = client.synthesize_speech(
485
+ input=texttospeech.SynthesisInput(
486
+ text=text,
487
+ prompt="친절하고 자연스러운 톤으로 말해주세요",
488
+ ),
489
+ voice=voice_params,
490
+ audio_config=texttospeech.AudioConfig(
491
+ audio_encoding=texttospeech.AudioEncoding.LINEAR16,
492
+ sample_rate_hertz=24000,
493
+ ),
494
+ )
495
 
 
496
  with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
497
+ f.write(response.audio_content)
498
  return f.name
499
 
500
+ except ImportError:
501
+ raise ValueError("google-cloud-texttospeech 패키지가 설치되지 않았습니다.")
502
  except Exception as e:
503
  raise ValueError(f"Gemini TTS API 오류: {str(e)}")
504