Spaces:
Sleeping
Sleeping
Ko-TTS-Arena Contributors
commited on
Commit
·
515ea95
1
Parent(s):
c82ad44
fix: Gemini TTS requires service account JSON (API Key not supported), disable until JSON provided
Browse files- models.py +2 -1
- requirements.txt +2 -1
- tts.py +38 -32
models.py
CHANGED
|
@@ -566,7 +566,8 @@ def insert_initial_models():
|
|
| 566 |
has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
|
| 567 |
has_humelo = bool(os.getenv("HUMELO_API_KEY"))
|
| 568 |
has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
|
| 569 |
-
|
|
|
|
| 570 |
|
| 571 |
tts_models = [
|
| 572 |
# 채널톡 TTS (한국어 특화) - 항상 활성화
|
|
|
|
| 566 |
has_clova = bool(os.getenv("CLOVA_CLIENT_ID") and os.getenv("CLOVA_API_KEY"))
|
| 567 |
has_humelo = bool(os.getenv("HUMELO_API_KEY"))
|
| 568 |
has_typecast = bool(os.getenv("TYPECAST_API_KEY"))
|
| 569 |
+
# Gemini TTS는 서비스 계정 JSON이 필요 (API Key 미지원)
|
| 570 |
+
has_gemini_tts = bool(os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON"))
|
| 571 |
|
| 572 |
tts_models = [
|
| 573 |
# 채널톡 TTS (한국어 특화) - 항상 활성화
|
requirements.txt
CHANGED
|
@@ -14,4 +14,5 @@ huggingface-hub
|
|
| 14 |
scipy
|
| 15 |
numpy
|
| 16 |
pydub
|
| 17 |
-
typecast-python
|
|
|
|
|
|
| 14 |
scipy
|
| 15 |
numpy
|
| 16 |
pydub
|
| 17 |
+
typecast-python
|
| 18 |
+
google-cloud-texttospeech
|
tts.py
CHANGED
|
@@ -55,10 +55,17 @@ HUMELO_API_URL = "https://agitvxptajouhvoatxio.supabase.co/functions/v1/dive-syn
|
|
| 55 |
# Typecast TTS
|
| 56 |
TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
|
| 57 |
|
| 58 |
-
# Gemini TTS (Google Cloud) -
|
| 59 |
-
|
| 60 |
-
if
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
def resample_wav_to_16khz(input_path: str) -> str:
|
| 64 |
"""
|
|
@@ -456,43 +463,42 @@ def predict_typecast_tts(text: str, voice_id: str = "tc_612ed01c7eb720fddd3ddedf
|
|
| 456 |
|
| 457 |
|
| 458 |
def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
|
| 459 |
-
"""Gemini TTS API 호출 (
|
| 460 |
-
if not
|
| 461 |
-
raise ValueError("
|
| 462 |
|
| 463 |
try:
|
| 464 |
-
|
|
|
|
| 465 |
|
| 466 |
-
|
| 467 |
-
"
|
| 468 |
-
|
| 469 |
-
"prompt": "친절하고 자연스러운 톤으로 말해주세요"
|
| 470 |
-
},
|
| 471 |
-
"voice": {
|
| 472 |
-
"languageCode": "ko-kr",
|
| 473 |
-
"name": voice,
|
| 474 |
-
"modelName": model
|
| 475 |
-
},
|
| 476 |
-
"audioConfig": {
|
| 477 |
-
"audioEncoding": "LINEAR16",
|
| 478 |
-
"sampleRateHertz": 24000
|
| 479 |
-
}
|
| 480 |
-
}
|
| 481 |
|
| 482 |
-
|
| 483 |
-
|
|
|
|
|
|
|
|
|
|
| 484 |
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 488 |
|
| 489 |
-
audio_bytes = base64.b64decode(audio_content)
|
| 490 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 491 |
-
f.write(
|
| 492 |
return f.name
|
| 493 |
|
| 494 |
-
except
|
| 495 |
-
raise ValueError(
|
| 496 |
except Exception as e:
|
| 497 |
raise ValueError(f"Gemini TTS API 오류: {str(e)}")
|
| 498 |
|
|
|
|
| 55 |
# Typecast TTS
|
| 56 |
TYPECAST_API_KEY = os.getenv("TYPECAST_API_KEY")
|
| 57 |
|
| 58 |
+
# Gemini TTS (Google Cloud) - 서비스 계정 JSON 필요 (API Key 미지원)
|
| 59 |
+
GOOGLE_APPLICATION_CREDENTIALS_JSON = os.getenv("GOOGLE_APPLICATION_CREDENTIALS_JSON")
|
| 60 |
+
if GOOGLE_APPLICATION_CREDENTIALS_JSON:
|
| 61 |
+
_credentials_path = os.path.join(tempfile.gettempdir(), "google_credentials.json")
|
| 62 |
+
try:
|
| 63 |
+
with open(_credentials_path, "w") as f:
|
| 64 |
+
f.write(GOOGLE_APPLICATION_CREDENTIALS_JSON)
|
| 65 |
+
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = _credentials_path
|
| 66 |
+
print("[Gemini TTS] Service account credentials loaded")
|
| 67 |
+
except Exception as e:
|
| 68 |
+
print(f"[Gemini TTS] Failed to save credentials: {e}")
|
| 69 |
|
| 70 |
def resample_wav_to_16khz(input_path: str) -> str:
|
| 71 |
"""
|
|
|
|
| 463 |
|
| 464 |
|
| 465 |
def predict_gemini_tts(text: str, voice: str = "Aoede", model: str = "gemini-2.5-flash-tts") -> str:
|
| 466 |
+
"""Gemini TTS API 호출 (서비스 계정 JSON 필요)"""
|
| 467 |
+
if not GOOGLE_APPLICATION_CREDENTIALS_JSON:
|
| 468 |
+
raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON 환경 변수가 설정되지 않았습니다.")
|
| 469 |
|
| 470 |
try:
|
| 471 |
+
from google.api_core.client_options import ClientOptions
|
| 472 |
+
from google.cloud import texttospeech_v1beta1 as texttospeech
|
| 473 |
|
| 474 |
+
client = texttospeech.TextToSpeechClient(
|
| 475 |
+
client_options=ClientOptions(api_endpoint="texttospeech.googleapis.com")
|
| 476 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 477 |
|
| 478 |
+
voice_params = texttospeech.VoiceSelectionParams(
|
| 479 |
+
name=voice,
|
| 480 |
+
language_code="ko-kr",
|
| 481 |
+
model_name=model,
|
| 482 |
+
)
|
| 483 |
|
| 484 |
+
response = client.synthesize_speech(
|
| 485 |
+
input=texttospeech.SynthesisInput(
|
| 486 |
+
text=text,
|
| 487 |
+
prompt="친절하고 자연스러운 톤으로 말해주세요",
|
| 488 |
+
),
|
| 489 |
+
voice=voice_params,
|
| 490 |
+
audio_config=texttospeech.AudioConfig(
|
| 491 |
+
audio_encoding=texttospeech.AudioEncoding.LINEAR16,
|
| 492 |
+
sample_rate_hertz=24000,
|
| 493 |
+
),
|
| 494 |
+
)
|
| 495 |
|
|
|
|
| 496 |
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 497 |
+
f.write(response.audio_content)
|
| 498 |
return f.name
|
| 499 |
|
| 500 |
+
except ImportError:
|
| 501 |
+
raise ValueError("google-cloud-texttospeech 패키지가 설치되지 않았습니다.")
|
| 502 |
except Exception as e:
|
| 503 |
raise ValueError(f"Gemini TTS API 오류: {str(e)}")
|
| 504 |
|