Spaces:
Sleeping
Sleeping
Commit
·
62f57ec
1
Parent(s):
6eee52e
한국어 TTS 아레나 - Docker Space 배포
Browse files- 채널톡 TTS API 통합
- 한국어 UI/UX
- Conversational 기능 제거, TTS 전용
- Docker 배포 설정 추가
- About 페이지 한국어 TTS 벤치마크 설명 추가
- .dockerignore +34 -0
- Dockerfile +33 -0
- README.md +34 -9
- app.py +15 -368
- ko_prompts.json +55 -0
- models.py +36 -201
- requirements.txt +1 -4
- static/channeltalk-logo-kr.svg +19 -0
- templates/about.html +255 -240
- templates/arena.html +57 -1144
- templates/base.html +45 -5
- tts.py +188 -268
.dockerignore
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Git
|
| 2 |
+
.git
|
| 3 |
+
.gitignore
|
| 4 |
+
|
| 5 |
+
# Python
|
| 6 |
+
__pycache__
|
| 7 |
+
*.py[cod]
|
| 8 |
+
*$py.class
|
| 9 |
+
*.so
|
| 10 |
+
.Python
|
| 11 |
+
env/
|
| 12 |
+
venv/
|
| 13 |
+
.env
|
| 14 |
+
*.egg-info/
|
| 15 |
+
dist/
|
| 16 |
+
build/
|
| 17 |
+
|
| 18 |
+
# IDE
|
| 19 |
+
.vscode/
|
| 20 |
+
.idea/
|
| 21 |
+
*.swp
|
| 22 |
+
*.swo
|
| 23 |
+
|
| 24 |
+
# Local files
|
| 25 |
+
instance/
|
| 26 |
+
*.db
|
| 27 |
+
*.sqlite
|
| 28 |
+
tts_cache/
|
| 29 |
+
audio_cache/
|
| 30 |
+
|
| 31 |
+
# Misc
|
| 32 |
+
.DS_Store
|
| 33 |
+
*.log
|
| 34 |
+
|
Dockerfile
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Hugging Face Spaces Docker
|
| 2 |
+
FROM python:3.11-slim
|
| 3 |
+
|
| 4 |
+
# Create non-root user
|
| 5 |
+
RUN useradd -m -u 1000 user
|
| 6 |
+
USER user
|
| 7 |
+
|
| 8 |
+
ENV PATH="/home/user/.local/bin:$PATH"
|
| 9 |
+
ENV HOME="/home/user"
|
| 10 |
+
|
| 11 |
+
WORKDIR /app
|
| 12 |
+
|
| 13 |
+
# Copy requirements first for better caching
|
| 14 |
+
COPY --chown=user ./requirements.txt requirements.txt
|
| 15 |
+
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 16 |
+
|
| 17 |
+
# Copy application files
|
| 18 |
+
COPY --chown=user . /app
|
| 19 |
+
|
| 20 |
+
# Create necessary directories
|
| 21 |
+
RUN mkdir -p /app/instance /app/tts_cache /app/audio_cache
|
| 22 |
+
|
| 23 |
+
# Set environment variables for HF Spaces
|
| 24 |
+
ENV FLASK_ENV=production
|
| 25 |
+
ENV IS_SPACES=true
|
| 26 |
+
ENV PORT=7860
|
| 27 |
+
|
| 28 |
+
# Expose port
|
| 29 |
+
EXPOSE 7860
|
| 30 |
+
|
| 31 |
+
# Run with waitress (already in requirements.txt)
|
| 32 |
+
CMD ["python", "app.py"]
|
| 33 |
+
|
README.md
CHANGED
|
@@ -1,16 +1,41 @@
|
|
| 1 |
---
|
| 2 |
-
title: TTS
|
| 3 |
-
emoji:
|
| 4 |
-
colorFrom:
|
| 5 |
colorTo: blue
|
| 6 |
-
sdk:
|
| 7 |
-
|
| 8 |
-
short_description:
|
| 9 |
pinned: true
|
| 10 |
-
|
| 11 |
hf_oauth: true
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
---
|
| 13 |
|
| 14 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
-
|
|
|
|
| 1 |
---
|
| 2 |
+
title: 한국어 TTS 아레나
|
| 3 |
+
emoji: 🎤
|
| 4 |
+
colorFrom: purple
|
| 5 |
colorTo: blue
|
| 6 |
+
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
+
short_description: 한국어 TTS 모델을 블라인드 테스트로 비교 평가하세요!
|
| 9 |
pinned: true
|
|
|
|
| 10 |
hf_oauth: true
|
| 11 |
+
hf_oauth_scopes:
|
| 12 |
+
- read-repos
|
| 13 |
+
- write-repos
|
| 14 |
+
- manage-repos
|
| 15 |
+
- inference-api
|
| 16 |
---
|
| 17 |
|
| 18 |
+
# 🎤 한국어 TTS 아레나
|
| 19 |
+
|
| 20 |
+
한국어 TTS 모델을 블라인드 테스트로 비교 평가하는 커뮤니티 기반 플랫폼입니다.
|
| 21 |
+
|
| 22 |
+
## 왜 한국어 TTS 벤치마크가 필요한가?
|
| 23 |
+
|
| 24 |
+
- **WER (Word Error Rate)**: 한국어의 복잡한 발화 패턴을 제대로 반영하지 못함
|
| 25 |
+
- **MOS (Mean Opinion Score)**: 소규모 참가자 대상의 주관적 평가로 한계 존재
|
| 26 |
+
- **글로벌 TTS 모델의 한국어 한계**: 운율(Prosody) 부자연스러움, 숫자/날짜/전화번호 발화 취약
|
| 27 |
+
|
| 28 |
+
## 사용 방법
|
| 29 |
+
|
| 30 |
+
1. 텍스트를 입력하거나 랜덤 문장을 선택
|
| 31 |
+
2. 두 TTS 모델의 음성을 듣고 비교
|
| 32 |
+
3. 더 자연스러운 음성에 투표
|
| 33 |
+
4. 리더보드에서 모델 순위 확인
|
| 34 |
+
|
| 35 |
+
## Supported by
|
| 36 |
+
|
| 37 |
+
[채널톡](https://channel.io/ko) AI Team
|
| 38 |
+
|
| 39 |
+
## 참고 자료
|
| 40 |
|
| 41 |
+
- [Channel TTS: Towards Real-World Prosody for Conversational Agents](https://tts.ch.dev/)
|
app.py
CHANGED
|
@@ -5,12 +5,11 @@ from concurrent.futures import ThreadPoolExecutor
|
|
| 5 |
from datetime import datetime
|
| 6 |
import threading # Added for locking
|
| 7 |
from sqlalchemy import or_ # Added for vote counting query
|
| 8 |
-
from datasets import load_dataset
|
| 9 |
|
| 10 |
year = datetime.now().year
|
| 11 |
month = datetime.now().month
|
| 12 |
|
| 13 |
-
# Check if running in a
|
| 14 |
IS_SPACES = False
|
| 15 |
if os.getenv("SPACE_REPO_NAME"):
|
| 16 |
print("Running in a Hugging Face Space 🤗")
|
|
@@ -22,7 +21,7 @@ if os.getenv("SPACE_REPO_NAME"):
|
|
| 22 |
try:
|
| 23 |
print("Database not found, downloading from HF dataset...")
|
| 24 |
hf_hub_download(
|
| 25 |
-
repo_id="
|
| 26 |
filename="tts_arena.db",
|
| 27 |
repo_type="dataset",
|
| 28 |
local_dir="instance",
|
|
@@ -68,29 +67,6 @@ from flask_migrate import Migrate
|
|
| 68 |
import requests
|
| 69 |
import functools
|
| 70 |
import time # Added for potential retries
|
| 71 |
-
from langdetect import detect, DetectorFactory
|
| 72 |
-
|
| 73 |
-
# Set random seed for consistent language detection results
|
| 74 |
-
DetectorFactory.seed = 0
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
def is_english_text(text):
|
| 78 |
-
"""
|
| 79 |
-
Detect if the given text is in English.
|
| 80 |
-
Returns True if English, False otherwise.
|
| 81 |
-
"""
|
| 82 |
-
try:
|
| 83 |
-
# Remove leading/trailing whitespace and check if text is not empty
|
| 84 |
-
text = text.strip()
|
| 85 |
-
if not text:
|
| 86 |
-
return False
|
| 87 |
-
|
| 88 |
-
# Detect language
|
| 89 |
-
detected_language = detect(text)
|
| 90 |
-
return detected_language == 'en'
|
| 91 |
-
except Exception:
|
| 92 |
-
# If detection fails, assume it's not English for safety
|
| 93 |
-
return False
|
| 94 |
|
| 95 |
|
| 96 |
def get_client_ip():
|
|
@@ -177,10 +153,6 @@ os.makedirs(CACHE_AUDIO_DIR, exist_ok=True) # Ensure cache subdir exists
|
|
| 177 |
app.tts_sessions = {}
|
| 178 |
tts_sessions = app.tts_sessions
|
| 179 |
|
| 180 |
-
# Store active conversational sessions
|
| 181 |
-
app.conversational_sessions = {}
|
| 182 |
-
conversational_sessions = app.conversational_sessions
|
| 183 |
-
|
| 184 |
# Register blueprints
|
| 185 |
app.register_blueprint(auth, url_prefix="/auth")
|
| 186 |
app.register_blueprint(admin)
|
|
@@ -332,12 +304,13 @@ def verify_turnstile():
|
|
| 332 |
# Otherwise redirect back to turnstile page
|
| 333 |
return redirect(url_for("turnstile_page", redirect_url=redirect_url))
|
| 334 |
|
| 335 |
-
# Load
|
| 336 |
-
print("Loading TTS
|
| 337 |
-
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
|
|
|
|
| 341 |
|
| 342 |
# Initialize initial_sentences as empty - will be populated with unconsumed sentences only
|
| 343 |
initial_sentences = []
|
|
@@ -351,42 +324,29 @@ def arena():
|
|
| 351 |
@app.route("/leaderboard")
|
| 352 |
def leaderboard():
|
| 353 |
tts_leaderboard = get_leaderboard_data(ModelType.TTS)
|
| 354 |
-
conversational_leaderboard = get_leaderboard_data(ModelType.CONVERSATIONAL)
|
| 355 |
top_voters = get_top_voters(10) # Get top 10 voters
|
| 356 |
|
| 357 |
# Initialize personal leaderboard data
|
| 358 |
tts_personal_leaderboard = None
|
| 359 |
-
conversational_personal_leaderboard = None
|
| 360 |
user_leaderboard_visibility = None
|
| 361 |
|
| 362 |
# If user is logged in, get their personal leaderboard and visibility setting
|
| 363 |
if current_user.is_authenticated:
|
| 364 |
tts_personal_leaderboard = get_user_leaderboard(current_user.id, ModelType.TTS)
|
| 365 |
-
conversational_personal_leaderboard = get_user_leaderboard(
|
| 366 |
-
current_user.id, ModelType.CONVERSATIONAL
|
| 367 |
-
)
|
| 368 |
user_leaderboard_visibility = current_user.show_in_leaderboard
|
| 369 |
|
| 370 |
# Get key dates for the timeline
|
| 371 |
tts_key_dates = get_key_historical_dates(ModelType.TTS)
|
| 372 |
-
conversational_key_dates = get_key_historical_dates(ModelType.CONVERSATIONAL)
|
| 373 |
|
| 374 |
# Format dates for display in the dropdown
|
| 375 |
formatted_tts_dates = [date.strftime("%B %Y") for date in tts_key_dates]
|
| 376 |
-
formatted_conversational_dates = [
|
| 377 |
-
date.strftime("%B %Y") for date in conversational_key_dates
|
| 378 |
-
]
|
| 379 |
|
| 380 |
return render_template(
|
| 381 |
"leaderboard.html",
|
| 382 |
tts_leaderboard=tts_leaderboard,
|
| 383 |
-
conversational_leaderboard=conversational_leaderboard,
|
| 384 |
tts_personal_leaderboard=tts_personal_leaderboard,
|
| 385 |
-
conversational_personal_leaderboard=conversational_personal_leaderboard,
|
| 386 |
tts_key_dates=tts_key_dates,
|
| 387 |
-
conversational_key_dates=conversational_key_dates,
|
| 388 |
formatted_tts_dates=formatted_tts_dates,
|
| 389 |
-
formatted_conversational_dates=formatted_conversational_dates,
|
| 390 |
top_voters=top_voters,
|
| 391 |
user_leaderboard_visibility=user_leaderboard_visibility
|
| 392 |
)
|
|
@@ -395,7 +355,7 @@ def leaderboard():
|
|
| 395 |
@app.route("/api/historical-leaderboard/<model_type>")
|
| 396 |
def historical_leaderboard(model_type):
|
| 397 |
"""Get historical leaderboard data for a specific date"""
|
| 398 |
-
if model_type
|
| 399 |
return jsonify({"error": "Invalid model type"}), 400
|
| 400 |
|
| 401 |
# Get date from query parameter
|
|
@@ -939,303 +899,6 @@ def cleanup_session(session_id):
|
|
| 939 |
del app.tts_sessions[session_id]
|
| 940 |
|
| 941 |
|
| 942 |
-
@app.route("/api/conversational/generate", methods=["POST"])
|
| 943 |
-
@limiter.limit("5 per minute")
|
| 944 |
-
def generate_podcast():
|
| 945 |
-
# If verification not setup, handle it first
|
| 946 |
-
if app.config["TURNSTILE_ENABLED"] and not session.get("turnstile_verified"):
|
| 947 |
-
return jsonify({"error": "Turnstile verification required"}), 403
|
| 948 |
-
|
| 949 |
-
# Require user to be logged in to generate audio
|
| 950 |
-
if not current_user.is_authenticated:
|
| 951 |
-
return jsonify({"error": "You must be logged in to generate audio"}), 401
|
| 952 |
-
|
| 953 |
-
data = request.json
|
| 954 |
-
script = data.get("script")
|
| 955 |
-
|
| 956 |
-
if not script or not isinstance(script, list) or len(script) < 2:
|
| 957 |
-
return jsonify({"error": "Invalid script format or too short"}), 400
|
| 958 |
-
|
| 959 |
-
# Validate script format
|
| 960 |
-
for line in script:
|
| 961 |
-
if not isinstance(line, dict) or "text" not in line or "speaker_id" not in line:
|
| 962 |
-
return (
|
| 963 |
-
jsonify(
|
| 964 |
-
{
|
| 965 |
-
"error": "Invalid script line format. Each line must have text and speaker_id"
|
| 966 |
-
}
|
| 967 |
-
),
|
| 968 |
-
400,
|
| 969 |
-
)
|
| 970 |
-
if (
|
| 971 |
-
not line["text"]
|
| 972 |
-
or not isinstance(line["speaker_id"], int)
|
| 973 |
-
or line["speaker_id"] not in [0, 1]
|
| 974 |
-
):
|
| 975 |
-
return (
|
| 976 |
-
jsonify({"error": "Invalid script content. Speaker ID must be 0 or 1"}),
|
| 977 |
-
400,
|
| 978 |
-
)
|
| 979 |
-
|
| 980 |
-
# Get two conversational models (currently only CSM and PlayDialog)
|
| 981 |
-
available_models = Model.query.filter_by(
|
| 982 |
-
model_type=ModelType.CONVERSATIONAL, is_active=True
|
| 983 |
-
).all()
|
| 984 |
-
|
| 985 |
-
if len(available_models) < 2:
|
| 986 |
-
return jsonify({"error": "Not enough conversational models available"}), 500
|
| 987 |
-
|
| 988 |
-
selected_models = get_weighted_random_models(available_models, 2, ModelType.CONVERSATIONAL)
|
| 989 |
-
|
| 990 |
-
try:
|
| 991 |
-
# Generate audio for both models concurrently
|
| 992 |
-
audio_files = []
|
| 993 |
-
model_ids = []
|
| 994 |
-
|
| 995 |
-
# Function to process a single model
|
| 996 |
-
def process_model(model):
|
| 997 |
-
# Call conversational TTS service
|
| 998 |
-
audio_content = predict_tts(script, model.id)
|
| 999 |
-
|
| 1000 |
-
# Save to temp file with unique name
|
| 1001 |
-
file_uuid = str(uuid.uuid4())
|
| 1002 |
-
dest_path = os.path.join(TEMP_AUDIO_DIR, f"{file_uuid}.wav")
|
| 1003 |
-
|
| 1004 |
-
with open(dest_path, "wb") as f:
|
| 1005 |
-
f.write(audio_content)
|
| 1006 |
-
|
| 1007 |
-
return {"model_id": model.id, "audio_path": dest_path}
|
| 1008 |
-
|
| 1009 |
-
# Use ThreadPoolExecutor to process models concurrently
|
| 1010 |
-
with ThreadPoolExecutor(max_workers=2) as executor:
|
| 1011 |
-
results = list(executor.map(process_model, selected_models))
|
| 1012 |
-
|
| 1013 |
-
# Extract results
|
| 1014 |
-
for result in results:
|
| 1015 |
-
model_ids.append(result["model_id"])
|
| 1016 |
-
audio_files.append(result["audio_path"])
|
| 1017 |
-
|
| 1018 |
-
# Create session
|
| 1019 |
-
session_id = str(uuid.uuid4())
|
| 1020 |
-
script_text = " ".join([line["text"] for line in script])
|
| 1021 |
-
app.conversational_sessions[session_id] = {
|
| 1022 |
-
"model_a": model_ids[0],
|
| 1023 |
-
"model_b": model_ids[1],
|
| 1024 |
-
"audio_a": audio_files[0],
|
| 1025 |
-
"audio_b": audio_files[1],
|
| 1026 |
-
"text": script_text[:1000], # Limit text length
|
| 1027 |
-
"created_at": datetime.utcnow(),
|
| 1028 |
-
"expires_at": datetime.utcnow() + timedelta(minutes=30),
|
| 1029 |
-
"voted": False,
|
| 1030 |
-
"script": script,
|
| 1031 |
-
"cache_hit": False, # Conversational is always generated on-demand
|
| 1032 |
-
}
|
| 1033 |
-
|
| 1034 |
-
# Return audio file paths and session
|
| 1035 |
-
return jsonify(
|
| 1036 |
-
{
|
| 1037 |
-
"session_id": session_id,
|
| 1038 |
-
"audio_a": f"/api/conversational/audio/{session_id}/a",
|
| 1039 |
-
"audio_b": f"/api/conversational/audio/{session_id}/b",
|
| 1040 |
-
"expires_in": 1800, # 30 minutes in seconds
|
| 1041 |
-
}
|
| 1042 |
-
)
|
| 1043 |
-
|
| 1044 |
-
except Exception as e:
|
| 1045 |
-
app.logger.error(f"Conversational generation error: {str(e)}")
|
| 1046 |
-
return jsonify({"error": f"Failed to generate podcast: {str(e)}"}), 500
|
| 1047 |
-
|
| 1048 |
-
|
| 1049 |
-
@app.route("/api/conversational/audio/<session_id>/<model_key>")
|
| 1050 |
-
def get_podcast_audio(session_id, model_key):
|
| 1051 |
-
# If verification not setup, handle it first
|
| 1052 |
-
if app.config["TURNSTILE_ENABLED"] and not session.get("turnstile_verified"):
|
| 1053 |
-
return jsonify({"error": "Turnstile verification required"}), 403
|
| 1054 |
-
|
| 1055 |
-
if session_id not in app.conversational_sessions:
|
| 1056 |
-
return jsonify({"error": "Invalid or expired session"}), 404
|
| 1057 |
-
|
| 1058 |
-
session_data = app.conversational_sessions[session_id]
|
| 1059 |
-
|
| 1060 |
-
# Check if session expired
|
| 1061 |
-
if datetime.utcnow() > session_data["expires_at"]:
|
| 1062 |
-
cleanup_conversational_session(session_id)
|
| 1063 |
-
return jsonify({"error": "Session expired"}), 410
|
| 1064 |
-
|
| 1065 |
-
if model_key == "a":
|
| 1066 |
-
audio_path = session_data["audio_a"]
|
| 1067 |
-
elif model_key == "b":
|
| 1068 |
-
audio_path = session_data["audio_b"]
|
| 1069 |
-
else:
|
| 1070 |
-
return jsonify({"error": "Invalid model key"}), 400
|
| 1071 |
-
|
| 1072 |
-
# Check if file exists
|
| 1073 |
-
if not os.path.exists(audio_path):
|
| 1074 |
-
return jsonify({"error": "Audio file not found"}), 404
|
| 1075 |
-
|
| 1076 |
-
return send_file(audio_path, mimetype="audio/wav")
|
| 1077 |
-
|
| 1078 |
-
|
| 1079 |
-
@app.route("/api/conversational/vote", methods=["POST"])
|
| 1080 |
-
@limiter.limit("30 per minute")
|
| 1081 |
-
def submit_podcast_vote():
|
| 1082 |
-
# If verification not setup, handle it first
|
| 1083 |
-
if app.config["TURNSTILE_ENABLED"] and not session.get("turnstile_verified"):
|
| 1084 |
-
return jsonify({"error": "Turnstile verification required"}), 403
|
| 1085 |
-
|
| 1086 |
-
# Require user to be logged in to vote
|
| 1087 |
-
if not current_user.is_authenticated:
|
| 1088 |
-
return jsonify({"error": "You must be logged in to vote"}), 401
|
| 1089 |
-
|
| 1090 |
-
# Security checks for vote manipulation prevention
|
| 1091 |
-
client_ip = get_client_ip()
|
| 1092 |
-
vote_allowed, security_reason, security_score = is_vote_allowed(current_user.id, client_ip)
|
| 1093 |
-
|
| 1094 |
-
if not vote_allowed:
|
| 1095 |
-
app.logger.warning(f"Conversational vote blocked for user {current_user.username} (ID: {current_user.id}): {security_reason} (Score: {security_score})")
|
| 1096 |
-
return jsonify({"error": f"Vote not allowed: {security_reason}"}), 403
|
| 1097 |
-
|
| 1098 |
-
data = request.json
|
| 1099 |
-
session_id = data.get("session_id")
|
| 1100 |
-
chosen_model_key = data.get("chosen_model") # "a" or "b"
|
| 1101 |
-
|
| 1102 |
-
if not session_id or session_id not in app.conversational_sessions:
|
| 1103 |
-
return jsonify({"error": "Invalid or expired session"}), 404
|
| 1104 |
-
|
| 1105 |
-
if not chosen_model_key or chosen_model_key not in ["a", "b"]:
|
| 1106 |
-
return jsonify({"error": "Invalid chosen model"}), 400
|
| 1107 |
-
|
| 1108 |
-
session_data = app.conversational_sessions[session_id]
|
| 1109 |
-
|
| 1110 |
-
# Check if session expired
|
| 1111 |
-
if datetime.utcnow() > session_data["expires_at"]:
|
| 1112 |
-
cleanup_conversational_session(session_id)
|
| 1113 |
-
return jsonify({"error": "Session expired"}), 410
|
| 1114 |
-
|
| 1115 |
-
# Check if already voted
|
| 1116 |
-
if session_data["voted"]:
|
| 1117 |
-
return jsonify({"error": "Vote already submitted for this session"}), 400
|
| 1118 |
-
|
| 1119 |
-
# Get model IDs and audio paths
|
| 1120 |
-
chosen_id = (
|
| 1121 |
-
session_data["model_a"] if chosen_model_key == "a" else session_data["model_b"]
|
| 1122 |
-
)
|
| 1123 |
-
rejected_id = (
|
| 1124 |
-
session_data["model_b"] if chosen_model_key == "a" else session_data["model_a"]
|
| 1125 |
-
)
|
| 1126 |
-
chosen_audio_path = (
|
| 1127 |
-
session_data["audio_a"] if chosen_model_key == "a" else session_data["audio_b"]
|
| 1128 |
-
)
|
| 1129 |
-
rejected_audio_path = (
|
| 1130 |
-
session_data["audio_b"] if chosen_model_key == "a" else session_data["audio_a"]
|
| 1131 |
-
)
|
| 1132 |
-
|
| 1133 |
-
# Calculate session duration and gather analytics data
|
| 1134 |
-
vote_time = datetime.utcnow()
|
| 1135 |
-
session_duration = (vote_time - session_data["created_at"]).total_seconds()
|
| 1136 |
-
client_ip = get_client_ip()
|
| 1137 |
-
user_agent = request.headers.get('User-Agent')
|
| 1138 |
-
cache_hit = session_data.get("cache_hit", False)
|
| 1139 |
-
|
| 1140 |
-
# Record vote in database with analytics data
|
| 1141 |
-
vote, error = record_vote(
|
| 1142 |
-
current_user.id,
|
| 1143 |
-
session_data["text"],
|
| 1144 |
-
chosen_id,
|
| 1145 |
-
rejected_id,
|
| 1146 |
-
ModelType.CONVERSATIONAL,
|
| 1147 |
-
session_duration=session_duration,
|
| 1148 |
-
ip_address=client_ip,
|
| 1149 |
-
user_agent=user_agent,
|
| 1150 |
-
generation_date=session_data["created_at"],
|
| 1151 |
-
cache_hit=cache_hit,
|
| 1152 |
-
all_dataset_sentences=all_harvard_sentences # Note: conversational uses scripts, not sentences
|
| 1153 |
-
)
|
| 1154 |
-
|
| 1155 |
-
if error:
|
| 1156 |
-
return jsonify({"error": error}), 500
|
| 1157 |
-
|
| 1158 |
-
# Sentence consumption is now handled within record_vote function
|
| 1159 |
-
|
| 1160 |
-
# --- Save preference data ---\
|
| 1161 |
-
try:
|
| 1162 |
-
vote_uuid = str(uuid.uuid4())
|
| 1163 |
-
vote_dir = os.path.join("./votes", vote_uuid)
|
| 1164 |
-
os.makedirs(vote_dir, exist_ok=True)
|
| 1165 |
-
|
| 1166 |
-
# Copy audio files
|
| 1167 |
-
shutil.copy(chosen_audio_path, os.path.join(vote_dir, "chosen.wav"))
|
| 1168 |
-
shutil.copy(rejected_audio_path, os.path.join(vote_dir, "rejected.wav"))
|
| 1169 |
-
|
| 1170 |
-
# Create metadata
|
| 1171 |
-
chosen_model_obj = Model.query.get(chosen_id)
|
| 1172 |
-
rejected_model_obj = Model.query.get(rejected_id)
|
| 1173 |
-
metadata = {
|
| 1174 |
-
"script": session_data["script"], # Save the full script
|
| 1175 |
-
"chosen_model": chosen_model_obj.name if chosen_model_obj else "Unknown",
|
| 1176 |
-
"chosen_model_id": chosen_model_obj.id if chosen_model_obj else "Unknown",
|
| 1177 |
-
"rejected_model": rejected_model_obj.name if rejected_model_obj else "Unknown",
|
| 1178 |
-
"rejected_model_id": rejected_model_obj.id if rejected_model_obj else "Unknown",
|
| 1179 |
-
"session_id": session_id,
|
| 1180 |
-
"timestamp": datetime.utcnow().isoformat(),
|
| 1181 |
-
"username": current_user.username,
|
| 1182 |
-
"model_type": "CONVERSATIONAL"
|
| 1183 |
-
}
|
| 1184 |
-
with open(os.path.join(vote_dir, "metadata.json"), "w") as f:
|
| 1185 |
-
json.dump(metadata, f, indent=2)
|
| 1186 |
-
|
| 1187 |
-
except Exception as e:
|
| 1188 |
-
app.logger.error(f"Error saving preference data for conversational vote {session_id}: {str(e)}")
|
| 1189 |
-
# Continue even if saving preference data fails, vote is already recorded
|
| 1190 |
-
|
| 1191 |
-
# Mark session as voted
|
| 1192 |
-
session_data["voted"] = True
|
| 1193 |
-
|
| 1194 |
-
# Check for coordinated voting campaigns (async to not slow down response)
|
| 1195 |
-
try:
|
| 1196 |
-
from threading import Thread
|
| 1197 |
-
campaign_check_thread = Thread(target=check_for_coordinated_campaigns)
|
| 1198 |
-
campaign_check_thread.daemon = True
|
| 1199 |
-
campaign_check_thread.start()
|
| 1200 |
-
except Exception as e:
|
| 1201 |
-
app.logger.error(f"Error starting coordinated campaign check thread: {str(e)}")
|
| 1202 |
-
|
| 1203 |
-
# Return updated models (use previously fetched objects)
|
| 1204 |
-
return jsonify(
|
| 1205 |
-
{
|
| 1206 |
-
"success": True,
|
| 1207 |
-
"chosen_model": {"id": chosen_id, "name": chosen_model_obj.name if chosen_model_obj else "Unknown"},
|
| 1208 |
-
"rejected_model": {
|
| 1209 |
-
"id": rejected_id,
|
| 1210 |
-
"name": rejected_model_obj.name if rejected_model_obj else "Unknown",
|
| 1211 |
-
},
|
| 1212 |
-
"names": {
|
| 1213 |
-
"a": Model.query.get(session_data["model_a"]).name,
|
| 1214 |
-
"b": Model.query.get(session_data["model_b"]).name,
|
| 1215 |
-
},
|
| 1216 |
-
}
|
| 1217 |
-
)
|
| 1218 |
-
|
| 1219 |
-
|
| 1220 |
-
def cleanup_conversational_session(session_id):
|
| 1221 |
-
"""Remove conversational session and its audio files"""
|
| 1222 |
-
if session_id in app.conversational_sessions:
|
| 1223 |
-
session = app.conversational_sessions[session_id]
|
| 1224 |
-
|
| 1225 |
-
# Remove audio files
|
| 1226 |
-
for audio_file in [session["audio_a"], session["audio_b"]]:
|
| 1227 |
-
if os.path.exists(audio_file):
|
| 1228 |
-
try:
|
| 1229 |
-
os.remove(audio_file)
|
| 1230 |
-
except Exception as e:
|
| 1231 |
-
app.logger.error(
|
| 1232 |
-
f"Error removing conversational audio file: {str(e)}"
|
| 1233 |
-
)
|
| 1234 |
-
|
| 1235 |
-
# Remove session
|
| 1236 |
-
del app.conversational_sessions[session_id]
|
| 1237 |
-
|
| 1238 |
-
|
| 1239 |
# Schedule periodic cleanup
|
| 1240 |
def setup_cleanup():
|
| 1241 |
def cleanup_expired_sessions():
|
|
@@ -1249,16 +912,7 @@ def setup_cleanup():
|
|
| 1249 |
]
|
| 1250 |
for sid in expired_tts_sessions:
|
| 1251 |
cleanup_session(sid)
|
| 1252 |
-
|
| 1253 |
-
# Cleanup conversational sessions
|
| 1254 |
-
expired_conv_sessions = [
|
| 1255 |
-
sid
|
| 1256 |
-
for sid, session_data in app.conversational_sessions.items()
|
| 1257 |
-
if current_time > session_data["expires_at"]
|
| 1258 |
-
]
|
| 1259 |
-
for sid in expired_conv_sessions:
|
| 1260 |
-
cleanup_conversational_session(sid)
|
| 1261 |
-
app.logger.info(f"Cleaned up {len(expired_tts_sessions)} TTS and {len(expired_conv_sessions)} conversational sessions.")
|
| 1262 |
|
| 1263 |
# Also cleanup potentially expired cache entries (e.g., > 1 hour old)
|
| 1264 |
# This prevents stale cache entries if generation is slow or failing
|
|
@@ -1593,14 +1247,6 @@ def check_for_coordinated_campaigns():
|
|
| 1593 |
detect_coordinated_voting(model.id)
|
| 1594 |
except Exception as e:
|
| 1595 |
app.logger.error(f"Error checking coordinated voting for TTS model {model.id}: {str(e)}")
|
| 1596 |
-
|
| 1597 |
-
# Check conversational models
|
| 1598 |
-
conv_models = Model.query.filter_by(model_type=ModelType.CONVERSATIONAL, is_active=True).all()
|
| 1599 |
-
for model in conv_models:
|
| 1600 |
-
try:
|
| 1601 |
-
detect_coordinated_voting(model.id)
|
| 1602 |
-
except Exception as e:
|
| 1603 |
-
app.logger.error(f"Error checking coordinated voting for conversational model {model.id}: {str(e)}")
|
| 1604 |
|
| 1605 |
except Exception as e:
|
| 1606 |
app.logger.error(f"Error in coordinated campaign check: {str(e)}")
|
|
@@ -1682,13 +1328,14 @@ if __name__ == "__main__":
|
|
| 1682 |
url_scheme='https'
|
| 1683 |
)
|
| 1684 |
else:
|
| 1685 |
-
|
|
|
|
| 1686 |
serve(
|
| 1687 |
app,
|
| 1688 |
host="0.0.0.0",
|
| 1689 |
-
port=
|
| 1690 |
threads=threads,
|
| 1691 |
connection_limit=100,
|
| 1692 |
channel_timeout=30,
|
| 1693 |
-
url_scheme='
|
| 1694 |
)
|
|
|
|
| 5 |
from datetime import datetime
|
| 6 |
import threading # Added for locking
|
| 7 |
from sqlalchemy import or_ # Added for vote counting query
|
|
|
|
| 8 |
|
| 9 |
year = datetime.now().year
|
| 10 |
month = datetime.now().month
|
| 11 |
|
| 12 |
+
# Check if running in a Hugging Face Space
|
| 13 |
IS_SPACES = False
|
| 14 |
if os.getenv("SPACE_REPO_NAME"):
|
| 15 |
print("Running in a Hugging Face Space 🤗")
|
|
|
|
| 21 |
try:
|
| 22 |
print("Database not found, downloading from HF dataset...")
|
| 23 |
hf_hub_download(
|
| 24 |
+
repo_id="channelcorp/ko-tts-arena-db",
|
| 25 |
filename="tts_arena.db",
|
| 26 |
repo_type="dataset",
|
| 27 |
local_dir="instance",
|
|
|
|
| 67 |
import requests
|
| 68 |
import functools
|
| 69 |
import time # Added for potential retries
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
|
| 71 |
|
| 72 |
def get_client_ip():
|
|
|
|
| 153 |
app.tts_sessions = {}
|
| 154 |
tts_sessions = app.tts_sessions
|
| 155 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
# Register blueprints
|
| 157 |
app.register_blueprint(auth, url_prefix="/auth")
|
| 158 |
app.register_blueprint(admin)
|
|
|
|
| 304 |
# Otherwise redirect back to turnstile page
|
| 305 |
return redirect(url_for("turnstile_page", redirect_url=redirect_url))
|
| 306 |
|
| 307 |
+
# Load Korean prompts from local JSON file
|
| 308 |
+
print("Loading Korean TTS prompts from ko_prompts.json...")
|
| 309 |
+
_prompts_path = os.path.join(os.path.dirname(__file__), "ko_prompts.json")
|
| 310 |
+
with open(_prompts_path, "r", encoding="utf-8") as f:
|
| 311 |
+
_prompts_data = json.load(f)
|
| 312 |
+
all_harvard_sentences = _prompts_data.get("prompts", [])
|
| 313 |
+
print(f"Loaded {len(all_harvard_sentences)} Korean prompts")
|
| 314 |
|
| 315 |
# Initialize initial_sentences as empty - will be populated with unconsumed sentences only
|
| 316 |
initial_sentences = []
|
|
|
|
| 324 |
@app.route("/leaderboard")
|
| 325 |
def leaderboard():
|
| 326 |
tts_leaderboard = get_leaderboard_data(ModelType.TTS)
|
|
|
|
| 327 |
top_voters = get_top_voters(10) # Get top 10 voters
|
| 328 |
|
| 329 |
# Initialize personal leaderboard data
|
| 330 |
tts_personal_leaderboard = None
|
|
|
|
| 331 |
user_leaderboard_visibility = None
|
| 332 |
|
| 333 |
# If user is logged in, get their personal leaderboard and visibility setting
|
| 334 |
if current_user.is_authenticated:
|
| 335 |
tts_personal_leaderboard = get_user_leaderboard(current_user.id, ModelType.TTS)
|
|
|
|
|
|
|
|
|
|
| 336 |
user_leaderboard_visibility = current_user.show_in_leaderboard
|
| 337 |
|
| 338 |
# Get key dates for the timeline
|
| 339 |
tts_key_dates = get_key_historical_dates(ModelType.TTS)
|
|
|
|
| 340 |
|
| 341 |
# Format dates for display in the dropdown
|
| 342 |
formatted_tts_dates = [date.strftime("%B %Y") for date in tts_key_dates]
|
|
|
|
|
|
|
|
|
|
| 343 |
|
| 344 |
return render_template(
|
| 345 |
"leaderboard.html",
|
| 346 |
tts_leaderboard=tts_leaderboard,
|
|
|
|
| 347 |
tts_personal_leaderboard=tts_personal_leaderboard,
|
|
|
|
| 348 |
tts_key_dates=tts_key_dates,
|
|
|
|
| 349 |
formatted_tts_dates=formatted_tts_dates,
|
|
|
|
| 350 |
top_voters=top_voters,
|
| 351 |
user_leaderboard_visibility=user_leaderboard_visibility
|
| 352 |
)
|
|
|
|
| 355 |
@app.route("/api/historical-leaderboard/<model_type>")
|
| 356 |
def historical_leaderboard(model_type):
|
| 357 |
"""Get historical leaderboard data for a specific date"""
|
| 358 |
+
if model_type != ModelType.TTS:
|
| 359 |
return jsonify({"error": "Invalid model type"}), 400
|
| 360 |
|
| 361 |
# Get date from query parameter
|
|
|
|
| 899 |
del app.tts_sessions[session_id]
|
| 900 |
|
| 901 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 902 |
# Schedule periodic cleanup
|
| 903 |
def setup_cleanup():
|
| 904 |
def cleanup_expired_sessions():
|
|
|
|
| 912 |
]
|
| 913 |
for sid in expired_tts_sessions:
|
| 914 |
cleanup_session(sid)
|
| 915 |
+
app.logger.info(f"Cleaned up {len(expired_tts_sessions)} TTS sessions.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 916 |
|
| 917 |
# Also cleanup potentially expired cache entries (e.g., > 1 hour old)
|
| 918 |
# This prevents stale cache entries if generation is slow or failing
|
|
|
|
| 1247 |
detect_coordinated_voting(model.id)
|
| 1248 |
except Exception as e:
|
| 1249 |
app.logger.error(f"Error checking coordinated voting for TTS model {model.id}: {str(e)}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1250 |
|
| 1251 |
except Exception as e:
|
| 1252 |
app.logger.error(f"Error in coordinated campaign check: {str(e)}")
|
|
|
|
| 1328 |
url_scheme='https'
|
| 1329 |
)
|
| 1330 |
else:
|
| 1331 |
+
port = int(os.environ.get("PORT", 5001))
|
| 1332 |
+
print(f"Starting Waitress server with {threads} threads on port {port}")
|
| 1333 |
serve(
|
| 1334 |
app,
|
| 1335 |
host="0.0.0.0",
|
| 1336 |
+
port=port,
|
| 1337 |
threads=threads,
|
| 1338 |
connection_limit=100,
|
| 1339 |
channel_timeout=30,
|
| 1340 |
+
url_scheme='http' # Local dev uses http
|
| 1341 |
)
|
ko_prompts.json
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"prompts": [
|
| 3 |
+
"안녕하세요, 오늘 날씨가 정말 좋네요.",
|
| 4 |
+
"지금 몇 시예요? 약속 시간에 늦을 것 같아요.",
|
| 5 |
+
"오늘 저녁에 뭐 먹을까요? 치킨이 땡기는데.",
|
| 6 |
+
"주말에 시간 되시면 같이 영화 보러 갈래요?",
|
| 7 |
+
"커피 한 잔 하실래요? 제가 살게요.",
|
| 8 |
+
"회의는 오후 세 시에 시작합니다. 자료 준비해 주세요.",
|
| 9 |
+
"이번 분기 매출이 전년 대비 이십 퍼센트 증가했습니다.",
|
| 10 |
+
"고객님, 문의하신 내용 확인 후 답변드리겠습니다.",
|
| 11 |
+
"프로젝트 마감일이 다음 주 금요일입니다.",
|
| 12 |
+
"미팅 일정을 조율하고 싶은데, 언제가 편하신가요?",
|
| 13 |
+
"채널톡 고객센터입니다. 무엇을 도와드릴까요?",
|
| 14 |
+
"주문하신 상품은 내일 오전 중으로 배송될 예정입니다.",
|
| 15 |
+
"불편을 드려 죄송합니다. 바로 처리해 드리겠습니다.",
|
| 16 |
+
"결제가 정상적으로 완료되었습니다. 감사합니다.",
|
| 17 |
+
"반품 신청이 접수되었습니다. 삼 영업일 내에 처리됩니다.",
|
| 18 |
+
"서울의 현재 기온은 섭씨 이십오 도입니다.",
|
| 19 |
+
"다음 정류장은 강남역입니다. 내리실 분은 준비해 주세요.",
|
| 20 |
+
"오늘의 환율은 달러당 천삼백원입니다.",
|
| 21 |
+
"이 제품의 가격은 삼만구천원입니다.",
|
| 22 |
+
"영업시간은 오전 아홉 시부터 오후 여섯 시까지입니다.",
|
| 23 |
+
"정말 기쁜 소식이에요! 축하드려요!",
|
| 24 |
+
"걱정하지 마세요, 다 잘 될 거예요.",
|
| 25 |
+
"오랜만이에요! 그동안 잘 지내셨어요?",
|
| 26 |
+
"정말 감사합니다. 덕분에 큰 도움이 됐어요.",
|
| 27 |
+
"아쉽지만 다음 기회에 뵙겠습니다.",
|
| 28 |
+
"문을 열려면 버튼을 눌러주세요.",
|
| 29 |
+
"왼쪽으로 돌아서 직진하시면 됩니다.",
|
| 30 |
+
"앱을 설치하고 회원가입을 진행해 주세요.",
|
| 31 |
+
"비밀번호는 여덟 자리 이상으로 설정해 주세요.",
|
| 32 |
+
"첨부파일을 확인하시고 서명해 주세요.",
|
| 33 |
+
"오늘 주요 뉴스를 전해드리겠습니다.",
|
| 34 |
+
"정부가 새로운 정책을 발표했습니다.",
|
| 35 |
+
"국내 반도체 수출이 사상 최대치를 기록했습니다.",
|
| 36 |
+
"내일 전국적으로 비가 내릴 예정입니다.",
|
| 37 |
+
"올해 출생률이 역대 최저를 기록했습니다.",
|
| 38 |
+
"오늘 수업에서는 인공지능의 기초를 배워보겠습니다.",
|
| 39 |
+
"이 문제의 정답은 삼번입니다.",
|
| 40 |
+
"다음 시간까지 과제를 제출해 주세요.",
|
| 41 |
+
"질문이 있으시면 언제든지 물어보세요.",
|
| 42 |
+
"복습은 학습의 가장 중요한 부분입니다.",
|
| 43 |
+
"이번 주 인기 영화 순위를 알려드릴게요.",
|
| 44 |
+
"새 앨범이 음원 차트 일위를 차지했습니다.",
|
| 45 |
+
"오늘 경기에서 한국팀이 이겼습니다!",
|
| 46 |
+
"다음 에피소드가 정말 기대돼요.",
|
| 47 |
+
"이 노래 가사가 정말 마음에 들어요.",
|
| 48 |
+
"인공지능 기술이 빠르게 발전하고 있습니다.",
|
| 49 |
+
"스마트폰 배터리를 절약하는 방법을 알려드릴게요.",
|
| 50 |
+
"이 앱은 무료로 다운로드할 수 있습니다.",
|
| 51 |
+
"시스템 업데이트가 완료되었습니다.",
|
| 52 |
+
"클라우드에 파일이 자동으로 저장됩니다."
|
| 53 |
+
]
|
| 54 |
+
}
|
| 55 |
+
|
models.py
CHANGED
|
@@ -566,235 +566,70 @@ def get_key_historical_dates(model_type):
|
|
| 566 |
|
| 567 |
|
| 568 |
def insert_initial_models():
|
| 569 |
-
"""Insert initial models into the database."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 570 |
tts_models = [
|
|
|
|
| 571 |
Model(
|
| 572 |
-
id="
|
| 573 |
-
name="
|
| 574 |
-
model_type=ModelType.TTS,
|
| 575 |
-
is_open=False,
|
| 576 |
-
model_url="https://elevenlabs.io/",
|
| 577 |
-
),
|
| 578 |
-
Model(
|
| 579 |
-
id="eleven-turbo-v2.5",
|
| 580 |
-
name="Eleven Turbo v2.5",
|
| 581 |
-
model_type=ModelType.TTS,
|
| 582 |
-
is_open=False,
|
| 583 |
-
model_url="https://elevenlabs.io/",
|
| 584 |
-
),
|
| 585 |
-
Model(
|
| 586 |
-
id="eleven-flash-v2.5",
|
| 587 |
-
name="Eleven Flash v2.5",
|
| 588 |
-
model_type=ModelType.TTS,
|
| 589 |
-
is_open=False,
|
| 590 |
-
model_url="https://elevenlabs.io/",
|
| 591 |
-
),
|
| 592 |
-
Model(
|
| 593 |
-
id="cartesia-sonic-2",
|
| 594 |
-
name="Cartesia Sonic 2",
|
| 595 |
-
model_type=ModelType.TTS,
|
| 596 |
-
is_open=False,
|
| 597 |
-
is_active=False, # ran out of credits
|
| 598 |
-
model_url="https://cartesia.ai/",
|
| 599 |
-
),
|
| 600 |
-
Model(
|
| 601 |
-
id="spark-tts",
|
| 602 |
-
name="Spark TTS",
|
| 603 |
-
model_type=ModelType.TTS,
|
| 604 |
-
is_open=False,
|
| 605 |
-
is_active=False, # API stopped working
|
| 606 |
-
model_url="https://github.com/SparkAudio/Spark-TTS",
|
| 607 |
-
),
|
| 608 |
-
Model(
|
| 609 |
-
id="playht-2.0",
|
| 610 |
-
name="PlayHT 2.0",
|
| 611 |
-
model_type=ModelType.TTS,
|
| 612 |
-
is_open=False,
|
| 613 |
-
is_active=False,
|
| 614 |
-
model_url="https://play.ht/",
|
| 615 |
-
),
|
| 616 |
-
Model(
|
| 617 |
-
id="styletts2",
|
| 618 |
-
name="StyleTTS 2",
|
| 619 |
-
model_type=ModelType.TTS,
|
| 620 |
-
is_open=False,
|
| 621 |
-
is_active=False,
|
| 622 |
-
model_url="https://github.com/yl4579/StyleTTS2",
|
| 623 |
-
),
|
| 624 |
-
Model(
|
| 625 |
-
id="kokoro-v1",
|
| 626 |
-
name="Kokoro v1.0",
|
| 627 |
-
model_type=ModelType.TTS,
|
| 628 |
-
is_open=True,
|
| 629 |
-
model_url="https://huggingface.co/hexgrad/Kokoro-82M",
|
| 630 |
-
),
|
| 631 |
-
Model(
|
| 632 |
-
id="cosyvoice-2.0",
|
| 633 |
-
name="CosyVoice 2.0",
|
| 634 |
-
model_type=ModelType.TTS,
|
| 635 |
-
is_open=True,
|
| 636 |
-
model_url="https://github.com/FunAudioLLM/CosyVoice",
|
| 637 |
-
),
|
| 638 |
-
Model(
|
| 639 |
-
id="papla-p1",
|
| 640 |
-
name="Papla P1",
|
| 641 |
-
model_type=ModelType.TTS,
|
| 642 |
-
is_open=False,
|
| 643 |
-
model_url="https://papla.media/",
|
| 644 |
-
),
|
| 645 |
-
Model(
|
| 646 |
-
id="hume-octave",
|
| 647 |
-
name="Hume Octave",
|
| 648 |
-
model_type=ModelType.TTS,
|
| 649 |
-
is_open=False,
|
| 650 |
-
model_url="https://hume.ai/",
|
| 651 |
-
),
|
| 652 |
-
Model(
|
| 653 |
-
id="megatts3",
|
| 654 |
-
name="MegaTTS 3",
|
| 655 |
-
model_type=ModelType.TTS,
|
| 656 |
-
is_active=False,
|
| 657 |
-
is_open=True,
|
| 658 |
-
model_url="https://github.com/bytedance/MegaTTS3",
|
| 659 |
-
),
|
| 660 |
-
Model(
|
| 661 |
-
id="minimax-02-hd",
|
| 662 |
-
name="MiniMax Speech-02-HD",
|
| 663 |
-
model_type=ModelType.TTS,
|
| 664 |
-
is_open=False,
|
| 665 |
-
model_url="http://minimax.io/",
|
| 666 |
-
),
|
| 667 |
-
Model(
|
| 668 |
-
id="minimax-02-turbo",
|
| 669 |
-
name="MiniMax Speech-02-Turbo",
|
| 670 |
-
model_type=ModelType.TTS,
|
| 671 |
-
is_open=False,
|
| 672 |
-
model_url="http://minimax.io/",
|
| 673 |
-
),
|
| 674 |
-
Model(
|
| 675 |
-
id="lanternfish-1",
|
| 676 |
-
name="OpenAudio S1",
|
| 677 |
-
model_type=ModelType.TTS,
|
| 678 |
-
is_open=False,
|
| 679 |
-
is_active=False, # NOTE: Waiting to receive a pool of voices
|
| 680 |
-
model_url="https://fish.audio/",
|
| 681 |
-
),
|
| 682 |
-
Model(
|
| 683 |
-
id="chatterbox",
|
| 684 |
-
name="Chatterbox",
|
| 685 |
-
model_type=ModelType.TTS,
|
| 686 |
-
is_open=False,
|
| 687 |
-
is_active=True,
|
| 688 |
-
model_url="https://www.resemble.ai/chatterbox/",
|
| 689 |
-
),
|
| 690 |
-
Model(
|
| 691 |
-
id="inworld",
|
| 692 |
-
name="Inworld TTS",
|
| 693 |
-
model_type=ModelType.TTS,
|
| 694 |
-
is_open=False,
|
| 695 |
-
is_active=True,
|
| 696 |
-
model_url="https://inworld.ai/tts",
|
| 697 |
-
),
|
| 698 |
-
Model(
|
| 699 |
-
id="inworld-max",
|
| 700 |
-
name="Inworld TTS MAX",
|
| 701 |
-
model_type=ModelType.TTS,
|
| 702 |
-
is_open=False,
|
| 703 |
-
is_active=True,
|
| 704 |
-
model_url="https://inworld.ai/tts",
|
| 705 |
-
),
|
| 706 |
-
Model(
|
| 707 |
-
id="async-1",
|
| 708 |
-
name="CastleFlow v1.0",
|
| 709 |
model_type=ModelType.TTS,
|
| 710 |
is_open=False,
|
| 711 |
is_active=True,
|
| 712 |
-
model_url="https://
|
| 713 |
),
|
|
|
|
| 714 |
Model(
|
| 715 |
-
id="
|
| 716 |
-
name="
|
| 717 |
model_type=ModelType.TTS,
|
| 718 |
is_open=False,
|
| 719 |
-
is_active=
|
| 720 |
-
model_url="https://
|
| 721 |
),
|
|
|
|
| 722 |
Model(
|
| 723 |
-
id="
|
| 724 |
-
name="
|
| 725 |
model_type=ModelType.TTS,
|
| 726 |
is_open=False,
|
| 727 |
-
is_active=
|
| 728 |
-
model_url="https://
|
| 729 |
),
|
| 730 |
Model(
|
| 731 |
-
id="
|
| 732 |
-
name="
|
| 733 |
-
model_type=ModelType.TTS,
|
| 734 |
-
is_open=True,
|
| 735 |
-
is_active=True,
|
| 736 |
-
model_url="https://mayaresearch.ai/",
|
| 737 |
-
),
|
| 738 |
-
Model(
|
| 739 |
-
id="maya1",
|
| 740 |
-
name="Maya 1",
|
| 741 |
model_type=ModelType.TTS,
|
| 742 |
is_open=False,
|
| 743 |
-
is_active=
|
| 744 |
-
model_url="https://
|
| 745 |
),
|
|
|
|
| 746 |
Model(
|
| 747 |
-
id="
|
| 748 |
-
name="
|
| 749 |
model_type=ModelType.TTS,
|
| 750 |
is_open=False,
|
| 751 |
-
is_active=
|
| 752 |
-
model_url="https://
|
| 753 |
),
|
| 754 |
Model(
|
| 755 |
-
id="
|
| 756 |
-
name="
|
| 757 |
model_type=ModelType.TTS,
|
| 758 |
is_open=False,
|
| 759 |
-
is_active=
|
| 760 |
-
model_url="https://
|
| 761 |
-
),
|
| 762 |
-
Model(
|
| 763 |
-
id="vocu",
|
| 764 |
-
name="Vocu V3.0",
|
| 765 |
-
model_type=ModelType.TTS,
|
| 766 |
-
is_open=False,
|
| 767 |
-
is_active=True,
|
| 768 |
-
model_url="https://vocu.ai/",
|
| 769 |
-
),
|
| 770 |
-
]
|
| 771 |
-
conversational_models = [
|
| 772 |
-
Model(
|
| 773 |
-
id="csm-1b",
|
| 774 |
-
name="CSM 1B",
|
| 775 |
-
model_type=ModelType.CONVERSATIONAL,
|
| 776 |
-
is_open=True,
|
| 777 |
-
model_url="https://huggingface.co/sesame/csm-1b",
|
| 778 |
-
),
|
| 779 |
-
Model(
|
| 780 |
-
id="playdialog-1.0",
|
| 781 |
-
name="PlayDialog 1.0",
|
| 782 |
-
model_type=ModelType.CONVERSATIONAL,
|
| 783 |
-
is_open=False,
|
| 784 |
-
model_url="https://play.ht/",
|
| 785 |
-
),
|
| 786 |
-
Model(
|
| 787 |
-
id="dia-1.6b",
|
| 788 |
-
name="Dia 1.6B",
|
| 789 |
-
model_type=ModelType.CONVERSATIONAL,
|
| 790 |
-
is_open=True,
|
| 791 |
-
model_url="https://huggingface.co/nari-labs/Dia-1.6B",
|
| 792 |
),
|
| 793 |
]
|
| 794 |
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
for model in all_models:
|
| 798 |
existing = Model.query.filter_by(
|
| 799 |
id=model.id, model_type=model.model_type
|
| 800 |
).first()
|
|
|
|
| 566 |
|
| 567 |
|
| 568 |
def insert_initial_models():
|
| 569 |
+
"""Insert initial models into the database (한국어 TTS 전용)."""
|
| 570 |
+
import os
|
| 571 |
+
|
| 572 |
+
# 환경 변수로 API 키 확인하여 활성화 여부 결정
|
| 573 |
+
has_openai = bool(os.getenv("OPENAI_API_KEY"))
|
| 574 |
+
has_elevenlabs = bool(os.getenv("ELEVENLABS_API_KEY"))
|
| 575 |
+
has_google = bool(os.getenv("GOOGLE_API_KEY"))
|
| 576 |
+
|
| 577 |
tts_models = [
|
| 578 |
+
# 채널톡 TTS (한국어 특화) - 항상 활성화
|
| 579 |
Model(
|
| 580 |
+
id="channel-hana",
|
| 581 |
+
name="채널톡 하나",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
model_type=ModelType.TTS,
|
| 583 |
is_open=False,
|
| 584 |
is_active=True,
|
| 585 |
+
model_url="https://channel.io/",
|
| 586 |
),
|
| 587 |
+
# ElevenLabs (다국어 지원) - API 키 있을 때만 활성화
|
| 588 |
Model(
|
| 589 |
+
id="eleven-multilingual-v2",
|
| 590 |
+
name="ElevenLabs Multilingual v2",
|
| 591 |
model_type=ModelType.TTS,
|
| 592 |
is_open=False,
|
| 593 |
+
is_active=has_elevenlabs,
|
| 594 |
+
model_url="https://elevenlabs.io/",
|
| 595 |
),
|
| 596 |
+
# OpenAI TTS - API 키 있을 때만 활성화
|
| 597 |
Model(
|
| 598 |
+
id="openai-tts-1",
|
| 599 |
+
name="OpenAI TTS-1",
|
| 600 |
model_type=ModelType.TTS,
|
| 601 |
is_open=False,
|
| 602 |
+
is_active=has_openai,
|
| 603 |
+
model_url="https://platform.openai.com/docs/guides/text-to-speech",
|
| 604 |
),
|
| 605 |
Model(
|
| 606 |
+
id="openai-tts-1-hd",
|
| 607 |
+
name="OpenAI TTS-1-HD",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
model_type=ModelType.TTS,
|
| 609 |
is_open=False,
|
| 610 |
+
is_active=has_openai,
|
| 611 |
+
model_url="https://platform.openai.com/docs/guides/text-to-speech",
|
| 612 |
),
|
| 613 |
+
# Google Cloud TTS - API 키 있을 때만 활성화
|
| 614 |
Model(
|
| 615 |
+
id="google-wavenet",
|
| 616 |
+
name="Google Wavenet (ko-KR)",
|
| 617 |
model_type=ModelType.TTS,
|
| 618 |
is_open=False,
|
| 619 |
+
is_active=has_google,
|
| 620 |
+
model_url="https://cloud.google.com/text-to-speech",
|
| 621 |
),
|
| 622 |
Model(
|
| 623 |
+
id="google-neural2",
|
| 624 |
+
name="Google Neural2 (ko-KR)",
|
| 625 |
model_type=ModelType.TTS,
|
| 626 |
is_open=False,
|
| 627 |
+
is_active=has_google,
|
| 628 |
+
model_url="https://cloud.google.com/text-to-speech",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 629 |
),
|
| 630 |
]
|
| 631 |
|
| 632 |
+
for model in tts_models:
|
|
|
|
|
|
|
| 633 |
existing = Model.query.filter_by(
|
| 634 |
id=model.id, model_type=model.model_type
|
| 635 |
).first()
|
requirements.txt
CHANGED
|
@@ -10,7 +10,4 @@ apscheduler
|
|
| 10 |
flask-migrate
|
| 11 |
gunicorn
|
| 12 |
waitress
|
| 13 |
-
|
| 14 |
-
git+https://github.com/playht/pyht
|
| 15 |
-
datasets
|
| 16 |
-
langdetect
|
|
|
|
| 10 |
flask-migrate
|
| 11 |
gunicorn
|
| 12 |
waitress
|
| 13 |
+
huggingface-hub
|
|
|
|
|
|
|
|
|
static/channeltalk-logo-kr.svg
ADDED
|
|
templates/about.html
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{% extends "base.html" %}
|
| 2 |
|
| 3 |
-
{% block title %}About - TTS
|
| 4 |
|
| 5 |
{% block current_page %}About{% endblock %}
|
| 6 |
|
|
@@ -25,9 +25,16 @@
|
|
| 25 |
font-size: 24px;
|
| 26 |
}
|
| 27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
.about-section p {
|
| 29 |
margin-bottom: 16px;
|
| 30 |
-
line-height: 1.
|
| 31 |
color: #444;
|
| 32 |
}
|
| 33 |
|
|
@@ -35,6 +42,40 @@
|
|
| 35 |
margin-bottom: 0;
|
| 36 |
}
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
.feature-list {
|
| 39 |
list-style: none;
|
| 40 |
padding: 0;
|
|
@@ -44,86 +85,111 @@
|
|
| 44 |
margin-bottom: 12px;
|
| 45 |
padding-left: 28px;
|
| 46 |
position: relative;
|
|
|
|
| 47 |
}
|
| 48 |
|
| 49 |
.feature-list li::before {
|
| 50 |
-
content: "
|
| 51 |
color: var(--primary-color);
|
| 52 |
-
font-
|
| 53 |
position: absolute;
|
| 54 |
left: 8px;
|
| 55 |
-
top:
|
| 56 |
}
|
| 57 |
|
| 58 |
-
.
|
| 59 |
display: grid;
|
| 60 |
-
grid-template-columns: repeat(auto-
|
| 61 |
-
gap:
|
| 62 |
-
margin
|
| 63 |
}
|
| 64 |
|
| 65 |
-
.
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
border-bottom: 1px solid var(--border-color);
|
| 71 |
}
|
| 72 |
|
| 73 |
-
.
|
| 74 |
color: var(--primary-color);
|
| 75 |
-
|
|
|
|
| 76 |
}
|
| 77 |
|
| 78 |
-
.
|
| 79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
}
|
| 81 |
|
| 82 |
-
.
|
| 83 |
-
|
| 84 |
-
|
| 85 |
}
|
| 86 |
-
|
| 87 |
-
.
|
| 88 |
-
|
| 89 |
-
|
| 90 |
}
|
| 91 |
|
| 92 |
-
.
|
| 93 |
-
background-color: var(--light-gray);
|
| 94 |
-
border-radius: var(--radius);
|
| 95 |
-
padding: 16px;
|
| 96 |
-
margin-top: 16px;
|
| 97 |
-
position: relative;
|
| 98 |
-
font-family: monospace;
|
| 99 |
-
white-space: pre-wrap;
|
| 100 |
-
word-break: break-word;
|
| 101 |
font-size: 14px;
|
| 102 |
-
|
|
|
|
| 103 |
}
|
| 104 |
|
| 105 |
-
.
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
display: flex;
|
| 115 |
-
align-items: center;
|
| 116 |
-
justify-content: center;
|
| 117 |
-
cursor: pointer;
|
| 118 |
-
transition: background-color 0.2s;
|
| 119 |
}
|
| 120 |
|
| 121 |
-
.
|
| 122 |
-
background
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
}
|
| 124 |
|
| 125 |
-
.
|
|
|
|
| 126 |
color: var(--text-color);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 127 |
}
|
| 128 |
|
| 129 |
.faq-item {
|
|
@@ -139,6 +205,7 @@
|
|
| 139 |
.faq-answer {
|
| 140 |
line-height: 1.6;
|
| 141 |
}
|
|
|
|
| 142 |
/* Dark mode styles */
|
| 143 |
@media (prefers-color-scheme: dark) {
|
| 144 |
.about-section {
|
|
@@ -150,266 +217,214 @@
|
|
| 150 |
color: var(--text-color);
|
| 151 |
}
|
| 152 |
|
| 153 |
-
.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
background-color: var(--secondary-color);
|
| 155 |
-
border-color: var(--border-color);
|
| 156 |
}
|
| 157 |
|
| 158 |
-
.
|
| 159 |
-
|
| 160 |
-
border-color: var(--border-color);
|
| 161 |
}
|
| 162 |
|
| 163 |
-
.
|
| 164 |
-
background-color:
|
| 165 |
}
|
| 166 |
|
| 167 |
-
.
|
| 168 |
-
color:
|
| 169 |
}
|
| 170 |
|
| 171 |
-
.
|
| 172 |
-
color: var(--
|
| 173 |
}
|
| 174 |
|
| 175 |
-
.
|
| 176 |
-
|
| 177 |
}
|
| 178 |
}
|
| 179 |
-
|
| 180 |
</style>
|
| 181 |
{% endblock %}
|
| 182 |
|
| 183 |
{% block content %}
|
| 184 |
<div class="about-container">
|
| 185 |
<div class="about-section">
|
| 186 |
-
<h2
|
| 187 |
<p>
|
| 188 |
-
TTS
|
| 189 |
-
|
| 190 |
-
a
|
| 191 |
-
|
| 192 |
-
<p>
|
| 193 |
-
Our second version now supports conversational models for podcast-like content generation, expanding the arena's scope to reflect the diverse applications of modern speech synthesis.
|
| 194 |
</p>
|
|
|
|
|
|
|
|
|
|
| 195 |
</div>
|
| 196 |
|
| 197 |
<div class="about-section">
|
| 198 |
-
<h2
|
| 199 |
<p>
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
measures such as MOS (mean opinion score) typically involve small-scale experiments with limited participants.
|
| 203 |
</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
<p>
|
| 205 |
-
|
| 206 |
-
|
|
|
|
| 207 |
</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 208 |
</div>
|
| 209 |
|
| 210 |
<div class="about-section">
|
| 211 |
-
<h2
|
| 212 |
<p>
|
| 213 |
-
|
| 214 |
-
listening to both samples, vote for the one that sounds more natural and engaging. To prevent bias,
|
| 215 |
-
model names are revealed only after your vote is submitted.
|
| 216 |
</p>
|
| 217 |
<ul class="feature-list">
|
| 218 |
-
<li>
|
| 219 |
-
<li>
|
| 220 |
-
<li>
|
| 221 |
-
<li>
|
| 222 |
-
<li>Track model rankings on our leaderboard</li>
|
| 223 |
</ul>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 224 |
</div>
|
| 225 |
|
| 226 |
<div class="about-section">
|
| 227 |
-
<h2
|
| 228 |
-
<div class="faq-item">
|
| 229 |
-
<div class="faq-question">What happened to the TTS Arena V1 leaderboard?</div>
|
| 230 |
-
<div class="faq-answer">
|
| 231 |
-
The TTS Arena V1 leaderboard is now deprecated. While you can no longer vote on it, the results and leaderboard are still available for reference at <a href="https://huggingface.co/spaces/TTS-AGI/TTS-Arena" target="_blank" rel="noopener">TTS Arena V1</a>. The leaderboard is static and will not change.
|
| 232 |
-
</div>
|
| 233 |
-
</div>
|
| 234 |
-
<div class="faq-item">
|
| 235 |
-
<div class="faq-question">How are models ranked in TTS Arena?</div>
|
| 236 |
-
<div class="faq-answer">
|
| 237 |
-
Models are ranked using an Elo rating system, similar to chess rankings. When you vote for a model, its rating increases while the other model's rating decreases. The amount of change depends on the current ratings of both models.
|
| 238 |
-
</div>
|
| 239 |
-
</div>
|
| 240 |
<div class="faq-item">
|
| 241 |
-
<div class="faq-question"
|
| 242 |
<div class="faq-answer">
|
| 243 |
-
|
|
|
|
| 244 |
</div>
|
| 245 |
</div>
|
| 246 |
<div class="faq-item">
|
| 247 |
-
<div class="faq-question"
|
| 248 |
<div class="faq-answer">
|
| 249 |
-
|
|
|
|
| 250 |
</div>
|
| 251 |
</div>
|
| 252 |
<div class="faq-item">
|
| 253 |
-
<div class="faq-question"
|
| 254 |
<div class="faq-answer">
|
| 255 |
-
|
|
|
|
| 256 |
</div>
|
| 257 |
</div>
|
| 258 |
<div class="faq-item">
|
| 259 |
-
<div class="faq-question"
|
| 260 |
<div class="faq-answer">
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
</div>
|
| 264 |
-
<div class="faq-item">
|
| 265 |
-
<div class="faq-question">Do I need to login to use TTS Arena?</div>
|
| 266 |
-
<div class="faq-answer">
|
| 267 |
-
Login is optional and not required to vote. If you choose to login (with Hugging Face), texts you enter will be associated with your account, and you'll have access to a personal leaderboard showing the models you favor the most.
|
| 268 |
</div>
|
| 269 |
</div>
|
| 270 |
</div>
|
| 271 |
|
| 272 |
<div class="about-section">
|
| 273 |
-
<h2
|
| 274 |
<p>
|
| 275 |
-
|
| 276 |
</p>
|
| 277 |
-
<
|
| 278 |
-
|
| 279 |
-
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
function copyToClipboard() {
|
| 286 |
-
const text = document.getElementById('citation-text').innerText;
|
| 287 |
-
navigator.clipboard.writeText(text).then(() => {
|
| 288 |
-
const btn = document.querySelector('.copy-citation');
|
| 289 |
-
const originalContent = btn.innerHTML;
|
| 290 |
-
btn.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M20 6 9 17l-5-5"/></svg>';
|
| 291 |
-
setTimeout(() => {
|
| 292 |
-
btn.innerHTML = originalContent;
|
| 293 |
-
}, 2000);
|
| 294 |
-
});
|
| 295 |
-
}
|
| 296 |
-
</script>
|
| 297 |
</div>
|
| 298 |
|
| 299 |
<div class="about-section">
|
| 300 |
-
<h2
|
| 301 |
<p>
|
| 302 |
-
|
| 303 |
</p>
|
| 304 |
-
<div class="
|
| 305 |
-
<div class="
|
| 306 |
-
<
|
| 307 |
-
<div class="
|
| 308 |
-
<a href="https://twitter.com/reach_vb" target="_blank" rel="noopener" title="Twitter">
|
| 309 |
-
<img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
|
| 310 |
-
</a>
|
| 311 |
-
<a href="https://huggingface.co/reach-vb" target="_blank" rel="noopener" title="Hugging Face">
|
| 312 |
-
<img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
|
| 313 |
-
</a>
|
| 314 |
-
</div>
|
| 315 |
-
</div>
|
| 316 |
-
<div class="credit-item">
|
| 317 |
-
<span>Clémentine Fourrier</span>
|
| 318 |
-
<div class="social-links">
|
| 319 |
-
<a href="https://twitter.com/clefourrier" target="_blank" rel="noopener" title="Twitter">
|
| 320 |
-
<img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
|
| 321 |
-
</a>
|
| 322 |
-
<a href="https://huggingface.co/clefourrier" target="_blank" rel="noopener" title="Hugging Face">
|
| 323 |
-
<img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
|
| 324 |
-
</a>
|
| 325 |
-
</div>
|
| 326 |
-
</div>
|
| 327 |
-
<div class="credit-item">
|
| 328 |
-
<span>Lucain Pouget</span>
|
| 329 |
-
<div class="social-links">
|
| 330 |
-
<a href="https://twitter.com/Wauplin" target="_blank" rel="noopener" title="Twitter">
|
| 331 |
-
<img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
|
| 332 |
-
</a>
|
| 333 |
-
<a href="https://huggingface.co/Wauplin" target="_blank" rel="noopener" title="Hugging Face">
|
| 334 |
-
<img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
|
| 335 |
-
</a>
|
| 336 |
-
</div>
|
| 337 |
-
</div>
|
| 338 |
-
<div class="credit-item">
|
| 339 |
-
<span>Yoach Lacombe</span>
|
| 340 |
-
<div class="social-links">
|
| 341 |
-
<a href="https://twitter.com/yoachlacombe" target="_blank" rel="noopener" title="Twitter">
|
| 342 |
-
<img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
|
| 343 |
-
</a>
|
| 344 |
-
<a href="https://huggingface.co/ylacombe" target="_blank" rel="noopener" title="Hugging Face">
|
| 345 |
-
<img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
|
| 346 |
-
</a>
|
| 347 |
-
</div>
|
| 348 |
</div>
|
| 349 |
-
<div class="
|
| 350 |
-
<
|
| 351 |
-
<div class="
|
| 352 |
-
<a href="https://twitter.com/main_horse" target="_blank" rel="noopener" title="Twitter">
|
| 353 |
-
<img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
|
| 354 |
-
</a>
|
| 355 |
-
<a href="https://huggingface.co/main-horse" target="_blank" rel="noopener" title="Hugging Face">
|
| 356 |
-
<img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
|
| 357 |
-
</a>
|
| 358 |
-
</div>
|
| 359 |
-
</div>
|
| 360 |
-
<div class="credit-item">
|
| 361 |
-
<span>Sanchit Gandhi</span>
|
| 362 |
-
<div class="social-links">
|
| 363 |
-
<a href="https://twitter.com/sanchitgandhi99" target="_blank" rel="noopener" title="Twitter">
|
| 364 |
-
<img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
|
| 365 |
-
</a>
|
| 366 |
-
<a href="https://huggingface.co/sanchit-gandhi" target="_blank" rel="noopener" title="Hugging Face">
|
| 367 |
-
<img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
|
| 368 |
-
</a>
|
| 369 |
-
</div>
|
| 370 |
-
</div>
|
| 371 |
-
<div class="credit-item">
|
| 372 |
-
<span>Apolinário Passos</span>
|
| 373 |
-
<div class="social-links">
|
| 374 |
-
<a href="https://twitter.com/multimodalart" target="_blank" rel="noopener" title="Twitter">
|
| 375 |
-
<img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
|
| 376 |
-
</a>
|
| 377 |
-
<a href="https://huggingface.co/multimodalart" target="_blank" rel="noopener" title="Hugging Face">
|
| 378 |
-
<img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
|
| 379 |
-
</a>
|
| 380 |
-
</div>
|
| 381 |
-
</div>
|
| 382 |
-
<div class="credit-item">
|
| 383 |
-
<span>Pedro Cuenca</span>
|
| 384 |
-
<div class="social-links">
|
| 385 |
-
<a href="https://twitter.com/pcuenq" target="_blank" rel="noopener" title="Twitter">
|
| 386 |
-
<img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
|
| 387 |
-
</a>
|
| 388 |
-
<a href="https://huggingface.co/pcuenq" target="_blank" rel="noopener" title="Hugging Face">
|
| 389 |
-
<img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
|
| 390 |
-
</a>
|
| 391 |
-
</div>
|
| 392 |
</div>
|
| 393 |
</div>
|
| 394 |
</div>
|
| 395 |
|
| 396 |
<div class="about-section">
|
| 397 |
-
<h2
|
| 398 |
<p>
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
commercial purposes.
|
| 402 |
</p>
|
| 403 |
-
</div>
|
| 404 |
-
|
| 405 |
-
<div class="about-section">
|
| 406 |
-
<h2>License</h2>
|
| 407 |
<p>
|
| 408 |
-
|
| 409 |
-
The code for the Arena is licensed under the Zlib license.
|
| 410 |
-
Random sentences are sourced from a filtered subset of the
|
| 411 |
-
<a href="https://www.cs.columbia.edu/~hgs/audio/harvard.html" target="_blank" rel="noopener">Harvard Sentences</a>.
|
| 412 |
</p>
|
| 413 |
</div>
|
| 414 |
</div>
|
| 415 |
-
{% endblock %}
|
|
|
|
| 1 |
{% extends "base.html" %}
|
| 2 |
|
| 3 |
+
{% block title %}About - 한국어 TTS 아레나{% endblock %}
|
| 4 |
|
| 5 |
{% block current_page %}About{% endblock %}
|
| 6 |
|
|
|
|
| 25 |
font-size: 24px;
|
| 26 |
}
|
| 27 |
|
| 28 |
+
.about-section h3 {
|
| 29 |
+
color: var(--text-color);
|
| 30 |
+
margin-top: 20px;
|
| 31 |
+
margin-bottom: 12px;
|
| 32 |
+
font-size: 18px;
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
.about-section p {
|
| 36 |
margin-bottom: 16px;
|
| 37 |
+
line-height: 1.7;
|
| 38 |
color: #444;
|
| 39 |
}
|
| 40 |
|
|
|
|
| 42 |
margin-bottom: 0;
|
| 43 |
}
|
| 44 |
|
| 45 |
+
.highlight-box {
|
| 46 |
+
background: linear-gradient(135deg, #f5f3ff 0%, #ede9fe 100%);
|
| 47 |
+
border-left: 4px solid var(--primary-color);
|
| 48 |
+
padding: 16px 20px;
|
| 49 |
+
border-radius: 0 var(--radius) var(--radius) 0;
|
| 50 |
+
margin: 20px 0;
|
| 51 |
+
}
|
| 52 |
+
|
| 53 |
+
.highlight-box p {
|
| 54 |
+
margin: 0;
|
| 55 |
+
color: #4c1d95;
|
| 56 |
+
font-weight: 500;
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
.problem-list {
|
| 60 |
+
list-style: none;
|
| 61 |
+
padding: 0;
|
| 62 |
+
margin: 16px 0;
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
.problem-list li {
|
| 66 |
+
margin-bottom: 16px;
|
| 67 |
+
padding-left: 32px;
|
| 68 |
+
position: relative;
|
| 69 |
+
line-height: 1.6;
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
.problem-list li::before {
|
| 73 |
+
content: "⚠️";
|
| 74 |
+
position: absolute;
|
| 75 |
+
left: 0;
|
| 76 |
+
top: 0;
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
.feature-list {
|
| 80 |
list-style: none;
|
| 81 |
padding: 0;
|
|
|
|
| 85 |
margin-bottom: 12px;
|
| 86 |
padding-left: 28px;
|
| 87 |
position: relative;
|
| 88 |
+
line-height: 1.6;
|
| 89 |
}
|
| 90 |
|
| 91 |
.feature-list li::before {
|
| 92 |
+
content: "✓";
|
| 93 |
color: var(--primary-color);
|
| 94 |
+
font-weight: bold;
|
| 95 |
position: absolute;
|
| 96 |
left: 8px;
|
| 97 |
+
top: 0;
|
| 98 |
}
|
| 99 |
|
| 100 |
+
.metric-comparison {
|
| 101 |
display: grid;
|
| 102 |
+
grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
|
| 103 |
+
gap: 16px;
|
| 104 |
+
margin: 20px 0;
|
| 105 |
}
|
| 106 |
|
| 107 |
+
.metric-card {
|
| 108 |
+
background: var(--light-gray);
|
| 109 |
+
border-radius: var(--radius);
|
| 110 |
+
padding: 20px;
|
| 111 |
+
border: 1px solid var(--border-color);
|
|
|
|
| 112 |
}
|
| 113 |
|
| 114 |
+
.metric-card h4 {
|
| 115 |
color: var(--primary-color);
|
| 116 |
+
margin-bottom: 8px;
|
| 117 |
+
font-size: 16px;
|
| 118 |
}
|
| 119 |
|
| 120 |
+
.metric-card .status {
|
| 121 |
+
font-size: 12px;
|
| 122 |
+
padding: 4px 8px;
|
| 123 |
+
border-radius: 4px;
|
| 124 |
+
display: inline-block;
|
| 125 |
+
margin-bottom: 8px;
|
| 126 |
}
|
| 127 |
|
| 128 |
+
.metric-card .status.problem {
|
| 129 |
+
background: #fee2e2;
|
| 130 |
+
color: #dc2626;
|
| 131 |
}
|
| 132 |
+
|
| 133 |
+
.metric-card .status.solution {
|
| 134 |
+
background: #dcfce7;
|
| 135 |
+
color: #16a34a;
|
| 136 |
}
|
| 137 |
|
| 138 |
+
.metric-card p {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
font-size: 14px;
|
| 140 |
+
margin: 0;
|
| 141 |
+
color: #666;
|
| 142 |
}
|
| 143 |
|
| 144 |
+
.team-section {
|
| 145 |
+
margin-top: 20px;
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
.team-grid {
|
| 149 |
+
display: grid;
|
| 150 |
+
grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
|
| 151 |
+
gap: 16px;
|
| 152 |
+
margin-top: 16px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 153 |
}
|
| 154 |
|
| 155 |
+
.team-member {
|
| 156 |
+
background: var(--light-gray);
|
| 157 |
+
border-radius: var(--radius);
|
| 158 |
+
padding: 16px;
|
| 159 |
+
text-align: center;
|
| 160 |
+
border: 1px solid var(--border-color);
|
| 161 |
}
|
| 162 |
|
| 163 |
+
.team-member .name {
|
| 164 |
+
font-weight: 600;
|
| 165 |
color: var(--text-color);
|
| 166 |
+
margin-bottom: 4px;
|
| 167 |
+
}
|
| 168 |
+
|
| 169 |
+
.team-member .role {
|
| 170 |
+
font-size: 13px;
|
| 171 |
+
color: #666;
|
| 172 |
+
}
|
| 173 |
+
|
| 174 |
+
.reference-link {
|
| 175 |
+
display: inline-flex;
|
| 176 |
+
align-items: center;
|
| 177 |
+
gap: 8px;
|
| 178 |
+
background: var(--light-gray);
|
| 179 |
+
padding: 12px 20px;
|
| 180 |
+
border-radius: var(--radius);
|
| 181 |
+
text-decoration: none;
|
| 182 |
+
color: var(--primary-color);
|
| 183 |
+
font-weight: 500;
|
| 184 |
+
border: 1px solid var(--border-color);
|
| 185 |
+
transition: all 0.2s;
|
| 186 |
+
margin-top: 12px;
|
| 187 |
+
}
|
| 188 |
+
|
| 189 |
+
.reference-link:hover {
|
| 190 |
+
background: var(--primary-color);
|
| 191 |
+
color: white;
|
| 192 |
+
border-color: var(--primary-color);
|
| 193 |
}
|
| 194 |
|
| 195 |
.faq-item {
|
|
|
|
| 205 |
.faq-answer {
|
| 206 |
line-height: 1.6;
|
| 207 |
}
|
| 208 |
+
|
| 209 |
/* Dark mode styles */
|
| 210 |
@media (prefers-color-scheme: dark) {
|
| 211 |
.about-section {
|
|
|
|
| 217 |
color: var(--text-color);
|
| 218 |
}
|
| 219 |
|
| 220 |
+
.highlight-box {
|
| 221 |
+
background: linear-gradient(135deg, rgba(91, 94, 255, 0.1) 0%, rgba(91, 94, 255, 0.05) 100%);
|
| 222 |
+
}
|
| 223 |
+
|
| 224 |
+
.highlight-box p {
|
| 225 |
+
color: #a5b4fc;
|
| 226 |
+
}
|
| 227 |
+
|
| 228 |
+
.metric-card {
|
| 229 |
background-color: var(--secondary-color);
|
|
|
|
| 230 |
}
|
| 231 |
|
| 232 |
+
.metric-card p {
|
| 233 |
+
color: #aaa;
|
|
|
|
| 234 |
}
|
| 235 |
|
| 236 |
+
.team-member {
|
| 237 |
+
background-color: var(--secondary-color);
|
| 238 |
}
|
| 239 |
|
| 240 |
+
.team-member .role {
|
| 241 |
+
color: #aaa;
|
| 242 |
}
|
| 243 |
|
| 244 |
+
.reference-link {
|
| 245 |
+
background-color: var(--secondary-color);
|
| 246 |
}
|
| 247 |
|
| 248 |
+
.faq-question {
|
| 249 |
+
color: var(--primary-color);
|
| 250 |
}
|
| 251 |
}
|
|
|
|
| 252 |
</style>
|
| 253 |
{% endblock %}
|
| 254 |
|
| 255 |
{% block content %}
|
| 256 |
<div class="about-container">
|
| 257 |
<div class="about-section">
|
| 258 |
+
<h2>🎤 한국어 TTS 아레나에 오신 것을 환영합니다</h2>
|
| 259 |
<p>
|
| 260 |
+
한국어 TTS 아레나는 다양한 음성 합성(TTS) 모델을 <strong>블라인드 테스트</strong>로 비교 평가하는
|
| 261 |
+
커뮤니티 기반 플랫폼입니다. LMsys의
|
| 262 |
+
<a href="https://chat.lmsys.org/" target="_blank" rel="noopener">Chatbot Arena</a>에서 영감을 받아,
|
| 263 |
+
누구나 한국어 TTS 모델의 품질을 직접 비교하고 평가할 수 있는 공간을 만들었습니다.
|
|
|
|
|
|
|
| 264 |
</p>
|
| 265 |
+
<div class="highlight-box">
|
| 266 |
+
<p>💡 두 모델의 음성을 듣고 더 자연스러운 쪽에 투표하세요. 모델 이름은 투표 후에 공개됩니다.</p>
|
| 267 |
+
</div>
|
| 268 |
</div>
|
| 269 |
|
| 270 |
<div class="about-section">
|
| 271 |
+
<h2>🤔 왜 한국어 TTS 벤치마크가 필요한가?</h2>
|
| 272 |
<p>
|
| 273 |
+
여러 상용 TTS가 이미 존재하지만, <strong>한국어에 특화된 신뢰할 수 있는 벤치마크</strong>는
|
| 274 |
+
부재한 상황입니다. 글로벌 TTS 모델들은 한국어 처리에서 여러 한계를 보이고 있습니다.
|
|
|
|
| 275 |
</p>
|
| 276 |
+
|
| 277 |
+
<h3>기존 평가 방식의 한계</h3>
|
| 278 |
+
<div class="metric-comparison">
|
| 279 |
+
<div class="metric-card">
|
| 280 |
+
<h4>WER (Word Error Rate)</h4>
|
| 281 |
+
<span class="status problem">문제 있음</span>
|
| 282 |
+
<p>한국어의 복잡한 발화 패턴(숫자, 날짜, 전화번호, 주문번호 등)을 STT로 평가할 때
|
| 283 |
+
정확도가 떨어져 실제 발화 품질을 제대로 반영하지 못합니다.</p>
|
| 284 |
+
</div>
|
| 285 |
+
<div class="metric-card">
|
| 286 |
+
<h4>MOS (Mean Opinion Score)</h4>
|
| 287 |
+
<span class="status problem">한계 존재</span>
|
| 288 |
+
<p>소규모 참가자를 대상으로 한 주관적 평가로, 비용이 많이 들고
|
| 289 |
+
대규모 커뮤니티의 다양한 의견을 반영하기 어렵습니다.</p>
|
| 290 |
+
</div>
|
| 291 |
+
<div class="metric-card">
|
| 292 |
+
<h4>Arena 방식</h4>
|
| 293 |
+
<span class="status solution">해결��</span>
|
| 294 |
+
<p>커뮤니티 전체가 참여하는 블라인드 A/B 테스트로,
|
| 295 |
+
Elo 레이팅 시스템을 통해 객관적인 순위를 도출합니다.</p>
|
| 296 |
+
</div>
|
| 297 |
+
</div>
|
| 298 |
+
|
| 299 |
+
<h3>글로벌 TTS 모델의 한국어 한계</h3>
|
| 300 |
+
<ul class="problem-list">
|
| 301 |
+
<li>
|
| 302 |
+
<strong>운율(Prosody)의 부자연스러움</strong><br>
|
| 303 |
+
상담사처럼 자연스러운 억양과 톤을 구현하지 못하고, 단조로운(monotone) 발화가 생성됩니다.
|
| 304 |
+
</li>
|
| 305 |
+
<li>
|
| 306 |
+
<strong>한국어 상식 기반 발화 처리 취약</strong><br>
|
| 307 |
+
한·영 혼용, 날짜·시간, 주문/고유번호, URL·이메일 등 한국어 특유의 발화 패턴을
|
| 308 |
+
제대로 처리하지 못합니다.
|
| 309 |
+
</li>
|
| 310 |
+
<li>
|
| 311 |
+
<strong>숫자 발화의 어려움</strong><br>
|
| 312 |
+
"19,992원"을 "만 구천 구백 구십 이원"으로 자연스럽게 읽거나,
|
| 313 |
+
전화번호 형식(011-1234-1234)을 올바르게 발화하는 것이 어렵습니다.
|
| 314 |
+
</li>
|
| 315 |
+
<li>
|
| 316 |
+
<strong>전문 용어 및 약어 처리</strong><br>
|
| 317 |
+
"%p"를 "퍼센트포인트"로 읽는 등의 상식 기반 추론이 필요한 발화에 취약합니다.
|
| 318 |
+
</li>
|
| 319 |
+
</ul>
|
| 320 |
+
</div>
|
| 321 |
+
|
| 322 |
+
<div class="about-section">
|
| 323 |
+
<h2>⚙️ 아레나 작동 방식</h2>
|
| 324 |
<p>
|
| 325 |
+
평가 방식은 간단합니다. 텍스트를 입력하면 두 개의 TTS 모델이 각각 음성을 생성합니다.
|
| 326 |
+
두 샘플을 듣고 더 자연스러운 쪽에 투표하세요. 편향을 방지하기 위해 모델 이름은
|
| 327 |
+
투표 후에만 공개됩니다.
|
| 328 |
</p>
|
| 329 |
+
<ul class="feature-list">
|
| 330 |
+
<li>직접 텍스트를 입력하거나 랜덤 문장을 선택할 수 있습니다</li>
|
| 331 |
+
<li>동일한 텍스트로 생성된 두 TTS 모델의 음성을 비교합니다</li>
|
| 332 |
+
<li>더 자연스럽고, 명확하며, 표현력 있는 음성에 투표합니다</li>
|
| 333 |
+
<li>리더보드에서 모델 순위를 확인할 수 있습니다</li>
|
| 334 |
+
<li>Elo 레이팅 시스템으로 객관적인 순위가 산출됩니다</li>
|
| 335 |
+
</ul>
|
| 336 |
</div>
|
| 337 |
|
| 338 |
<div class="about-section">
|
| 339 |
+
<h2>📊 평가 대상 모델</h2>
|
| 340 |
<p>
|
| 341 |
+
현재 아레나에서는 다음과 같은 한국어 지원 TTS 모델들을 평가하고 있습니다:
|
|
|
|
|
|
|
| 342 |
</p>
|
| 343 |
<ul class="feature-list">
|
| 344 |
+
<li><strong>채널톡 TTS</strong> - 상담사향 프로소디에 최적화된 한국어 TTS</li>
|
| 345 |
+
<li><strong>OpenAI TTS</strong> - GPT 기반 다국어 TTS</li>
|
| 346 |
+
<li><strong>ElevenLabs</strong> - Multilingual v2 모델</li>
|
| 347 |
+
<li><strong>Google Cloud TTS</strong> - WaveNet/Neural2 한국어 음성</li>
|
|
|
|
| 348 |
</ul>
|
| 349 |
+
<p>
|
| 350 |
+
더 많은 모델이 지속적으로 추가될 예정입니다.
|
| 351 |
+
새로운 모델 추가를 원하시면 문의해 주세요.
|
| 352 |
+
</p>
|
| 353 |
</div>
|
| 354 |
|
| 355 |
<div class="about-section">
|
| 356 |
+
<h2>❓ 자주 묻는 질문</h2>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
<div class="faq-item">
|
| 358 |
+
<div class="faq-question">모델 순위는 어떻게 결정되나요?</div>
|
| 359 |
<div class="faq-answer">
|
| 360 |
+
체스 랭킹과 유사한 Elo 레이팅 시스템을 사용합니다. 투표를 받은 모델의 점수가 올라가고,
|
| 361 |
+
상대 모델의 점수는 내려갑니다. 변동 폭은 두 모델의 현재 레이팅에 따라 달라집니다.
|
| 362 |
</div>
|
| 363 |
</div>
|
| 364 |
<div class="faq-item">
|
| 365 |
+
<div class="faq-question">로그인이 필요한가요?</div>
|
| 366 |
<div class="faq-answer">
|
| 367 |
+
투표를 위해서는 Hugging Face 로그인이 필요합니다. 로그인하면 투표 기록을 추적하고
|
| 368 |
+
개인 리더보드에서 선호하는 모델을 확인할 수 있습니다.
|
| 369 |
</div>
|
| 370 |
</div>
|
| 371 |
<div class="faq-item">
|
| 372 |
+
<div class="faq-question">새로운 모델을 추가하고 싶어요.</div>
|
| 373 |
<div class="faq-answer">
|
| 374 |
+
새로운 TTS 모델 추가 요청은 언제든 환영합니다.
|
| 375 |
+
출시 전 익명 평가를 원하시는 경우에도 문의해 주세요.
|
| 376 |
</div>
|
| 377 |
</div>
|
| 378 |
<div class="faq-item">
|
| 379 |
+
<div class="faq-question">어떤 기준으로 투표해야 하나요?</div>
|
| 380 |
<div class="faq-answer">
|
| 381 |
+
자연스러움, 발음 정확도, 억양, 감정 표현 등을 종합적으로 고려해서
|
| 382 |
+
더 "사람 같은" 음성에 투표해 주세요.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
</div>
|
| 384 |
</div>
|
| 385 |
</div>
|
| 386 |
|
| 387 |
<div class="about-section">
|
| 388 |
+
<h2>🔗 참고 자료</h2>
|
| 389 |
<p>
|
| 390 |
+
채널톡 TTS 팀의 연구 내용과 기술적 접근 방식에 대해 더 알아보세요:
|
| 391 |
</p>
|
| 392 |
+
<a href="https://tts.ch.dev/" target="_blank" rel="noopener" class="reference-link">
|
| 393 |
+
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 394 |
+
<path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/>
|
| 395 |
+
<polyline points="15 3 21 3 21 9"/>
|
| 396 |
+
<line x1="10" y1="14" x2="21" y2="3"/>
|
| 397 |
+
</svg>
|
| 398 |
+
Channel TTS: Towards Real-World Prosody for Conversational Agents
|
| 399 |
+
</a>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 400 |
</div>
|
| 401 |
|
| 402 |
<div class="about-section">
|
| 403 |
+
<h2>👥 만든 사람들</h2>
|
| 404 |
<p>
|
| 405 |
+
이 프로젝트는 <a href="https://channel.io/ko" target="_blank" rel="noopener">채널톡</a> AI팀에서 제작했습니다.
|
| 406 |
</p>
|
| 407 |
+
<div class="team-grid">
|
| 408 |
+
<div class="team-member">
|
| 409 |
+
<div class="name">Robin (신승윤)</div>
|
| 410 |
+
<div class="role">AI Team - Speech</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
</div>
|
| 412 |
+
<div class="team-member">
|
| 413 |
+
<div class="name">Jake (황정인)</div>
|
| 414 |
+
<div class="role">AI Team Lead</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 415 |
</div>
|
| 416 |
</div>
|
| 417 |
</div>
|
| 418 |
|
| 419 |
<div class="about-section">
|
| 420 |
+
<h2>📜 개인정보 및 라이선스</h2>
|
| 421 |
<p>
|
| 422 |
+
입력하신 텍스트와 생성된 오디오는 연구 목적으로 저장될 수 있습니다.
|
| 423 |
+
로그인한 경우 투표 기록이 계정과 연결됩니다.
|
|
|
|
| 424 |
</p>
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
<p>
|
| 426 |
+
생성된 오디오 클립은 개인적, 비상업적 용도로만 사용할 수 있으며 재배포할 수 없습니다.
|
|
|
|
|
|
|
|
|
|
| 427 |
</p>
|
| 428 |
</div>
|
| 429 |
</div>
|
| 430 |
+
{% endblock %}
|
templates/arena.html
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{% extends "base.html" %}
|
| 2 |
|
| 3 |
-
{% block title %}
|
| 4 |
|
| 5 |
{% block current_page %}Arena{% endblock %}
|
| 6 |
|
|
@@ -12,25 +12,20 @@
|
|
| 12 |
<!-- Login prompt overlay -->
|
| 13 |
<div id="login-prompt-overlay" class="login-prompt-overlay" style="display: none;">
|
| 14 |
<div class="login-prompt-content">
|
| 15 |
-
<h3
|
| 16 |
-
<p>
|
| 17 |
<div class="login-prompt-actions">
|
| 18 |
-
<button class="login-prompt-close"
|
| 19 |
-
<a href="{{ url_for('auth.login', next=request.path) }}" class="login-prompt-btn">
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
</div>
|
| 23 |
{% endif %}
|
| 24 |
|
| 25 |
-
<div class="tabs">
|
| 26 |
-
<div class="tab active" data-tab="tts">TTS</div>
|
| 27 |
-
<div class="tab" data-tab="conversational">Conversational</div>
|
| 28 |
-
</div>
|
| 29 |
-
|
| 30 |
<div id="tts-tab" class="tab-content active">
|
| 31 |
<form class="input-container">
|
| 32 |
<div class="input-group">
|
| 33 |
-
<button type="button" class="segmented-btn random-btn" title="
|
| 34 |
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-shuffle-icon lucide-shuffle">
|
| 35 |
<path d="m18 14 4 4-4 4" />
|
| 36 |
<path d="m18 2 4 4-4 4" />
|
|
@@ -39,14 +34,14 @@
|
|
| 39 |
<path d="M22 18h-6.041a4 4 0 0 1-3.3-1.8l-.359-.45" />
|
| 40 |
</svg>
|
| 41 |
</button>
|
| 42 |
-
<input type="text" class="text-input" placeholder="
|
| 43 |
-
<button type="submit" class="segmented-btn synth-btn"
|
| 44 |
</div>
|
| 45 |
-
<button type="submit" class="mobile-synth-btn"
|
| 46 |
</form>
|
| 47 |
|
| 48 |
<div id="initial-keyboard-hint" class="keyboard-hint">
|
| 49 |
-
|
| 50 |
</div>
|
| 51 |
|
| 52 |
<div class="loading-container" style="display: none;">
|
|
@@ -61,18 +56,18 @@
|
|
| 61 |
<span></span>
|
| 62 |
</div>
|
| 63 |
</div>
|
| 64 |
-
<div class="loader-text"
|
| 65 |
-
<div class="loader-subtext"
|
| 66 |
</div>
|
| 67 |
</div>
|
| 68 |
|
| 69 |
<div class="players-container" style="display: none;">
|
| 70 |
<div class="players-row">
|
| 71 |
<div class="player">
|
| 72 |
-
<div class="player-label"
|
| 73 |
<div class="wave-player-container" data-model="a"></div>
|
| 74 |
<button class="vote-btn" data-model="a" disabled>
|
| 75 |
-
|
| 76 |
<span class="shortcut-key">A</span>
|
| 77 |
<span class="vote-loader" style="display: none;">
|
| 78 |
<div class="vote-spinner"></div>
|
|
@@ -81,10 +76,10 @@
|
|
| 81 |
</div>
|
| 82 |
|
| 83 |
<div class="player">
|
| 84 |
-
<div class="player-label"
|
| 85 |
<div class="wave-player-container" data-model="b"></div>
|
| 86 |
<button class="vote-btn" data-model="b" disabled>
|
| 87 |
-
|
| 88 |
<span class="shortcut-key">B</span>
|
| 89 |
<span class="vote-loader" style="display: none;">
|
| 90 |
<div class="vote-spinner"></div>
|
|
@@ -95,114 +90,23 @@
|
|
| 95 |
</div>
|
| 96 |
|
| 97 |
<div class="vote-results" style="display: none;">
|
| 98 |
-
<h3 class="results-heading"
|
| 99 |
<div class="results-content">
|
| 100 |
<div class="chosen-model">
|
| 101 |
-
<strong
|
| 102 |
</div>
|
| 103 |
<div class="rejected-model">
|
| 104 |
-
<strong
|
| 105 |
</div>
|
| 106 |
</div>
|
| 107 |
</div>
|
| 108 |
|
| 109 |
<div class="next-round-container" style="display: none;">
|
| 110 |
-
<button class="next-round-btn"
|
| 111 |
</div>
|
| 112 |
|
| 113 |
<div id="playback-keyboard-hint" class="keyboard-hint" style="display: none;">
|
| 114 |
-
|
| 115 |
-
</div>
|
| 116 |
-
</div>
|
| 117 |
-
|
| 118 |
-
<div id="conversational-tab" class="tab-content">
|
| 119 |
-
<div class="podcast-container">
|
| 120 |
-
<div class="podcast-controls">
|
| 121 |
-
<button type="button" class="segmented-btn random-script-btn" title="Load random script">
|
| 122 |
-
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-shuffle-icon lucide-shuffle">
|
| 123 |
-
<path d="m18 14 4 4-4 4" />
|
| 124 |
-
<path d="m18 2 4 4-4 4" />
|
| 125 |
-
<path d="M2 18h1.973a4 4 0 0 0 3.3-1.7l5.454-8.6a4 4 0 0 1 3.3-1.7H22" />
|
| 126 |
-
<path d="M2 6h1.972a4 4 0 0 1 3.6 2.2" />
|
| 127 |
-
<path d="M22 18h-6.041a4 4 0 0 1-3.3-1.8l-.359-.45" />
|
| 128 |
-
</svg>
|
| 129 |
-
Random Script
|
| 130 |
-
</button>
|
| 131 |
-
<button type="button" class="podcast-synth-btn">Generate Podcast</button>
|
| 132 |
-
</div>
|
| 133 |
-
|
| 134 |
-
<div class="podcast-script-container">
|
| 135 |
-
<div class="podcast-lines">
|
| 136 |
-
<!-- Script lines will be added here -->
|
| 137 |
-
</div>
|
| 138 |
-
|
| 139 |
-
<button type="button" class="add-line-btn">+ Add Line</button>
|
| 140 |
-
|
| 141 |
-
<div class="keyboard-hint podcast-keyboard-hint">
|
| 142 |
-
Press <kbd>Ctrl</kbd>+<kbd>Enter</kbd> or <kbd>Alt</kbd>+<kbd>Enter</kbd> to add a new line
|
| 143 |
-
</div>
|
| 144 |
-
</div>
|
| 145 |
-
|
| 146 |
-
<div class="podcast-loading-container" style="display: none;">
|
| 147 |
-
<div class="loader-wrapper">
|
| 148 |
-
<div class="loader-animation">
|
| 149 |
-
<div class="sound-wave">
|
| 150 |
-
<span></span>
|
| 151 |
-
<span></span>
|
| 152 |
-
<span></span>
|
| 153 |
-
<span></span>
|
| 154 |
-
<span></span>
|
| 155 |
-
<span></span>
|
| 156 |
-
</div>
|
| 157 |
-
</div>
|
| 158 |
-
<div class="loader-text">Generating podcast...</div>
|
| 159 |
-
<div class="loader-subtext">This may take up to a minute</div>
|
| 160 |
-
</div>
|
| 161 |
-
</div>
|
| 162 |
-
|
| 163 |
-
<div class="podcast-player-container" style="display: none;">
|
| 164 |
-
<div class="players-row">
|
| 165 |
-
<div class="player">
|
| 166 |
-
<div class="player-label">Model A <span class="model-name-display"></span></div>
|
| 167 |
-
<div class="podcast-wave-player-a"></div>
|
| 168 |
-
<button class="vote-btn" data-model="a" disabled>
|
| 169 |
-
Vote for A
|
| 170 |
-
<span class="shortcut-key">A</span>
|
| 171 |
-
<span class="vote-loader" style="display: none;">
|
| 172 |
-
<div class="vote-spinner"></div>
|
| 173 |
-
</span>
|
| 174 |
-
</button>
|
| 175 |
-
</div>
|
| 176 |
-
|
| 177 |
-
<div class="player">
|
| 178 |
-
<div class="player-label">Model B <span class="model-name-display"></span></div>
|
| 179 |
-
<div class="podcast-wave-player-b"></div>
|
| 180 |
-
<button class="vote-btn" data-model="b" disabled>
|
| 181 |
-
Vote for B
|
| 182 |
-
<span class="shortcut-key">B</span>
|
| 183 |
-
<span class="vote-loader" style="display: none;">
|
| 184 |
-
<div class="vote-spinner"></div>
|
| 185 |
-
</span>
|
| 186 |
-
</button>
|
| 187 |
-
</div>
|
| 188 |
-
</div>
|
| 189 |
-
|
| 190 |
-
<div class="podcast-vote-results vote-results" style="display: none;">
|
| 191 |
-
<h3 class="results-heading">Vote Recorded!</h3>
|
| 192 |
-
<div class="results-content">
|
| 193 |
-
<div class="chosen-model">
|
| 194 |
-
<strong>You chose:</strong> <span class="chosen-model-name"></span>
|
| 195 |
-
</div>
|
| 196 |
-
<div class="rejected-model">
|
| 197 |
-
<strong>Over:</strong> <span class="rejected-model-name"></span>
|
| 198 |
-
</div>
|
| 199 |
-
</div>
|
| 200 |
-
</div>
|
| 201 |
-
|
| 202 |
-
<div class="podcast-next-round-container next-round-container" style="display: none;">
|
| 203 |
-
<button class="podcast-next-round-btn next-round-btn">Next Round <span class="shortcut-key">N</span></button>
|
| 204 |
-
</div>
|
| 205 |
-
</div>
|
| 206 |
</div>
|
| 207 |
</div>
|
| 208 |
|
|
@@ -455,34 +359,6 @@
|
|
| 455 |
}
|
| 456 |
}
|
| 457 |
|
| 458 |
-
/* Tab styling */
|
| 459 |
-
.tabs {
|
| 460 |
-
display: flex;
|
| 461 |
-
border-bottom: 1px solid var(--border-color);
|
| 462 |
-
margin-bottom: 24px;
|
| 463 |
-
}
|
| 464 |
-
|
| 465 |
-
.tab {
|
| 466 |
-
padding: 12px 24px;
|
| 467 |
-
cursor: pointer;
|
| 468 |
-
position: relative;
|
| 469 |
-
font-weight: 500;
|
| 470 |
-
}
|
| 471 |
-
|
| 472 |
-
.tab.active {
|
| 473 |
-
color: var(--primary-color);
|
| 474 |
-
}
|
| 475 |
-
|
| 476 |
-
.tab.active::after {
|
| 477 |
-
content: '';
|
| 478 |
-
position: absolute;
|
| 479 |
-
bottom: -1px;
|
| 480 |
-
left: 0;
|
| 481 |
-
width: 100%;
|
| 482 |
-
height: 2px;
|
| 483 |
-
background-color: var(--primary-color);
|
| 484 |
-
}
|
| 485 |
-
|
| 486 |
.tab-content {
|
| 487 |
display: none;
|
| 488 |
}
|
|
@@ -491,38 +367,6 @@
|
|
| 491 |
display: block;
|
| 492 |
}
|
| 493 |
|
| 494 |
-
/* Coming soon styling */
|
| 495 |
-
.coming-soon-container {
|
| 496 |
-
display: flex;
|
| 497 |
-
flex-direction: column;
|
| 498 |
-
align-items: center;
|
| 499 |
-
justify-content: center;
|
| 500 |
-
text-align: center;
|
| 501 |
-
padding: 60px 20px;
|
| 502 |
-
background-color: var(--light-gray);
|
| 503 |
-
border-radius: var(--radius);
|
| 504 |
-
margin: 20px 0;
|
| 505 |
-
}
|
| 506 |
-
|
| 507 |
-
.coming-soon-icon {
|
| 508 |
-
color: var(--primary-color);
|
| 509 |
-
margin-bottom: 20px;
|
| 510 |
-
}
|
| 511 |
-
|
| 512 |
-
.coming-soon-title {
|
| 513 |
-
font-size: 24px;
|
| 514 |
-
font-weight: 600;
|
| 515 |
-
margin-bottom: 16px;
|
| 516 |
-
color: var(--text-color);
|
| 517 |
-
}
|
| 518 |
-
|
| 519 |
-
.coming-soon-text {
|
| 520 |
-
font-size: 16px;
|
| 521 |
-
color: #666;
|
| 522 |
-
max-width: 500px;
|
| 523 |
-
line-height: 1.5;
|
| 524 |
-
}
|
| 525 |
-
|
| 526 |
.model-name-display {
|
| 527 |
font-size: 0.9em;
|
| 528 |
color: #666;
|
|
@@ -581,14 +425,6 @@
|
|
| 581 |
}
|
| 582 |
/* Dark mode styles */
|
| 583 |
@media (prefers-color-scheme: dark) {
|
| 584 |
-
.coming-soon-container {
|
| 585 |
-
background-color: var(--light-gray);
|
| 586 |
-
}
|
| 587 |
-
|
| 588 |
-
.coming-soon-text {
|
| 589 |
-
color: #aaa;
|
| 590 |
-
}
|
| 591 |
-
|
| 592 |
.model-name-display {
|
| 593 |
color: #aaa;
|
| 594 |
}
|
|
@@ -658,347 +494,30 @@
|
|
| 658 |
}
|
| 659 |
|
| 660 |
.random-btn:hover {
|
| 661 |
-
background-color: rgba(255, 255, 255, 0.1);
|
| 662 |
-
}
|
| 663 |
-
|
| 664 |
-
.vote-recorded {
|
| 665 |
-
background-color: var(--light-gray);
|
| 666 |
-
border-color: var(--border-color);
|
| 667 |
-
}
|
| 668 |
-
|
| 669 |
-
/* Ensure border-radius is maintained during loading state */
|
| 670 |
-
.vote-btn.loading {
|
| 671 |
-
border-radius: var(--radius);
|
| 672 |
-
}
|
| 673 |
-
|
| 674 |
-
/* Dark mode keyboard hint */
|
| 675 |
-
.keyboard-hint {
|
| 676 |
-
color: #aaa;
|
| 677 |
-
}
|
| 678 |
-
|
| 679 |
-
.keyboard-hint kbd {
|
| 680 |
-
color: #ddd;
|
| 681 |
-
background-color: #333;
|
| 682 |
-
border-color: #555;
|
| 683 |
-
box-shadow: 0 1px 0 rgba(255,255,255,0.1);
|
| 684 |
-
}
|
| 685 |
-
}
|
| 686 |
-
|
| 687 |
-
/* Podcast UI styles */
|
| 688 |
-
.podcast-container {
|
| 689 |
-
width: 100%;
|
| 690 |
-
}
|
| 691 |
-
|
| 692 |
-
.podcast-controls {
|
| 693 |
-
display: flex;
|
| 694 |
-
gap: 12px;
|
| 695 |
-
margin-bottom: 24px;
|
| 696 |
-
}
|
| 697 |
-
|
| 698 |
-
.random-script-btn {
|
| 699 |
-
display: flex;
|
| 700 |
-
align-items: center;
|
| 701 |
-
gap: 8px;
|
| 702 |
-
padding: 0 16px;
|
| 703 |
-
height: 40px;
|
| 704 |
-
background-color: white;
|
| 705 |
-
border: 1px solid var(--border-color);
|
| 706 |
-
border-radius: var(--radius);
|
| 707 |
-
cursor: pointer;
|
| 708 |
-
transition: background-color 0.2s;
|
| 709 |
-
}
|
| 710 |
-
|
| 711 |
-
.random-script-btn:hover {
|
| 712 |
-
background-color: var(--light-gray);
|
| 713 |
-
}
|
| 714 |
-
|
| 715 |
-
.podcast-synth-btn {
|
| 716 |
-
padding: 0 24px;
|
| 717 |
-
height: 40px;
|
| 718 |
-
background-color: var(--primary-color);
|
| 719 |
-
color: white;
|
| 720 |
-
border: none;
|
| 721 |
-
border-radius: var(--radius);
|
| 722 |
-
font-weight: 500;
|
| 723 |
-
cursor: pointer;
|
| 724 |
-
transition: background-color 0.2s;
|
| 725 |
-
}
|
| 726 |
-
|
| 727 |
-
.podcast-synth-btn:hover {
|
| 728 |
-
background-color: #4038c7;
|
| 729 |
-
}
|
| 730 |
-
|
| 731 |
-
.podcast-script-container {
|
| 732 |
-
border: 1px solid var(--border-color);
|
| 733 |
-
border-radius: var(--radius);
|
| 734 |
-
overflow: hidden;
|
| 735 |
-
margin-bottom: 24px;
|
| 736 |
-
}
|
| 737 |
-
|
| 738 |
-
.podcast-lines {
|
| 739 |
-
max-height: 500px;
|
| 740 |
-
overflow-y: auto;
|
| 741 |
-
}
|
| 742 |
-
|
| 743 |
-
.podcast-line {
|
| 744 |
-
display: flex;
|
| 745 |
-
border-bottom: 1px solid var(--border-color);
|
| 746 |
-
}
|
| 747 |
-
|
| 748 |
-
.speaker-label {
|
| 749 |
-
width: 120px;
|
| 750 |
-
padding: 12px;
|
| 751 |
-
display: flex;
|
| 752 |
-
align-items: center;
|
| 753 |
-
justify-content: center;
|
| 754 |
-
font-weight: 500;
|
| 755 |
-
border-right: 1px solid var(--border-color);
|
| 756 |
-
background-color: var(--light-gray);
|
| 757 |
-
white-space: nowrap;
|
| 758 |
-
}
|
| 759 |
-
|
| 760 |
-
.speaker-1 {
|
| 761 |
-
color: #3b82f6;
|
| 762 |
-
}
|
| 763 |
-
|
| 764 |
-
.speaker-2 {
|
| 765 |
-
color: #ef4444;
|
| 766 |
-
}
|
| 767 |
-
|
| 768 |
-
.line-input {
|
| 769 |
-
flex: 1;
|
| 770 |
-
padding: 12px;
|
| 771 |
-
border: none;
|
| 772 |
-
outline: none;
|
| 773 |
-
font-size: 1em;
|
| 774 |
-
}
|
| 775 |
-
|
| 776 |
-
.line-input:focus {
|
| 777 |
-
background-color: rgba(80, 70, 229, 0.03);
|
| 778 |
-
}
|
| 779 |
-
|
| 780 |
-
.remove-line-btn {
|
| 781 |
-
width: 40px;
|
| 782 |
-
display: flex;
|
| 783 |
-
align-items: center;
|
| 784 |
-
justify-content: center;
|
| 785 |
-
background: none;
|
| 786 |
-
border: none;
|
| 787 |
-
border-left: 1px solid var(--border-color);
|
| 788 |
-
cursor: pointer;
|
| 789 |
-
color: #888;
|
| 790 |
-
transition: color 0.2s, background-color 0.2s;
|
| 791 |
-
}
|
| 792 |
-
|
| 793 |
-
.remove-line-btn:hover {
|
| 794 |
-
color: #ef4444;
|
| 795 |
-
background-color: rgba(239, 68, 68, 0.1);
|
| 796 |
-
}
|
| 797 |
-
|
| 798 |
-
.add-line-btn {
|
| 799 |
-
width: 100%;
|
| 800 |
-
padding: 12px;
|
| 801 |
-
border: none;
|
| 802 |
-
background-color: var(--light-gray);
|
| 803 |
-
cursor: pointer;
|
| 804 |
-
font-weight: 500;
|
| 805 |
-
transition: background-color 0.2s;
|
| 806 |
-
margin-bottom: 0;
|
| 807 |
-
border-bottom: 1px solid var(--border-color);
|
| 808 |
-
}
|
| 809 |
-
|
| 810 |
-
.add-line-btn:hover {
|
| 811 |
-
background-color: rgba(80, 70, 229, 0.1);
|
| 812 |
-
}
|
| 813 |
-
|
| 814 |
-
.podcast-keyboard-hint {
|
| 815 |
-
padding: 10px;
|
| 816 |
-
text-align: center;
|
| 817 |
-
background-color: var(--light-gray);
|
| 818 |
-
border-top: 1px solid var(--border-color);
|
| 819 |
-
margin-top: 0;
|
| 820 |
-
font-size: 13px;
|
| 821 |
-
}
|
| 822 |
-
|
| 823 |
-
.podcast-player {
|
| 824 |
-
border: 1px solid var(--border-color);
|
| 825 |
-
border-radius: var(--radius);
|
| 826 |
-
padding: 20px;
|
| 827 |
-
margin-bottom: 24px;
|
| 828 |
-
}
|
| 829 |
-
|
| 830 |
-
.podcast-wave-player {
|
| 831 |
-
margin: 20px 0;
|
| 832 |
-
}
|
| 833 |
-
|
| 834 |
-
.podcast-transcript-container {
|
| 835 |
-
margin-top: 20px;
|
| 836 |
-
padding-top: 20px;
|
| 837 |
-
border-top: 1px solid var(--border-color);
|
| 838 |
-
}
|
| 839 |
-
|
| 840 |
-
.podcast-transcript {
|
| 841 |
-
margin-top: 12px;
|
| 842 |
-
line-height: 1.6;
|
| 843 |
-
}
|
| 844 |
-
|
| 845 |
-
.transcript-line {
|
| 846 |
-
margin-bottom: 12px;
|
| 847 |
-
}
|
| 848 |
-
|
| 849 |
-
.transcript-speaker {
|
| 850 |
-
font-weight: 600;
|
| 851 |
-
margin-right: 8px;
|
| 852 |
-
}
|
| 853 |
-
|
| 854 |
-
.transcript-speaker.speaker-1 {
|
| 855 |
-
color: #3b82f6;
|
| 856 |
-
}
|
| 857 |
-
|
| 858 |
-
.transcript-speaker.speaker-2 {
|
| 859 |
-
color: #ef4444;
|
| 860 |
-
}
|
| 861 |
-
|
| 862 |
-
/* Responsive styles for podcast UI */
|
| 863 |
-
@media (max-width: 768px) {
|
| 864 |
-
.podcast-controls {
|
| 865 |
-
flex-direction: column;
|
| 866 |
-
}
|
| 867 |
-
|
| 868 |
-
.random-script-btn,
|
| 869 |
-
.podcast-synth-btn {
|
| 870 |
-
width: 100%;
|
| 871 |
-
height: 48px;
|
| 872 |
-
}
|
| 873 |
-
|
| 874 |
-
/* Stack podcast players vertically on mobile */
|
| 875 |
-
.podcast-player-container .players-row {
|
| 876 |
-
flex-direction: column;
|
| 877 |
-
gap: 16px;
|
| 878 |
-
}
|
| 879 |
-
|
| 880 |
-
.podcast-line {
|
| 881 |
-
flex-direction: column;
|
| 882 |
-
padding-bottom: 0;
|
| 883 |
-
margin-bottom: 0;
|
| 884 |
-
}
|
| 885 |
-
|
| 886 |
-
.speaker-label {
|
| 887 |
-
width: 100%;
|
| 888 |
-
border-right: none;
|
| 889 |
-
border-bottom: 1px solid var(--border-color);
|
| 890 |
-
padding: 8px 10px;
|
| 891 |
-
justify-content: flex-start;
|
| 892 |
-
}
|
| 893 |
-
|
| 894 |
-
.line-input {
|
| 895 |
-
width: 100%;
|
| 896 |
-
padding: 8px 10px;
|
| 897 |
-
}
|
| 898 |
-
|
| 899 |
-
.remove-line-btn {
|
| 900 |
-
position: absolute;
|
| 901 |
-
top: 6px;
|
| 902 |
-
right: 10px;
|
| 903 |
-
border-left: none;
|
| 904 |
-
background-color: rgba(255, 255, 255, 0.5);
|
| 905 |
-
border-radius: 4px;
|
| 906 |
-
width: 30px;
|
| 907 |
-
height: 30px;
|
| 908 |
-
}
|
| 909 |
-
|
| 910 |
-
.podcast-line {
|
| 911 |
-
position: relative;
|
| 912 |
-
}
|
| 913 |
-
|
| 914 |
-
/* Dark mode adjustments for mobile */
|
| 915 |
-
@media (prefers-color-scheme: dark) {
|
| 916 |
-
.remove-line-btn {
|
| 917 |
-
background-color: rgba(50, 50, 60, 0.7);
|
| 918 |
-
}
|
| 919 |
-
}
|
| 920 |
-
}
|
| 921 |
-
|
| 922 |
-
/* Dark mode styles for podcast UI */
|
| 923 |
-
@media (prefers-color-scheme: dark) {
|
| 924 |
-
.random-script-btn {
|
| 925 |
-
background-color: var(--light-gray);
|
| 926 |
-
color: var(--text-color);
|
| 927 |
-
border-color: var(--border-color);
|
| 928 |
-
}
|
| 929 |
-
|
| 930 |
-
.add-line-btn {
|
| 931 |
-
background-color: var(--light-gray);
|
| 932 |
-
color: var(--text-color);
|
| 933 |
-
border-color: var(--border-color);
|
| 934 |
-
}
|
| 935 |
-
|
| 936 |
-
.line-input {
|
| 937 |
-
background-color: var(--light-gray);
|
| 938 |
-
color: var(--text-color);
|
| 939 |
-
}
|
| 940 |
-
|
| 941 |
-
.line-input:focus {
|
| 942 |
-
background-color: rgba(108, 99, 255, 0.1);
|
| 943 |
-
}
|
| 944 |
-
}
|
| 945 |
-
|
| 946 |
-
.podcast-loading-container {
|
| 947 |
-
display: flex;
|
| 948 |
-
justify-content: center;
|
| 949 |
-
align-items: center;
|
| 950 |
-
position: fixed;
|
| 951 |
-
top: 0;
|
| 952 |
-
left: 0;
|
| 953 |
-
width: 100%;
|
| 954 |
-
height: 100vh;
|
| 955 |
-
background-color: rgba(255, 255, 255, 0.9);
|
| 956 |
-
z-index: 1000;
|
| 957 |
-
}
|
| 958 |
-
|
| 959 |
-
@media (prefers-color-scheme: dark) {
|
| 960 |
-
.podcast-loading-container {
|
| 961 |
-
background-color: rgba(18, 18, 24, 0.9);
|
| 962 |
-
}
|
| 963 |
-
}
|
| 964 |
-
|
| 965 |
-
.podcast-vote-results {
|
| 966 |
-
background-color: #f0f4ff;
|
| 967 |
-
border: 1px solid #d0d7f7;
|
| 968 |
-
border-radius: var(--radius);
|
| 969 |
-
padding: 16px;
|
| 970 |
-
margin: 24px 0;
|
| 971 |
-
}
|
| 972 |
-
|
| 973 |
-
.podcast-next-round-container {
|
| 974 |
-
margin-top: 24px;
|
| 975 |
-
text-align: center;
|
| 976 |
-
}
|
| 977 |
-
|
| 978 |
-
.podcast-next-round-btn {
|
| 979 |
-
padding: 12px 24px;
|
| 980 |
-
background-color: var(--primary-color);
|
| 981 |
-
color: white;
|
| 982 |
-
border: none;
|
| 983 |
-
border-radius: var(--radius);
|
| 984 |
-
font-weight: 500;
|
| 985 |
-
cursor: pointer;
|
| 986 |
-
position: relative;
|
| 987 |
-
width: 100%;
|
| 988 |
-
font-size: 1rem;
|
| 989 |
-
transition: background-color 0.2s;
|
| 990 |
-
}
|
| 991 |
-
|
| 992 |
-
.podcast-next-round-btn:hover {
|
| 993 |
-
background-color: #4038c7;
|
| 994 |
-
}
|
| 995 |
-
|
| 996 |
-
/* Dark mode adjustments */
|
| 997 |
-
@media (prefers-color-scheme: dark) {
|
| 998 |
-
.podcast-vote-results {
|
| 999 |
background-color: var(--light-gray);
|
| 1000 |
border-color: var(--border-color);
|
| 1001 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1002 |
}
|
| 1003 |
|
| 1004 |
/* Login prompt overlay styles */
|
|
@@ -1134,8 +653,6 @@
|
|
| 1134 |
const nextRoundBtn = document.querySelector('.next-round-btn');
|
| 1135 |
const nextRoundContainer = document.querySelector('.next-round-container');
|
| 1136 |
const randomBtn = document.querySelector('.random-btn');
|
| 1137 |
-
const tabs = document.querySelectorAll('.tab');
|
| 1138 |
-
const tabContents = document.querySelectorAll('.tab-content');
|
| 1139 |
const voteResultsContainer = document.querySelector('.vote-results');
|
| 1140 |
const chosenModelNameElement = document.querySelector('.chosen-model-name');
|
| 1141 |
const rejectedModelNameElement = document.querySelector('.rejected-model-name');
|
|
@@ -1182,55 +699,6 @@
|
|
| 1182 |
});
|
| 1183 |
}
|
| 1184 |
|
| 1185 |
-
// Check URL hash for direct tab access
|
| 1186 |
-
function checkHashAndSetTab() {
|
| 1187 |
-
const hash = window.location.hash.toLowerCase();
|
| 1188 |
-
if (hash === '#conversational') {
|
| 1189 |
-
// Switch to conversational tab
|
| 1190 |
-
tabs.forEach(t => t.classList.remove('active'));
|
| 1191 |
-
tabContents.forEach(c => c.classList.remove('active'));
|
| 1192 |
-
|
| 1193 |
-
document.querySelector('.tab[data-tab="conversational"]').classList.add('active');
|
| 1194 |
-
document.getElementById('conversational-tab').classList.add('active');
|
| 1195 |
-
} else if (hash === '#tts') {
|
| 1196 |
-
// Switch to TTS tab (explicit)
|
| 1197 |
-
tabs.forEach(t => t.classList.remove('active'));
|
| 1198 |
-
tabContents.forEach(c => c.classList.remove('active'));
|
| 1199 |
-
|
| 1200 |
-
document.querySelector('.tab[data-tab="tts"]').classList.add('active');
|
| 1201 |
-
document.getElementById('tts-tab').classList.add('active');
|
| 1202 |
-
}
|
| 1203 |
-
}
|
| 1204 |
-
|
| 1205 |
-
// Check hash on page load
|
| 1206 |
-
checkHashAndSetTab();
|
| 1207 |
-
|
| 1208 |
-
// Listen for hash changes
|
| 1209 |
-
window.addEventListener('hashchange', checkHashAndSetTab);
|
| 1210 |
-
|
| 1211 |
-
// Tab switching functionality
|
| 1212 |
-
tabs.forEach(tab => {
|
| 1213 |
-
tab.addEventListener('click', function() {
|
| 1214 |
-
const tabId = this.dataset.tab;
|
| 1215 |
-
|
| 1216 |
-
// Update URL hash without page reload
|
| 1217 |
-
history.replaceState(null, null, `#${tabId}`);
|
| 1218 |
-
|
| 1219 |
-
// Remove active class from all tabs and contents
|
| 1220 |
-
tabs.forEach(t => t.classList.remove('active'));
|
| 1221 |
-
tabContents.forEach(c => c.classList.remove('active'));
|
| 1222 |
-
|
| 1223 |
-
// Add active class to clicked tab and corresponding content
|
| 1224 |
-
this.classList.add('active');
|
| 1225 |
-
document.getElementById(`${tabId}-tab`).classList.add('active');
|
| 1226 |
-
|
| 1227 |
-
// Reset TTS tab state if switching away from it
|
| 1228 |
-
if (tabId !== 'tts') {
|
| 1229 |
-
resetToInitialState();
|
| 1230 |
-
}
|
| 1231 |
-
});
|
| 1232 |
-
});
|
| 1233 |
-
|
| 1234 |
function handleSynthesize(e) {
|
| 1235 |
if (e) {
|
| 1236 |
e.preventDefault();
|
|
@@ -1244,12 +712,12 @@
|
|
| 1244 |
|
| 1245 |
const text = textInput.value.trim();
|
| 1246 |
if (!text) {
|
| 1247 |
-
openToast("
|
| 1248 |
return;
|
| 1249 |
}
|
| 1250 |
|
| 1251 |
if (text.length > 1000) {
|
| 1252 |
-
openToast("
|
| 1253 |
return;
|
| 1254 |
}
|
| 1255 |
|
|
@@ -1289,7 +757,7 @@
|
|
| 1289 |
.then(response => {
|
| 1290 |
if (!response.ok) {
|
| 1291 |
return response.json().then(err => {
|
| 1292 |
-
throw new Error(err.error || '
|
| 1293 |
});
|
| 1294 |
}
|
| 1295 |
return response.json();
|
|
@@ -1336,7 +804,7 @@
|
|
| 1336 |
|
| 1337 |
// Handle authentication errors specially
|
| 1338 |
if (error.message.includes('logged in to generate') || error.message.includes('logged in to vote')) {
|
| 1339 |
-
openToast("
|
| 1340 |
} else {
|
| 1341 |
openToast(error.message, "error");
|
| 1342 |
}
|
|
@@ -1367,7 +835,7 @@
|
|
| 1367 |
.then(response => {
|
| 1368 |
if (!response.ok) {
|
| 1369 |
return response.json().then(err => {
|
| 1370 |
-
throw new Error(err.error || '
|
| 1371 |
});
|
| 1372 |
}
|
| 1373 |
return response.json();
|
|
@@ -1403,7 +871,7 @@
|
|
| 1403 |
nextRoundContainer.style.display = 'block';
|
| 1404 |
|
| 1405 |
// Show success toast
|
| 1406 |
-
openToast("
|
| 1407 |
})
|
| 1408 |
.catch(error => {
|
| 1409 |
// Re-enable vote buttons
|
|
@@ -1414,7 +882,7 @@
|
|
| 1414 |
|
| 1415 |
// Handle authentication errors specially
|
| 1416 |
if (error.message.includes('logged in to vote')) {
|
| 1417 |
-
openToast("
|
| 1418 |
} else {
|
| 1419 |
openToast(error.message, "error");
|
| 1420 |
}
|
|
@@ -1470,10 +938,13 @@
|
|
| 1470 |
// Select a random text from the unconsumed sentences
|
| 1471 |
selectedText = cachedSentences[Math.floor(Math.random() * cachedSentences.length)];
|
| 1472 |
console.log("Using random sentence from unconsumed sentences.");
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1473 |
} else {
|
| 1474 |
-
|
| 1475 |
-
|
| 1476 |
-
openToast("No unused sentences available. All sentences from the dataset may have been consumed.", "error");
|
| 1477 |
return;
|
| 1478 |
}
|
| 1479 |
textInput.value = selectedText;
|
|
@@ -1481,7 +952,7 @@
|
|
| 1481 |
}
|
| 1482 |
|
| 1483 |
function showListenToastMessage() {
|
| 1484 |
-
openToast("
|
| 1485 |
}
|
| 1486 |
|
| 1487 |
// New function for N shortcut: Random + Synthesize
|
|
@@ -1589,562 +1060,4 @@
|
|
| 1589 |
fetchCachedSentences();
|
| 1590 |
});
|
| 1591 |
</script>
|
| 1592 |
-
|
| 1593 |
-
<script>
|
| 1594 |
-
document.addEventListener('DOMContentLoaded', function() {
|
| 1595 |
-
// Variables for podcast UI
|
| 1596 |
-
const podcastContainer = document.querySelector('.podcast-container');
|
| 1597 |
-
const podcastLinesContainer = document.querySelector('.podcast-lines');
|
| 1598 |
-
const addLineBtn = document.querySelector('.add-line-btn');
|
| 1599 |
-
const randomScriptBtn = document.querySelector('.random-script-btn');
|
| 1600 |
-
const podcastSynthBtn = document.querySelector('.podcast-synth-btn');
|
| 1601 |
-
const podcastLoadingContainer = document.querySelector('.podcast-loading-container');
|
| 1602 |
-
const podcastPlayerContainer = document.querySelector('.podcast-player-container');
|
| 1603 |
-
const podcastWavePlayerA = document.querySelector('.podcast-wave-player-a');
|
| 1604 |
-
const podcastWavePlayerB = document.querySelector('.podcast-wave-player-b');
|
| 1605 |
-
const podcastVoteButtons = podcastPlayerContainer.querySelectorAll('.vote-btn');
|
| 1606 |
-
const podcastVoteResults = podcastPlayerContainer.querySelector('.vote-results');
|
| 1607 |
-
const podcastNextRoundContainer = podcastPlayerContainer.querySelector('.next-round-container');
|
| 1608 |
-
const podcastNextRoundBtn = podcastPlayerContainer.querySelector('.next-round-btn');
|
| 1609 |
-
const chosenModelNameElement = podcastVoteResults.querySelector('.chosen-model-name');
|
| 1610 |
-
const rejectedModelNameElement = podcastVoteResults.querySelector('.rejected-model-name');
|
| 1611 |
-
|
| 1612 |
-
let podcastWavePlayers = { a: null, b: null };
|
| 1613 |
-
let bothPodcastSamplesPlayed = false;
|
| 1614 |
-
let currentPodcastSessionId = null;
|
| 1615 |
-
let podcastModelNames = { a: 'Model A', b: 'Model B' };
|
| 1616 |
-
|
| 1617 |
-
// Sample random scripts for the podcast
|
| 1618 |
-
const randomScripts = [
|
| 1619 |
-
[
|
| 1620 |
-
{ speaker: 1, text: "Welcome to our podcast about artificial intelligence. Today we're discussing the latest advances in text-to-speech technology." },
|
| 1621 |
-
{ speaker: 2, text: "That's right! Text-to-speech has come a long way in recent years. The voices sound increasingly natural." },
|
| 1622 |
-
{ speaker: 1, text: "What do you think are the most impressive recent developments?" },
|
| 1623 |
-
{ speaker: 2, text: "I'd say the emotion and inflection that modern TTS systems can convey is truly remarkable." }
|
| 1624 |
-
],
|
| 1625 |
-
[
|
| 1626 |
-
{ speaker: 1, text: "So today we're talking about climate change and its effects on our planet." },
|
| 1627 |
-
{ speaker: 2, text: "It's such an important topic. We're seeing more extreme weather events every year." },
|
| 1628 |
-
{ speaker: 1, text: "Absolutely. And the science is clear that human activity is the primary driver." },
|
| 1629 |
-
{ speaker: 2, text: "What can individuals do to help address this global challenge?" }
|
| 1630 |
-
],
|
| 1631 |
-
[
|
| 1632 |
-
{ speaker: 1, text: "In today's episode, we're exploring the world of modern cinema." },
|
| 1633 |
-
{ speaker: 2, text: "Film has evolved so much since its early days. What's your favorite era of movies?" },
|
| 1634 |
-
{ speaker: 1, text: "I'm particularly fond of the 1970s New Hollywood movement. Films like The Godfather and Taxi Driver really pushed boundaries." },
|
| 1635 |
-
{ speaker: 2, text: "Interesting choice! I'm more drawn to contemporary international cinema, especially from directors like Bong Joon-ho and Park Chan-wook." }
|
| 1636 |
-
],
|
| 1637 |
-
[
|
| 1638 |
-
{ speaker: 1, text: "Today we're discussing the future of remote work. How do you think it's changed the workplace?" },
|
| 1639 |
-
{ speaker: 2, text: "I believe it's revolutionized how we think about productivity and work-life balance." },
|
| 1640 |
-
{ speaker: 1, text: "Do you think companies will continue to offer remote options post-pandemic?" },
|
| 1641 |
-
{ speaker: 2, text: "Absolutely. Companies that don't embrace flexibility will struggle to attract top talent." }
|
| 1642 |
-
],
|
| 1643 |
-
[
|
| 1644 |
-
{ speaker: 1, text: "Let's talk about the latest developments in renewable energy." },
|
| 1645 |
-
{ speaker: 2, text: "Solar and wind have become increasingly cost-effective in recent years." },
|
| 1646 |
-
{ speaker: 1, text: "What about emerging technologies like green hydrogen?" },
|
| 1647 |
-
{ speaker: 2, text: "That's a fascinating area with huge potential, especially for industries that are difficult to electrify." }
|
| 1648 |
-
],
|
| 1649 |
-
[
|
| 1650 |
-
{ speaker: 1, text: "The world of cryptocurrency has seen massive changes lately. What's your take?" },
|
| 1651 |
-
{ speaker: 2, text: "It's certainly volatile, but I think blockchain technology has applications beyond just digital currency." },
|
| 1652 |
-
{ speaker: 1, text: "Do you see it becoming mainstream in the financial sector?" },
|
| 1653 |
-
{ speaker: 2, text: "Parts of it already are. Central banks are exploring digital currencies, and major companies are investing in blockchain." }
|
| 1654 |
-
],
|
| 1655 |
-
[
|
| 1656 |
-
{ speaker: 1, text: "Mental health awareness has grown significantly in recent years." },
|
| 1657 |
-
{ speaker: 2, text: "Yes, and it's about time. The stigma around seeking help is finally starting to diminish." },
|
| 1658 |
-
{ speaker: 1, text: "What do you think has driven this change?" },
|
| 1659 |
-
{ speaker: 2, text: "I think social media has played a role, with more people openly sharing their experiences." }
|
| 1660 |
-
],
|
| 1661 |
-
[
|
| 1662 |
-
{ speaker: 1, text: "Space exploration is entering an exciting new era with private companies leading the charge." },
|
| 1663 |
-
{ speaker: 2, text: "The commercialization of space has definitely accelerated innovation in the field." },
|
| 1664 |
-
{ speaker: 1, text: "Do you think we'll see humans on Mars in our lifetime?" },
|
| 1665 |
-
{ speaker: 2, text: "I'm optimistic. The technology is advancing rapidly, and there's strong motivation from both public and private sectors." }
|
| 1666 |
-
],
|
| 1667 |
-
[
|
| 1668 |
-
{ speaker: 1, text: "Today's topic is sustainable fashion. How can consumers make more ethical choices?" },
|
| 1669 |
-
{ speaker: 2, text: "It starts with buying less and choosing quality items that last longer." },
|
| 1670 |
-
{ speaker: 1, text: "What about the responsibility of fashion brands themselves?" },
|
| 1671 |
-
{ speaker: 2, text: "They need to be transparent about their supply chains and commit to reducing their environmental impact." }
|
| 1672 |
-
],
|
| 1673 |
-
[
|
| 1674 |
-
{ speaker: 1, text: "Let's discuss the evolution of social media and its impact on society." },
|
| 1675 |
-
{ speaker: 2, text: "It's transformed how we connect, but also created new challenges like misinformation and privacy concerns." },
|
| 1676 |
-
{ speaker: 1, text: "Do you think regulation is the answer?" },
|
| 1677 |
-
{ speaker: 2, text: "Partly, but digital literacy education is equally important so people can navigate these platforms responsibly." }
|
| 1678 |
-
],
|
| 1679 |
-
[
|
| 1680 |
-
{ speaker: 1, text: "The field of genomics has seen remarkable progress. What excites you most about it?" },
|
| 1681 |
-
{ speaker: 2, text: "Personalized medicine is fascinating - the idea that treatments can be tailored to an individual's genetic makeup." },
|
| 1682 |
-
{ speaker: 1, text: "What about the ethical considerations?" },
|
| 1683 |
-
{ speaker: 2, text: "Those are crucial. We need robust frameworks to ensure these technologies are used responsibly." }
|
| 1684 |
-
],
|
| 1685 |
-
[
|
| 1686 |
-
{ speaker: 1, text: "Urban planning is facing new challenges in the 21st century. What trends are you seeing?" },
|
| 1687 |
-
{ speaker: 2, text: "There's a growing focus on creating walkable, mixed-use neighborhoods that reduce car dependency." },
|
| 1688 |
-
{ speaker: 1, text: "How are cities adapting to climate change?" },
|
| 1689 |
-
{ speaker: 2, text: "Many are implementing green infrastructure like parks and permeable surfaces to manage flooding and reduce heat islands." }
|
| 1690 |
-
],
|
| 1691 |
-
[
|
| 1692 |
-
{ speaker: 1, text: "The gaming industry has grown enormously in recent years. What's driving this expansion?" },
|
| 1693 |
-
{ speaker: 2, text: "Gaming has become much more accessible across different platforms, and the pandemic certainly accelerated adoption." },
|
| 1694 |
-
{ speaker: 1, text: "What do you think about the rise of esports?" },
|
| 1695 |
-
{ speaker: 2, text: "It's fascinating to see competitive gaming achieve mainstream recognition and create new career opportunities." }
|
| 1696 |
-
],
|
| 1697 |
-
[
|
| 1698 |
-
{ speaker: 1, text: "Let's talk about the future of transportation. How will we get around in 20 years?" },
|
| 1699 |
-
{ speaker: 2, text: "Electric vehicles will be dominant, and autonomous driving technology will be much more widespread." },
|
| 1700 |
-
{ speaker: 1, text: "What about public transit and alternative modes?" },
|
| 1701 |
-
{ speaker: 2, text: "I think we'll see more integrated systems where bikes, scooters, and public transit work seamlessly together." }
|
| 1702 |
-
]
|
| 1703 |
-
];
|
| 1704 |
-
|
| 1705 |
-
// Initialize with 2 empty lines
|
| 1706 |
-
function initializePodcastLines() {
|
| 1707 |
-
podcastLinesContainer.innerHTML = '';
|
| 1708 |
-
addPodcastLine(1);
|
| 1709 |
-
addPodcastLine(2);
|
| 1710 |
-
}
|
| 1711 |
-
|
| 1712 |
-
// Add a new podcast line
|
| 1713 |
-
function addPodcastLine(speakerNum = null) {
|
| 1714 |
-
const lineCount = podcastLinesContainer.querySelectorAll('.podcast-line').length;
|
| 1715 |
-
|
| 1716 |
-
// If speaker number isn't specified, alternate between 1 and 2
|
| 1717 |
-
if (speakerNum === null) {
|
| 1718 |
-
speakerNum = (lineCount % 2) + 1;
|
| 1719 |
-
}
|
| 1720 |
-
|
| 1721 |
-
const lineElement = document.createElement('div');
|
| 1722 |
-
lineElement.className = 'podcast-line';
|
| 1723 |
-
|
| 1724 |
-
lineElement.innerHTML = `
|
| 1725 |
-
<div class="speaker-label speaker-${speakerNum}">Speaker ${speakerNum}</div>
|
| 1726 |
-
<input type="text" class="line-input" placeholder="Enter dialog...">
|
| 1727 |
-
<button type="button" class="remove-line-btn" tabindex="-1">
|
| 1728 |
-
<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none"
|
| 1729 |
-
stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
|
| 1730 |
-
<line x1="18" y1="6" x2="6" y2="18"></line>
|
| 1731 |
-
<line x1="6" y1="6" x2="18" y2="18"></line>
|
| 1732 |
-
</svg>
|
| 1733 |
-
</button>
|
| 1734 |
-
`;
|
| 1735 |
-
|
| 1736 |
-
podcastLinesContainer.appendChild(lineElement);
|
| 1737 |
-
|
| 1738 |
-
// Add event listener to remove button
|
| 1739 |
-
const removeBtn = lineElement.querySelector('.remove-line-btn');
|
| 1740 |
-
removeBtn.addEventListener('click', function() {
|
| 1741 |
-
// Don't allow removing if there are only 2 lines
|
| 1742 |
-
if (podcastLinesContainer.querySelectorAll('.podcast-line').length > 2) {
|
| 1743 |
-
lineElement.remove();
|
| 1744 |
-
} else {
|
| 1745 |
-
openToast("At least 2 lines are required", "warning");
|
| 1746 |
-
}
|
| 1747 |
-
});
|
| 1748 |
-
|
| 1749 |
-
// Add event listener for keyboard navigation in the input field
|
| 1750 |
-
const inputField = lineElement.querySelector('.line-input');
|
| 1751 |
-
inputField.addEventListener('keydown', function(e) {
|
| 1752 |
-
// Alt+Enter or Ctrl+Enter to add new line
|
| 1753 |
-
if (e.key === 'Enter' && (e.altKey || e.ctrlKey)) {
|
| 1754 |
-
e.preventDefault();
|
| 1755 |
-
addPodcastLine();
|
| 1756 |
-
|
| 1757 |
-
// Focus the new line's input field
|
| 1758 |
-
setTimeout(() => {
|
| 1759 |
-
const inputs = podcastLinesContainer.querySelectorAll('.line-input');
|
| 1760 |
-
inputs[inputs.length - 1].focus();
|
| 1761 |
-
}, 10);
|
| 1762 |
-
}
|
| 1763 |
-
});
|
| 1764 |
-
|
| 1765 |
-
return lineElement;
|
| 1766 |
-
}
|
| 1767 |
-
|
| 1768 |
-
// Load a random script
|
| 1769 |
-
function loadRandomScript() {
|
| 1770 |
-
// Clear existing lines
|
| 1771 |
-
podcastLinesContainer.innerHTML = '';
|
| 1772 |
-
|
| 1773 |
-
// Select a random script
|
| 1774 |
-
const randomScript = randomScripts[Math.floor(Math.random() * randomScripts.length)];
|
| 1775 |
-
|
| 1776 |
-
// Add each line from the script
|
| 1777 |
-
randomScript.forEach(line => {
|
| 1778 |
-
const lineElement = addPodcastLine(line.speaker);
|
| 1779 |
-
lineElement.querySelector('.line-input').value = line.text;
|
| 1780 |
-
});
|
| 1781 |
-
}
|
| 1782 |
-
|
| 1783 |
-
// Generate podcast (mock functionality)
|
| 1784 |
-
function generatePodcast() {
|
| 1785 |
-
// Get all lines
|
| 1786 |
-
const lines = [];
|
| 1787 |
-
podcastLinesContainer.querySelectorAll('.podcast-line').forEach(line => {
|
| 1788 |
-
const speaker_id = line.querySelector('.speaker-label').textContent.includes('1') ? 0 : 1;
|
| 1789 |
-
const text = line.querySelector('.line-input').value.trim();
|
| 1790 |
-
|
| 1791 |
-
if (text) {
|
| 1792 |
-
lines.push({ speaker_id, text });
|
| 1793 |
-
}
|
| 1794 |
-
});
|
| 1795 |
-
|
| 1796 |
-
// Validate that we have at least 2 lines with content
|
| 1797 |
-
if (lines.length < 2) {
|
| 1798 |
-
openToast("Please enter at least 2 lines of dialog", "warning");
|
| 1799 |
-
return;
|
| 1800 |
-
}
|
| 1801 |
-
|
| 1802 |
-
// Reset vote buttons and hide results
|
| 1803 |
-
podcastVoteButtons.forEach(btn => {
|
| 1804 |
-
btn.disabled = true;
|
| 1805 |
-
btn.classList.remove('selected');
|
| 1806 |
-
btn.querySelector('.vote-loader').style.display = 'none';
|
| 1807 |
-
});
|
| 1808 |
-
|
| 1809 |
-
// Clear model name displays
|
| 1810 |
-
const modelNameDisplays = podcastPlayerContainer.querySelectorAll('.model-name-display');
|
| 1811 |
-
modelNameDisplays.forEach(display => {
|
| 1812 |
-
display.textContent = '';
|
| 1813 |
-
});
|
| 1814 |
-
|
| 1815 |
-
podcastVoteResults.style.display = 'none';
|
| 1816 |
-
podcastNextRoundContainer.style.display = 'none';
|
| 1817 |
-
|
| 1818 |
-
// Reset the flag for both samples played
|
| 1819 |
-
bothPodcastSamplesPlayed = false;
|
| 1820 |
-
|
| 1821 |
-
// Show loading animation
|
| 1822 |
-
podcastLoadingContainer.style.display = 'flex';
|
| 1823 |
-
podcastPlayerContainer.style.display = 'none';
|
| 1824 |
-
|
| 1825 |
-
// Call API to generate podcast
|
| 1826 |
-
fetch('/api/conversational/generate', {
|
| 1827 |
-
method: 'POST',
|
| 1828 |
-
headers: {
|
| 1829 |
-
'Content-Type': 'application/json',
|
| 1830 |
-
},
|
| 1831 |
-
body: JSON.stringify({ script: lines }),
|
| 1832 |
-
})
|
| 1833 |
-
.then(response => {
|
| 1834 |
-
if (!response.ok) {
|
| 1835 |
-
return response.json().then(err => {
|
| 1836 |
-
throw new Error(err.error || 'Failed to generate podcast');
|
| 1837 |
-
});
|
| 1838 |
-
}
|
| 1839 |
-
return response.json();
|
| 1840 |
-
})
|
| 1841 |
-
.then(data => {
|
| 1842 |
-
currentPodcastSessionId = data.session_id;
|
| 1843 |
-
|
| 1844 |
-
// Hide loading
|
| 1845 |
-
podcastLoadingContainer.style.display = 'none';
|
| 1846 |
-
|
| 1847 |
-
// Show player
|
| 1848 |
-
podcastPlayerContainer.style.display = 'block';
|
| 1849 |
-
|
| 1850 |
-
// Initialize WavePlayers if not already done
|
| 1851 |
-
if (!podcastWavePlayers.a) {
|
| 1852 |
-
podcastWavePlayers.a = new WavePlayer(podcastWavePlayerA, {
|
| 1853 |
-
// Add mobile-friendly options but hide native controls
|
| 1854 |
-
backend: 'MediaElement',
|
| 1855 |
-
mediaControls: false // Hide native audio controls
|
| 1856 |
-
});
|
| 1857 |
-
podcastWavePlayers.b = new WavePlayer(podcastWavePlayerB, {
|
| 1858 |
-
// Add mobile-friendly options but hide native controls
|
| 1859 |
-
backend: 'MediaElement',
|
| 1860 |
-
mediaControls: false // Hide native audio controls
|
| 1861 |
-
});
|
| 1862 |
-
|
| 1863 |
-
// Load audio in waveplayers
|
| 1864 |
-
podcastWavePlayers.a.loadAudio(data.audio_a);
|
| 1865 |
-
podcastWavePlayers.b.loadAudio(data.audio_b);
|
| 1866 |
-
|
| 1867 |
-
// Force hide loading indicators after 5 seconds as a fallback
|
| 1868 |
-
setTimeout(() => {
|
| 1869 |
-
if (podcastWavePlayers.a && podcastWavePlayers.a.hideLoading) {
|
| 1870 |
-
podcastWavePlayers.a.hideLoading();
|
| 1871 |
-
}
|
| 1872 |
-
if (podcastWavePlayers.b && podcastWavePlayers.b.hideLoading) {
|
| 1873 |
-
podcastWavePlayers.b.hideLoading();
|
| 1874 |
-
}
|
| 1875 |
-
console.log('Forced hiding of podcast loading indicators (safety timeout - existing players)');
|
| 1876 |
-
}, 5000);
|
| 1877 |
-
} else {
|
| 1878 |
-
// Reset and reload for existing players
|
| 1879 |
-
try {
|
| 1880 |
-
podcastWavePlayers.a.wavesurfer.empty();
|
| 1881 |
-
podcastWavePlayers.b.wavesurfer.empty();
|
| 1882 |
-
|
| 1883 |
-
// Make sure loading indicators are reset
|
| 1884 |
-
podcastWavePlayers.a.hideLoading();
|
| 1885 |
-
podcastWavePlayers.b.hideLoading();
|
| 1886 |
-
|
| 1887 |
-
podcastWavePlayers.a.loadAudio(data.audio_a);
|
| 1888 |
-
podcastWavePlayers.b.loadAudio(data.audio_b);
|
| 1889 |
-
|
| 1890 |
-
// Force hide loading indicators after 5 seconds as a fallback
|
| 1891 |
-
setTimeout(() => {
|
| 1892 |
-
if (podcastWavePlayers.a && podcastWavePlayers.a.hideLoading) {
|
| 1893 |
-
podcastWavePlayers.a.hideLoading();
|
| 1894 |
-
}
|
| 1895 |
-
if (podcastWavePlayers.b && podcastWavePlayers.b.hideLoading) {
|
| 1896 |
-
podcastWavePlayers.b.hideLoading();
|
| 1897 |
-
}
|
| 1898 |
-
console.log('Forced hiding of podcast loading indicators (safety timeout - existing players)');
|
| 1899 |
-
}, 5000);
|
| 1900 |
-
} catch (err) {
|
| 1901 |
-
console.error('Error resetting podcast waveplayers:', err);
|
| 1902 |
-
|
| 1903 |
-
// Recreate the players if there was an error
|
| 1904 |
-
podcastWavePlayers.a = new WavePlayer(podcastWavePlayerA, {
|
| 1905 |
-
backend: 'MediaElement',
|
| 1906 |
-
mediaControls: false
|
| 1907 |
-
});
|
| 1908 |
-
podcastWavePlayers.b = new WavePlayer(podcastWavePlayerB, {
|
| 1909 |
-
backend: 'MediaElement',
|
| 1910 |
-
mediaControls: false
|
| 1911 |
-
});
|
| 1912 |
-
|
| 1913 |
-
podcastWavePlayers.a.loadAudio(data.audio_a);
|
| 1914 |
-
podcastWavePlayers.b.loadAudio(data.audio_b);
|
| 1915 |
-
|
| 1916 |
-
// Force hide loading indicators after 5 seconds as a fallback
|
| 1917 |
-
setTimeout(() => {
|
| 1918 |
-
if (podcastWavePlayers.a && podcastWavePlayers.a.hideLoading) {
|
| 1919 |
-
podcastWavePlayers.a.hideLoading();
|
| 1920 |
-
}
|
| 1921 |
-
if (podcastWavePlayers.b && podcastWavePlayers.b.hideLoading) {
|
| 1922 |
-
podcastWavePlayers.b.hideLoading();
|
| 1923 |
-
}
|
| 1924 |
-
console.log('Forced hiding of podcast loading indicators (fallback case)');
|
| 1925 |
-
}, 5000);
|
| 1926 |
-
}
|
| 1927 |
-
}
|
| 1928 |
-
|
| 1929 |
-
// Setup automatic sequential playback
|
| 1930 |
-
podcastWavePlayers.a.wavesurfer.once('ready', function() {
|
| 1931 |
-
podcastWavePlayers.a.play();
|
| 1932 |
-
|
| 1933 |
-
// When audio A ends, play audio B
|
| 1934 |
-
podcastWavePlayers.a.wavesurfer.once('finish', function() {
|
| 1935 |
-
// Wait a short moment before playing B
|
| 1936 |
-
setTimeout(() => {
|
| 1937 |
-
podcastWavePlayers.b.play();
|
| 1938 |
-
|
| 1939 |
-
// When audio B ends, enable voting
|
| 1940 |
-
podcastWavePlayers.b.wavesurfer.once('finish', function() {
|
| 1941 |
-
bothPodcastSamplesPlayed = true;
|
| 1942 |
-
podcastVoteButtons.forEach(btn => {
|
| 1943 |
-
btn.disabled = false;
|
| 1944 |
-
});
|
| 1945 |
-
});
|
| 1946 |
-
}, 500);
|
| 1947 |
-
});
|
| 1948 |
-
});
|
| 1949 |
-
})
|
| 1950 |
-
.catch(error => {
|
| 1951 |
-
podcastLoadingContainer.style.display = 'none';
|
| 1952 |
-
|
| 1953 |
-
// Handle authentication errors specially
|
| 1954 |
-
if (error.message.includes('logged in to generate') || error.message.includes('logged in to vote')) {
|
| 1955 |
-
openToast("Please log in to use TTS Arena. <a href='{{ url_for('auth.login', next=request.path) }}' style='color: white; text-decoration: underline;'>Login now</a>", "error");
|
| 1956 |
-
} else {
|
| 1957 |
-
openToast(error.message, "error");
|
| 1958 |
-
}
|
| 1959 |
-
console.error('Error:', error);
|
| 1960 |
-
});
|
| 1961 |
-
}
|
| 1962 |
-
|
| 1963 |
-
// Handle vote for a podcast model
|
| 1964 |
-
function handlePodcastVote(model) {
|
| 1965 |
-
// Disable both vote buttons
|
| 1966 |
-
podcastVoteButtons.forEach(btn => {
|
| 1967 |
-
btn.disabled = true;
|
| 1968 |
-
if (btn.dataset.model === model) {
|
| 1969 |
-
btn.querySelector('.vote-loader').style.display = 'flex';
|
| 1970 |
-
}
|
| 1971 |
-
});
|
| 1972 |
-
|
| 1973 |
-
// Send vote to server
|
| 1974 |
-
fetch('/api/conversational/vote', {
|
| 1975 |
-
method: 'POST',
|
| 1976 |
-
headers: {
|
| 1977 |
-
'Content-Type': 'application/json',
|
| 1978 |
-
},
|
| 1979 |
-
body: JSON.stringify({
|
| 1980 |
-
session_id: currentPodcastSessionId,
|
| 1981 |
-
chosen_model: model
|
| 1982 |
-
}),
|
| 1983 |
-
})
|
| 1984 |
-
.then(response => {
|
| 1985 |
-
if (!response.ok) {
|
| 1986 |
-
return response.json().then(err => {
|
| 1987 |
-
throw new Error(err.error || 'Failed to submit vote');
|
| 1988 |
-
});
|
| 1989 |
-
}
|
| 1990 |
-
return response.json();
|
| 1991 |
-
})
|
| 1992 |
-
.then(data => {
|
| 1993 |
-
// Hide loaders
|
| 1994 |
-
podcastVoteButtons.forEach(btn => {
|
| 1995 |
-
btn.querySelector('.vote-loader').style.display = 'none';
|
| 1996 |
-
|
| 1997 |
-
// Highlight the selected button
|
| 1998 |
-
if (btn.dataset.model === model) {
|
| 1999 |
-
btn.classList.add('selected');
|
| 2000 |
-
}
|
| 2001 |
-
});
|
| 2002 |
-
|
| 2003 |
-
// Store model names from vote response
|
| 2004 |
-
podcastModelNames.a = data.names.a;
|
| 2005 |
-
podcastModelNames.b = data.names.b;
|
| 2006 |
-
|
| 2007 |
-
// Show model names after voting
|
| 2008 |
-
const modelNameDisplays = podcastPlayerContainer.querySelectorAll('.model-name-display');
|
| 2009 |
-
modelNameDisplays[0].textContent = data.names.a ? `(${data.names.a})` : '';
|
| 2010 |
-
modelNameDisplays[1].textContent = data.names.b ? `(${data.names.b})` : '';
|
| 2011 |
-
|
| 2012 |
-
// Show vote results
|
| 2013 |
-
chosenModelNameElement.textContent = data.chosen_model.name;
|
| 2014 |
-
rejectedModelNameElement.textContent = data.rejected_model.name;
|
| 2015 |
-
podcastVoteResults.style.display = 'block';
|
| 2016 |
-
|
| 2017 |
-
// Show next round button
|
| 2018 |
-
podcastNextRoundContainer.style.display = 'block';
|
| 2019 |
-
|
| 2020 |
-
// Show success toast
|
| 2021 |
-
openToast("Vote recorded successfully!", "success");
|
| 2022 |
-
})
|
| 2023 |
-
.catch(error => {
|
| 2024 |
-
// Re-enable vote buttons
|
| 2025 |
-
podcastVoteButtons.forEach(btn => {
|
| 2026 |
-
btn.disabled = false;
|
| 2027 |
-
btn.querySelector('.vote-loader').style.display = 'none';
|
| 2028 |
-
});
|
| 2029 |
-
|
| 2030 |
-
// Handle authentication errors specially
|
| 2031 |
-
if (error.message.includes('logged in to vote')) {
|
| 2032 |
-
openToast("Please log in to vote. <a href='{{ url_for('auth.login', next=request.path) }}' style='color: white; text-decoration: underline;'>Login now</a>", "error");
|
| 2033 |
-
} else {
|
| 2034 |
-
openToast(error.message, "error");
|
| 2035 |
-
}
|
| 2036 |
-
console.error('Error:', error);
|
| 2037 |
-
});
|
| 2038 |
-
}
|
| 2039 |
-
|
| 2040 |
-
// Reset podcast UI to initial state
|
| 2041 |
-
function resetPodcastState() {
|
| 2042 |
-
// Hide players, results, and next round button
|
| 2043 |
-
podcastPlayerContainer.style.display = 'none';
|
| 2044 |
-
podcastVoteResults.style.display = 'none';
|
| 2045 |
-
podcastNextRoundContainer.style.display = 'none';
|
| 2046 |
-
|
| 2047 |
-
// Reset vote buttons
|
| 2048 |
-
podcastVoteButtons.forEach(btn => {
|
| 2049 |
-
btn.disabled = true;
|
| 2050 |
-
btn.classList.remove('selected');
|
| 2051 |
-
btn.querySelector('.vote-loader').style.display = 'none';
|
| 2052 |
-
});
|
| 2053 |
-
|
| 2054 |
-
// Clear model name displays
|
| 2055 |
-
const modelNameDisplays = podcastPlayerContainer.querySelectorAll('.model-name-display');
|
| 2056 |
-
modelNameDisplays.forEach(display => {
|
| 2057 |
-
display.textContent = '';
|
| 2058 |
-
});
|
| 2059 |
-
|
| 2060 |
-
// Stop any playing audio
|
| 2061 |
-
if (podcastWavePlayers.a) podcastWavePlayers.a.stop();
|
| 2062 |
-
if (podcastWavePlayers.b) podcastWavePlayers.b.stop();
|
| 2063 |
-
|
| 2064 |
-
// Reset session
|
| 2065 |
-
currentPodcastSessionId = null;
|
| 2066 |
-
|
| 2067 |
-
// Reset the flag for both samples played
|
| 2068 |
-
bothPodcastSamplesPlayed = false;
|
| 2069 |
-
}
|
| 2070 |
-
|
| 2071 |
-
// Add keyboard shortcut listeners for podcast voting
|
| 2072 |
-
document.addEventListener('keydown', function(e) {
|
| 2073 |
-
// Check if we're in the podcast tab and it's active
|
| 2074 |
-
const podcastTab = document.getElementById('conversational-tab');
|
| 2075 |
-
if (!podcastTab.classList.contains('active')) return;
|
| 2076 |
-
|
| 2077 |
-
// Only process if input fields are not focused
|
| 2078 |
-
if (document.activeElement.tagName === 'INPUT' ||
|
| 2079 |
-
document.activeElement.tagName === 'TEXTAREA') {
|
| 2080 |
-
return;
|
| 2081 |
-
}
|
| 2082 |
-
|
| 2083 |
-
if (e.key.toLowerCase() === 'a') {
|
| 2084 |
-
if (bothPodcastSamplesPlayed && !podcastVoteButtons[0].disabled) {
|
| 2085 |
-
handlePodcastVote('a');
|
| 2086 |
-
} else if (podcastPlayerContainer.style.display !== 'none' && !bothPodcastSamplesPlayed) {
|
| 2087 |
-
openToast("Please listen to both audio samples before voting", "info");
|
| 2088 |
-
}
|
| 2089 |
-
} else if (e.key.toLowerCase() === 'b') {
|
| 2090 |
-
if (bothPodcastSamplesPlayed && !podcastVoteButtons[1].disabled) {
|
| 2091 |
-
handlePodcastVote('b');
|
| 2092 |
-
} else if (podcastPlayerContainer.style.display !== 'none' && !bothPodcastSamplesPlayed) {
|
| 2093 |
-
openToast("Please listen to both audio samples before voting", "info");
|
| 2094 |
-
}
|
| 2095 |
-
} else if (e.key.toLowerCase() === 'n') {
|
| 2096 |
-
if (podcastNextRoundContainer.style.display === 'block') {
|
| 2097 |
-
if (!e.ctrlKey && !e.metaKey) {
|
| 2098 |
-
e.preventDefault();
|
| 2099 |
-
}
|
| 2100 |
-
resetPodcastState();
|
| 2101 |
-
}
|
| 2102 |
-
} else if (e.key === ' ') {
|
| 2103 |
-
// Space to play/pause current audio
|
| 2104 |
-
if (podcastPlayerContainer.style.display !== 'none') {
|
| 2105 |
-
e.preventDefault();
|
| 2106 |
-
// If A is playing, toggle A, else if B is playing, toggle B, else play A
|
| 2107 |
-
if (podcastWavePlayers.a && podcastWavePlayers.a.isPlaying) {
|
| 2108 |
-
podcastWavePlayers.a.togglePlayPause();
|
| 2109 |
-
} else if (podcastWavePlayers.b && podcastWavePlayers.b.isPlaying) {
|
| 2110 |
-
podcastWavePlayers.b.togglePlayPause();
|
| 2111 |
-
} else if (podcastWavePlayers.a) {
|
| 2112 |
-
podcastWavePlayers.a.play();
|
| 2113 |
-
}
|
| 2114 |
-
}
|
| 2115 |
-
}
|
| 2116 |
-
});
|
| 2117 |
-
|
| 2118 |
-
// Event listeners
|
| 2119 |
-
addLineBtn.addEventListener('click', function() {
|
| 2120 |
-
addPodcastLine();
|
| 2121 |
-
});
|
| 2122 |
-
|
| 2123 |
-
randomScriptBtn.addEventListener('click', function() {
|
| 2124 |
-
loadRandomScript();
|
| 2125 |
-
});
|
| 2126 |
-
|
| 2127 |
-
podcastSynthBtn.addEventListener('click', function() {
|
| 2128 |
-
generatePodcast();
|
| 2129 |
-
});
|
| 2130 |
-
|
| 2131 |
-
// Add event listeners to vote buttons
|
| 2132 |
-
podcastVoteButtons.forEach(btn => {
|
| 2133 |
-
btn.addEventListener('click', function() {
|
| 2134 |
-
if (bothPodcastSamplesPlayed) {
|
| 2135 |
-
const model = this.dataset.model;
|
| 2136 |
-
handlePodcastVote(model);
|
| 2137 |
-
} else {
|
| 2138 |
-
openToast("Please listen to both audio samples before voting", "info");
|
| 2139 |
-
}
|
| 2140 |
-
});
|
| 2141 |
-
});
|
| 2142 |
-
|
| 2143 |
-
// Add event listener for next round button
|
| 2144 |
-
podcastNextRoundBtn.addEventListener('click', resetPodcastState);
|
| 2145 |
-
|
| 2146 |
-
// Initialize with 2 empty lines
|
| 2147 |
-
initializePodcastLines();
|
| 2148 |
-
});
|
| 2149 |
-
</script>
|
| 2150 |
-
{% endblock %}
|
|
|
|
| 1 |
{% extends "base.html" %}
|
| 2 |
|
| 3 |
+
{% block title %}한국어 TTS Arena{% endblock %}
|
| 4 |
|
| 5 |
{% block current_page %}Arena{% endblock %}
|
| 6 |
|
|
|
|
| 12 |
<!-- Login prompt overlay -->
|
| 13 |
<div id="login-prompt-overlay" class="login-prompt-overlay" style="display: none;">
|
| 14 |
<div class="login-prompt-content">
|
| 15 |
+
<h3>로그인 필요</h3>
|
| 16 |
+
<p>TTS Arena를 사용하려면 로그인이 필요합니다. 로그인하여 음성을 생성하고 투표하세요!</p>
|
| 17 |
<div class="login-prompt-actions">
|
| 18 |
+
<button class="login-prompt-close">나중에</button>
|
| 19 |
+
<a href="{{ url_for('auth.login', next=request.path) }}" class="login-prompt-btn">Hugging Face로 로그인</a>
|
| 20 |
</div>
|
| 21 |
</div>
|
| 22 |
</div>
|
| 23 |
{% endif %}
|
| 24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
<div id="tts-tab" class="tab-content active">
|
| 26 |
<form class="input-container">
|
| 27 |
<div class="input-group">
|
| 28 |
+
<button type="button" class="segmented-btn random-btn" title="랜덤 텍스트">
|
| 29 |
<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-shuffle-icon lucide-shuffle">
|
| 30 |
<path d="m18 14 4 4-4 4" />
|
| 31 |
<path d="m18 2 4 4-4 4" />
|
|
|
|
| 34 |
<path d="M22 18h-6.041a4 4 0 0 1-3.3-1.8l-.359-.45" />
|
| 35 |
</svg>
|
| 36 |
</button>
|
| 37 |
+
<input type="text" class="text-input" placeholder="합성할 텍스트를 입력하세요...">
|
| 38 |
+
<button type="submit" class="segmented-btn synth-btn">합성</button>
|
| 39 |
</div>
|
| 40 |
+
<button type="submit" class="mobile-synth-btn">합성</button>
|
| 41 |
</form>
|
| 42 |
|
| 43 |
<div id="initial-keyboard-hint" class="keyboard-hint">
|
| 44 |
+
<kbd>R</kbd> 랜덤 텍스트, <kbd>N</kbd> 다음 랜덤 라운드, <kbd>Enter</kbd> 생성
|
| 45 |
</div>
|
| 46 |
|
| 47 |
<div class="loading-container" style="display: none;">
|
|
|
|
| 56 |
<span></span>
|
| 57 |
</div>
|
| 58 |
</div>
|
| 59 |
+
<div class="loader-text">오디오 샘플 생성 중...</div>
|
| 60 |
+
<div class="loader-subtext">최대 30초가 소요될 수 있습니다</div>
|
| 61 |
</div>
|
| 62 |
</div>
|
| 63 |
|
| 64 |
<div class="players-container" style="display: none;">
|
| 65 |
<div class="players-row">
|
| 66 |
<div class="player">
|
| 67 |
+
<div class="player-label">모델 A <span class="model-name-display"></span></div>
|
| 68 |
<div class="wave-player-container" data-model="a"></div>
|
| 69 |
<button class="vote-btn" data-model="a" disabled>
|
| 70 |
+
A에 투표
|
| 71 |
<span class="shortcut-key">A</span>
|
| 72 |
<span class="vote-loader" style="display: none;">
|
| 73 |
<div class="vote-spinner"></div>
|
|
|
|
| 76 |
</div>
|
| 77 |
|
| 78 |
<div class="player">
|
| 79 |
+
<div class="player-label">모델 B <span class="model-name-display"></span></div>
|
| 80 |
<div class="wave-player-container" data-model="b"></div>
|
| 81 |
<button class="vote-btn" data-model="b" disabled>
|
| 82 |
+
B에 투표
|
| 83 |
<span class="shortcut-key">B</span>
|
| 84 |
<span class="vote-loader" style="display: none;">
|
| 85 |
<div class="vote-spinner"></div>
|
|
|
|
| 90 |
</div>
|
| 91 |
|
| 92 |
<div class="vote-results" style="display: none;">
|
| 93 |
+
<h3 class="results-heading">투표 완료!</h3>
|
| 94 |
<div class="results-content">
|
| 95 |
<div class="chosen-model">
|
| 96 |
+
<strong>선택:</strong> <span class="chosen-model-name"></span>
|
| 97 |
</div>
|
| 98 |
<div class="rejected-model">
|
| 99 |
+
<strong>비교 대상:</strong> <span class="rejected-model-name"></span>
|
| 100 |
</div>
|
| 101 |
</div>
|
| 102 |
</div>
|
| 103 |
|
| 104 |
<div class="next-round-container" style="display: none;">
|
| 105 |
+
<button class="next-round-btn">다음 라운드</button>
|
| 106 |
</div>
|
| 107 |
|
| 108 |
<div id="playback-keyboard-hint" class="keyboard-hint" style="display: none;">
|
| 109 |
+
<kbd>Space</kbd> 재생/일시정지, <kbd>A</kbd>/<kbd>B</kbd> 투표, <kbd>R</kbd> 랜덤 텍스트, <kbd>N</kbd> 다음 랜덤 라운드
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
</div>
|
| 111 |
</div>
|
| 112 |
|
|
|
|
| 359 |
}
|
| 360 |
}
|
| 361 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
.tab-content {
|
| 363 |
display: none;
|
| 364 |
}
|
|
|
|
| 367 |
display: block;
|
| 368 |
}
|
| 369 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
.model-name-display {
|
| 371 |
font-size: 0.9em;
|
| 372 |
color: #666;
|
|
|
|
| 425 |
}
|
| 426 |
/* Dark mode styles */
|
| 427 |
@media (prefers-color-scheme: dark) {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
.model-name-display {
|
| 429 |
color: #aaa;
|
| 430 |
}
|
|
|
|
| 494 |
}
|
| 495 |
|
| 496 |
.random-btn:hover {
|
| 497 |
+
background-color: rgba(255, 255, 255, 0.1);
|
| 498 |
+
}
|
| 499 |
+
|
| 500 |
+
.vote-recorded {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
background-color: var(--light-gray);
|
| 502 |
border-color: var(--border-color);
|
| 503 |
}
|
| 504 |
+
|
| 505 |
+
/* Ensure border-radius is maintained during loading state */
|
| 506 |
+
.vote-btn.loading {
|
| 507 |
+
border-radius: var(--radius);
|
| 508 |
+
}
|
| 509 |
+
|
| 510 |
+
/* Dark mode keyboard hint */
|
| 511 |
+
.keyboard-hint {
|
| 512 |
+
color: #aaa;
|
| 513 |
+
}
|
| 514 |
+
|
| 515 |
+
.keyboard-hint kbd {
|
| 516 |
+
color: #ddd;
|
| 517 |
+
background-color: #333;
|
| 518 |
+
border-color: #555;
|
| 519 |
+
box-shadow: 0 1px 0 rgba(255,255,255,0.1);
|
| 520 |
+
}
|
| 521 |
}
|
| 522 |
|
| 523 |
/* Login prompt overlay styles */
|
|
|
|
| 653 |
const nextRoundBtn = document.querySelector('.next-round-btn');
|
| 654 |
const nextRoundContainer = document.querySelector('.next-round-container');
|
| 655 |
const randomBtn = document.querySelector('.random-btn');
|
|
|
|
|
|
|
| 656 |
const voteResultsContainer = document.querySelector('.vote-results');
|
| 657 |
const chosenModelNameElement = document.querySelector('.chosen-model-name');
|
| 658 |
const rejectedModelNameElement = document.querySelector('.rejected-model-name');
|
|
|
|
| 699 |
});
|
| 700 |
}
|
| 701 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 702 |
function handleSynthesize(e) {
|
| 703 |
if (e) {
|
| 704 |
e.preventDefault();
|
|
|
|
| 712 |
|
| 713 |
const text = textInput.value.trim();
|
| 714 |
if (!text) {
|
| 715 |
+
openToast("텍스트를 입력해주세요", "warning");
|
| 716 |
return;
|
| 717 |
}
|
| 718 |
|
| 719 |
if (text.length > 1000) {
|
| 720 |
+
openToast("텍스트가 너무 깁니다. 1000자 이하로 입력해주세요.", "warning");
|
| 721 |
return;
|
| 722 |
}
|
| 723 |
|
|
|
|
| 757 |
.then(response => {
|
| 758 |
if (!response.ok) {
|
| 759 |
return response.json().then(err => {
|
| 760 |
+
throw new Error(err.error || 'TTS 생성에 실패했습니다');
|
| 761 |
});
|
| 762 |
}
|
| 763 |
return response.json();
|
|
|
|
| 804 |
|
| 805 |
// Handle authentication errors specially
|
| 806 |
if (error.message.includes('logged in to generate') || error.message.includes('logged in to vote')) {
|
| 807 |
+
openToast("로그인이 필요합니다. <a href='{{ url_for('auth.login', next=request.path) }}' style='color: white; text-decoration: underline;'>지금 로그인</a>", "error");
|
| 808 |
} else {
|
| 809 |
openToast(error.message, "error");
|
| 810 |
}
|
|
|
|
| 835 |
.then(response => {
|
| 836 |
if (!response.ok) {
|
| 837 |
return response.json().then(err => {
|
| 838 |
+
throw new Error(err.error || '투표 제출에 실패했습니다');
|
| 839 |
});
|
| 840 |
}
|
| 841 |
return response.json();
|
|
|
|
| 871 |
nextRoundContainer.style.display = 'block';
|
| 872 |
|
| 873 |
// Show success toast
|
| 874 |
+
openToast("투표가 기록되었습니다!", "success");
|
| 875 |
})
|
| 876 |
.catch(error => {
|
| 877 |
// Re-enable vote buttons
|
|
|
|
| 882 |
|
| 883 |
// Handle authentication errors specially
|
| 884 |
if (error.message.includes('logged in to vote')) {
|
| 885 |
+
openToast("로그인이 필요합니다. <a href='{{ url_for('auth.login', next=request.path) }}' style='color: white; text-decoration: underline;'>지금 로그인</a>", "error");
|
| 886 |
} else {
|
| 887 |
openToast(error.message, "error");
|
| 888 |
}
|
|
|
|
| 938 |
// Select a random text from the unconsumed sentences
|
| 939 |
selectedText = cachedSentences[Math.floor(Math.random() * cachedSentences.length)];
|
| 940 |
console.log("Using random sentence from unconsumed sentences.");
|
| 941 |
+
} else if (fallbackRandomTexts && fallbackRandomTexts.length > 0) {
|
| 942 |
+
// Fallback to harvard sentences
|
| 943 |
+
selectedText = fallbackRandomTexts[Math.floor(Math.random() * fallbackRandomTexts.length)];
|
| 944 |
+
console.log("Using fallback random text.");
|
| 945 |
} else {
|
| 946 |
+
console.error("No sentences available.");
|
| 947 |
+
openToast("사용 가능한 문장이 없습니다.", "error");
|
|
|
|
| 948 |
return;
|
| 949 |
}
|
| 950 |
textInput.value = selectedText;
|
|
|
|
| 952 |
}
|
| 953 |
|
| 954 |
function showListenToastMessage() {
|
| 955 |
+
openToast("투표하기 전에 두 오디오 샘플을 모두 들어주세요", "info");
|
| 956 |
}
|
| 957 |
|
| 958 |
// New function for N shortcut: Random + Synthesize
|
|
|
|
| 1060 |
fetchCachedSentences();
|
| 1061 |
});
|
| 1062 |
</script>
|
| 1063 |
+
{% endblock %}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
templates/base.html
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
-
<html lang="
|
| 3 |
|
| 4 |
<head>
|
| 5 |
<meta charset="UTF-8">
|
| 6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
-
<title>{% block title %}TTS Arena{% endblock %}</title>
|
| 8 |
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 9 |
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 10 |
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
|
|
@@ -56,11 +56,43 @@
|
|
| 56 |
flex-shrink: 0;
|
| 57 |
}
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
.logo {
|
| 60 |
-
font-size:
|
| 61 |
font-weight: 700;
|
| 62 |
-
margin-bottom: 32px;
|
| 63 |
color: var(--primary-color);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
}
|
| 65 |
|
| 66 |
.nav-item {
|
|
@@ -1061,7 +1093,15 @@
|
|
| 1061 |
<path d="M18 6L6 18M6 6L18 18" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" />
|
| 1062 |
</svg>
|
| 1063 |
</div>
|
| 1064 |
-
<div class="logo">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1065 |
<nav>
|
| 1066 |
<a href="{{ url_for('arena') }}" class="nav-item {% if request.path == '/' %}active{% endif %}">
|
| 1067 |
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-dices"><rect width="12" height="12" x="2" y="10" rx="2" ry="2"/><path d="m17.92 14 3.5-3.5a2.24 2.24 0 0 0 0-3l-5-4.92a2.24 2.24 0 0 0-3 0L10 6"/><path d="M6 18h.01"/><path d="M10 14h.01"/><path d="M15 6h.01"/><path d="M18 9h.01"/></svg>
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
+
<html lang="ko">
|
| 3 |
|
| 4 |
<head>
|
| 5 |
<meta charset="UTF-8">
|
| 6 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 7 |
+
<title>{% block title %}한국어 TTS Arena{% endblock %}</title>
|
| 8 |
<link rel="preconnect" href="https://fonts.googleapis.com">
|
| 9 |
<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
|
| 10 |
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
|
|
|
|
| 56 |
flex-shrink: 0;
|
| 57 |
}
|
| 58 |
|
| 59 |
+
.logo-container {
|
| 60 |
+
margin-bottom: 32px;
|
| 61 |
+
}
|
| 62 |
+
|
| 63 |
.logo {
|
| 64 |
+
font-size: 22px;
|
| 65 |
font-weight: 700;
|
|
|
|
| 66 |
color: var(--primary-color);
|
| 67 |
+
margin-bottom: 8px;
|
| 68 |
+
}
|
| 69 |
+
|
| 70 |
+
.supported-by {
|
| 71 |
+
display: flex;
|
| 72 |
+
align-items: center;
|
| 73 |
+
gap: 6px;
|
| 74 |
+
font-size: 11px;
|
| 75 |
+
color: #888;
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
.supported-by span {
|
| 79 |
+
opacity: 0.8;
|
| 80 |
+
}
|
| 81 |
+
|
| 82 |
+
.channel-link {
|
| 83 |
+
display: flex;
|
| 84 |
+
align-items: center;
|
| 85 |
+
text-decoration: none;
|
| 86 |
+
}
|
| 87 |
+
|
| 88 |
+
.channel-logo-img {
|
| 89 |
+
height: 20px;
|
| 90 |
+
width: auto;
|
| 91 |
+
transition: opacity 0.2s;
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
.channel-link:hover .channel-logo-img {
|
| 95 |
+
opacity: 0.8;
|
| 96 |
}
|
| 97 |
|
| 98 |
.nav-item {
|
|
|
|
| 1093 |
<path d="M18 6L6 18M6 6L18 18" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" />
|
| 1094 |
</svg>
|
| 1095 |
</div>
|
| 1096 |
+
<div class="logo-container">
|
| 1097 |
+
<div class="logo">한국어 TTS 아레나</div>
|
| 1098 |
+
<div class="supported-by">
|
| 1099 |
+
<span>supported by</span>
|
| 1100 |
+
<a href="https://channel.io/ko" target="_blank" rel="noopener noreferrer" class="channel-link">
|
| 1101 |
+
<img src="{{ url_for('static', filename='channeltalk-logo-kr.svg') }}" alt="채널톡" class="channel-logo-img">
|
| 1102 |
+
</a>
|
| 1103 |
+
</div>
|
| 1104 |
+
</div>
|
| 1105 |
<nav>
|
| 1106 |
<a href="{{ url_for('arena') }}" class="nav-item {% if request.path == '/' %}active{% endif %}">
|
| 1107 |
<svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-dices"><rect width="12" height="12" x="2" y="10" rx="2" ry="2"/><path d="m17.92 14 3.5-3.5a2.24 2.24 0 0 0 0-3l-5-4.92a2.24 2.24 0 0 0-3 0L10 6"/><path d="M6 18h.01"/><path d="M10 14h.01"/><path d="M15 6h.01"/><path d="M18 9h.01"/></svg>
|
tts.py
CHANGED
|
@@ -1,298 +1,218 @@
|
|
| 1 |
-
#
|
| 2 |
-
# Currently just use current TTS router.
|
| 3 |
import os
|
| 4 |
import json
|
| 5 |
-
from dotenv import load_dotenv
|
| 6 |
-
import fal_client
|
| 7 |
-
import requests
|
| 8 |
-
import time
|
| 9 |
-
import io
|
| 10 |
-
from pyht import Client as PyhtClient
|
| 11 |
-
from pyht.client import TTSOptions
|
| 12 |
import base64
|
| 13 |
import tempfile
|
| 14 |
-
import
|
|
|
|
| 15 |
|
| 16 |
load_dotenv()
|
| 17 |
|
| 18 |
-
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
-
|
| 22 |
-
|
|
|
|
|
|
|
| 23 |
|
|
|
|
|
|
|
| 24 |
|
| 25 |
model_mapping = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
"eleven-multilingual-v2": {
|
| 27 |
"provider": "elevenlabs",
|
| 28 |
"model": "eleven_multilingual_v2",
|
| 29 |
},
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
"
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
"provider": "elevenlabs",
|
| 36 |
-
"model": "eleven_turbo_v2_5",
|
| 37 |
-
},
|
| 38 |
-
"eleven-flash-v2.5": {
|
| 39 |
-
"provider": "elevenlabs",
|
| 40 |
-
"model": "eleven_flash_v2_5",
|
| 41 |
-
},
|
| 42 |
-
"cartesia-sonic-2": {
|
| 43 |
-
"provider": "cartesia",
|
| 44 |
-
"model": "sonic-2",
|
| 45 |
-
},
|
| 46 |
-
"spark-tts": {
|
| 47 |
-
"provider": "spark",
|
| 48 |
-
"model": "spark-tts",
|
| 49 |
-
},
|
| 50 |
-
"playht-2.0": {
|
| 51 |
-
"provider": "playht",
|
| 52 |
-
"model": "PlayHT2.0",
|
| 53 |
-
},
|
| 54 |
-
"styletts2": {
|
| 55 |
-
"provider": "styletts",
|
| 56 |
-
"model": "styletts2",
|
| 57 |
-
},
|
| 58 |
-
"kokoro-v1": {
|
| 59 |
-
"provider": "kokoro",
|
| 60 |
-
"model": "kokoro_v1",
|
| 61 |
-
},
|
| 62 |
-
"cosyvoice-2.0": {
|
| 63 |
-
"provider": "cosyvoice",
|
| 64 |
-
"model": "cosyvoice_2_0",
|
| 65 |
-
},
|
| 66 |
-
"papla-p1": {
|
| 67 |
-
"provider": "papla",
|
| 68 |
-
"model": "papla_p1",
|
| 69 |
},
|
| 70 |
-
"
|
| 71 |
-
"provider": "
|
| 72 |
-
"model": "
|
|
|
|
| 73 |
},
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
"
|
|
|
|
| 77 |
},
|
| 78 |
-
"
|
| 79 |
-
"provider": "
|
| 80 |
-
"
|
| 81 |
},
|
| 82 |
-
"minimax-02-turbo": {
|
| 83 |
-
"provider": "minimax",
|
| 84 |
-
"model": "speech-02-turbo",
|
| 85 |
-
},
|
| 86 |
-
"lanternfish-1": {
|
| 87 |
-
"provider": "lanternfish",
|
| 88 |
-
"model": "lanternfish-1",
|
| 89 |
-
},
|
| 90 |
-
"nls-pre-v1": {
|
| 91 |
-
"provider": "nls",
|
| 92 |
-
"model": "nls-1",
|
| 93 |
-
},
|
| 94 |
-
"chatterbox": {
|
| 95 |
-
"provider": "chatterbox",
|
| 96 |
-
"model": "chatterbox",
|
| 97 |
-
},
|
| 98 |
-
"inworld": {
|
| 99 |
-
"provider": "inworld",
|
| 100 |
-
"model": "inworld-tts-1",
|
| 101 |
-
},
|
| 102 |
-
"inworld-max": {
|
| 103 |
-
"provider": "inworld",
|
| 104 |
-
"model": "inworld-tts-1-max",
|
| 105 |
-
},
|
| 106 |
-
"wordcab": {
|
| 107 |
-
"provider": "wordcab",
|
| 108 |
-
"model": "wordcab",
|
| 109 |
-
},
|
| 110 |
-
"veena": {
|
| 111 |
-
"provider": "veena",
|
| 112 |
-
"model": "veena",
|
| 113 |
-
},
|
| 114 |
-
"maya1": {
|
| 115 |
-
"provider": "maya1",
|
| 116 |
-
"model": "maya1",
|
| 117 |
-
},
|
| 118 |
-
"magpie": {
|
| 119 |
-
"provider": "magpie",
|
| 120 |
-
"model": "magpie",
|
| 121 |
-
},
|
| 122 |
-
"parmesan": {
|
| 123 |
-
"provider": "parmesan",
|
| 124 |
-
"model": "parmesan",
|
| 125 |
-
},
|
| 126 |
-
"vocu": {
|
| 127 |
-
"provider": "vocu",
|
| 128 |
-
"model": "vocu-balance",
|
| 129 |
-
},
|
| 130 |
-
}
|
| 131 |
-
url = "https://tts-agi-tts-router-v2.hf.space/tts"
|
| 132 |
-
headers = {
|
| 133 |
-
"accept": "application/json",
|
| 134 |
-
"Content-Type": "application/json",
|
| 135 |
-
"Authorization": f'Bearer {os.getenv("HF_TOKEN")}',
|
| 136 |
}
|
| 137 |
-
data = {"text": "string", "provider": "string", "model": "string"}
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
def predict_csm(script):
|
| 141 |
-
result = fal_client.subscribe(
|
| 142 |
-
"fal-ai/csm-1b",
|
| 143 |
-
arguments={
|
| 144 |
-
# "scene": [{
|
| 145 |
-
# "text": "Hey how are you doing.",
|
| 146 |
-
# "speaker_id": 0
|
| 147 |
-
# }, {
|
| 148 |
-
# "text": "Pretty good, pretty good.",
|
| 149 |
-
# "speaker_id": 1
|
| 150 |
-
# }, {
|
| 151 |
-
# "text": "I'm great, so happy to be speaking to you.",
|
| 152 |
-
# "speaker_id": 0
|
| 153 |
-
# }]
|
| 154 |
-
"scene": script
|
| 155 |
-
},
|
| 156 |
-
with_logs=True,
|
| 157 |
-
)
|
| 158 |
-
return requests.get(result["audio"]["url"]).content
|
| 159 |
|
| 160 |
|
| 161 |
-
def
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 166 |
)
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
)
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
# Convert from list of dictionaries to formatted string
|
| 202 |
-
formatted_text = ""
|
| 203 |
-
for turn in script:
|
| 204 |
-
speaker_id = turn.get("speaker_id", 0)
|
| 205 |
-
speaker_tag = "[S1]" if speaker_id == 0 else "[S2]"
|
| 206 |
-
text = turn.get("text", "").strip().replace("[S1]", "").replace("[S2]", "")
|
| 207 |
-
formatted_text += f"{speaker_tag} {text} "
|
| 208 |
-
text = formatted_text.strip()
|
| 209 |
-
else:
|
| 210 |
-
# If it's already a string, use as is
|
| 211 |
-
text = script
|
| 212 |
-
# Make a POST request to initiate the dialogue generation
|
| 213 |
-
headers = {
|
| 214 |
-
# "Content-Type": "application/json",
|
| 215 |
-
"Authorization": f"Bearer {get_zerogpu_token()}"
|
| 216 |
-
}
|
| 217 |
-
|
| 218 |
response = requests.post(
|
| 219 |
-
"https://
|
| 220 |
-
headers=
|
| 221 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 222 |
)
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
|
| 237 |
-
|
| 238 |
-
|
| 239 |
-
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
if not model in model_mapping:
|
| 252 |
-
raise ValueError(f"Model {model} not found")
|
| 253 |
-
|
| 254 |
-
result = requests.post(
|
| 255 |
-
url,
|
| 256 |
-
headers=headers,
|
| 257 |
-
data=json.dumps(
|
| 258 |
-
{
|
| 259 |
-
"text": text,
|
| 260 |
-
"provider": model_mapping[model]["provider"],
|
| 261 |
-
"model": model_mapping[model]["model"],
|
| 262 |
-
}
|
| 263 |
-
),
|
| 264 |
)
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
|
| 280 |
if __name__ == "__main__":
|
| 281 |
-
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
)
|
| 288 |
-
|
| 289 |
-
|
| 290 |
-
# print(
|
| 291 |
-
# predict_playdialog(
|
| 292 |
-
# [
|
| 293 |
-
# {"text": "Hey how are you doing.", "speaker_id": 0},
|
| 294 |
-
# {"text": "Pretty good, pretty good.", "speaker_id": 1},
|
| 295 |
-
# {"text": "I'm great, so happy to be speaking to you.", "speaker_id": 0},
|
| 296 |
-
# ]
|
| 297 |
-
# )
|
| 298 |
-
# )
|
|
|
|
| 1 |
+
# 한국어 TTS Arena - TTS Router
|
|
|
|
| 2 |
import os
|
| 3 |
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
import base64
|
| 5 |
import tempfile
|
| 6 |
+
import requests
|
| 7 |
+
from dotenv import load_dotenv
|
| 8 |
|
| 9 |
load_dotenv()
|
| 10 |
|
| 11 |
+
# 한국어 지원 TTS 제공자 매핑
|
| 12 |
+
# - 채널톡: 자체 API
|
| 13 |
+
# - ElevenLabs: 직접 API
|
| 14 |
+
# - OpenAI: API
|
| 15 |
+
# - Google: API
|
| 16 |
|
| 17 |
+
CHANNEL_TTS_URL = os.getenv(
|
| 18 |
+
"CHANNEL_TTS_URL",
|
| 19 |
+
"https://ch-tts-streaming-demo.channel.io/v1/text-to-speech"
|
| 20 |
+
)
|
| 21 |
|
| 22 |
+
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
|
| 23 |
+
ELEVENLABS_VOICE_ID = os.getenv("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM") # Rachel (기본)
|
| 24 |
|
| 25 |
model_mapping = {
|
| 26 |
+
# 채널톡 TTS (한국어 특화)
|
| 27 |
+
"channel-hana": {
|
| 28 |
+
"provider": "channel",
|
| 29 |
+
"voice": "hana",
|
| 30 |
+
},
|
| 31 |
+
# ElevenLabs (다국어 지원) - 직접 API 호출
|
| 32 |
"eleven-multilingual-v2": {
|
| 33 |
"provider": "elevenlabs",
|
| 34 |
"model": "eleven_multilingual_v2",
|
| 35 |
},
|
| 36 |
+
# OpenAI TTS
|
| 37 |
+
"openai-tts-1": {
|
| 38 |
+
"provider": "openai",
|
| 39 |
+
"model": "tts-1",
|
| 40 |
+
"voice": "alloy",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
},
|
| 42 |
+
"openai-tts-1-hd": {
|
| 43 |
+
"provider": "openai",
|
| 44 |
+
"model": "tts-1-hd",
|
| 45 |
+
"voice": "alloy",
|
| 46 |
},
|
| 47 |
+
# Google Cloud TTS
|
| 48 |
+
"google-wavenet": {
|
| 49 |
+
"provider": "google",
|
| 50 |
+
"voice": "ko-KR-Wavenet-A",
|
| 51 |
},
|
| 52 |
+
"google-neural2": {
|
| 53 |
+
"provider": "google",
|
| 54 |
+
"voice": "ko-KR-Neural2-A",
|
| 55 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
| 58 |
|
| 59 |
+
def predict_channel_tts(text: str, voice: str = "hana") -> str:
|
| 60 |
+
"""채널톡 TTS API 호출"""
|
| 61 |
+
url = f"{CHANNEL_TTS_URL}/{voice}"
|
| 62 |
+
|
| 63 |
+
response = requests.post(
|
| 64 |
+
url,
|
| 65 |
+
headers={"Content-Type": "application/json"},
|
| 66 |
+
json={"text": text, "output_format": "wav_24000"},
|
| 67 |
+
timeout=30,
|
| 68 |
)
|
| 69 |
+
response.raise_for_status()
|
| 70 |
+
|
| 71 |
+
# 임시 파일에 저장
|
| 72 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 73 |
+
f.write(response.content)
|
| 74 |
+
return f.name
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def predict_elevenlabs_tts(text: str, model: str = "eleven_multilingual_v2") -> str:
|
| 78 |
+
"""ElevenLabs TTS API 직접 호출"""
|
| 79 |
+
api_key = ELEVENLABS_API_KEY
|
| 80 |
+
if not api_key:
|
| 81 |
+
raise ValueError("ELEVENLABS_API_KEY 환경 변수가 설정되지 않았습니다.")
|
| 82 |
+
|
| 83 |
+
voice_id = ELEVENLABS_VOICE_ID
|
| 84 |
+
|
| 85 |
+
response = requests.post(
|
| 86 |
+
f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
|
| 87 |
+
headers={
|
| 88 |
+
"xi-api-key": api_key,
|
| 89 |
+
"Content-Type": "application/json",
|
| 90 |
+
"Accept": "audio/mpeg",
|
| 91 |
+
},
|
| 92 |
+
json={
|
| 93 |
+
"text": text,
|
| 94 |
+
"model_id": model,
|
| 95 |
+
"voice_settings": {
|
| 96 |
+
"stability": 0.5,
|
| 97 |
+
"similarity_boost": 0.75,
|
| 98 |
+
},
|
| 99 |
+
},
|
| 100 |
+
timeout=60,
|
| 101 |
)
|
| 102 |
+
response.raise_for_status()
|
| 103 |
+
|
| 104 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
|
| 105 |
+
f.write(response.content)
|
| 106 |
+
return f.name
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
def predict_openai_tts(text: str, model: str = "tts-1", voice: str = "alloy") -> str:
|
| 110 |
+
"""OpenAI TTS API 호출"""
|
| 111 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
| 112 |
+
if not api_key:
|
| 113 |
+
raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")
|
| 114 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 115 |
response = requests.post(
|
| 116 |
+
"https://api.openai.com/v1/audio/speech",
|
| 117 |
+
headers={
|
| 118 |
+
"Authorization": f"Bearer {api_key}",
|
| 119 |
+
"Content-Type": "application/json",
|
| 120 |
+
},
|
| 121 |
+
json={
|
| 122 |
+
"model": model,
|
| 123 |
+
"input": text,
|
| 124 |
+
"voice": voice,
|
| 125 |
+
"response_format": "wav",
|
| 126 |
+
},
|
| 127 |
+
timeout=60,
|
| 128 |
)
|
| 129 |
+
response.raise_for_status()
|
| 130 |
+
|
| 131 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 132 |
+
f.write(response.content)
|
| 133 |
+
return f.name
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str:
|
| 137 |
+
"""Google Cloud TTS API 호출"""
|
| 138 |
+
api_key = os.getenv("GOOGLE_API_KEY")
|
| 139 |
+
if not api_key:
|
| 140 |
+
raise ValueError("GOOGLE_API_KEY 환경 변수가 설정되지 않았습니다.")
|
| 141 |
+
|
| 142 |
+
response = requests.post(
|
| 143 |
+
f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}",
|
| 144 |
+
headers={"Content-Type": "application/json"},
|
| 145 |
+
json={
|
| 146 |
+
"input": {"text": text},
|
| 147 |
+
"voice": {
|
| 148 |
+
"languageCode": "ko-KR",
|
| 149 |
+
"name": voice,
|
| 150 |
+
},
|
| 151 |
+
"audioConfig": {
|
| 152 |
+
"audioEncoding": "LINEAR16",
|
| 153 |
+
"sampleRateHertz": 24000,
|
| 154 |
+
},
|
| 155 |
+
},
|
| 156 |
+
timeout=30,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
)
|
| 158 |
+
response.raise_for_status()
|
| 159 |
+
|
| 160 |
+
audio_content = response.json().get("audioContent")
|
| 161 |
+
if not audio_content:
|
| 162 |
+
raise ValueError("Google TTS API가 오디오를 반환하지 않았습니다.")
|
| 163 |
+
|
| 164 |
+
audio_bytes = base64.b64decode(audio_content)
|
| 165 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
|
| 166 |
+
f.write(audio_bytes)
|
| 167 |
+
return f.name
|
| 168 |
+
|
| 169 |
+
|
| 170 |
+
def predict_tts(text: str, model: str) -> str:
|
| 171 |
+
"""
|
| 172 |
+
TTS 생성 메인 함수
|
| 173 |
+
|
| 174 |
+
Args:
|
| 175 |
+
text: 합성할 텍스트
|
| 176 |
+
model: 모델 ID (model_mapping의 키)
|
| 177 |
+
|
| 178 |
+
Returns:
|
| 179 |
+
생성된 오디오 파일 경로
|
| 180 |
+
"""
|
| 181 |
+
print(f"[TTS] Predicting for model: {model}")
|
| 182 |
+
|
| 183 |
+
if model not in model_mapping:
|
| 184 |
+
raise ValueError(f"지원하지 않는 모델입니다: {model}")
|
| 185 |
+
|
| 186 |
+
config = model_mapping[model]
|
| 187 |
+
provider = config["provider"]
|
| 188 |
+
|
| 189 |
+
if provider == "channel":
|
| 190 |
+
return predict_channel_tts(text, config.get("voice", "hana"))
|
| 191 |
+
|
| 192 |
+
elif provider == "openai":
|
| 193 |
+
return predict_openai_tts(
|
| 194 |
+
text,
|
| 195 |
+
config.get("model", "tts-1"),
|
| 196 |
+
config.get("voice", "alloy"),
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
elif provider == "google":
|
| 200 |
+
return predict_google_tts(text, config.get("voice", "ko-KR-Wavenet-A"))
|
| 201 |
+
|
| 202 |
+
elif provider == "elevenlabs":
|
| 203 |
+
return predict_elevenlabs_tts(text, config.get("model", "eleven_multilingual_v2"))
|
| 204 |
+
|
| 205 |
+
else:
|
| 206 |
+
raise ValueError(f"알 수 없는 provider: {provider}")
|
| 207 |
|
| 208 |
|
| 209 |
if __name__ == "__main__":
|
| 210 |
+
# 테스트
|
| 211 |
+
test_text = "안녕하세요, 채널톡 TTS 테스트입니다."
|
| 212 |
+
|
| 213 |
+
print("Testing Channel TTS...")
|
| 214 |
+
try:
|
| 215 |
+
path = predict_channel_tts(test_text)
|
| 216 |
+
print(f" Success: {path}")
|
| 217 |
+
except Exception as e:
|
| 218 |
+
print(f" Error: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|