blackhole1218 commited on
Commit
62f57ec
·
1 Parent(s): 6eee52e

한국어 TTS 아레나 - Docker Space 배포

Browse files

- 채널톡 TTS API 통합
- 한국어 UI/UX
- Conversational 기능 제거, TTS 전용
- Docker 배포 설정 추가
- About 페이지 한국어 TTS 벤치마크 설명 추가

Files changed (12) hide show
  1. .dockerignore +34 -0
  2. Dockerfile +33 -0
  3. README.md +34 -9
  4. app.py +15 -368
  5. ko_prompts.json +55 -0
  6. models.py +36 -201
  7. requirements.txt +1 -4
  8. static/channeltalk-logo-kr.svg +19 -0
  9. templates/about.html +255 -240
  10. templates/arena.html +57 -1144
  11. templates/base.html +45 -5
  12. tts.py +188 -268
.dockerignore ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Git
2
+ .git
3
+ .gitignore
4
+
5
+ # Python
6
+ __pycache__
7
+ *.py[cod]
8
+ *$py.class
9
+ *.so
10
+ .Python
11
+ env/
12
+ venv/
13
+ .env
14
+ *.egg-info/
15
+ dist/
16
+ build/
17
+
18
+ # IDE
19
+ .vscode/
20
+ .idea/
21
+ *.swp
22
+ *.swo
23
+
24
+ # Local files
25
+ instance/
26
+ *.db
27
+ *.sqlite
28
+ tts_cache/
29
+ audio_cache/
30
+
31
+ # Misc
32
+ .DS_Store
33
+ *.log
34
+
Dockerfile ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Hugging Face Spaces Docker
2
+ FROM python:3.11-slim
3
+
4
+ # Create non-root user
5
+ RUN useradd -m -u 1000 user
6
+ USER user
7
+
8
+ ENV PATH="/home/user/.local/bin:$PATH"
9
+ ENV HOME="/home/user"
10
+
11
+ WORKDIR /app
12
+
13
+ # Copy requirements first for better caching
14
+ COPY --chown=user ./requirements.txt requirements.txt
15
+ RUN pip install --no-cache-dir --upgrade -r requirements.txt
16
+
17
+ # Copy application files
18
+ COPY --chown=user . /app
19
+
20
+ # Create necessary directories
21
+ RUN mkdir -p /app/instance /app/tts_cache /app/audio_cache
22
+
23
+ # Set environment variables for HF Spaces
24
+ ENV FLASK_ENV=production
25
+ ENV IS_SPACES=true
26
+ ENV PORT=7860
27
+
28
+ # Expose port
29
+ EXPOSE 7860
30
+
31
+ # Run with waitress (already in requirements.txt)
32
+ CMD ["python", "app.py"]
33
+
README.md CHANGED
@@ -1,16 +1,41 @@
1
  ---
2
- title: TTS Arena V2
3
- emoji: 🏆
4
- colorFrom: blue
5
  colorTo: blue
6
- sdk: gradio
7
- app_file: app.py
8
- short_description: Vote on the latest TTS models!
9
  pinned: true
10
-
11
  hf_oauth: true
 
 
 
 
 
12
  ---
13
 
14
- Please see the [GitHub repo](https://github.com/TTS-AGI/TTS-Arena-V2) for information.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
- Join the [Discord server](https://discord.gg/HB8fMR6GTr) for updates and support.
 
1
  ---
2
+ title: 한국어 TTS 아레나
3
+ emoji: 🎤
4
+ colorFrom: purple
5
  colorTo: blue
6
+ sdk: docker
7
+ app_port: 7860
8
+ short_description: 한국어 TTS 모델을 블라인드 테스트로 비교 평가하세요!
9
  pinned: true
 
10
  hf_oauth: true
11
+ hf_oauth_scopes:
12
+ - read-repos
13
+ - write-repos
14
+ - manage-repos
15
+ - inference-api
16
  ---
17
 
18
+ # 🎤 한국어 TTS 아레나
19
+
20
+ 한국어 TTS 모델을 블라인드 테스트로 비교 평가하는 커뮤니티 기반 플랫폼입니다.
21
+
22
+ ## 왜 한국어 TTS 벤치마크가 필요한가?
23
+
24
+ - **WER (Word Error Rate)**: 한국어의 복잡한 발화 패턴을 제대로 반영하지 못함
25
+ - **MOS (Mean Opinion Score)**: 소규모 참가자 대상의 주관적 평가로 한계 존재
26
+ - **글로벌 TTS 모델의 한국어 한계**: 운율(Prosody) 부자연스러움, 숫자/날짜/전화번호 발화 취약
27
+
28
+ ## 사용 방법
29
+
30
+ 1. 텍스트를 입력하거나 랜덤 문장을 선택
31
+ 2. 두 TTS 모델의 음성을 듣고 비교
32
+ 3. 더 자연스러운 음성에 투표
33
+ 4. 리더보드에서 모델 순위 확인
34
+
35
+ ## Supported by
36
+
37
+ [채널톡](https://channel.io/ko) AI Team
38
+
39
+ ## 참고 자료
40
 
41
+ - [Channel TTS: Towards Real-World Prosody for Conversational Agents](https://tts.ch.dev/)
app.py CHANGED
@@ -5,12 +5,11 @@ from concurrent.futures import ThreadPoolExecutor
5
  from datetime import datetime
6
  import threading # Added for locking
7
  from sqlalchemy import or_ # Added for vote counting query
8
- from datasets import load_dataset
9
 
10
  year = datetime.now().year
11
  month = datetime.now().month
12
 
13
- # Check if running in a Huggin Face Space
14
  IS_SPACES = False
15
  if os.getenv("SPACE_REPO_NAME"):
16
  print("Running in a Hugging Face Space 🤗")
@@ -22,7 +21,7 @@ if os.getenv("SPACE_REPO_NAME"):
22
  try:
23
  print("Database not found, downloading from HF dataset...")
24
  hf_hub_download(
25
- repo_id="TTS-AGI/database-arena-v2",
26
  filename="tts_arena.db",
27
  repo_type="dataset",
28
  local_dir="instance",
@@ -68,29 +67,6 @@ from flask_migrate import Migrate
68
  import requests
69
  import functools
70
  import time # Added for potential retries
71
- from langdetect import detect, DetectorFactory
72
-
73
- # Set random seed for consistent language detection results
74
- DetectorFactory.seed = 0
75
-
76
-
77
- def is_english_text(text):
78
- """
79
- Detect if the given text is in English.
80
- Returns True if English, False otherwise.
81
- """
82
- try:
83
- # Remove leading/trailing whitespace and check if text is not empty
84
- text = text.strip()
85
- if not text:
86
- return False
87
-
88
- # Detect language
89
- detected_language = detect(text)
90
- return detected_language == 'en'
91
- except Exception:
92
- # If detection fails, assume it's not English for safety
93
- return False
94
 
95
 
96
  def get_client_ip():
@@ -177,10 +153,6 @@ os.makedirs(CACHE_AUDIO_DIR, exist_ok=True) # Ensure cache subdir exists
177
  app.tts_sessions = {}
178
  tts_sessions = app.tts_sessions
179
 
180
- # Store active conversational sessions
181
- app.conversational_sessions = {}
182
- conversational_sessions = app.conversational_sessions
183
-
184
  # Register blueprints
185
  app.register_blueprint(auth, url_prefix="/auth")
186
  app.register_blueprint(admin)
@@ -332,12 +304,13 @@ def verify_turnstile():
332
  # Otherwise redirect back to turnstile page
333
  return redirect(url_for("turnstile_page", redirect_url=redirect_url))
334
 
335
- # Load sentences from the TTS-AGI/arena-prompts dataset
336
- print("Loading TTS-AGI/arena-prompts dataset...")
337
- dataset = load_dataset("TTS-AGI/arena-prompts", split="train")
338
- # Extract the text column and clean up
339
- all_harvard_sentences = [item['text'].strip() for item in dataset if item['text'] and item['text'].strip()]
340
- print(f"Loaded {len(all_harvard_sentences)} sentences from dataset")
 
341
 
342
  # Initialize initial_sentences as empty - will be populated with unconsumed sentences only
343
  initial_sentences = []
@@ -351,42 +324,29 @@ def arena():
351
  @app.route("/leaderboard")
352
  def leaderboard():
353
  tts_leaderboard = get_leaderboard_data(ModelType.TTS)
354
- conversational_leaderboard = get_leaderboard_data(ModelType.CONVERSATIONAL)
355
  top_voters = get_top_voters(10) # Get top 10 voters
356
 
357
  # Initialize personal leaderboard data
358
  tts_personal_leaderboard = None
359
- conversational_personal_leaderboard = None
360
  user_leaderboard_visibility = None
361
 
362
  # If user is logged in, get their personal leaderboard and visibility setting
363
  if current_user.is_authenticated:
364
  tts_personal_leaderboard = get_user_leaderboard(current_user.id, ModelType.TTS)
365
- conversational_personal_leaderboard = get_user_leaderboard(
366
- current_user.id, ModelType.CONVERSATIONAL
367
- )
368
  user_leaderboard_visibility = current_user.show_in_leaderboard
369
 
370
  # Get key dates for the timeline
371
  tts_key_dates = get_key_historical_dates(ModelType.TTS)
372
- conversational_key_dates = get_key_historical_dates(ModelType.CONVERSATIONAL)
373
 
374
  # Format dates for display in the dropdown
375
  formatted_tts_dates = [date.strftime("%B %Y") for date in tts_key_dates]
376
- formatted_conversational_dates = [
377
- date.strftime("%B %Y") for date in conversational_key_dates
378
- ]
379
 
380
  return render_template(
381
  "leaderboard.html",
382
  tts_leaderboard=tts_leaderboard,
383
- conversational_leaderboard=conversational_leaderboard,
384
  tts_personal_leaderboard=tts_personal_leaderboard,
385
- conversational_personal_leaderboard=conversational_personal_leaderboard,
386
  tts_key_dates=tts_key_dates,
387
- conversational_key_dates=conversational_key_dates,
388
  formatted_tts_dates=formatted_tts_dates,
389
- formatted_conversational_dates=formatted_conversational_dates,
390
  top_voters=top_voters,
391
  user_leaderboard_visibility=user_leaderboard_visibility
392
  )
@@ -395,7 +355,7 @@ def leaderboard():
395
  @app.route("/api/historical-leaderboard/<model_type>")
396
  def historical_leaderboard(model_type):
397
  """Get historical leaderboard data for a specific date"""
398
- if model_type not in [ModelType.TTS, ModelType.CONVERSATIONAL]:
399
  return jsonify({"error": "Invalid model type"}), 400
400
 
401
  # Get date from query parameter
@@ -939,303 +899,6 @@ def cleanup_session(session_id):
939
  del app.tts_sessions[session_id]
940
 
941
 
942
- @app.route("/api/conversational/generate", methods=["POST"])
943
- @limiter.limit("5 per minute")
944
- def generate_podcast():
945
- # If verification not setup, handle it first
946
- if app.config["TURNSTILE_ENABLED"] and not session.get("turnstile_verified"):
947
- return jsonify({"error": "Turnstile verification required"}), 403
948
-
949
- # Require user to be logged in to generate audio
950
- if not current_user.is_authenticated:
951
- return jsonify({"error": "You must be logged in to generate audio"}), 401
952
-
953
- data = request.json
954
- script = data.get("script")
955
-
956
- if not script or not isinstance(script, list) or len(script) < 2:
957
- return jsonify({"error": "Invalid script format or too short"}), 400
958
-
959
- # Validate script format
960
- for line in script:
961
- if not isinstance(line, dict) or "text" not in line or "speaker_id" not in line:
962
- return (
963
- jsonify(
964
- {
965
- "error": "Invalid script line format. Each line must have text and speaker_id"
966
- }
967
- ),
968
- 400,
969
- )
970
- if (
971
- not line["text"]
972
- or not isinstance(line["speaker_id"], int)
973
- or line["speaker_id"] not in [0, 1]
974
- ):
975
- return (
976
- jsonify({"error": "Invalid script content. Speaker ID must be 0 or 1"}),
977
- 400,
978
- )
979
-
980
- # Get two conversational models (currently only CSM and PlayDialog)
981
- available_models = Model.query.filter_by(
982
- model_type=ModelType.CONVERSATIONAL, is_active=True
983
- ).all()
984
-
985
- if len(available_models) < 2:
986
- return jsonify({"error": "Not enough conversational models available"}), 500
987
-
988
- selected_models = get_weighted_random_models(available_models, 2, ModelType.CONVERSATIONAL)
989
-
990
- try:
991
- # Generate audio for both models concurrently
992
- audio_files = []
993
- model_ids = []
994
-
995
- # Function to process a single model
996
- def process_model(model):
997
- # Call conversational TTS service
998
- audio_content = predict_tts(script, model.id)
999
-
1000
- # Save to temp file with unique name
1001
- file_uuid = str(uuid.uuid4())
1002
- dest_path = os.path.join(TEMP_AUDIO_DIR, f"{file_uuid}.wav")
1003
-
1004
- with open(dest_path, "wb") as f:
1005
- f.write(audio_content)
1006
-
1007
- return {"model_id": model.id, "audio_path": dest_path}
1008
-
1009
- # Use ThreadPoolExecutor to process models concurrently
1010
- with ThreadPoolExecutor(max_workers=2) as executor:
1011
- results = list(executor.map(process_model, selected_models))
1012
-
1013
- # Extract results
1014
- for result in results:
1015
- model_ids.append(result["model_id"])
1016
- audio_files.append(result["audio_path"])
1017
-
1018
- # Create session
1019
- session_id = str(uuid.uuid4())
1020
- script_text = " ".join([line["text"] for line in script])
1021
- app.conversational_sessions[session_id] = {
1022
- "model_a": model_ids[0],
1023
- "model_b": model_ids[1],
1024
- "audio_a": audio_files[0],
1025
- "audio_b": audio_files[1],
1026
- "text": script_text[:1000], # Limit text length
1027
- "created_at": datetime.utcnow(),
1028
- "expires_at": datetime.utcnow() + timedelta(minutes=30),
1029
- "voted": False,
1030
- "script": script,
1031
- "cache_hit": False, # Conversational is always generated on-demand
1032
- }
1033
-
1034
- # Return audio file paths and session
1035
- return jsonify(
1036
- {
1037
- "session_id": session_id,
1038
- "audio_a": f"/api/conversational/audio/{session_id}/a",
1039
- "audio_b": f"/api/conversational/audio/{session_id}/b",
1040
- "expires_in": 1800, # 30 minutes in seconds
1041
- }
1042
- )
1043
-
1044
- except Exception as e:
1045
- app.logger.error(f"Conversational generation error: {str(e)}")
1046
- return jsonify({"error": f"Failed to generate podcast: {str(e)}"}), 500
1047
-
1048
-
1049
- @app.route("/api/conversational/audio/<session_id>/<model_key>")
1050
- def get_podcast_audio(session_id, model_key):
1051
- # If verification not setup, handle it first
1052
- if app.config["TURNSTILE_ENABLED"] and not session.get("turnstile_verified"):
1053
- return jsonify({"error": "Turnstile verification required"}), 403
1054
-
1055
- if session_id not in app.conversational_sessions:
1056
- return jsonify({"error": "Invalid or expired session"}), 404
1057
-
1058
- session_data = app.conversational_sessions[session_id]
1059
-
1060
- # Check if session expired
1061
- if datetime.utcnow() > session_data["expires_at"]:
1062
- cleanup_conversational_session(session_id)
1063
- return jsonify({"error": "Session expired"}), 410
1064
-
1065
- if model_key == "a":
1066
- audio_path = session_data["audio_a"]
1067
- elif model_key == "b":
1068
- audio_path = session_data["audio_b"]
1069
- else:
1070
- return jsonify({"error": "Invalid model key"}), 400
1071
-
1072
- # Check if file exists
1073
- if not os.path.exists(audio_path):
1074
- return jsonify({"error": "Audio file not found"}), 404
1075
-
1076
- return send_file(audio_path, mimetype="audio/wav")
1077
-
1078
-
1079
- @app.route("/api/conversational/vote", methods=["POST"])
1080
- @limiter.limit("30 per minute")
1081
- def submit_podcast_vote():
1082
- # If verification not setup, handle it first
1083
- if app.config["TURNSTILE_ENABLED"] and not session.get("turnstile_verified"):
1084
- return jsonify({"error": "Turnstile verification required"}), 403
1085
-
1086
- # Require user to be logged in to vote
1087
- if not current_user.is_authenticated:
1088
- return jsonify({"error": "You must be logged in to vote"}), 401
1089
-
1090
- # Security checks for vote manipulation prevention
1091
- client_ip = get_client_ip()
1092
- vote_allowed, security_reason, security_score = is_vote_allowed(current_user.id, client_ip)
1093
-
1094
- if not vote_allowed:
1095
- app.logger.warning(f"Conversational vote blocked for user {current_user.username} (ID: {current_user.id}): {security_reason} (Score: {security_score})")
1096
- return jsonify({"error": f"Vote not allowed: {security_reason}"}), 403
1097
-
1098
- data = request.json
1099
- session_id = data.get("session_id")
1100
- chosen_model_key = data.get("chosen_model") # "a" or "b"
1101
-
1102
- if not session_id or session_id not in app.conversational_sessions:
1103
- return jsonify({"error": "Invalid or expired session"}), 404
1104
-
1105
- if not chosen_model_key or chosen_model_key not in ["a", "b"]:
1106
- return jsonify({"error": "Invalid chosen model"}), 400
1107
-
1108
- session_data = app.conversational_sessions[session_id]
1109
-
1110
- # Check if session expired
1111
- if datetime.utcnow() > session_data["expires_at"]:
1112
- cleanup_conversational_session(session_id)
1113
- return jsonify({"error": "Session expired"}), 410
1114
-
1115
- # Check if already voted
1116
- if session_data["voted"]:
1117
- return jsonify({"error": "Vote already submitted for this session"}), 400
1118
-
1119
- # Get model IDs and audio paths
1120
- chosen_id = (
1121
- session_data["model_a"] if chosen_model_key == "a" else session_data["model_b"]
1122
- )
1123
- rejected_id = (
1124
- session_data["model_b"] if chosen_model_key == "a" else session_data["model_a"]
1125
- )
1126
- chosen_audio_path = (
1127
- session_data["audio_a"] if chosen_model_key == "a" else session_data["audio_b"]
1128
- )
1129
- rejected_audio_path = (
1130
- session_data["audio_b"] if chosen_model_key == "a" else session_data["audio_a"]
1131
- )
1132
-
1133
- # Calculate session duration and gather analytics data
1134
- vote_time = datetime.utcnow()
1135
- session_duration = (vote_time - session_data["created_at"]).total_seconds()
1136
- client_ip = get_client_ip()
1137
- user_agent = request.headers.get('User-Agent')
1138
- cache_hit = session_data.get("cache_hit", False)
1139
-
1140
- # Record vote in database with analytics data
1141
- vote, error = record_vote(
1142
- current_user.id,
1143
- session_data["text"],
1144
- chosen_id,
1145
- rejected_id,
1146
- ModelType.CONVERSATIONAL,
1147
- session_duration=session_duration,
1148
- ip_address=client_ip,
1149
- user_agent=user_agent,
1150
- generation_date=session_data["created_at"],
1151
- cache_hit=cache_hit,
1152
- all_dataset_sentences=all_harvard_sentences # Note: conversational uses scripts, not sentences
1153
- )
1154
-
1155
- if error:
1156
- return jsonify({"error": error}), 500
1157
-
1158
- # Sentence consumption is now handled within record_vote function
1159
-
1160
- # --- Save preference data ---\
1161
- try:
1162
- vote_uuid = str(uuid.uuid4())
1163
- vote_dir = os.path.join("./votes", vote_uuid)
1164
- os.makedirs(vote_dir, exist_ok=True)
1165
-
1166
- # Copy audio files
1167
- shutil.copy(chosen_audio_path, os.path.join(vote_dir, "chosen.wav"))
1168
- shutil.copy(rejected_audio_path, os.path.join(vote_dir, "rejected.wav"))
1169
-
1170
- # Create metadata
1171
- chosen_model_obj = Model.query.get(chosen_id)
1172
- rejected_model_obj = Model.query.get(rejected_id)
1173
- metadata = {
1174
- "script": session_data["script"], # Save the full script
1175
- "chosen_model": chosen_model_obj.name if chosen_model_obj else "Unknown",
1176
- "chosen_model_id": chosen_model_obj.id if chosen_model_obj else "Unknown",
1177
- "rejected_model": rejected_model_obj.name if rejected_model_obj else "Unknown",
1178
- "rejected_model_id": rejected_model_obj.id if rejected_model_obj else "Unknown",
1179
- "session_id": session_id,
1180
- "timestamp": datetime.utcnow().isoformat(),
1181
- "username": current_user.username,
1182
- "model_type": "CONVERSATIONAL"
1183
- }
1184
- with open(os.path.join(vote_dir, "metadata.json"), "w") as f:
1185
- json.dump(metadata, f, indent=2)
1186
-
1187
- except Exception as e:
1188
- app.logger.error(f"Error saving preference data for conversational vote {session_id}: {str(e)}")
1189
- # Continue even if saving preference data fails, vote is already recorded
1190
-
1191
- # Mark session as voted
1192
- session_data["voted"] = True
1193
-
1194
- # Check for coordinated voting campaigns (async to not slow down response)
1195
- try:
1196
- from threading import Thread
1197
- campaign_check_thread = Thread(target=check_for_coordinated_campaigns)
1198
- campaign_check_thread.daemon = True
1199
- campaign_check_thread.start()
1200
- except Exception as e:
1201
- app.logger.error(f"Error starting coordinated campaign check thread: {str(e)}")
1202
-
1203
- # Return updated models (use previously fetched objects)
1204
- return jsonify(
1205
- {
1206
- "success": True,
1207
- "chosen_model": {"id": chosen_id, "name": chosen_model_obj.name if chosen_model_obj else "Unknown"},
1208
- "rejected_model": {
1209
- "id": rejected_id,
1210
- "name": rejected_model_obj.name if rejected_model_obj else "Unknown",
1211
- },
1212
- "names": {
1213
- "a": Model.query.get(session_data["model_a"]).name,
1214
- "b": Model.query.get(session_data["model_b"]).name,
1215
- },
1216
- }
1217
- )
1218
-
1219
-
1220
- def cleanup_conversational_session(session_id):
1221
- """Remove conversational session and its audio files"""
1222
- if session_id in app.conversational_sessions:
1223
- session = app.conversational_sessions[session_id]
1224
-
1225
- # Remove audio files
1226
- for audio_file in [session["audio_a"], session["audio_b"]]:
1227
- if os.path.exists(audio_file):
1228
- try:
1229
- os.remove(audio_file)
1230
- except Exception as e:
1231
- app.logger.error(
1232
- f"Error removing conversational audio file: {str(e)}"
1233
- )
1234
-
1235
- # Remove session
1236
- del app.conversational_sessions[session_id]
1237
-
1238
-
1239
  # Schedule periodic cleanup
1240
  def setup_cleanup():
1241
  def cleanup_expired_sessions():
@@ -1249,16 +912,7 @@ def setup_cleanup():
1249
  ]
1250
  for sid in expired_tts_sessions:
1251
  cleanup_session(sid)
1252
-
1253
- # Cleanup conversational sessions
1254
- expired_conv_sessions = [
1255
- sid
1256
- for sid, session_data in app.conversational_sessions.items()
1257
- if current_time > session_data["expires_at"]
1258
- ]
1259
- for sid in expired_conv_sessions:
1260
- cleanup_conversational_session(sid)
1261
- app.logger.info(f"Cleaned up {len(expired_tts_sessions)} TTS and {len(expired_conv_sessions)} conversational sessions.")
1262
 
1263
  # Also cleanup potentially expired cache entries (e.g., > 1 hour old)
1264
  # This prevents stale cache entries if generation is slow or failing
@@ -1593,14 +1247,6 @@ def check_for_coordinated_campaigns():
1593
  detect_coordinated_voting(model.id)
1594
  except Exception as e:
1595
  app.logger.error(f"Error checking coordinated voting for TTS model {model.id}: {str(e)}")
1596
-
1597
- # Check conversational models
1598
- conv_models = Model.query.filter_by(model_type=ModelType.CONVERSATIONAL, is_active=True).all()
1599
- for model in conv_models:
1600
- try:
1601
- detect_coordinated_voting(model.id)
1602
- except Exception as e:
1603
- app.logger.error(f"Error checking coordinated voting for conversational model {model.id}: {str(e)}")
1604
 
1605
  except Exception as e:
1606
  app.logger.error(f"Error in coordinated campaign check: {str(e)}")
@@ -1682,13 +1328,14 @@ if __name__ == "__main__":
1682
  url_scheme='https'
1683
  )
1684
  else:
1685
- print(f"Starting Waitress server with {threads} threads")
 
1686
  serve(
1687
  app,
1688
  host="0.0.0.0",
1689
- port=5000,
1690
  threads=threads,
1691
  connection_limit=100,
1692
  channel_timeout=30,
1693
- url_scheme='https' # Keep https for local dev if using proxy/tunnel
1694
  )
 
5
  from datetime import datetime
6
  import threading # Added for locking
7
  from sqlalchemy import or_ # Added for vote counting query
 
8
 
9
  year = datetime.now().year
10
  month = datetime.now().month
11
 
12
+ # Check if running in a Hugging Face Space
13
  IS_SPACES = False
14
  if os.getenv("SPACE_REPO_NAME"):
15
  print("Running in a Hugging Face Space 🤗")
 
21
  try:
22
  print("Database not found, downloading from HF dataset...")
23
  hf_hub_download(
24
+ repo_id="channelcorp/ko-tts-arena-db",
25
  filename="tts_arena.db",
26
  repo_type="dataset",
27
  local_dir="instance",
 
67
  import requests
68
  import functools
69
  import time # Added for potential retries
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
 
71
 
72
  def get_client_ip():
 
153
  app.tts_sessions = {}
154
  tts_sessions = app.tts_sessions
155
 
 
 
 
 
156
  # Register blueprints
157
  app.register_blueprint(auth, url_prefix="/auth")
158
  app.register_blueprint(admin)
 
304
  # Otherwise redirect back to turnstile page
305
  return redirect(url_for("turnstile_page", redirect_url=redirect_url))
306
 
307
+ # Load Korean prompts from local JSON file
308
+ print("Loading Korean TTS prompts from ko_prompts.json...")
309
+ _prompts_path = os.path.join(os.path.dirname(__file__), "ko_prompts.json")
310
+ with open(_prompts_path, "r", encoding="utf-8") as f:
311
+ _prompts_data = json.load(f)
312
+ all_harvard_sentences = _prompts_data.get("prompts", [])
313
+ print(f"Loaded {len(all_harvard_sentences)} Korean prompts")
314
 
315
  # Initialize initial_sentences as empty - will be populated with unconsumed sentences only
316
  initial_sentences = []
 
324
  @app.route("/leaderboard")
325
  def leaderboard():
326
  tts_leaderboard = get_leaderboard_data(ModelType.TTS)
 
327
  top_voters = get_top_voters(10) # Get top 10 voters
328
 
329
  # Initialize personal leaderboard data
330
  tts_personal_leaderboard = None
 
331
  user_leaderboard_visibility = None
332
 
333
  # If user is logged in, get their personal leaderboard and visibility setting
334
  if current_user.is_authenticated:
335
  tts_personal_leaderboard = get_user_leaderboard(current_user.id, ModelType.TTS)
 
 
 
336
  user_leaderboard_visibility = current_user.show_in_leaderboard
337
 
338
  # Get key dates for the timeline
339
  tts_key_dates = get_key_historical_dates(ModelType.TTS)
 
340
 
341
  # Format dates for display in the dropdown
342
  formatted_tts_dates = [date.strftime("%B %Y") for date in tts_key_dates]
 
 
 
343
 
344
  return render_template(
345
  "leaderboard.html",
346
  tts_leaderboard=tts_leaderboard,
 
347
  tts_personal_leaderboard=tts_personal_leaderboard,
 
348
  tts_key_dates=tts_key_dates,
 
349
  formatted_tts_dates=formatted_tts_dates,
 
350
  top_voters=top_voters,
351
  user_leaderboard_visibility=user_leaderboard_visibility
352
  )
 
355
  @app.route("/api/historical-leaderboard/<model_type>")
356
  def historical_leaderboard(model_type):
357
  """Get historical leaderboard data for a specific date"""
358
+ if model_type != ModelType.TTS:
359
  return jsonify({"error": "Invalid model type"}), 400
360
 
361
  # Get date from query parameter
 
899
  del app.tts_sessions[session_id]
900
 
901
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
902
  # Schedule periodic cleanup
903
  def setup_cleanup():
904
  def cleanup_expired_sessions():
 
912
  ]
913
  for sid in expired_tts_sessions:
914
  cleanup_session(sid)
915
+ app.logger.info(f"Cleaned up {len(expired_tts_sessions)} TTS sessions.")
 
 
 
 
 
 
 
 
 
916
 
917
  # Also cleanup potentially expired cache entries (e.g., > 1 hour old)
918
  # This prevents stale cache entries if generation is slow or failing
 
1247
  detect_coordinated_voting(model.id)
1248
  except Exception as e:
1249
  app.logger.error(f"Error checking coordinated voting for TTS model {model.id}: {str(e)}")
 
 
 
 
 
 
 
 
1250
 
1251
  except Exception as e:
1252
  app.logger.error(f"Error in coordinated campaign check: {str(e)}")
 
1328
  url_scheme='https'
1329
  )
1330
  else:
1331
+ port = int(os.environ.get("PORT", 5001))
1332
+ print(f"Starting Waitress server with {threads} threads on port {port}")
1333
  serve(
1334
  app,
1335
  host="0.0.0.0",
1336
+ port=port,
1337
  threads=threads,
1338
  connection_limit=100,
1339
  channel_timeout=30,
1340
+ url_scheme='http' # Local dev uses http
1341
  )
ko_prompts.json ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "prompts": [
3
+ "안녕하세요, 오늘 날씨가 정말 좋네요.",
4
+ "지금 몇 시예요? 약속 시간에 늦을 것 같아요.",
5
+ "오늘 저녁에 뭐 먹을까요? 치킨이 땡기는데.",
6
+ "주말에 시간 되시면 같이 영화 보러 갈래요?",
7
+ "커피 한 잔 하실래요? 제가 살게요.",
8
+ "회의는 오후 세 시에 시작합니다. 자료 준비해 주세요.",
9
+ "이번 분기 매출이 전년 대비 이십 퍼센트 증가했습니다.",
10
+ "고객님, 문의하신 내용 확인 후 답변드리겠습니다.",
11
+ "프로젝트 마감일이 다음 주 금요일입니다.",
12
+ "미팅 일정을 조율하고 싶은데, 언제가 편하신가요?",
13
+ "채널톡 고객센터입니다. 무엇을 도와드릴까요?",
14
+ "주문하신 상품은 내일 오전 중으로 배송될 예정입니다.",
15
+ "불편을 드려 죄송합니다. 바로 처리해 드리겠습니다.",
16
+ "결제가 정상적으로 완료되었습니다. 감사합니다.",
17
+ "반품 신청이 접수되었습니다. 삼 영업일 내에 처리됩니다.",
18
+ "서울의 현재 기온은 섭씨 이십오 도입니다.",
19
+ "다음 정류장은 강남역입니다. 내리실 분은 준비해 주세요.",
20
+ "오늘의 환율은 달러당 천삼백원입니다.",
21
+ "이 제품의 가격은 삼만구천원입니다.",
22
+ "영업시간은 오전 아홉 시부터 오후 여섯 시까지입니다.",
23
+ "정말 기쁜 소식이에요! 축하드려요!",
24
+ "걱정하지 마세요, 다 잘 될 거예요.",
25
+ "오랜만이에요! 그동안 잘 지내셨어요?",
26
+ "정말 감사합니다. 덕분에 큰 도움이 됐어요.",
27
+ "아쉽지만 다음 기회에 뵙겠습니다.",
28
+ "문을 열려면 버튼을 눌러주세요.",
29
+ "왼쪽으로 돌아서 직진하시면 됩니다.",
30
+ "앱을 설치하고 회원가입을 진행해 주세요.",
31
+ "비밀번호는 여덟 자리 이상으로 설정해 주세요.",
32
+ "첨부파일을 확인하시고 서명해 주세요.",
33
+ "오늘 주요 뉴스를 전해드리겠습니다.",
34
+ "정부가 새로운 정책을 발표했습니다.",
35
+ "국내 반도체 수출이 사상 최대치를 기록했습니다.",
36
+ "내일 전국적으로 비가 내릴 예정입니다.",
37
+ "올해 출생률이 역대 최저를 기록했습니다.",
38
+ "오늘 수업에서는 인공지능의 기초를 배워보겠습니다.",
39
+ "이 문제의 정답은 삼번입니다.",
40
+ "다음 시간까지 과제를 제출해 주세요.",
41
+ "질문이 있으시면 언제든지 물어보세요.",
42
+ "복습은 학습의 가장 중요한 부분입니다.",
43
+ "이번 주 인기 영화 순위를 알려드릴게요.",
44
+ "새 앨범이 음원 차트 일위를 차지했습니다.",
45
+ "오늘 경기에서 한국팀이 이겼습니다!",
46
+ "다음 에피소드가 정말 기대돼요.",
47
+ "이 노래 가사가 정말 마음에 들어요.",
48
+ "인공지능 기술이 빠르게 발전하고 있습니다.",
49
+ "스마트폰 배터리를 절약하는 방법을 알려드릴게요.",
50
+ "이 앱은 무료로 다운로드할 수 있습니다.",
51
+ "시스템 업데이트가 완료되었습니다.",
52
+ "클라우드에 파일이 자동으로 저장됩니다."
53
+ ]
54
+ }
55
+
models.py CHANGED
@@ -566,235 +566,70 @@ def get_key_historical_dates(model_type):
566
 
567
 
568
  def insert_initial_models():
569
- """Insert initial models into the database."""
 
 
 
 
 
 
 
570
  tts_models = [
 
571
  Model(
572
- id="eleven-multilingual-v2",
573
- name="Eleven Multilingual v2",
574
- model_type=ModelType.TTS,
575
- is_open=False,
576
- model_url="https://elevenlabs.io/",
577
- ),
578
- Model(
579
- id="eleven-turbo-v2.5",
580
- name="Eleven Turbo v2.5",
581
- model_type=ModelType.TTS,
582
- is_open=False,
583
- model_url="https://elevenlabs.io/",
584
- ),
585
- Model(
586
- id="eleven-flash-v2.5",
587
- name="Eleven Flash v2.5",
588
- model_type=ModelType.TTS,
589
- is_open=False,
590
- model_url="https://elevenlabs.io/",
591
- ),
592
- Model(
593
- id="cartesia-sonic-2",
594
- name="Cartesia Sonic 2",
595
- model_type=ModelType.TTS,
596
- is_open=False,
597
- is_active=False, # ran out of credits
598
- model_url="https://cartesia.ai/",
599
- ),
600
- Model(
601
- id="spark-tts",
602
- name="Spark TTS",
603
- model_type=ModelType.TTS,
604
- is_open=False,
605
- is_active=False, # API stopped working
606
- model_url="https://github.com/SparkAudio/Spark-TTS",
607
- ),
608
- Model(
609
- id="playht-2.0",
610
- name="PlayHT 2.0",
611
- model_type=ModelType.TTS,
612
- is_open=False,
613
- is_active=False,
614
- model_url="https://play.ht/",
615
- ),
616
- Model(
617
- id="styletts2",
618
- name="StyleTTS 2",
619
- model_type=ModelType.TTS,
620
- is_open=False,
621
- is_active=False,
622
- model_url="https://github.com/yl4579/StyleTTS2",
623
- ),
624
- Model(
625
- id="kokoro-v1",
626
- name="Kokoro v1.0",
627
- model_type=ModelType.TTS,
628
- is_open=True,
629
- model_url="https://huggingface.co/hexgrad/Kokoro-82M",
630
- ),
631
- Model(
632
- id="cosyvoice-2.0",
633
- name="CosyVoice 2.0",
634
- model_type=ModelType.TTS,
635
- is_open=True,
636
- model_url="https://github.com/FunAudioLLM/CosyVoice",
637
- ),
638
- Model(
639
- id="papla-p1",
640
- name="Papla P1",
641
- model_type=ModelType.TTS,
642
- is_open=False,
643
- model_url="https://papla.media/",
644
- ),
645
- Model(
646
- id="hume-octave",
647
- name="Hume Octave",
648
- model_type=ModelType.TTS,
649
- is_open=False,
650
- model_url="https://hume.ai/",
651
- ),
652
- Model(
653
- id="megatts3",
654
- name="MegaTTS 3",
655
- model_type=ModelType.TTS,
656
- is_active=False,
657
- is_open=True,
658
- model_url="https://github.com/bytedance/MegaTTS3",
659
- ),
660
- Model(
661
- id="minimax-02-hd",
662
- name="MiniMax Speech-02-HD",
663
- model_type=ModelType.TTS,
664
- is_open=False,
665
- model_url="http://minimax.io/",
666
- ),
667
- Model(
668
- id="minimax-02-turbo",
669
- name="MiniMax Speech-02-Turbo",
670
- model_type=ModelType.TTS,
671
- is_open=False,
672
- model_url="http://minimax.io/",
673
- ),
674
- Model(
675
- id="lanternfish-1",
676
- name="OpenAudio S1",
677
- model_type=ModelType.TTS,
678
- is_open=False,
679
- is_active=False, # NOTE: Waiting to receive a pool of voices
680
- model_url="https://fish.audio/",
681
- ),
682
- Model(
683
- id="chatterbox",
684
- name="Chatterbox",
685
- model_type=ModelType.TTS,
686
- is_open=False,
687
- is_active=True,
688
- model_url="https://www.resemble.ai/chatterbox/",
689
- ),
690
- Model(
691
- id="inworld",
692
- name="Inworld TTS",
693
- model_type=ModelType.TTS,
694
- is_open=False,
695
- is_active=True,
696
- model_url="https://inworld.ai/tts",
697
- ),
698
- Model(
699
- id="inworld-max",
700
- name="Inworld TTS MAX",
701
- model_type=ModelType.TTS,
702
- is_open=False,
703
- is_active=True,
704
- model_url="https://inworld.ai/tts",
705
- ),
706
- Model(
707
- id="async-1",
708
- name="CastleFlow v1.0",
709
  model_type=ModelType.TTS,
710
  is_open=False,
711
  is_active=True,
712
- model_url="https://async.ai/",
713
  ),
 
714
  Model(
715
- id="nls-pre-v1",
716
- name="NLS Pre V1",
717
  model_type=ModelType.TTS,
718
  is_open=False,
719
- is_active=True,
720
- model_url="https://ttsarena.org/",
721
  ),
 
722
  Model(
723
- id="wordcab",
724
- name="Wordcab TTS",
725
  model_type=ModelType.TTS,
726
  is_open=False,
727
- is_active=True,
728
- model_url="https://wordcab.com/",
729
  ),
730
  Model(
731
- id="veena",
732
- name="Veena",
733
- model_type=ModelType.TTS,
734
- is_open=True,
735
- is_active=True,
736
- model_url="https://mayaresearch.ai/",
737
- ),
738
- Model(
739
- id="maya1",
740
- name="Maya 1",
741
  model_type=ModelType.TTS,
742
  is_open=False,
743
- is_active=True,
744
- model_url="https://mayaresearch.ai/",
745
  ),
 
746
  Model(
747
- id="magpie",
748
- name="Magpie Multilingual",
749
  model_type=ModelType.TTS,
750
  is_open=False,
751
- is_active=True,
752
- model_url="https://build.nvidia.com/nvidia/magpie-tts-multilingual",
753
  ),
754
  Model(
755
- id="parmesan",
756
- name="Parmesan",
757
  model_type=ModelType.TTS,
758
  is_open=False,
759
- is_active=True,
760
- model_url="https://ttsarena.org/",
761
- ),
762
- Model(
763
- id="vocu",
764
- name="Vocu V3.0",
765
- model_type=ModelType.TTS,
766
- is_open=False,
767
- is_active=True,
768
- model_url="https://vocu.ai/",
769
- ),
770
- ]
771
- conversational_models = [
772
- Model(
773
- id="csm-1b",
774
- name="CSM 1B",
775
- model_type=ModelType.CONVERSATIONAL,
776
- is_open=True,
777
- model_url="https://huggingface.co/sesame/csm-1b",
778
- ),
779
- Model(
780
- id="playdialog-1.0",
781
- name="PlayDialog 1.0",
782
- model_type=ModelType.CONVERSATIONAL,
783
- is_open=False,
784
- model_url="https://play.ht/",
785
- ),
786
- Model(
787
- id="dia-1.6b",
788
- name="Dia 1.6B",
789
- model_type=ModelType.CONVERSATIONAL,
790
- is_open=True,
791
- model_url="https://huggingface.co/nari-labs/Dia-1.6B",
792
  ),
793
  ]
794
 
795
- all_models = tts_models + conversational_models
796
-
797
- for model in all_models:
798
  existing = Model.query.filter_by(
799
  id=model.id, model_type=model.model_type
800
  ).first()
 
566
 
567
 
568
  def insert_initial_models():
569
+ """Insert initial models into the database (한국어 TTS 전용)."""
570
+ import os
571
+
572
+ # 환경 변수로 API 키 확인하여 활성화 여부 결정
573
+ has_openai = bool(os.getenv("OPENAI_API_KEY"))
574
+ has_elevenlabs = bool(os.getenv("ELEVENLABS_API_KEY"))
575
+ has_google = bool(os.getenv("GOOGLE_API_KEY"))
576
+
577
  tts_models = [
578
+ # 채널톡 TTS (한국어 특화) - 항상 활성화
579
  Model(
580
+ id="channel-hana",
581
+ name="채널톡 하나",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
582
  model_type=ModelType.TTS,
583
  is_open=False,
584
  is_active=True,
585
+ model_url="https://channel.io/",
586
  ),
587
+ # ElevenLabs (다국어 지원) - API 키 있을 때만 활성화
588
  Model(
589
+ id="eleven-multilingual-v2",
590
+ name="ElevenLabs Multilingual v2",
591
  model_type=ModelType.TTS,
592
  is_open=False,
593
+ is_active=has_elevenlabs,
594
+ model_url="https://elevenlabs.io/",
595
  ),
596
+ # OpenAI TTS - API 키 있을 때만 활성화
597
  Model(
598
+ id="openai-tts-1",
599
+ name="OpenAI TTS-1",
600
  model_type=ModelType.TTS,
601
  is_open=False,
602
+ is_active=has_openai,
603
+ model_url="https://platform.openai.com/docs/guides/text-to-speech",
604
  ),
605
  Model(
606
+ id="openai-tts-1-hd",
607
+ name="OpenAI TTS-1-HD",
 
 
 
 
 
 
 
 
608
  model_type=ModelType.TTS,
609
  is_open=False,
610
+ is_active=has_openai,
611
+ model_url="https://platform.openai.com/docs/guides/text-to-speech",
612
  ),
613
+ # Google Cloud TTS - API 키 있을 때만 활성화
614
  Model(
615
+ id="google-wavenet",
616
+ name="Google Wavenet (ko-KR)",
617
  model_type=ModelType.TTS,
618
  is_open=False,
619
+ is_active=has_google,
620
+ model_url="https://cloud.google.com/text-to-speech",
621
  ),
622
  Model(
623
+ id="google-neural2",
624
+ name="Google Neural2 (ko-KR)",
625
  model_type=ModelType.TTS,
626
  is_open=False,
627
+ is_active=has_google,
628
+ model_url="https://cloud.google.com/text-to-speech",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
629
  ),
630
  ]
631
 
632
+ for model in tts_models:
 
 
633
  existing = Model.query.filter_by(
634
  id=model.id, model_type=model.model_type
635
  ).first()
requirements.txt CHANGED
@@ -10,7 +10,4 @@ apscheduler
10
  flask-migrate
11
  gunicorn
12
  waitress
13
- fal-client
14
- git+https://github.com/playht/pyht
15
- datasets
16
- langdetect
 
10
  flask-migrate
11
  gunicorn
12
  waitress
13
+ huggingface-hub
 
 
 
static/channeltalk-logo-kr.svg ADDED
templates/about.html CHANGED
@@ -1,6 +1,6 @@
1
  {% extends "base.html" %}
2
 
3
- {% block title %}About - TTS Arena{% endblock %}
4
 
5
  {% block current_page %}About{% endblock %}
6
 
@@ -25,9 +25,16 @@
25
  font-size: 24px;
26
  }
27
 
 
 
 
 
 
 
 
28
  .about-section p {
29
  margin-bottom: 16px;
30
- line-height: 1.6;
31
  color: #444;
32
  }
33
 
@@ -35,6 +42,40 @@
35
  margin-bottom: 0;
36
  }
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  .feature-list {
39
  list-style: none;
40
  padding: 0;
@@ -44,86 +85,111 @@
44
  margin-bottom: 12px;
45
  padding-left: 28px;
46
  position: relative;
 
47
  }
48
 
49
  .feature-list li::before {
50
- content: "";
51
  color: var(--primary-color);
52
- font-size: 24px;
53
  position: absolute;
54
  left: 8px;
55
- top: -4px;
56
  }
57
 
58
- .credits-list {
59
  display: grid;
60
- grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
61
- gap: 24px;
62
- margin-top: 16px;
63
  }
64
 
65
- .credit-item {
66
- display: flex;
67
- align-items: center;
68
- justify-content: space-between;
69
- padding-bottom: 8px;
70
- border-bottom: 1px solid var(--border-color);
71
  }
72
 
73
- .credit-item a {
74
  color: var(--primary-color);
75
- text-decoration: none;
 
76
  }
77
 
78
- .credit-item a:hover {
79
- text-decoration: underline;
 
 
 
 
80
  }
81
 
82
- .social-links {
83
- display: flex;
84
- gap: 12px;
85
  }
86
-
87
- .social-icon {
88
- width: 20px;
89
- height: 20px;
90
  }
91
 
92
- .citation-box {
93
- background-color: var(--light-gray);
94
- border-radius: var(--radius);
95
- padding: 16px;
96
- margin-top: 16px;
97
- position: relative;
98
- font-family: monospace;
99
- white-space: pre-wrap;
100
- word-break: break-word;
101
  font-size: 14px;
102
- line-height: 1.5;
 
103
  }
104
 
105
- .copy-citation {
106
- position: absolute;
107
- top: 8px;
108
- right: 8px;
109
- background-color: white;
110
- border: 1px solid var(--border-color);
111
- border-radius: var(--radius);
112
- width: 36px;
113
- height: 36px;
114
- display: flex;
115
- align-items: center;
116
- justify-content: center;
117
- cursor: pointer;
118
- transition: background-color 0.2s;
119
  }
120
 
121
- .copy-citation:hover {
122
- background-color: var(--light-gray);
 
 
 
 
123
  }
124
 
125
- .copy-citation svg {
 
126
  color: var(--text-color);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  }
128
 
129
  .faq-item {
@@ -139,6 +205,7 @@
139
  .faq-answer {
140
  line-height: 1.6;
141
  }
 
142
  /* Dark mode styles */
143
  @media (prefers-color-scheme: dark) {
144
  .about-section {
@@ -150,266 +217,214 @@
150
  color: var(--text-color);
151
  }
152
 
153
- .citation-box {
 
 
 
 
 
 
 
 
154
  background-color: var(--secondary-color);
155
- border-color: var(--border-color);
156
  }
157
 
158
- .copy-citation {
159
- background-color: var(--light-gray);
160
- border-color: var(--border-color);
161
  }
162
 
163
- .copy-citation:hover {
164
- background-color: rgba(255, 255, 255, 0.1);
165
  }
166
 
167
- .copy-citation svg {
168
- color: var(--text-color);
169
  }
170
 
171
- .faq-question {
172
- color: var(--primary-color);
173
  }
174
 
175
- .social-icon.icon-x {
176
- filter: invert(1);
177
  }
178
  }
179
-
180
  </style>
181
  {% endblock %}
182
 
183
  {% block content %}
184
  <div class="about-container">
185
  <div class="about-section">
186
- <h2>Welcome to TTS Arena 2.0</h2>
187
  <p>
188
- TTS Arena evaluates leading speech synthesis models in an interactive, community-driven platform.
189
- Inspired by LMsys's <a href="https://chat.lmsys.org/" target="_blank" rel="noopener">Chatbot Arena</a>, we've created
190
- a space where anyone can compare and rank text-to-speech technologies through direct, side-by-side evaluation.
191
- </p>
192
- <p>
193
- Our second version now supports conversational models for podcast-like content generation, expanding the arena's scope to reflect the diverse applications of modern speech synthesis.
194
  </p>
 
 
 
195
  </div>
196
 
197
  <div class="about-section">
198
- <h2>Motivation</h2>
199
  <p>
200
- The field of speech synthesis has long lacked reliable methods to measure model quality. Traditional
201
- metrics like WER (word error rate) often fail to capture the nuances of natural speech, while subjective
202
- measures such as MOS (mean opinion score) typically involve small-scale experiments with limited participants.
203
  </p>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
  <p>
205
- TTS Arena addresses these limitations by inviting the entire community to participate in the evaluation
206
- process, making both the opportunity to rank models and the resulting insights accessible to everyone.
 
207
  </p>
 
 
 
 
 
 
 
208
  </div>
209
 
210
  <div class="about-section">
211
- <h2>How The Arena Works</h2>
212
  <p>
213
- The concept is straightforward: enter text that will be synthesized by two competing models. After
214
- listening to both samples, vote for the one that sounds more natural and engaging. To prevent bias,
215
- model names are revealed only after your vote is submitted.
216
  </p>
217
  <ul class="feature-list">
218
- <li>Enter your own text or select a random sentence</li>
219
- <li>Listen to two different TTS models synthesize the same content</li>
220
- <li>Compare conversational models for podcast-like content</li>
221
- <li>Vote for the model that sounds more natural, clear, and expressive</li>
222
- <li>Track model rankings on our leaderboard</li>
223
  </ul>
 
 
 
 
224
  </div>
225
 
226
  <div class="about-section">
227
- <h2>Frequently Asked Questions</h2>
228
- <div class="faq-item">
229
- <div class="faq-question">What happened to the TTS Arena V1 leaderboard?</div>
230
- <div class="faq-answer">
231
- The TTS Arena V1 leaderboard is now deprecated. While you can no longer vote on it, the results and leaderboard are still available for reference at <a href="https://huggingface.co/spaces/TTS-AGI/TTS-Arena" target="_blank" rel="noopener">TTS Arena V1</a>. The leaderboard is static and will not change.
232
- </div>
233
- </div>
234
- <div class="faq-item">
235
- <div class="faq-question">How are models ranked in TTS Arena?</div>
236
- <div class="faq-answer">
237
- Models are ranked using an Elo rating system, similar to chess rankings. When you vote for a model, its rating increases while the other model's rating decreases. The amount of change depends on the current ratings of both models.
238
- </div>
239
- </div>
240
  <div class="faq-item">
241
- <div class="faq-question">Is the TTS Arena V2 leaderboard affected by votes from V1?</div>
242
  <div class="faq-answer">
243
- No, the TTS Arena V2 leaderboard is a completely fresh start. Votes from V1 do not affect the V2 leaderboard in any way. All models in V2 start with a clean slate.
 
244
  </div>
245
  </div>
246
  <div class="faq-item">
247
- <div class="faq-question">Can I suggest a model to be added to the arena?</div>
248
  <div class="faq-answer">
249
- Yes! We welcome suggestions for new models. Please reach out to us through the Hugging Face community or create an issue in our GitHub repository. If you are developing a new model and wish for it to be added anonymously for pre-release evaluation, please <a href="mailto:me@mrfake.name" target="_blank" rel="noopener">reach out to us to discuss</a>.
 
250
  </div>
251
  </div>
252
  <div class="faq-item">
253
- <div class="faq-question">How can I contribute to the project?</div>
254
  <div class="faq-answer">
255
- You can contribute by voting on models, suggesting improvements, reporting bugs, or even contributing code. Check our GitHub repository for more information on how to get involved.
 
256
  </div>
257
  </div>
258
  <div class="faq-item">
259
- <div class="faq-question">What's new in TTS Arena 2.0?</div>
260
  <div class="faq-answer">
261
- TTS Arena 2.0 introduces support for conversational models (for podcast-like content), improved UI/UX, and a more robust backend infrastructure for handling more models and votes.
262
- </div>
263
- </div>
264
- <div class="faq-item">
265
- <div class="faq-question">Do I need to login to use TTS Arena?</div>
266
- <div class="faq-answer">
267
- Login is optional and not required to vote. If you choose to login (with Hugging Face), texts you enter will be associated with your account, and you'll have access to a personal leaderboard showing the models you favor the most.
268
  </div>
269
  </div>
270
  </div>
271
 
272
  <div class="about-section">
273
- <h2>Citation</h2>
274
  <p>
275
- If you use TTS Arena in your research, please cite it as follows:
276
  </p>
277
- <div class="citation-box" id="citation-text">@misc{tts-arena-v2,
278
- title = {TTS Arena 2.0: Benchmarking Text-to-Speech Models in the Wild},
279
- author = {mrfakename and Srivastav, Vaibhav and Fourrier, Clémentine and Pouget, Lucain and Lacombe, Yoach and main and Gandhi, Sanchit and Passos, Apolinário and Cuenca, Pedro},
280
- year = 2025,
281
- publisher = {Hugging Face},
282
- howpublished = "\url{https://huggingface.co/spaces/TTS-AGI/TTS-Arena-V2}"
283
- }<button class="copy-citation" onclick="copyToClipboard()" title="Copy citation"><svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-copy-icon lucide-copy"><rect width="14" height="14" x="8" y="8" rx="2" ry="2"/><path d="M4 16c-1.1 0-2-.9-2-2V4c0-1.1.9-2 2-2h10c1.1 0 2 .9 2 2"/></svg></button></div>
284
- <script>
285
- function copyToClipboard() {
286
- const text = document.getElementById('citation-text').innerText;
287
- navigator.clipboard.writeText(text).then(() => {
288
- const btn = document.querySelector('.copy-citation');
289
- const originalContent = btn.innerHTML;
290
- btn.innerHTML = '<svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><path d="M20 6 9 17l-5-5"/></svg>';
291
- setTimeout(() => {
292
- btn.innerHTML = originalContent;
293
- }, 2000);
294
- });
295
- }
296
- </script>
297
  </div>
298
 
299
  <div class="about-section">
300
- <h2>Credits</h2>
301
  <p>
302
- Thank you to the following individuals who helped make this project possible:
303
  </p>
304
- <div class="credits-list">
305
- <div class="credit-item">
306
- <span>Vaibhav (VB) Srivastav</span>
307
- <div class="social-links">
308
- <a href="https://twitter.com/reach_vb" target="_blank" rel="noopener" title="Twitter">
309
- <img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
310
- </a>
311
- <a href="https://huggingface.co/reach-vb" target="_blank" rel="noopener" title="Hugging Face">
312
- <img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
313
- </a>
314
- </div>
315
- </div>
316
- <div class="credit-item">
317
- <span>Clémentine Fourrier</span>
318
- <div class="social-links">
319
- <a href="https://twitter.com/clefourrier" target="_blank" rel="noopener" title="Twitter">
320
- <img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
321
- </a>
322
- <a href="https://huggingface.co/clefourrier" target="_blank" rel="noopener" title="Hugging Face">
323
- <img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
324
- </a>
325
- </div>
326
- </div>
327
- <div class="credit-item">
328
- <span>Lucain Pouget</span>
329
- <div class="social-links">
330
- <a href="https://twitter.com/Wauplin" target="_blank" rel="noopener" title="Twitter">
331
- <img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
332
- </a>
333
- <a href="https://huggingface.co/Wauplin" target="_blank" rel="noopener" title="Hugging Face">
334
- <img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
335
- </a>
336
- </div>
337
- </div>
338
- <div class="credit-item">
339
- <span>Yoach Lacombe</span>
340
- <div class="social-links">
341
- <a href="https://twitter.com/yoachlacombe" target="_blank" rel="noopener" title="Twitter">
342
- <img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
343
- </a>
344
- <a href="https://huggingface.co/ylacombe" target="_blank" rel="noopener" title="Hugging Face">
345
- <img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
346
- </a>
347
- </div>
348
  </div>
349
- <div class="credit-item">
350
- <span>Main Horse</span>
351
- <div class="social-links">
352
- <a href="https://twitter.com/main_horse" target="_blank" rel="noopener" title="Twitter">
353
- <img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
354
- </a>
355
- <a href="https://huggingface.co/main-horse" target="_blank" rel="noopener" title="Hugging Face">
356
- <img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
357
- </a>
358
- </div>
359
- </div>
360
- <div class="credit-item">
361
- <span>Sanchit Gandhi</span>
362
- <div class="social-links">
363
- <a href="https://twitter.com/sanchitgandhi99" target="_blank" rel="noopener" title="Twitter">
364
- <img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
365
- </a>
366
- <a href="https://huggingface.co/sanchit-gandhi" target="_blank" rel="noopener" title="Hugging Face">
367
- <img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
368
- </a>
369
- </div>
370
- </div>
371
- <div class="credit-item">
372
- <span>Apolinário Passos</span>
373
- <div class="social-links">
374
- <a href="https://twitter.com/multimodalart" target="_blank" rel="noopener" title="Twitter">
375
- <img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
376
- </a>
377
- <a href="https://huggingface.co/multimodalart" target="_blank" rel="noopener" title="Hugging Face">
378
- <img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
379
- </a>
380
- </div>
381
- </div>
382
- <div class="credit-item">
383
- <span>Pedro Cuenca</span>
384
- <div class="social-links">
385
- <a href="https://twitter.com/pcuenq" target="_blank" rel="noopener" title="Twitter">
386
- <img src="{{ url_for('static', filename='twitter.svg') }}" alt="Twitter" class="social-icon icon-x">
387
- </a>
388
- <a href="https://huggingface.co/pcuenq" target="_blank" rel="noopener" title="Hugging Face">
389
- <img src="{{ url_for('static', filename='huggingface.svg') }}" alt="Hugging Face" class="social-icon">
390
- </a>
391
- </div>
392
  </div>
393
  </div>
394
  </div>
395
 
396
  <div class="about-section">
397
- <h2>Privacy Statement</h2>
398
  <p>
399
- We may store text you enter and generated audio. If you are logged in, we may associate your votes with your Hugging Face username.
400
- You agree that we may collect, share, and/or publish any data you input for research and/or
401
- commercial purposes.
402
  </p>
403
- </div>
404
-
405
- <div class="about-section">
406
- <h2>License</h2>
407
  <p>
408
- Generated audio clips cannot be redistributed and may be used for personal, non-commercial use only.
409
- The code for the Arena is licensed under the Zlib license.
410
- Random sentences are sourced from a filtered subset of the
411
- <a href="https://www.cs.columbia.edu/~hgs/audio/harvard.html" target="_blank" rel="noopener">Harvard Sentences</a>.
412
  </p>
413
  </div>
414
  </div>
415
- {% endblock %}
 
1
  {% extends "base.html" %}
2
 
3
+ {% block title %}About - 한국어 TTS 아레나{% endblock %}
4
 
5
  {% block current_page %}About{% endblock %}
6
 
 
25
  font-size: 24px;
26
  }
27
 
28
+ .about-section h3 {
29
+ color: var(--text-color);
30
+ margin-top: 20px;
31
+ margin-bottom: 12px;
32
+ font-size: 18px;
33
+ }
34
+
35
  .about-section p {
36
  margin-bottom: 16px;
37
+ line-height: 1.7;
38
  color: #444;
39
  }
40
 
 
42
  margin-bottom: 0;
43
  }
44
 
45
+ .highlight-box {
46
+ background: linear-gradient(135deg, #f5f3ff 0%, #ede9fe 100%);
47
+ border-left: 4px solid var(--primary-color);
48
+ padding: 16px 20px;
49
+ border-radius: 0 var(--radius) var(--radius) 0;
50
+ margin: 20px 0;
51
+ }
52
+
53
+ .highlight-box p {
54
+ margin: 0;
55
+ color: #4c1d95;
56
+ font-weight: 500;
57
+ }
58
+
59
+ .problem-list {
60
+ list-style: none;
61
+ padding: 0;
62
+ margin: 16px 0;
63
+ }
64
+
65
+ .problem-list li {
66
+ margin-bottom: 16px;
67
+ padding-left: 32px;
68
+ position: relative;
69
+ line-height: 1.6;
70
+ }
71
+
72
+ .problem-list li::before {
73
+ content: "⚠️";
74
+ position: absolute;
75
+ left: 0;
76
+ top: 0;
77
+ }
78
+
79
  .feature-list {
80
  list-style: none;
81
  padding: 0;
 
85
  margin-bottom: 12px;
86
  padding-left: 28px;
87
  position: relative;
88
+ line-height: 1.6;
89
  }
90
 
91
  .feature-list li::before {
92
+ content: "";
93
  color: var(--primary-color);
94
+ font-weight: bold;
95
  position: absolute;
96
  left: 8px;
97
+ top: 0;
98
  }
99
 
100
+ .metric-comparison {
101
  display: grid;
102
+ grid-template-columns: repeat(auto-fit, minmax(250px, 1fr));
103
+ gap: 16px;
104
+ margin: 20px 0;
105
  }
106
 
107
+ .metric-card {
108
+ background: var(--light-gray);
109
+ border-radius: var(--radius);
110
+ padding: 20px;
111
+ border: 1px solid var(--border-color);
 
112
  }
113
 
114
+ .metric-card h4 {
115
  color: var(--primary-color);
116
+ margin-bottom: 8px;
117
+ font-size: 16px;
118
  }
119
 
120
+ .metric-card .status {
121
+ font-size: 12px;
122
+ padding: 4px 8px;
123
+ border-radius: 4px;
124
+ display: inline-block;
125
+ margin-bottom: 8px;
126
  }
127
 
128
+ .metric-card .status.problem {
129
+ background: #fee2e2;
130
+ color: #dc2626;
131
  }
132
+
133
+ .metric-card .status.solution {
134
+ background: #dcfce7;
135
+ color: #16a34a;
136
  }
137
 
138
+ .metric-card p {
 
 
 
 
 
 
 
 
139
  font-size: 14px;
140
+ margin: 0;
141
+ color: #666;
142
  }
143
 
144
+ .team-section {
145
+ margin-top: 20px;
146
+ }
147
+
148
+ .team-grid {
149
+ display: grid;
150
+ grid-template-columns: repeat(auto-fill, minmax(200px, 1fr));
151
+ gap: 16px;
152
+ margin-top: 16px;
 
 
 
 
 
153
  }
154
 
155
+ .team-member {
156
+ background: var(--light-gray);
157
+ border-radius: var(--radius);
158
+ padding: 16px;
159
+ text-align: center;
160
+ border: 1px solid var(--border-color);
161
  }
162
 
163
+ .team-member .name {
164
+ font-weight: 600;
165
  color: var(--text-color);
166
+ margin-bottom: 4px;
167
+ }
168
+
169
+ .team-member .role {
170
+ font-size: 13px;
171
+ color: #666;
172
+ }
173
+
174
+ .reference-link {
175
+ display: inline-flex;
176
+ align-items: center;
177
+ gap: 8px;
178
+ background: var(--light-gray);
179
+ padding: 12px 20px;
180
+ border-radius: var(--radius);
181
+ text-decoration: none;
182
+ color: var(--primary-color);
183
+ font-weight: 500;
184
+ border: 1px solid var(--border-color);
185
+ transition: all 0.2s;
186
+ margin-top: 12px;
187
+ }
188
+
189
+ .reference-link:hover {
190
+ background: var(--primary-color);
191
+ color: white;
192
+ border-color: var(--primary-color);
193
  }
194
 
195
  .faq-item {
 
205
  .faq-answer {
206
  line-height: 1.6;
207
  }
208
+
209
  /* Dark mode styles */
210
  @media (prefers-color-scheme: dark) {
211
  .about-section {
 
217
  color: var(--text-color);
218
  }
219
 
220
+ .highlight-box {
221
+ background: linear-gradient(135deg, rgba(91, 94, 255, 0.1) 0%, rgba(91, 94, 255, 0.05) 100%);
222
+ }
223
+
224
+ .highlight-box p {
225
+ color: #a5b4fc;
226
+ }
227
+
228
+ .metric-card {
229
  background-color: var(--secondary-color);
 
230
  }
231
 
232
+ .metric-card p {
233
+ color: #aaa;
 
234
  }
235
 
236
+ .team-member {
237
+ background-color: var(--secondary-color);
238
  }
239
 
240
+ .team-member .role {
241
+ color: #aaa;
242
  }
243
 
244
+ .reference-link {
245
+ background-color: var(--secondary-color);
246
  }
247
 
248
+ .faq-question {
249
+ color: var(--primary-color);
250
  }
251
  }
 
252
  </style>
253
  {% endblock %}
254
 
255
  {% block content %}
256
  <div class="about-container">
257
  <div class="about-section">
258
+ <h2>🎤 한국어 TTS 아레나에 오신 것을 환영합니다</h2>
259
  <p>
260
+ 한국어 TTS 아레나는 다양한 음성 합성(TTS) 모델을 <strong>블라인드 테스트</strong>로 비교 평가하는
261
+ 커뮤니티 기반 플랫폼입니다. LMsys
262
+ <a href="https://chat.lmsys.org/" target="_blank" rel="noopener">Chatbot Arena</a>에서 영감을 받아,
263
+ 누구나 한국어 TTS 모델의 품질을 직접 비교하고 평가할 수 있는 공간을 만들었습니다.
 
 
264
  </p>
265
+ <div class="highlight-box">
266
+ <p>💡 두 모델의 음성을 듣고 더 자연스러운 쪽에 투표하세요. 모델 이름은 투표 후에 공개됩니다.</p>
267
+ </div>
268
  </div>
269
 
270
  <div class="about-section">
271
+ <h2>🤔 왜 한국어 TTS 벤치마크가 필요한가?</h2>
272
  <p>
273
+ 여러 상용 TTS가 이미 존재하지만, <strong>한국어에 특화된 신뢰할 있는 벤치마크</strong>는
274
+ 부재한 상황입니다. 글로벌 TTS 모델들은 한국어 처리에서 여러 한계를 보이고 있습니다.
 
275
  </p>
276
+
277
+ <h3>기존 평가 방식의 한계</h3>
278
+ <div class="metric-comparison">
279
+ <div class="metric-card">
280
+ <h4>WER (Word Error Rate)</h4>
281
+ <span class="status problem">문제 있음</span>
282
+ <p>한국어의 복잡한 발화 패턴(숫자, 날짜, 전화번호, 주문번호 등)을 STT로 평가할 때
283
+ 정확도가 떨어져 실제 발화 품질을 제대로 반영하지 못합니다.</p>
284
+ </div>
285
+ <div class="metric-card">
286
+ <h4>MOS (Mean Opinion Score)</h4>
287
+ <span class="status problem">한계 존재</span>
288
+ <p>소규모 참가자를 대상으로 한 주관적 평가로, 비용이 많이 들고
289
+ 대규모 커뮤니티의 다양한 의견을 반영하기 어렵습니다.</p>
290
+ </div>
291
+ <div class="metric-card">
292
+ <h4>Arena 방식</h4>
293
+ <span class="status solution">해결��</span>
294
+ <p>커뮤니티 전체가 참여하는 블라인드 A/B 테스트로,
295
+ Elo 레이팅 시스템을 통해 객관적인 순위를 도출합니다.</p>
296
+ </div>
297
+ </div>
298
+
299
+ <h3>글로벌 TTS 모델의 한국어 한계</h3>
300
+ <ul class="problem-list">
301
+ <li>
302
+ <strong>운율(Prosody)의 부자연스러움</strong><br>
303
+ 상담사처럼 자연스러운 억양과 톤을 구현하지 못하고, 단조로운(monotone) 발화가 생성됩니다.
304
+ </li>
305
+ <li>
306
+ <strong>한국어 상식 기반 발화 처리 취약</strong><br>
307
+ 한·영 혼용, 날짜·시간, 주문/고유번호, URL·이메일 등 한국어 특유의 발화 패턴을
308
+ 제대로 처리하지 못합니다.
309
+ </li>
310
+ <li>
311
+ <strong>숫자 발화의 어려움</strong><br>
312
+ "19,992원"을 "만 구천 구백 구십 이원"으로 자연스럽게 읽거나,
313
+ 전화번호 형식(011-1234-1234)을 올바르게 발화하는 것이 어렵습니다.
314
+ </li>
315
+ <li>
316
+ <strong>전문 용어 및 약어 처리</strong><br>
317
+ "%p"를 "퍼센트포인트"로 읽는 등의 상식 기반 추론이 필요한 발화에 취약합니다.
318
+ </li>
319
+ </ul>
320
+ </div>
321
+
322
+ <div class="about-section">
323
+ <h2>⚙️ 아레나 작동 방식</h2>
324
  <p>
325
+ 평가 방식은 간단합니다. 텍스트를 입력하면 개의 TTS 모델이 각각 음성을 생성합니다.
326
+ 샘플을 듣고 자연스러운 쪽에 투표하세요. 편향을 방지하기 위해 모델 이름은
327
+ 투표 후에만 공개됩니다.
328
  </p>
329
+ <ul class="feature-list">
330
+ <li>직접 텍스트를 입력하거나 랜덤 문장을 선택할 수 있습니다</li>
331
+ <li>동일한 텍스트로 생성된 두 TTS 모델의 음성을 비교합니다</li>
332
+ <li>더 자연스럽고, 명확하며, 표현력 있는 음성에 투표합니다</li>
333
+ <li>리더보드에서 모델 순위를 확인할 수 있습니다</li>
334
+ <li>Elo 레이팅 시스템으로 객관적인 순위가 산출됩니다</li>
335
+ </ul>
336
  </div>
337
 
338
  <div class="about-section">
339
+ <h2>📊 평가 대상 모델</h2>
340
  <p>
341
+ 현재 아레나에서는 다음과 같은 한국어 지원 TTS 모델들을 평가하고 있습니다:
 
 
342
  </p>
343
  <ul class="feature-list">
344
+ <li><strong>채널톡 TTS</strong> - 상담사향 프로소디에 최적화된 한국어 TTS</li>
345
+ <li><strong>OpenAI TTS</strong> - GPT 기반 다국어 TTS</li>
346
+ <li><strong>ElevenLabs</strong> - Multilingual v2 모델</li>
347
+ <li><strong>Google Cloud TTS</strong> - WaveNet/Neural2 한국어 음성</li>
 
348
  </ul>
349
+ <p>
350
+ 더 많은 모델이 지속적으로 추가될 예정입니다.
351
+ 새로운 모델 추가를 원하시면 문의해 주세요.
352
+ </p>
353
  </div>
354
 
355
  <div class="about-section">
356
+ <h2>❓ 자주 묻는 질문</h2>
 
 
 
 
 
 
 
 
 
 
 
 
357
  <div class="faq-item">
358
+ <div class="faq-question">모델 순위는 어떻게 결정되나요?</div>
359
  <div class="faq-answer">
360
+ 체스 랭킹과 유사한 Elo 레이팅 시스템을 사용합니다. 투표를 받은 모델의 점수가 올라가고,
361
+ 상대 모델의 점수는 내려갑니다. 변동 폭은 두 모델의 현재 레이팅에 따라 달라집니다.
362
  </div>
363
  </div>
364
  <div class="faq-item">
365
+ <div class="faq-question">로그인이 필요한가요?</div>
366
  <div class="faq-answer">
367
+ 투표를 위해서는 Hugging Face 로그인이 필요합니다. 로그인하면 투표 기록을 추적하고
368
+ 개인 리더보드에서 선호하는 모델을 확인할 수 있습니다.
369
  </div>
370
  </div>
371
  <div class="faq-item">
372
+ <div class="faq-question">새로운 모델을 추가하고 싶어요.</div>
373
  <div class="faq-answer">
374
+ 새로운 TTS 모델 추가 요청은 언제든 환영합니다.
375
+ 출시 전 익명 평가를 원하시는 경우에도 문의해 주세요.
376
  </div>
377
  </div>
378
  <div class="faq-item">
379
+ <div class="faq-question">어떤 기준으로 투표해야 하나요?</div>
380
  <div class="faq-answer">
381
+ 자연스러움, 발음 정확도, 억양, 감정 표현 등을 종합적으로 고려해서
382
+ 더 "사람 같은" 음성에 투표해 주세요.
 
 
 
 
 
383
  </div>
384
  </div>
385
  </div>
386
 
387
  <div class="about-section">
388
+ <h2>🔗 참고 자료</h2>
389
  <p>
390
+ 채널톡 TTS 팀의 연구 내용과 기술적 접근 방식에 대해 알아보세요:
391
  </p>
392
+ <a href="https://tts.ch.dev/" target="_blank" rel="noopener" class="reference-link">
393
+ <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
394
+ <path d="M18 13v6a2 2 0 0 1-2 2H5a2 2 0 0 1-2-2V8a2 2 0 0 1 2-2h6"/>
395
+ <polyline points="15 3 21 3 21 9"/>
396
+ <line x1="10" y1="14" x2="21" y2="3"/>
397
+ </svg>
398
+ Channel TTS: Towards Real-World Prosody for Conversational Agents
399
+ </a>
 
 
 
 
 
 
 
 
 
 
 
 
400
  </div>
401
 
402
  <div class="about-section">
403
+ <h2>👥 만든 사람들</h2>
404
  <p>
405
+ 프로젝트는 <a href="https://channel.io/ko" target="_blank" rel="noopener">채널톡</a> AI팀에서 제작했습니다.
406
  </p>
407
+ <div class="team-grid">
408
+ <div class="team-member">
409
+ <div class="name">Robin (신승윤)</div>
410
+ <div class="role">AI Team - Speech</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
411
  </div>
412
+ <div class="team-member">
413
+ <div class="name">Jake (황정인)</div>
414
+ <div class="role">AI Team Lead</div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
415
  </div>
416
  </div>
417
  </div>
418
 
419
  <div class="about-section">
420
+ <h2>📜 개인정보 및 라이선스</h2>
421
  <p>
422
+ 입력하신 텍스트와 생성된 오디오는 연구 목적으로 저장될 있습니다.
423
+ 로그인한 경우 투표 기록이 계정과 연결됩니다.
 
424
  </p>
 
 
 
 
425
  <p>
426
+ 생성된 오디오 클립은 개인적, 비상업적 용도로만 사용할 있으며 재배포할 없습니다.
 
 
 
427
  </p>
428
  </div>
429
  </div>
430
+ {% endblock %}
templates/arena.html CHANGED
@@ -1,6 +1,6 @@
1
  {% extends "base.html" %}
2
 
3
- {% block title %}Arena - TTS Arena{% endblock %}
4
 
5
  {% block current_page %}Arena{% endblock %}
6
 
@@ -12,25 +12,20 @@
12
  <!-- Login prompt overlay -->
13
  <div id="login-prompt-overlay" class="login-prompt-overlay" style="display: none;">
14
  <div class="login-prompt-content">
15
- <h3>Login Required</h3>
16
- <p>You need to be logged in to use TTS Arena. Login to generate audio and vote on models!</p>
17
  <div class="login-prompt-actions">
18
- <button class="login-prompt-close">Maybe later</button>
19
- <a href="{{ url_for('auth.login', next=request.path) }}" class="login-prompt-btn">Login with Hugging Face</a>
20
  </div>
21
  </div>
22
  </div>
23
  {% endif %}
24
 
25
- <div class="tabs">
26
- <div class="tab active" data-tab="tts">TTS</div>
27
- <div class="tab" data-tab="conversational">Conversational</div>
28
- </div>
29
-
30
  <div id="tts-tab" class="tab-content active">
31
  <form class="input-container">
32
  <div class="input-group">
33
- <button type="button" class="segmented-btn random-btn" title="Roll random text">
34
  <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-shuffle-icon lucide-shuffle">
35
  <path d="m18 14 4 4-4 4" />
36
  <path d="m18 2 4 4-4 4" />
@@ -39,14 +34,14 @@
39
  <path d="M22 18h-6.041a4 4 0 0 1-3.3-1.8l-.359-.45" />
40
  </svg>
41
  </button>
42
- <input type="text" class="text-input" placeholder="Enter text to synthesize...">
43
- <button type="submit" class="segmented-btn synth-btn">Synthesize</button>
44
  </div>
45
- <button type="submit" class="mobile-synth-btn">Synthesize</button>
46
  </form>
47
 
48
  <div id="initial-keyboard-hint" class="keyboard-hint">
49
- Press <kbd>R</kbd> for random text, <kbd>N</kbd> for next random round, <kbd>Enter</kbd> to generate
50
  </div>
51
 
52
  <div class="loading-container" style="display: none;">
@@ -61,18 +56,18 @@
61
  <span></span>
62
  </div>
63
  </div>
64
- <div class="loader-text">Generating audio samples...</div>
65
- <div class="loader-subtext">This may take up to 30 seconds</div>
66
  </div>
67
  </div>
68
 
69
  <div class="players-container" style="display: none;">
70
  <div class="players-row">
71
  <div class="player">
72
- <div class="player-label">Model A <span class="model-name-display"></span></div>
73
  <div class="wave-player-container" data-model="a"></div>
74
  <button class="vote-btn" data-model="a" disabled>
75
- Vote for A
76
  <span class="shortcut-key">A</span>
77
  <span class="vote-loader" style="display: none;">
78
  <div class="vote-spinner"></div>
@@ -81,10 +76,10 @@
81
  </div>
82
 
83
  <div class="player">
84
- <div class="player-label">Model B <span class="model-name-display"></span></div>
85
  <div class="wave-player-container" data-model="b"></div>
86
  <button class="vote-btn" data-model="b" disabled>
87
- Vote for B
88
  <span class="shortcut-key">B</span>
89
  <span class="vote-loader" style="display: none;">
90
  <div class="vote-spinner"></div>
@@ -95,114 +90,23 @@
95
  </div>
96
 
97
  <div class="vote-results" style="display: none;">
98
- <h3 class="results-heading">Vote Recorded!</h3>
99
  <div class="results-content">
100
  <div class="chosen-model">
101
- <strong>You chose:</strong> <span class="chosen-model-name"></span>
102
  </div>
103
  <div class="rejected-model">
104
- <strong>Over:</strong> <span class="rejected-model-name"></span>
105
  </div>
106
  </div>
107
  </div>
108
 
109
  <div class="next-round-container" style="display: none;">
110
- <button class="next-round-btn">Next Round</button>
111
  </div>
112
 
113
  <div id="playback-keyboard-hint" class="keyboard-hint" style="display: none;">
114
- Press <kbd>Space</kbd> to play/pause, <kbd>A</kbd>/<kbd>B</kbd> to vote, <kbd>R</kbd> for random text, <kbd>N</kbd> for next random round
115
- </div>
116
- </div>
117
-
118
- <div id="conversational-tab" class="tab-content">
119
- <div class="podcast-container">
120
- <div class="podcast-controls">
121
- <button type="button" class="segmented-btn random-script-btn" title="Load random script">
122
- <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-shuffle-icon lucide-shuffle">
123
- <path d="m18 14 4 4-4 4" />
124
- <path d="m18 2 4 4-4 4" />
125
- <path d="M2 18h1.973a4 4 0 0 0 3.3-1.7l5.454-8.6a4 4 0 0 1 3.3-1.7H22" />
126
- <path d="M2 6h1.972a4 4 0 0 1 3.6 2.2" />
127
- <path d="M22 18h-6.041a4 4 0 0 1-3.3-1.8l-.359-.45" />
128
- </svg>
129
- Random Script
130
- </button>
131
- <button type="button" class="podcast-synth-btn">Generate Podcast</button>
132
- </div>
133
-
134
- <div class="podcast-script-container">
135
- <div class="podcast-lines">
136
- <!-- Script lines will be added here -->
137
- </div>
138
-
139
- <button type="button" class="add-line-btn">+ Add Line</button>
140
-
141
- <div class="keyboard-hint podcast-keyboard-hint">
142
- Press <kbd>Ctrl</kbd>+<kbd>Enter</kbd> or <kbd>Alt</kbd>+<kbd>Enter</kbd> to add a new line
143
- </div>
144
- </div>
145
-
146
- <div class="podcast-loading-container" style="display: none;">
147
- <div class="loader-wrapper">
148
- <div class="loader-animation">
149
- <div class="sound-wave">
150
- <span></span>
151
- <span></span>
152
- <span></span>
153
- <span></span>
154
- <span></span>
155
- <span></span>
156
- </div>
157
- </div>
158
- <div class="loader-text">Generating podcast...</div>
159
- <div class="loader-subtext">This may take up to a minute</div>
160
- </div>
161
- </div>
162
-
163
- <div class="podcast-player-container" style="display: none;">
164
- <div class="players-row">
165
- <div class="player">
166
- <div class="player-label">Model A <span class="model-name-display"></span></div>
167
- <div class="podcast-wave-player-a"></div>
168
- <button class="vote-btn" data-model="a" disabled>
169
- Vote for A
170
- <span class="shortcut-key">A</span>
171
- <span class="vote-loader" style="display: none;">
172
- <div class="vote-spinner"></div>
173
- </span>
174
- </button>
175
- </div>
176
-
177
- <div class="player">
178
- <div class="player-label">Model B <span class="model-name-display"></span></div>
179
- <div class="podcast-wave-player-b"></div>
180
- <button class="vote-btn" data-model="b" disabled>
181
- Vote for B
182
- <span class="shortcut-key">B</span>
183
- <span class="vote-loader" style="display: none;">
184
- <div class="vote-spinner"></div>
185
- </span>
186
- </button>
187
- </div>
188
- </div>
189
-
190
- <div class="podcast-vote-results vote-results" style="display: none;">
191
- <h3 class="results-heading">Vote Recorded!</h3>
192
- <div class="results-content">
193
- <div class="chosen-model">
194
- <strong>You chose:</strong> <span class="chosen-model-name"></span>
195
- </div>
196
- <div class="rejected-model">
197
- <strong>Over:</strong> <span class="rejected-model-name"></span>
198
- </div>
199
- </div>
200
- </div>
201
-
202
- <div class="podcast-next-round-container next-round-container" style="display: none;">
203
- <button class="podcast-next-round-btn next-round-btn">Next Round <span class="shortcut-key">N</span></button>
204
- </div>
205
- </div>
206
  </div>
207
  </div>
208
 
@@ -455,34 +359,6 @@
455
  }
456
  }
457
 
458
- /* Tab styling */
459
- .tabs {
460
- display: flex;
461
- border-bottom: 1px solid var(--border-color);
462
- margin-bottom: 24px;
463
- }
464
-
465
- .tab {
466
- padding: 12px 24px;
467
- cursor: pointer;
468
- position: relative;
469
- font-weight: 500;
470
- }
471
-
472
- .tab.active {
473
- color: var(--primary-color);
474
- }
475
-
476
- .tab.active::after {
477
- content: '';
478
- position: absolute;
479
- bottom: -1px;
480
- left: 0;
481
- width: 100%;
482
- height: 2px;
483
- background-color: var(--primary-color);
484
- }
485
-
486
  .tab-content {
487
  display: none;
488
  }
@@ -491,38 +367,6 @@
491
  display: block;
492
  }
493
 
494
- /* Coming soon styling */
495
- .coming-soon-container {
496
- display: flex;
497
- flex-direction: column;
498
- align-items: center;
499
- justify-content: center;
500
- text-align: center;
501
- padding: 60px 20px;
502
- background-color: var(--light-gray);
503
- border-radius: var(--radius);
504
- margin: 20px 0;
505
- }
506
-
507
- .coming-soon-icon {
508
- color: var(--primary-color);
509
- margin-bottom: 20px;
510
- }
511
-
512
- .coming-soon-title {
513
- font-size: 24px;
514
- font-weight: 600;
515
- margin-bottom: 16px;
516
- color: var(--text-color);
517
- }
518
-
519
- .coming-soon-text {
520
- font-size: 16px;
521
- color: #666;
522
- max-width: 500px;
523
- line-height: 1.5;
524
- }
525
-
526
  .model-name-display {
527
  font-size: 0.9em;
528
  color: #666;
@@ -581,14 +425,6 @@
581
  }
582
  /* Dark mode styles */
583
  @media (prefers-color-scheme: dark) {
584
- .coming-soon-container {
585
- background-color: var(--light-gray);
586
- }
587
-
588
- .coming-soon-text {
589
- color: #aaa;
590
- }
591
-
592
  .model-name-display {
593
  color: #aaa;
594
  }
@@ -658,347 +494,30 @@
658
  }
659
 
660
  .random-btn:hover {
661
- background-color: rgba(255, 255, 255, 0.1);
662
- }
663
-
664
- .vote-recorded {
665
- background-color: var(--light-gray);
666
- border-color: var(--border-color);
667
- }
668
-
669
- /* Ensure border-radius is maintained during loading state */
670
- .vote-btn.loading {
671
- border-radius: var(--radius);
672
- }
673
-
674
- /* Dark mode keyboard hint */
675
- .keyboard-hint {
676
- color: #aaa;
677
- }
678
-
679
- .keyboard-hint kbd {
680
- color: #ddd;
681
- background-color: #333;
682
- border-color: #555;
683
- box-shadow: 0 1px 0 rgba(255,255,255,0.1);
684
- }
685
- }
686
-
687
- /* Podcast UI styles */
688
- .podcast-container {
689
- width: 100%;
690
- }
691
-
692
- .podcast-controls {
693
- display: flex;
694
- gap: 12px;
695
- margin-bottom: 24px;
696
- }
697
-
698
- .random-script-btn {
699
- display: flex;
700
- align-items: center;
701
- gap: 8px;
702
- padding: 0 16px;
703
- height: 40px;
704
- background-color: white;
705
- border: 1px solid var(--border-color);
706
- border-radius: var(--radius);
707
- cursor: pointer;
708
- transition: background-color 0.2s;
709
- }
710
-
711
- .random-script-btn:hover {
712
- background-color: var(--light-gray);
713
- }
714
-
715
- .podcast-synth-btn {
716
- padding: 0 24px;
717
- height: 40px;
718
- background-color: var(--primary-color);
719
- color: white;
720
- border: none;
721
- border-radius: var(--radius);
722
- font-weight: 500;
723
- cursor: pointer;
724
- transition: background-color 0.2s;
725
- }
726
-
727
- .podcast-synth-btn:hover {
728
- background-color: #4038c7;
729
- }
730
-
731
- .podcast-script-container {
732
- border: 1px solid var(--border-color);
733
- border-radius: var(--radius);
734
- overflow: hidden;
735
- margin-bottom: 24px;
736
- }
737
-
738
- .podcast-lines {
739
- max-height: 500px;
740
- overflow-y: auto;
741
- }
742
-
743
- .podcast-line {
744
- display: flex;
745
- border-bottom: 1px solid var(--border-color);
746
- }
747
-
748
- .speaker-label {
749
- width: 120px;
750
- padding: 12px;
751
- display: flex;
752
- align-items: center;
753
- justify-content: center;
754
- font-weight: 500;
755
- border-right: 1px solid var(--border-color);
756
- background-color: var(--light-gray);
757
- white-space: nowrap;
758
- }
759
-
760
- .speaker-1 {
761
- color: #3b82f6;
762
- }
763
-
764
- .speaker-2 {
765
- color: #ef4444;
766
- }
767
-
768
- .line-input {
769
- flex: 1;
770
- padding: 12px;
771
- border: none;
772
- outline: none;
773
- font-size: 1em;
774
- }
775
-
776
- .line-input:focus {
777
- background-color: rgba(80, 70, 229, 0.03);
778
- }
779
-
780
- .remove-line-btn {
781
- width: 40px;
782
- display: flex;
783
- align-items: center;
784
- justify-content: center;
785
- background: none;
786
- border: none;
787
- border-left: 1px solid var(--border-color);
788
- cursor: pointer;
789
- color: #888;
790
- transition: color 0.2s, background-color 0.2s;
791
- }
792
-
793
- .remove-line-btn:hover {
794
- color: #ef4444;
795
- background-color: rgba(239, 68, 68, 0.1);
796
- }
797
-
798
- .add-line-btn {
799
- width: 100%;
800
- padding: 12px;
801
- border: none;
802
- background-color: var(--light-gray);
803
- cursor: pointer;
804
- font-weight: 500;
805
- transition: background-color 0.2s;
806
- margin-bottom: 0;
807
- border-bottom: 1px solid var(--border-color);
808
- }
809
-
810
- .add-line-btn:hover {
811
- background-color: rgba(80, 70, 229, 0.1);
812
- }
813
-
814
- .podcast-keyboard-hint {
815
- padding: 10px;
816
- text-align: center;
817
- background-color: var(--light-gray);
818
- border-top: 1px solid var(--border-color);
819
- margin-top: 0;
820
- font-size: 13px;
821
- }
822
-
823
- .podcast-player {
824
- border: 1px solid var(--border-color);
825
- border-radius: var(--radius);
826
- padding: 20px;
827
- margin-bottom: 24px;
828
- }
829
-
830
- .podcast-wave-player {
831
- margin: 20px 0;
832
- }
833
-
834
- .podcast-transcript-container {
835
- margin-top: 20px;
836
- padding-top: 20px;
837
- border-top: 1px solid var(--border-color);
838
- }
839
-
840
- .podcast-transcript {
841
- margin-top: 12px;
842
- line-height: 1.6;
843
- }
844
-
845
- .transcript-line {
846
- margin-bottom: 12px;
847
- }
848
-
849
- .transcript-speaker {
850
- font-weight: 600;
851
- margin-right: 8px;
852
- }
853
-
854
- .transcript-speaker.speaker-1 {
855
- color: #3b82f6;
856
- }
857
-
858
- .transcript-speaker.speaker-2 {
859
- color: #ef4444;
860
- }
861
-
862
- /* Responsive styles for podcast UI */
863
- @media (max-width: 768px) {
864
- .podcast-controls {
865
- flex-direction: column;
866
- }
867
-
868
- .random-script-btn,
869
- .podcast-synth-btn {
870
- width: 100%;
871
- height: 48px;
872
- }
873
-
874
- /* Stack podcast players vertically on mobile */
875
- .podcast-player-container .players-row {
876
- flex-direction: column;
877
- gap: 16px;
878
- }
879
-
880
- .podcast-line {
881
- flex-direction: column;
882
- padding-bottom: 0;
883
- margin-bottom: 0;
884
- }
885
-
886
- .speaker-label {
887
- width: 100%;
888
- border-right: none;
889
- border-bottom: 1px solid var(--border-color);
890
- padding: 8px 10px;
891
- justify-content: flex-start;
892
- }
893
-
894
- .line-input {
895
- width: 100%;
896
- padding: 8px 10px;
897
- }
898
-
899
- .remove-line-btn {
900
- position: absolute;
901
- top: 6px;
902
- right: 10px;
903
- border-left: none;
904
- background-color: rgba(255, 255, 255, 0.5);
905
- border-radius: 4px;
906
- width: 30px;
907
- height: 30px;
908
- }
909
-
910
- .podcast-line {
911
- position: relative;
912
- }
913
-
914
- /* Dark mode adjustments for mobile */
915
- @media (prefers-color-scheme: dark) {
916
- .remove-line-btn {
917
- background-color: rgba(50, 50, 60, 0.7);
918
- }
919
- }
920
- }
921
-
922
- /* Dark mode styles for podcast UI */
923
- @media (prefers-color-scheme: dark) {
924
- .random-script-btn {
925
- background-color: var(--light-gray);
926
- color: var(--text-color);
927
- border-color: var(--border-color);
928
- }
929
-
930
- .add-line-btn {
931
- background-color: var(--light-gray);
932
- color: var(--text-color);
933
- border-color: var(--border-color);
934
- }
935
-
936
- .line-input {
937
- background-color: var(--light-gray);
938
- color: var(--text-color);
939
- }
940
-
941
- .line-input:focus {
942
- background-color: rgba(108, 99, 255, 0.1);
943
- }
944
- }
945
-
946
- .podcast-loading-container {
947
- display: flex;
948
- justify-content: center;
949
- align-items: center;
950
- position: fixed;
951
- top: 0;
952
- left: 0;
953
- width: 100%;
954
- height: 100vh;
955
- background-color: rgba(255, 255, 255, 0.9);
956
- z-index: 1000;
957
- }
958
-
959
- @media (prefers-color-scheme: dark) {
960
- .podcast-loading-container {
961
- background-color: rgba(18, 18, 24, 0.9);
962
- }
963
- }
964
-
965
- .podcast-vote-results {
966
- background-color: #f0f4ff;
967
- border: 1px solid #d0d7f7;
968
- border-radius: var(--radius);
969
- padding: 16px;
970
- margin: 24px 0;
971
- }
972
-
973
- .podcast-next-round-container {
974
- margin-top: 24px;
975
- text-align: center;
976
- }
977
-
978
- .podcast-next-round-btn {
979
- padding: 12px 24px;
980
- background-color: var(--primary-color);
981
- color: white;
982
- border: none;
983
- border-radius: var(--radius);
984
- font-weight: 500;
985
- cursor: pointer;
986
- position: relative;
987
- width: 100%;
988
- font-size: 1rem;
989
- transition: background-color 0.2s;
990
- }
991
-
992
- .podcast-next-round-btn:hover {
993
- background-color: #4038c7;
994
- }
995
-
996
- /* Dark mode adjustments */
997
- @media (prefers-color-scheme: dark) {
998
- .podcast-vote-results {
999
  background-color: var(--light-gray);
1000
  border-color: var(--border-color);
1001
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1002
  }
1003
 
1004
  /* Login prompt overlay styles */
@@ -1134,8 +653,6 @@
1134
  const nextRoundBtn = document.querySelector('.next-round-btn');
1135
  const nextRoundContainer = document.querySelector('.next-round-container');
1136
  const randomBtn = document.querySelector('.random-btn');
1137
- const tabs = document.querySelectorAll('.tab');
1138
- const tabContents = document.querySelectorAll('.tab-content');
1139
  const voteResultsContainer = document.querySelector('.vote-results');
1140
  const chosenModelNameElement = document.querySelector('.chosen-model-name');
1141
  const rejectedModelNameElement = document.querySelector('.rejected-model-name');
@@ -1182,55 +699,6 @@
1182
  });
1183
  }
1184
 
1185
- // Check URL hash for direct tab access
1186
- function checkHashAndSetTab() {
1187
- const hash = window.location.hash.toLowerCase();
1188
- if (hash === '#conversational') {
1189
- // Switch to conversational tab
1190
- tabs.forEach(t => t.classList.remove('active'));
1191
- tabContents.forEach(c => c.classList.remove('active'));
1192
-
1193
- document.querySelector('.tab[data-tab="conversational"]').classList.add('active');
1194
- document.getElementById('conversational-tab').classList.add('active');
1195
- } else if (hash === '#tts') {
1196
- // Switch to TTS tab (explicit)
1197
- tabs.forEach(t => t.classList.remove('active'));
1198
- tabContents.forEach(c => c.classList.remove('active'));
1199
-
1200
- document.querySelector('.tab[data-tab="tts"]').classList.add('active');
1201
- document.getElementById('tts-tab').classList.add('active');
1202
- }
1203
- }
1204
-
1205
- // Check hash on page load
1206
- checkHashAndSetTab();
1207
-
1208
- // Listen for hash changes
1209
- window.addEventListener('hashchange', checkHashAndSetTab);
1210
-
1211
- // Tab switching functionality
1212
- tabs.forEach(tab => {
1213
- tab.addEventListener('click', function() {
1214
- const tabId = this.dataset.tab;
1215
-
1216
- // Update URL hash without page reload
1217
- history.replaceState(null, null, `#${tabId}`);
1218
-
1219
- // Remove active class from all tabs and contents
1220
- tabs.forEach(t => t.classList.remove('active'));
1221
- tabContents.forEach(c => c.classList.remove('active'));
1222
-
1223
- // Add active class to clicked tab and corresponding content
1224
- this.classList.add('active');
1225
- document.getElementById(`${tabId}-tab`).classList.add('active');
1226
-
1227
- // Reset TTS tab state if switching away from it
1228
- if (tabId !== 'tts') {
1229
- resetToInitialState();
1230
- }
1231
- });
1232
- });
1233
-
1234
  function handleSynthesize(e) {
1235
  if (e) {
1236
  e.preventDefault();
@@ -1244,12 +712,12 @@
1244
 
1245
  const text = textInput.value.trim();
1246
  if (!text) {
1247
- openToast("Please enter some text to synthesize", "warning");
1248
  return;
1249
  }
1250
 
1251
  if (text.length > 1000) {
1252
- openToast("Text is too long. Please keep it under 1000 characters.", "warning");
1253
  return;
1254
  }
1255
 
@@ -1289,7 +757,7 @@
1289
  .then(response => {
1290
  if (!response.ok) {
1291
  return response.json().then(err => {
1292
- throw new Error(err.error || 'Failed to generate TTS');
1293
  });
1294
  }
1295
  return response.json();
@@ -1336,7 +804,7 @@
1336
 
1337
  // Handle authentication errors specially
1338
  if (error.message.includes('logged in to generate') || error.message.includes('logged in to vote')) {
1339
- openToast("Please log in to use TTS Arena. <a href='{{ url_for('auth.login', next=request.path) }}' style='color: white; text-decoration: underline;'>Login now</a>", "error");
1340
  } else {
1341
  openToast(error.message, "error");
1342
  }
@@ -1367,7 +835,7 @@
1367
  .then(response => {
1368
  if (!response.ok) {
1369
  return response.json().then(err => {
1370
- throw new Error(err.error || 'Failed to submit vote');
1371
  });
1372
  }
1373
  return response.json();
@@ -1403,7 +871,7 @@
1403
  nextRoundContainer.style.display = 'block';
1404
 
1405
  // Show success toast
1406
- openToast("Vote recorded successfully!", "success");
1407
  })
1408
  .catch(error => {
1409
  // Re-enable vote buttons
@@ -1414,7 +882,7 @@
1414
 
1415
  // Handle authentication errors specially
1416
  if (error.message.includes('logged in to vote')) {
1417
- openToast("Please log in to vote. <a href='{{ url_for('auth.login', next=request.path) }}' style='color: white; text-decoration: underline;'>Login now</a>", "error");
1418
  } else {
1419
  openToast(error.message, "error");
1420
  }
@@ -1470,10 +938,13 @@
1470
  // Select a random text from the unconsumed sentences
1471
  selectedText = cachedSentences[Math.floor(Math.random() * cachedSentences.length)];
1472
  console.log("Using random sentence from unconsumed sentences.");
 
 
 
 
1473
  } else {
1474
- // No fallback to consumed sentences for security reasons
1475
- console.error("No unconsumed sentences available. All sentences may have been used.");
1476
- openToast("No unused sentences available. All sentences from the dataset may have been consumed.", "error");
1477
  return;
1478
  }
1479
  textInput.value = selectedText;
@@ -1481,7 +952,7 @@
1481
  }
1482
 
1483
  function showListenToastMessage() {
1484
- openToast("Please listen to both audio samples before voting", "info");
1485
  }
1486
 
1487
  // New function for N shortcut: Random + Synthesize
@@ -1589,562 +1060,4 @@
1589
  fetchCachedSentences();
1590
  });
1591
  </script>
1592
-
1593
- <script>
1594
- document.addEventListener('DOMContentLoaded', function() {
1595
- // Variables for podcast UI
1596
- const podcastContainer = document.querySelector('.podcast-container');
1597
- const podcastLinesContainer = document.querySelector('.podcast-lines');
1598
- const addLineBtn = document.querySelector('.add-line-btn');
1599
- const randomScriptBtn = document.querySelector('.random-script-btn');
1600
- const podcastSynthBtn = document.querySelector('.podcast-synth-btn');
1601
- const podcastLoadingContainer = document.querySelector('.podcast-loading-container');
1602
- const podcastPlayerContainer = document.querySelector('.podcast-player-container');
1603
- const podcastWavePlayerA = document.querySelector('.podcast-wave-player-a');
1604
- const podcastWavePlayerB = document.querySelector('.podcast-wave-player-b');
1605
- const podcastVoteButtons = podcastPlayerContainer.querySelectorAll('.vote-btn');
1606
- const podcastVoteResults = podcastPlayerContainer.querySelector('.vote-results');
1607
- const podcastNextRoundContainer = podcastPlayerContainer.querySelector('.next-round-container');
1608
- const podcastNextRoundBtn = podcastPlayerContainer.querySelector('.next-round-btn');
1609
- const chosenModelNameElement = podcastVoteResults.querySelector('.chosen-model-name');
1610
- const rejectedModelNameElement = podcastVoteResults.querySelector('.rejected-model-name');
1611
-
1612
- let podcastWavePlayers = { a: null, b: null };
1613
- let bothPodcastSamplesPlayed = false;
1614
- let currentPodcastSessionId = null;
1615
- let podcastModelNames = { a: 'Model A', b: 'Model B' };
1616
-
1617
- // Sample random scripts for the podcast
1618
- const randomScripts = [
1619
- [
1620
- { speaker: 1, text: "Welcome to our podcast about artificial intelligence. Today we're discussing the latest advances in text-to-speech technology." },
1621
- { speaker: 2, text: "That's right! Text-to-speech has come a long way in recent years. The voices sound increasingly natural." },
1622
- { speaker: 1, text: "What do you think are the most impressive recent developments?" },
1623
- { speaker: 2, text: "I'd say the emotion and inflection that modern TTS systems can convey is truly remarkable." }
1624
- ],
1625
- [
1626
- { speaker: 1, text: "So today we're talking about climate change and its effects on our planet." },
1627
- { speaker: 2, text: "It's such an important topic. We're seeing more extreme weather events every year." },
1628
- { speaker: 1, text: "Absolutely. And the science is clear that human activity is the primary driver." },
1629
- { speaker: 2, text: "What can individuals do to help address this global challenge?" }
1630
- ],
1631
- [
1632
- { speaker: 1, text: "In today's episode, we're exploring the world of modern cinema." },
1633
- { speaker: 2, text: "Film has evolved so much since its early days. What's your favorite era of movies?" },
1634
- { speaker: 1, text: "I'm particularly fond of the 1970s New Hollywood movement. Films like The Godfather and Taxi Driver really pushed boundaries." },
1635
- { speaker: 2, text: "Interesting choice! I'm more drawn to contemporary international cinema, especially from directors like Bong Joon-ho and Park Chan-wook." }
1636
- ],
1637
- [
1638
- { speaker: 1, text: "Today we're discussing the future of remote work. How do you think it's changed the workplace?" },
1639
- { speaker: 2, text: "I believe it's revolutionized how we think about productivity and work-life balance." },
1640
- { speaker: 1, text: "Do you think companies will continue to offer remote options post-pandemic?" },
1641
- { speaker: 2, text: "Absolutely. Companies that don't embrace flexibility will struggle to attract top talent." }
1642
- ],
1643
- [
1644
- { speaker: 1, text: "Let's talk about the latest developments in renewable energy." },
1645
- { speaker: 2, text: "Solar and wind have become increasingly cost-effective in recent years." },
1646
- { speaker: 1, text: "What about emerging technologies like green hydrogen?" },
1647
- { speaker: 2, text: "That's a fascinating area with huge potential, especially for industries that are difficult to electrify." }
1648
- ],
1649
- [
1650
- { speaker: 1, text: "The world of cryptocurrency has seen massive changes lately. What's your take?" },
1651
- { speaker: 2, text: "It's certainly volatile, but I think blockchain technology has applications beyond just digital currency." },
1652
- { speaker: 1, text: "Do you see it becoming mainstream in the financial sector?" },
1653
- { speaker: 2, text: "Parts of it already are. Central banks are exploring digital currencies, and major companies are investing in blockchain." }
1654
- ],
1655
- [
1656
- { speaker: 1, text: "Mental health awareness has grown significantly in recent years." },
1657
- { speaker: 2, text: "Yes, and it's about time. The stigma around seeking help is finally starting to diminish." },
1658
- { speaker: 1, text: "What do you think has driven this change?" },
1659
- { speaker: 2, text: "I think social media has played a role, with more people openly sharing their experiences." }
1660
- ],
1661
- [
1662
- { speaker: 1, text: "Space exploration is entering an exciting new era with private companies leading the charge." },
1663
- { speaker: 2, text: "The commercialization of space has definitely accelerated innovation in the field." },
1664
- { speaker: 1, text: "Do you think we'll see humans on Mars in our lifetime?" },
1665
- { speaker: 2, text: "I'm optimistic. The technology is advancing rapidly, and there's strong motivation from both public and private sectors." }
1666
- ],
1667
- [
1668
- { speaker: 1, text: "Today's topic is sustainable fashion. How can consumers make more ethical choices?" },
1669
- { speaker: 2, text: "It starts with buying less and choosing quality items that last longer." },
1670
- { speaker: 1, text: "What about the responsibility of fashion brands themselves?" },
1671
- { speaker: 2, text: "They need to be transparent about their supply chains and commit to reducing their environmental impact." }
1672
- ],
1673
- [
1674
- { speaker: 1, text: "Let's discuss the evolution of social media and its impact on society." },
1675
- { speaker: 2, text: "It's transformed how we connect, but also created new challenges like misinformation and privacy concerns." },
1676
- { speaker: 1, text: "Do you think regulation is the answer?" },
1677
- { speaker: 2, text: "Partly, but digital literacy education is equally important so people can navigate these platforms responsibly." }
1678
- ],
1679
- [
1680
- { speaker: 1, text: "The field of genomics has seen remarkable progress. What excites you most about it?" },
1681
- { speaker: 2, text: "Personalized medicine is fascinating - the idea that treatments can be tailored to an individual's genetic makeup." },
1682
- { speaker: 1, text: "What about the ethical considerations?" },
1683
- { speaker: 2, text: "Those are crucial. We need robust frameworks to ensure these technologies are used responsibly." }
1684
- ],
1685
- [
1686
- { speaker: 1, text: "Urban planning is facing new challenges in the 21st century. What trends are you seeing?" },
1687
- { speaker: 2, text: "There's a growing focus on creating walkable, mixed-use neighborhoods that reduce car dependency." },
1688
- { speaker: 1, text: "How are cities adapting to climate change?" },
1689
- { speaker: 2, text: "Many are implementing green infrastructure like parks and permeable surfaces to manage flooding and reduce heat islands." }
1690
- ],
1691
- [
1692
- { speaker: 1, text: "The gaming industry has grown enormously in recent years. What's driving this expansion?" },
1693
- { speaker: 2, text: "Gaming has become much more accessible across different platforms, and the pandemic certainly accelerated adoption." },
1694
- { speaker: 1, text: "What do you think about the rise of esports?" },
1695
- { speaker: 2, text: "It's fascinating to see competitive gaming achieve mainstream recognition and create new career opportunities." }
1696
- ],
1697
- [
1698
- { speaker: 1, text: "Let's talk about the future of transportation. How will we get around in 20 years?" },
1699
- { speaker: 2, text: "Electric vehicles will be dominant, and autonomous driving technology will be much more widespread." },
1700
- { speaker: 1, text: "What about public transit and alternative modes?" },
1701
- { speaker: 2, text: "I think we'll see more integrated systems where bikes, scooters, and public transit work seamlessly together." }
1702
- ]
1703
- ];
1704
-
1705
- // Initialize with 2 empty lines
1706
- function initializePodcastLines() {
1707
- podcastLinesContainer.innerHTML = '';
1708
- addPodcastLine(1);
1709
- addPodcastLine(2);
1710
- }
1711
-
1712
- // Add a new podcast line
1713
- function addPodcastLine(speakerNum = null) {
1714
- const lineCount = podcastLinesContainer.querySelectorAll('.podcast-line').length;
1715
-
1716
- // If speaker number isn't specified, alternate between 1 and 2
1717
- if (speakerNum === null) {
1718
- speakerNum = (lineCount % 2) + 1;
1719
- }
1720
-
1721
- const lineElement = document.createElement('div');
1722
- lineElement.className = 'podcast-line';
1723
-
1724
- lineElement.innerHTML = `
1725
- <div class="speaker-label speaker-${speakerNum}">Speaker ${speakerNum}</div>
1726
- <input type="text" class="line-input" placeholder="Enter dialog...">
1727
- <button type="button" class="remove-line-btn" tabindex="-1">
1728
- <svg xmlns="http://www.w3.org/2000/svg" width="16" height="16" viewBox="0 0 24 24" fill="none"
1729
- stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
1730
- <line x1="18" y1="6" x2="6" y2="18"></line>
1731
- <line x1="6" y1="6" x2="18" y2="18"></line>
1732
- </svg>
1733
- </button>
1734
- `;
1735
-
1736
- podcastLinesContainer.appendChild(lineElement);
1737
-
1738
- // Add event listener to remove button
1739
- const removeBtn = lineElement.querySelector('.remove-line-btn');
1740
- removeBtn.addEventListener('click', function() {
1741
- // Don't allow removing if there are only 2 lines
1742
- if (podcastLinesContainer.querySelectorAll('.podcast-line').length > 2) {
1743
- lineElement.remove();
1744
- } else {
1745
- openToast("At least 2 lines are required", "warning");
1746
- }
1747
- });
1748
-
1749
- // Add event listener for keyboard navigation in the input field
1750
- const inputField = lineElement.querySelector('.line-input');
1751
- inputField.addEventListener('keydown', function(e) {
1752
- // Alt+Enter or Ctrl+Enter to add new line
1753
- if (e.key === 'Enter' && (e.altKey || e.ctrlKey)) {
1754
- e.preventDefault();
1755
- addPodcastLine();
1756
-
1757
- // Focus the new line's input field
1758
- setTimeout(() => {
1759
- const inputs = podcastLinesContainer.querySelectorAll('.line-input');
1760
- inputs[inputs.length - 1].focus();
1761
- }, 10);
1762
- }
1763
- });
1764
-
1765
- return lineElement;
1766
- }
1767
-
1768
- // Load a random script
1769
- function loadRandomScript() {
1770
- // Clear existing lines
1771
- podcastLinesContainer.innerHTML = '';
1772
-
1773
- // Select a random script
1774
- const randomScript = randomScripts[Math.floor(Math.random() * randomScripts.length)];
1775
-
1776
- // Add each line from the script
1777
- randomScript.forEach(line => {
1778
- const lineElement = addPodcastLine(line.speaker);
1779
- lineElement.querySelector('.line-input').value = line.text;
1780
- });
1781
- }
1782
-
1783
- // Generate podcast (mock functionality)
1784
- function generatePodcast() {
1785
- // Get all lines
1786
- const lines = [];
1787
- podcastLinesContainer.querySelectorAll('.podcast-line').forEach(line => {
1788
- const speaker_id = line.querySelector('.speaker-label').textContent.includes('1') ? 0 : 1;
1789
- const text = line.querySelector('.line-input').value.trim();
1790
-
1791
- if (text) {
1792
- lines.push({ speaker_id, text });
1793
- }
1794
- });
1795
-
1796
- // Validate that we have at least 2 lines with content
1797
- if (lines.length < 2) {
1798
- openToast("Please enter at least 2 lines of dialog", "warning");
1799
- return;
1800
- }
1801
-
1802
- // Reset vote buttons and hide results
1803
- podcastVoteButtons.forEach(btn => {
1804
- btn.disabled = true;
1805
- btn.classList.remove('selected');
1806
- btn.querySelector('.vote-loader').style.display = 'none';
1807
- });
1808
-
1809
- // Clear model name displays
1810
- const modelNameDisplays = podcastPlayerContainer.querySelectorAll('.model-name-display');
1811
- modelNameDisplays.forEach(display => {
1812
- display.textContent = '';
1813
- });
1814
-
1815
- podcastVoteResults.style.display = 'none';
1816
- podcastNextRoundContainer.style.display = 'none';
1817
-
1818
- // Reset the flag for both samples played
1819
- bothPodcastSamplesPlayed = false;
1820
-
1821
- // Show loading animation
1822
- podcastLoadingContainer.style.display = 'flex';
1823
- podcastPlayerContainer.style.display = 'none';
1824
-
1825
- // Call API to generate podcast
1826
- fetch('/api/conversational/generate', {
1827
- method: 'POST',
1828
- headers: {
1829
- 'Content-Type': 'application/json',
1830
- },
1831
- body: JSON.stringify({ script: lines }),
1832
- })
1833
- .then(response => {
1834
- if (!response.ok) {
1835
- return response.json().then(err => {
1836
- throw new Error(err.error || 'Failed to generate podcast');
1837
- });
1838
- }
1839
- return response.json();
1840
- })
1841
- .then(data => {
1842
- currentPodcastSessionId = data.session_id;
1843
-
1844
- // Hide loading
1845
- podcastLoadingContainer.style.display = 'none';
1846
-
1847
- // Show player
1848
- podcastPlayerContainer.style.display = 'block';
1849
-
1850
- // Initialize WavePlayers if not already done
1851
- if (!podcastWavePlayers.a) {
1852
- podcastWavePlayers.a = new WavePlayer(podcastWavePlayerA, {
1853
- // Add mobile-friendly options but hide native controls
1854
- backend: 'MediaElement',
1855
- mediaControls: false // Hide native audio controls
1856
- });
1857
- podcastWavePlayers.b = new WavePlayer(podcastWavePlayerB, {
1858
- // Add mobile-friendly options but hide native controls
1859
- backend: 'MediaElement',
1860
- mediaControls: false // Hide native audio controls
1861
- });
1862
-
1863
- // Load audio in waveplayers
1864
- podcastWavePlayers.a.loadAudio(data.audio_a);
1865
- podcastWavePlayers.b.loadAudio(data.audio_b);
1866
-
1867
- // Force hide loading indicators after 5 seconds as a fallback
1868
- setTimeout(() => {
1869
- if (podcastWavePlayers.a && podcastWavePlayers.a.hideLoading) {
1870
- podcastWavePlayers.a.hideLoading();
1871
- }
1872
- if (podcastWavePlayers.b && podcastWavePlayers.b.hideLoading) {
1873
- podcastWavePlayers.b.hideLoading();
1874
- }
1875
- console.log('Forced hiding of podcast loading indicators (safety timeout - existing players)');
1876
- }, 5000);
1877
- } else {
1878
- // Reset and reload for existing players
1879
- try {
1880
- podcastWavePlayers.a.wavesurfer.empty();
1881
- podcastWavePlayers.b.wavesurfer.empty();
1882
-
1883
- // Make sure loading indicators are reset
1884
- podcastWavePlayers.a.hideLoading();
1885
- podcastWavePlayers.b.hideLoading();
1886
-
1887
- podcastWavePlayers.a.loadAudio(data.audio_a);
1888
- podcastWavePlayers.b.loadAudio(data.audio_b);
1889
-
1890
- // Force hide loading indicators after 5 seconds as a fallback
1891
- setTimeout(() => {
1892
- if (podcastWavePlayers.a && podcastWavePlayers.a.hideLoading) {
1893
- podcastWavePlayers.a.hideLoading();
1894
- }
1895
- if (podcastWavePlayers.b && podcastWavePlayers.b.hideLoading) {
1896
- podcastWavePlayers.b.hideLoading();
1897
- }
1898
- console.log('Forced hiding of podcast loading indicators (safety timeout - existing players)');
1899
- }, 5000);
1900
- } catch (err) {
1901
- console.error('Error resetting podcast waveplayers:', err);
1902
-
1903
- // Recreate the players if there was an error
1904
- podcastWavePlayers.a = new WavePlayer(podcastWavePlayerA, {
1905
- backend: 'MediaElement',
1906
- mediaControls: false
1907
- });
1908
- podcastWavePlayers.b = new WavePlayer(podcastWavePlayerB, {
1909
- backend: 'MediaElement',
1910
- mediaControls: false
1911
- });
1912
-
1913
- podcastWavePlayers.a.loadAudio(data.audio_a);
1914
- podcastWavePlayers.b.loadAudio(data.audio_b);
1915
-
1916
- // Force hide loading indicators after 5 seconds as a fallback
1917
- setTimeout(() => {
1918
- if (podcastWavePlayers.a && podcastWavePlayers.a.hideLoading) {
1919
- podcastWavePlayers.a.hideLoading();
1920
- }
1921
- if (podcastWavePlayers.b && podcastWavePlayers.b.hideLoading) {
1922
- podcastWavePlayers.b.hideLoading();
1923
- }
1924
- console.log('Forced hiding of podcast loading indicators (fallback case)');
1925
- }, 5000);
1926
- }
1927
- }
1928
-
1929
- // Setup automatic sequential playback
1930
- podcastWavePlayers.a.wavesurfer.once('ready', function() {
1931
- podcastWavePlayers.a.play();
1932
-
1933
- // When audio A ends, play audio B
1934
- podcastWavePlayers.a.wavesurfer.once('finish', function() {
1935
- // Wait a short moment before playing B
1936
- setTimeout(() => {
1937
- podcastWavePlayers.b.play();
1938
-
1939
- // When audio B ends, enable voting
1940
- podcastWavePlayers.b.wavesurfer.once('finish', function() {
1941
- bothPodcastSamplesPlayed = true;
1942
- podcastVoteButtons.forEach(btn => {
1943
- btn.disabled = false;
1944
- });
1945
- });
1946
- }, 500);
1947
- });
1948
- });
1949
- })
1950
- .catch(error => {
1951
- podcastLoadingContainer.style.display = 'none';
1952
-
1953
- // Handle authentication errors specially
1954
- if (error.message.includes('logged in to generate') || error.message.includes('logged in to vote')) {
1955
- openToast("Please log in to use TTS Arena. <a href='{{ url_for('auth.login', next=request.path) }}' style='color: white; text-decoration: underline;'>Login now</a>", "error");
1956
- } else {
1957
- openToast(error.message, "error");
1958
- }
1959
- console.error('Error:', error);
1960
- });
1961
- }
1962
-
1963
- // Handle vote for a podcast model
1964
- function handlePodcastVote(model) {
1965
- // Disable both vote buttons
1966
- podcastVoteButtons.forEach(btn => {
1967
- btn.disabled = true;
1968
- if (btn.dataset.model === model) {
1969
- btn.querySelector('.vote-loader').style.display = 'flex';
1970
- }
1971
- });
1972
-
1973
- // Send vote to server
1974
- fetch('/api/conversational/vote', {
1975
- method: 'POST',
1976
- headers: {
1977
- 'Content-Type': 'application/json',
1978
- },
1979
- body: JSON.stringify({
1980
- session_id: currentPodcastSessionId,
1981
- chosen_model: model
1982
- }),
1983
- })
1984
- .then(response => {
1985
- if (!response.ok) {
1986
- return response.json().then(err => {
1987
- throw new Error(err.error || 'Failed to submit vote');
1988
- });
1989
- }
1990
- return response.json();
1991
- })
1992
- .then(data => {
1993
- // Hide loaders
1994
- podcastVoteButtons.forEach(btn => {
1995
- btn.querySelector('.vote-loader').style.display = 'none';
1996
-
1997
- // Highlight the selected button
1998
- if (btn.dataset.model === model) {
1999
- btn.classList.add('selected');
2000
- }
2001
- });
2002
-
2003
- // Store model names from vote response
2004
- podcastModelNames.a = data.names.a;
2005
- podcastModelNames.b = data.names.b;
2006
-
2007
- // Show model names after voting
2008
- const modelNameDisplays = podcastPlayerContainer.querySelectorAll('.model-name-display');
2009
- modelNameDisplays[0].textContent = data.names.a ? `(${data.names.a})` : '';
2010
- modelNameDisplays[1].textContent = data.names.b ? `(${data.names.b})` : '';
2011
-
2012
- // Show vote results
2013
- chosenModelNameElement.textContent = data.chosen_model.name;
2014
- rejectedModelNameElement.textContent = data.rejected_model.name;
2015
- podcastVoteResults.style.display = 'block';
2016
-
2017
- // Show next round button
2018
- podcastNextRoundContainer.style.display = 'block';
2019
-
2020
- // Show success toast
2021
- openToast("Vote recorded successfully!", "success");
2022
- })
2023
- .catch(error => {
2024
- // Re-enable vote buttons
2025
- podcastVoteButtons.forEach(btn => {
2026
- btn.disabled = false;
2027
- btn.querySelector('.vote-loader').style.display = 'none';
2028
- });
2029
-
2030
- // Handle authentication errors specially
2031
- if (error.message.includes('logged in to vote')) {
2032
- openToast("Please log in to vote. <a href='{{ url_for('auth.login', next=request.path) }}' style='color: white; text-decoration: underline;'>Login now</a>", "error");
2033
- } else {
2034
- openToast(error.message, "error");
2035
- }
2036
- console.error('Error:', error);
2037
- });
2038
- }
2039
-
2040
- // Reset podcast UI to initial state
2041
- function resetPodcastState() {
2042
- // Hide players, results, and next round button
2043
- podcastPlayerContainer.style.display = 'none';
2044
- podcastVoteResults.style.display = 'none';
2045
- podcastNextRoundContainer.style.display = 'none';
2046
-
2047
- // Reset vote buttons
2048
- podcastVoteButtons.forEach(btn => {
2049
- btn.disabled = true;
2050
- btn.classList.remove('selected');
2051
- btn.querySelector('.vote-loader').style.display = 'none';
2052
- });
2053
-
2054
- // Clear model name displays
2055
- const modelNameDisplays = podcastPlayerContainer.querySelectorAll('.model-name-display');
2056
- modelNameDisplays.forEach(display => {
2057
- display.textContent = '';
2058
- });
2059
-
2060
- // Stop any playing audio
2061
- if (podcastWavePlayers.a) podcastWavePlayers.a.stop();
2062
- if (podcastWavePlayers.b) podcastWavePlayers.b.stop();
2063
-
2064
- // Reset session
2065
- currentPodcastSessionId = null;
2066
-
2067
- // Reset the flag for both samples played
2068
- bothPodcastSamplesPlayed = false;
2069
- }
2070
-
2071
- // Add keyboard shortcut listeners for podcast voting
2072
- document.addEventListener('keydown', function(e) {
2073
- // Check if we're in the podcast tab and it's active
2074
- const podcastTab = document.getElementById('conversational-tab');
2075
- if (!podcastTab.classList.contains('active')) return;
2076
-
2077
- // Only process if input fields are not focused
2078
- if (document.activeElement.tagName === 'INPUT' ||
2079
- document.activeElement.tagName === 'TEXTAREA') {
2080
- return;
2081
- }
2082
-
2083
- if (e.key.toLowerCase() === 'a') {
2084
- if (bothPodcastSamplesPlayed && !podcastVoteButtons[0].disabled) {
2085
- handlePodcastVote('a');
2086
- } else if (podcastPlayerContainer.style.display !== 'none' && !bothPodcastSamplesPlayed) {
2087
- openToast("Please listen to both audio samples before voting", "info");
2088
- }
2089
- } else if (e.key.toLowerCase() === 'b') {
2090
- if (bothPodcastSamplesPlayed && !podcastVoteButtons[1].disabled) {
2091
- handlePodcastVote('b');
2092
- } else if (podcastPlayerContainer.style.display !== 'none' && !bothPodcastSamplesPlayed) {
2093
- openToast("Please listen to both audio samples before voting", "info");
2094
- }
2095
- } else if (e.key.toLowerCase() === 'n') {
2096
- if (podcastNextRoundContainer.style.display === 'block') {
2097
- if (!e.ctrlKey && !e.metaKey) {
2098
- e.preventDefault();
2099
- }
2100
- resetPodcastState();
2101
- }
2102
- } else if (e.key === ' ') {
2103
- // Space to play/pause current audio
2104
- if (podcastPlayerContainer.style.display !== 'none') {
2105
- e.preventDefault();
2106
- // If A is playing, toggle A, else if B is playing, toggle B, else play A
2107
- if (podcastWavePlayers.a && podcastWavePlayers.a.isPlaying) {
2108
- podcastWavePlayers.a.togglePlayPause();
2109
- } else if (podcastWavePlayers.b && podcastWavePlayers.b.isPlaying) {
2110
- podcastWavePlayers.b.togglePlayPause();
2111
- } else if (podcastWavePlayers.a) {
2112
- podcastWavePlayers.a.play();
2113
- }
2114
- }
2115
- }
2116
- });
2117
-
2118
- // Event listeners
2119
- addLineBtn.addEventListener('click', function() {
2120
- addPodcastLine();
2121
- });
2122
-
2123
- randomScriptBtn.addEventListener('click', function() {
2124
- loadRandomScript();
2125
- });
2126
-
2127
- podcastSynthBtn.addEventListener('click', function() {
2128
- generatePodcast();
2129
- });
2130
-
2131
- // Add event listeners to vote buttons
2132
- podcastVoteButtons.forEach(btn => {
2133
- btn.addEventListener('click', function() {
2134
- if (bothPodcastSamplesPlayed) {
2135
- const model = this.dataset.model;
2136
- handlePodcastVote(model);
2137
- } else {
2138
- openToast("Please listen to both audio samples before voting", "info");
2139
- }
2140
- });
2141
- });
2142
-
2143
- // Add event listener for next round button
2144
- podcastNextRoundBtn.addEventListener('click', resetPodcastState);
2145
-
2146
- // Initialize with 2 empty lines
2147
- initializePodcastLines();
2148
- });
2149
- </script>
2150
- {% endblock %}
 
1
  {% extends "base.html" %}
2
 
3
+ {% block title %}한국어 TTS Arena{% endblock %}
4
 
5
  {% block current_page %}Arena{% endblock %}
6
 
 
12
  <!-- Login prompt overlay -->
13
  <div id="login-prompt-overlay" class="login-prompt-overlay" style="display: none;">
14
  <div class="login-prompt-content">
15
+ <h3>로그인 필요</h3>
16
+ <p>TTS Arena 사용하려면 로그인이 필요합니다. 로그인하여 음성을 생성하고 투표하세요!</p>
17
  <div class="login-prompt-actions">
18
+ <button class="login-prompt-close">나중에</button>
19
+ <a href="{{ url_for('auth.login', next=request.path) }}" class="login-prompt-btn">Hugging Face로 로그인</a>
20
  </div>
21
  </div>
22
  </div>
23
  {% endif %}
24
 
 
 
 
 
 
25
  <div id="tts-tab" class="tab-content active">
26
  <form class="input-container">
27
  <div class="input-group">
28
+ <button type="button" class="segmented-btn random-btn" title="랜덤 텍스트">
29
  <svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-shuffle-icon lucide-shuffle">
30
  <path d="m18 14 4 4-4 4" />
31
  <path d="m18 2 4 4-4 4" />
 
34
  <path d="M22 18h-6.041a4 4 0 0 1-3.3-1.8l-.359-.45" />
35
  </svg>
36
  </button>
37
+ <input type="text" class="text-input" placeholder="합성할 텍스트를 입력하세요...">
38
+ <button type="submit" class="segmented-btn synth-btn">합성</button>
39
  </div>
40
+ <button type="submit" class="mobile-synth-btn">합성</button>
41
  </form>
42
 
43
  <div id="initial-keyboard-hint" class="keyboard-hint">
44
+ <kbd>R</kbd> 랜덤 텍스트, <kbd>N</kbd> 다음 랜덤 라운드, <kbd>Enter</kbd> 생성
45
  </div>
46
 
47
  <div class="loading-container" style="display: none;">
 
56
  <span></span>
57
  </div>
58
  </div>
59
+ <div class="loader-text">오디오 샘플 생성 중...</div>
60
+ <div class="loader-subtext">최대 30초가 소요될 있습니다</div>
61
  </div>
62
  </div>
63
 
64
  <div class="players-container" style="display: none;">
65
  <div class="players-row">
66
  <div class="player">
67
+ <div class="player-label">모델 A <span class="model-name-display"></span></div>
68
  <div class="wave-player-container" data-model="a"></div>
69
  <button class="vote-btn" data-model="a" disabled>
70
+ A에 투표
71
  <span class="shortcut-key">A</span>
72
  <span class="vote-loader" style="display: none;">
73
  <div class="vote-spinner"></div>
 
76
  </div>
77
 
78
  <div class="player">
79
+ <div class="player-label">모델 B <span class="model-name-display"></span></div>
80
  <div class="wave-player-container" data-model="b"></div>
81
  <button class="vote-btn" data-model="b" disabled>
82
+ B에 투표
83
  <span class="shortcut-key">B</span>
84
  <span class="vote-loader" style="display: none;">
85
  <div class="vote-spinner"></div>
 
90
  </div>
91
 
92
  <div class="vote-results" style="display: none;">
93
+ <h3 class="results-heading">투표 완료!</h3>
94
  <div class="results-content">
95
  <div class="chosen-model">
96
+ <strong>선택:</strong> <span class="chosen-model-name"></span>
97
  </div>
98
  <div class="rejected-model">
99
+ <strong>비교 대상:</strong> <span class="rejected-model-name"></span>
100
  </div>
101
  </div>
102
  </div>
103
 
104
  <div class="next-round-container" style="display: none;">
105
+ <button class="next-round-btn">다음 라운드</button>
106
  </div>
107
 
108
  <div id="playback-keyboard-hint" class="keyboard-hint" style="display: none;">
109
+ <kbd>Space</kbd> 재생/일시정지, <kbd>A</kbd>/<kbd>B</kbd> 투표, <kbd>R</kbd> 랜덤 텍스트, <kbd>N</kbd> 다음 랜덤 라운드
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  </div>
111
  </div>
112
 
 
359
  }
360
  }
361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
362
  .tab-content {
363
  display: none;
364
  }
 
367
  display: block;
368
  }
369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  .model-name-display {
371
  font-size: 0.9em;
372
  color: #666;
 
425
  }
426
  /* Dark mode styles */
427
  @media (prefers-color-scheme: dark) {
 
 
 
 
 
 
 
 
428
  .model-name-display {
429
  color: #aaa;
430
  }
 
494
  }
495
 
496
  .random-btn:hover {
497
+ background-color: rgba(255, 255, 255, 0.1);
498
+ }
499
+
500
+ .vote-recorded {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  background-color: var(--light-gray);
502
  border-color: var(--border-color);
503
  }
504
+
505
+ /* Ensure border-radius is maintained during loading state */
506
+ .vote-btn.loading {
507
+ border-radius: var(--radius);
508
+ }
509
+
510
+ /* Dark mode keyboard hint */
511
+ .keyboard-hint {
512
+ color: #aaa;
513
+ }
514
+
515
+ .keyboard-hint kbd {
516
+ color: #ddd;
517
+ background-color: #333;
518
+ border-color: #555;
519
+ box-shadow: 0 1px 0 rgba(255,255,255,0.1);
520
+ }
521
  }
522
 
523
  /* Login prompt overlay styles */
 
653
  const nextRoundBtn = document.querySelector('.next-round-btn');
654
  const nextRoundContainer = document.querySelector('.next-round-container');
655
  const randomBtn = document.querySelector('.random-btn');
 
 
656
  const voteResultsContainer = document.querySelector('.vote-results');
657
  const chosenModelNameElement = document.querySelector('.chosen-model-name');
658
  const rejectedModelNameElement = document.querySelector('.rejected-model-name');
 
699
  });
700
  }
701
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
702
  function handleSynthesize(e) {
703
  if (e) {
704
  e.preventDefault();
 
712
 
713
  const text = textInput.value.trim();
714
  if (!text) {
715
+ openToast("텍스트를 입력해주세요", "warning");
716
  return;
717
  }
718
 
719
  if (text.length > 1000) {
720
+ openToast("텍스트가 너무 깁니다. 1000자 이하로 입력해주세요.", "warning");
721
  return;
722
  }
723
 
 
757
  .then(response => {
758
  if (!response.ok) {
759
  return response.json().then(err => {
760
+ throw new Error(err.error || 'TTS 생성에 실패했습니다');
761
  });
762
  }
763
  return response.json();
 
804
 
805
  // Handle authentication errors specially
806
  if (error.message.includes('logged in to generate') || error.message.includes('logged in to vote')) {
807
+ openToast("로그인이 필요합니다. <a href='{{ url_for('auth.login', next=request.path) }}' style='color: white; text-decoration: underline;'>지금 로그인</a>", "error");
808
  } else {
809
  openToast(error.message, "error");
810
  }
 
835
  .then(response => {
836
  if (!response.ok) {
837
  return response.json().then(err => {
838
+ throw new Error(err.error || '투표 제출에 실패했습니다');
839
  });
840
  }
841
  return response.json();
 
871
  nextRoundContainer.style.display = 'block';
872
 
873
  // Show success toast
874
+ openToast("투표가 기록되었습니다!", "success");
875
  })
876
  .catch(error => {
877
  // Re-enable vote buttons
 
882
 
883
  // Handle authentication errors specially
884
  if (error.message.includes('logged in to vote')) {
885
+ openToast("로그인이 필요합니다. <a href='{{ url_for('auth.login', next=request.path) }}' style='color: white; text-decoration: underline;'>지금 로그인</a>", "error");
886
  } else {
887
  openToast(error.message, "error");
888
  }
 
938
  // Select a random text from the unconsumed sentences
939
  selectedText = cachedSentences[Math.floor(Math.random() * cachedSentences.length)];
940
  console.log("Using random sentence from unconsumed sentences.");
941
+ } else if (fallbackRandomTexts && fallbackRandomTexts.length > 0) {
942
+ // Fallback to harvard sentences
943
+ selectedText = fallbackRandomTexts[Math.floor(Math.random() * fallbackRandomTexts.length)];
944
+ console.log("Using fallback random text.");
945
  } else {
946
+ console.error("No sentences available.");
947
+ openToast("사용 가능한 문장이 없습니다.", "error");
 
948
  return;
949
  }
950
  textInput.value = selectedText;
 
952
  }
953
 
954
  function showListenToastMessage() {
955
+ openToast("투표하기 전에 오디오 샘플을 모두 들어주세요", "info");
956
  }
957
 
958
  // New function for N shortcut: Random + Synthesize
 
1060
  fetchCachedSentences();
1061
  });
1062
  </script>
1063
+ {% endblock %}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
templates/base.html CHANGED
@@ -1,10 +1,10 @@
1
  <!DOCTYPE html>
2
- <html lang="en">
3
 
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
- <title>{% block title %}TTS Arena{% endblock %}</title>
8
  <link rel="preconnect" href="https://fonts.googleapis.com">
9
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
@@ -56,11 +56,43 @@
56
  flex-shrink: 0;
57
  }
58
 
 
 
 
 
59
  .logo {
60
- font-size: 24px;
61
  font-weight: 700;
62
- margin-bottom: 32px;
63
  color: var(--primary-color);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  }
65
 
66
  .nav-item {
@@ -1061,7 +1093,15 @@
1061
  <path d="M18 6L6 18M6 6L18 18" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" />
1062
  </svg>
1063
  </div>
1064
- <div class="logo">TTS Arena</div>
 
 
 
 
 
 
 
 
1065
  <nav>
1066
  <a href="{{ url_for('arena') }}" class="nav-item {% if request.path == '/' %}active{% endif %}">
1067
  <svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-dices"><rect width="12" height="12" x="2" y="10" rx="2" ry="2"/><path d="m17.92 14 3.5-3.5a2.24 2.24 0 0 0 0-3l-5-4.92a2.24 2.24 0 0 0-3 0L10 6"/><path d="M6 18h.01"/><path d="M10 14h.01"/><path d="M15 6h.01"/><path d="M18 9h.01"/></svg>
 
1
  <!DOCTYPE html>
2
+ <html lang="ko">
3
 
4
  <head>
5
  <meta charset="UTF-8">
6
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>{% block title %}한국어 TTS Arena{% endblock %}</title>
8
  <link rel="preconnect" href="https://fonts.googleapis.com">
9
  <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
10
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&display=swap" rel="stylesheet">
 
56
  flex-shrink: 0;
57
  }
58
 
59
+ .logo-container {
60
+ margin-bottom: 32px;
61
+ }
62
+
63
  .logo {
64
+ font-size: 22px;
65
  font-weight: 700;
 
66
  color: var(--primary-color);
67
+ margin-bottom: 8px;
68
+ }
69
+
70
+ .supported-by {
71
+ display: flex;
72
+ align-items: center;
73
+ gap: 6px;
74
+ font-size: 11px;
75
+ color: #888;
76
+ }
77
+
78
+ .supported-by span {
79
+ opacity: 0.8;
80
+ }
81
+
82
+ .channel-link {
83
+ display: flex;
84
+ align-items: center;
85
+ text-decoration: none;
86
+ }
87
+
88
+ .channel-logo-img {
89
+ height: 20px;
90
+ width: auto;
91
+ transition: opacity 0.2s;
92
+ }
93
+
94
+ .channel-link:hover .channel-logo-img {
95
+ opacity: 0.8;
96
  }
97
 
98
  .nav-item {
 
1093
  <path d="M18 6L6 18M6 6L18 18" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" />
1094
  </svg>
1095
  </div>
1096
+ <div class="logo-container">
1097
+ <div class="logo">한국어 TTS 아레나</div>
1098
+ <div class="supported-by">
1099
+ <span>supported by</span>
1100
+ <a href="https://channel.io/ko" target="_blank" rel="noopener noreferrer" class="channel-link">
1101
+ <img src="{{ url_for('static', filename='channeltalk-logo-kr.svg') }}" alt="채널톡" class="channel-logo-img">
1102
+ </a>
1103
+ </div>
1104
+ </div>
1105
  <nav>
1106
  <a href="{{ url_for('arena') }}" class="nav-item {% if request.path == '/' %}active{% endif %}">
1107
  <svg xmlns="http://www.w3.org/2000/svg" width="18" height="18" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="lucide lucide-dices"><rect width="12" height="12" x="2" y="10" rx="2" ry="2"/><path d="m17.92 14 3.5-3.5a2.24 2.24 0 0 0 0-3l-5-4.92a2.24 2.24 0 0 0-3 0L10 6"/><path d="M6 18h.01"/><path d="M10 14h.01"/><path d="M15 6h.01"/><path d="M18 9h.01"/></svg>
tts.py CHANGED
@@ -1,298 +1,218 @@
1
- # TODO: V2 of TTS Router
2
- # Currently just use current TTS router.
3
  import os
4
  import json
5
- from dotenv import load_dotenv
6
- import fal_client
7
- import requests
8
- import time
9
- import io
10
- from pyht import Client as PyhtClient
11
- from pyht.client import TTSOptions
12
  import base64
13
  import tempfile
14
- import random
 
15
 
16
  load_dotenv()
17
 
18
- ZEROGPU_TOKENS = os.getenv("ZEROGPU_TOKENS", "").split(",")
19
-
 
 
 
20
 
21
- def get_zerogpu_token():
22
- return random.choice(ZEROGPU_TOKENS)
 
 
23
 
 
 
24
 
25
  model_mapping = {
 
 
 
 
 
 
26
  "eleven-multilingual-v2": {
27
  "provider": "elevenlabs",
28
  "model": "eleven_multilingual_v2",
29
  },
30
- "async-1": {
31
- "provider": "async",
32
- "model": "async-1",
33
- },
34
- "eleven-turbo-v2.5": {
35
- "provider": "elevenlabs",
36
- "model": "eleven_turbo_v2_5",
37
- },
38
- "eleven-flash-v2.5": {
39
- "provider": "elevenlabs",
40
- "model": "eleven_flash_v2_5",
41
- },
42
- "cartesia-sonic-2": {
43
- "provider": "cartesia",
44
- "model": "sonic-2",
45
- },
46
- "spark-tts": {
47
- "provider": "spark",
48
- "model": "spark-tts",
49
- },
50
- "playht-2.0": {
51
- "provider": "playht",
52
- "model": "PlayHT2.0",
53
- },
54
- "styletts2": {
55
- "provider": "styletts",
56
- "model": "styletts2",
57
- },
58
- "kokoro-v1": {
59
- "provider": "kokoro",
60
- "model": "kokoro_v1",
61
- },
62
- "cosyvoice-2.0": {
63
- "provider": "cosyvoice",
64
- "model": "cosyvoice_2_0",
65
- },
66
- "papla-p1": {
67
- "provider": "papla",
68
- "model": "papla_p1",
69
  },
70
- "hume-octave": {
71
- "provider": "hume",
72
- "model": "octave",
 
73
  },
74
- "megatts3": {
75
- "provider": "megatts3",
76
- "model": "megatts3",
 
77
  },
78
- "minimax-02-hd": {
79
- "provider": "minimax",
80
- "model": "speech-02-hd",
81
  },
82
- "minimax-02-turbo": {
83
- "provider": "minimax",
84
- "model": "speech-02-turbo",
85
- },
86
- "lanternfish-1": {
87
- "provider": "lanternfish",
88
- "model": "lanternfish-1",
89
- },
90
- "nls-pre-v1": {
91
- "provider": "nls",
92
- "model": "nls-1",
93
- },
94
- "chatterbox": {
95
- "provider": "chatterbox",
96
- "model": "chatterbox",
97
- },
98
- "inworld": {
99
- "provider": "inworld",
100
- "model": "inworld-tts-1",
101
- },
102
- "inworld-max": {
103
- "provider": "inworld",
104
- "model": "inworld-tts-1-max",
105
- },
106
- "wordcab": {
107
- "provider": "wordcab",
108
- "model": "wordcab",
109
- },
110
- "veena": {
111
- "provider": "veena",
112
- "model": "veena",
113
- },
114
- "maya1": {
115
- "provider": "maya1",
116
- "model": "maya1",
117
- },
118
- "magpie": {
119
- "provider": "magpie",
120
- "model": "magpie",
121
- },
122
- "parmesan": {
123
- "provider": "parmesan",
124
- "model": "parmesan",
125
- },
126
- "vocu": {
127
- "provider": "vocu",
128
- "model": "vocu-balance",
129
- },
130
- }
131
- url = "https://tts-agi-tts-router-v2.hf.space/tts"
132
- headers = {
133
- "accept": "application/json",
134
- "Content-Type": "application/json",
135
- "Authorization": f'Bearer {os.getenv("HF_TOKEN")}',
136
  }
137
- data = {"text": "string", "provider": "string", "model": "string"}
138
-
139
-
140
- def predict_csm(script):
141
- result = fal_client.subscribe(
142
- "fal-ai/csm-1b",
143
- arguments={
144
- # "scene": [{
145
- # "text": "Hey how are you doing.",
146
- # "speaker_id": 0
147
- # }, {
148
- # "text": "Pretty good, pretty good.",
149
- # "speaker_id": 1
150
- # }, {
151
- # "text": "I'm great, so happy to be speaking to you.",
152
- # "speaker_id": 0
153
- # }]
154
- "scene": script
155
- },
156
- with_logs=True,
157
- )
158
- return requests.get(result["audio"]["url"]).content
159
 
160
 
161
- def predict_playdialog(script):
162
- # Initialize the PyHT client
163
- pyht_client = PyhtClient(
164
- user_id=os.getenv("PLAY_USERID"),
165
- api_key=os.getenv("PLAY_SECRETKEY"),
 
 
 
 
166
  )
167
-
168
- # Define the voices
169
- voice_1 = "s3://voice-cloning-zero-shot/baf1ef41-36b6-428c-9bdf-50ba54682bd8/original/manifest.json"
170
- voice_2 = "s3://voice-cloning-zero-shot/e040bd1b-f190-4bdb-83f0-75ef85b18f84/original/manifest.json"
171
-
172
- # Convert script format from CSM to PlayDialog format
173
- if isinstance(script, list):
174
- # Process script in CSM format (list of dictionaries)
175
- text = ""
176
- for turn in script:
177
- speaker_id = turn.get("speaker_id", 0)
178
- prefix = "Host 1:" if speaker_id == 0 else "Host 2:"
179
- text += f"{prefix} {turn['text']}\n"
180
- else:
181
- # If it's already a string, use as is
182
- text = script
183
-
184
- # Set up TTSOptions
185
- options = TTSOptions(
186
- voice=voice_1, voice_2=voice_2, turn_prefix="Host 1:", turn_prefix_2="Host 2:"
 
 
 
 
 
 
 
 
 
 
 
 
187
  )
188
-
189
- # Generate audio using PlayDialog
190
- audio_chunks = []
191
- for chunk in pyht_client.tts(text, options, voice_engine="PlayDialog"):
192
- audio_chunks.append(chunk)
193
-
194
- # Combine all chunks into a single audio file
195
- return b"".join(audio_chunks)
196
-
197
-
198
- def predict_dia(script):
199
- # Convert script to the required format for Dia
200
- if isinstance(script, list):
201
- # Convert from list of dictionaries to formatted string
202
- formatted_text = ""
203
- for turn in script:
204
- speaker_id = turn.get("speaker_id", 0)
205
- speaker_tag = "[S1]" if speaker_id == 0 else "[S2]"
206
- text = turn.get("text", "").strip().replace("[S1]", "").replace("[S2]", "")
207
- formatted_text += f"{speaker_tag} {text} "
208
- text = formatted_text.strip()
209
- else:
210
- # If it's already a string, use as is
211
- text = script
212
- # Make a POST request to initiate the dialogue generation
213
- headers = {
214
- # "Content-Type": "application/json",
215
- "Authorization": f"Bearer {get_zerogpu_token()}"
216
- }
217
-
218
  response = requests.post(
219
- "https://mrfakename-dia-1-6b.hf.space/gradio_api/call/generate_dialogue",
220
- headers=headers,
221
- json={"data": [text]},
 
 
 
 
 
 
 
 
 
222
  )
223
-
224
- # Extract the event ID from the response
225
- event_id = response.json()["event_id"]
226
-
227
- # Make a streaming request to get the generated dialogue
228
- stream_url = f"https://mrfakename-dia-1-6b.hf.space/gradio_api/call/generate_dialogue/{event_id}"
229
-
230
- # Use a streaming request to get the audio data
231
- with requests.get(stream_url, headers=headers, stream=True) as stream_response:
232
- # Process the streaming response
233
- for line in stream_response.iter_lines():
234
- if line:
235
- if line.startswith(b"data: ") and not line.startswith(b"data: null"):
236
- audio_data = line[6:]
237
- return requests.get(json.loads(audio_data)[0]["url"]).content
238
-
239
-
240
- def predict_tts(text, model):
241
- global client
242
- print(f"Predicting TTS for {model}")
243
- # Exceptions: special models that shouldn't be passed to the router
244
- if model == "csm-1b":
245
- return predict_csm(text)
246
- elif model == "playdialog-1.0":
247
- return predict_playdialog(text)
248
- elif model == "dia-1.6b":
249
- return predict_dia(text)
250
-
251
- if not model in model_mapping:
252
- raise ValueError(f"Model {model} not found")
253
-
254
- result = requests.post(
255
- url,
256
- headers=headers,
257
- data=json.dumps(
258
- {
259
- "text": text,
260
- "provider": model_mapping[model]["provider"],
261
- "model": model_mapping[model]["model"],
262
- }
263
- ),
264
  )
265
- response_json = result.json()
266
-
267
- audio_data = response_json["audio_data"] # base64 encoded audio data
268
- extension = response_json["extension"]
269
- # Decode the base64 audio data
270
- audio_bytes = base64.b64decode(audio_data)
271
-
272
- # Create a temporary file to store the audio data
273
- with tempfile.NamedTemporaryFile(delete=False, suffix=f".{extension}") as temp_file:
274
- temp_file.write(audio_bytes)
275
- temp_path = temp_file.name
276
-
277
- return temp_path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
 
280
  if __name__ == "__main__":
281
- print(
282
- predict_dia(
283
- [
284
- {"text": "Hello, how are you?", "speaker_id": 0},
285
- {"text": "I'm great, thank you!", "speaker_id": 1},
286
- ]
287
- )
288
- )
289
- # print("Predicting PlayDialog")
290
- # print(
291
- # predict_playdialog(
292
- # [
293
- # {"text": "Hey how are you doing.", "speaker_id": 0},
294
- # {"text": "Pretty good, pretty good.", "speaker_id": 1},
295
- # {"text": "I'm great, so happy to be speaking to you.", "speaker_id": 0},
296
- # ]
297
- # )
298
- # )
 
1
+ # 한국어 TTS Arena - TTS Router
 
2
  import os
3
  import json
 
 
 
 
 
 
 
4
  import base64
5
  import tempfile
6
+ import requests
7
+ from dotenv import load_dotenv
8
 
9
  load_dotenv()
10
 
11
+ # 한국어 지원 TTS 제공자 매핑
12
+ # - 채널톡: 자체 API
13
+ # - ElevenLabs: 직접 API
14
+ # - OpenAI: API
15
+ # - Google: API
16
 
17
+ CHANNEL_TTS_URL = os.getenv(
18
+ "CHANNEL_TTS_URL",
19
+ "https://ch-tts-streaming-demo.channel.io/v1/text-to-speech"
20
+ )
21
 
22
+ ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")
23
+ ELEVENLABS_VOICE_ID = os.getenv("ELEVENLABS_VOICE_ID", "21m00Tcm4TlvDq8ikWAM") # Rachel (기본)
24
 
25
  model_mapping = {
26
+ # 채널톡 TTS (한국어 특화)
27
+ "channel-hana": {
28
+ "provider": "channel",
29
+ "voice": "hana",
30
+ },
31
+ # ElevenLabs (다국어 지원) - 직접 API 호출
32
  "eleven-multilingual-v2": {
33
  "provider": "elevenlabs",
34
  "model": "eleven_multilingual_v2",
35
  },
36
+ # OpenAI TTS
37
+ "openai-tts-1": {
38
+ "provider": "openai",
39
+ "model": "tts-1",
40
+ "voice": "alloy",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
  },
42
+ "openai-tts-1-hd": {
43
+ "provider": "openai",
44
+ "model": "tts-1-hd",
45
+ "voice": "alloy",
46
  },
47
+ # Google Cloud TTS
48
+ "google-wavenet": {
49
+ "provider": "google",
50
+ "voice": "ko-KR-Wavenet-A",
51
  },
52
+ "google-neural2": {
53
+ "provider": "google",
54
+ "voice": "ko-KR-Neural2-A",
55
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
 
59
+ def predict_channel_tts(text: str, voice: str = "hana") -> str:
60
+ """채널톡 TTS API 호출"""
61
+ url = f"{CHANNEL_TTS_URL}/{voice}"
62
+
63
+ response = requests.post(
64
+ url,
65
+ headers={"Content-Type": "application/json"},
66
+ json={"text": text, "output_format": "wav_24000"},
67
+ timeout=30,
68
  )
69
+ response.raise_for_status()
70
+
71
+ # 임시 파일에 저장
72
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
73
+ f.write(response.content)
74
+ return f.name
75
+
76
+
77
+ def predict_elevenlabs_tts(text: str, model: str = "eleven_multilingual_v2") -> str:
78
+ """ElevenLabs TTS API 직접 호출"""
79
+ api_key = ELEVENLABS_API_KEY
80
+ if not api_key:
81
+ raise ValueError("ELEVENLABS_API_KEY 환경 변수가 설정되지 않았습니다.")
82
+
83
+ voice_id = ELEVENLABS_VOICE_ID
84
+
85
+ response = requests.post(
86
+ f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}",
87
+ headers={
88
+ "xi-api-key": api_key,
89
+ "Content-Type": "application/json",
90
+ "Accept": "audio/mpeg",
91
+ },
92
+ json={
93
+ "text": text,
94
+ "model_id": model,
95
+ "voice_settings": {
96
+ "stability": 0.5,
97
+ "similarity_boost": 0.75,
98
+ },
99
+ },
100
+ timeout=60,
101
  )
102
+ response.raise_for_status()
103
+
104
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as f:
105
+ f.write(response.content)
106
+ return f.name
107
+
108
+
109
+ def predict_openai_tts(text: str, model: str = "tts-1", voice: str = "alloy") -> str:
110
+ """OpenAI TTS API 호출"""
111
+ api_key = os.getenv("OPENAI_API_KEY")
112
+ if not api_key:
113
+ raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")
114
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
  response = requests.post(
116
+ "https://api.openai.com/v1/audio/speech",
117
+ headers={
118
+ "Authorization": f"Bearer {api_key}",
119
+ "Content-Type": "application/json",
120
+ },
121
+ json={
122
+ "model": model,
123
+ "input": text,
124
+ "voice": voice,
125
+ "response_format": "wav",
126
+ },
127
+ timeout=60,
128
  )
129
+ response.raise_for_status()
130
+
131
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
132
+ f.write(response.content)
133
+ return f.name
134
+
135
+
136
+ def predict_google_tts(text: str, voice: str = "ko-KR-Wavenet-A") -> str:
137
+ """Google Cloud TTS API 호출"""
138
+ api_key = os.getenv("GOOGLE_API_KEY")
139
+ if not api_key:
140
+ raise ValueError("GOOGLE_API_KEY 환경 변수가 설정되지 않았습니다.")
141
+
142
+ response = requests.post(
143
+ f"https://texttospeech.googleapis.com/v1/text:synthesize?key={api_key}",
144
+ headers={"Content-Type": "application/json"},
145
+ json={
146
+ "input": {"text": text},
147
+ "voice": {
148
+ "languageCode": "ko-KR",
149
+ "name": voice,
150
+ },
151
+ "audioConfig": {
152
+ "audioEncoding": "LINEAR16",
153
+ "sampleRateHertz": 24000,
154
+ },
155
+ },
156
+ timeout=30,
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  )
158
+ response.raise_for_status()
159
+
160
+ audio_content = response.json().get("audioContent")
161
+ if not audio_content:
162
+ raise ValueError("Google TTS API가 오디오를 반환하지 않았습니다.")
163
+
164
+ audio_bytes = base64.b64decode(audio_content)
165
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as f:
166
+ f.write(audio_bytes)
167
+ return f.name
168
+
169
+
170
+ def predict_tts(text: str, model: str) -> str:
171
+ """
172
+ TTS 생성 메인 함수
173
+
174
+ Args:
175
+ text: 합성할 텍스트
176
+ model: 모델 ID (model_mapping의 키)
177
+
178
+ Returns:
179
+ 생성된 오디오 파일 경로
180
+ """
181
+ print(f"[TTS] Predicting for model: {model}")
182
+
183
+ if model not in model_mapping:
184
+ raise ValueError(f"지원하지 않는 모델입니다: {model}")
185
+
186
+ config = model_mapping[model]
187
+ provider = config["provider"]
188
+
189
+ if provider == "channel":
190
+ return predict_channel_tts(text, config.get("voice", "hana"))
191
+
192
+ elif provider == "openai":
193
+ return predict_openai_tts(
194
+ text,
195
+ config.get("model", "tts-1"),
196
+ config.get("voice", "alloy"),
197
+ )
198
+
199
+ elif provider == "google":
200
+ return predict_google_tts(text, config.get("voice", "ko-KR-Wavenet-A"))
201
+
202
+ elif provider == "elevenlabs":
203
+ return predict_elevenlabs_tts(text, config.get("model", "eleven_multilingual_v2"))
204
+
205
+ else:
206
+ raise ValueError(f"알 수 없는 provider: {provider}")
207
 
208
 
209
  if __name__ == "__main__":
210
+ # 테스트
211
+ test_text = "안녕하세요, 채널톡 TTS 테스트입니다."
212
+
213
+ print("Testing Channel TTS...")
214
+ try:
215
+ path = predict_channel_tts(test_text)
216
+ print(f" Success: {path}")
217
+ except Exception as e:
218
+ print(f" Error: {e}")