Sungjoon Lee
commited on
Commit
·
48b92eb
1
Parent(s):
5578f9d
[DOCS] 문서 수정 및 디버그 코드 제거
Browse files- client/app.py +26 -26
- client/docs/tech-stack.html +165 -62
- client/docs/user-guide.html +9 -8
- client/frontend/components/floating_chatbot.py +21 -21
- client/frontend/styles/result_screen_style.py +32 -0
- client/services/analysis_service.py +14 -14
- client/services/hint_generator.py +7 -7
- client/utils/audio_validator.py +1 -1
- client/utils/elevenlabs_tts.py +9 -8
- client/utils/stt_handler.py +3 -3
- docs/tech-stack.html +165 -62
- docs/user-guide.html +9 -8
- gemini_adapter.py +41 -41
- gradio_ui.py +1 -3
- inspect_routes.py +1 -1
- test_analyze_voice.py +12 -12
client/app.py
CHANGED
|
@@ -54,16 +54,16 @@ IMAGES_DIR = os.path.join(PROJECT_ROOT, "images")
|
|
| 54 |
REFERENCE_AUDIO_DIR = os.path.join(PROJECT_ROOT, "reference_audio")
|
| 55 |
|
| 56 |
# 디버그: 경로 확인
|
| 57 |
-
print(f"[PATH DEBUG] __file__: {__file__}")
|
| 58 |
-
print(f"[PATH DEBUG] PROJECT_ROOT: {PROJECT_ROOT}")
|
| 59 |
-
print(f"[PATH DEBUG] DOCS_DIR: {DOCS_DIR}")
|
| 60 |
-
print(f"[PATH DEBUG] DOCS_DIR exists: {os.path.exists(DOCS_DIR)}")
|
| 61 |
if os.path.exists(DOCS_DIR):
|
| 62 |
print(f"[PATH DEBUG] DOCS_DIR contents: {os.listdir(DOCS_DIR)}")
|
| 63 |
|
| 64 |
# 환경변수로 allowed_paths 설정 (Spaces 배포용)
|
| 65 |
os.environ["GRADIO_ALLOWED_PATHS"] = f"{UPLOAD_DIR},{DOCS_DIR},{IMAGES_DIR},{REFERENCE_AUDIO_DIR}"
|
| 66 |
-
print(f"[PATH DEBUG] GRADIO_ALLOWED_PATHS: {os.environ['GRADIO_ALLOWED_PATHS']}")
|
| 67 |
|
| 68 |
|
| 69 |
class AudioValidationApp:
|
|
@@ -89,7 +89,7 @@ class AudioValidationApp:
|
|
| 89 |
"""
|
| 90 |
try:
|
| 91 |
data = get_dashboard_stats()
|
| 92 |
-
print(f"[DASHBOARD] 통계 데이터: {data}")
|
| 93 |
return data # 전체 데이터 반환 (answer_word, reference_audio_path 포함)
|
| 94 |
|
| 95 |
except Exception as e:
|
|
@@ -152,13 +152,13 @@ class AudioValidationApp:
|
|
| 152 |
expected_text = self.validator.get_expected_text(current_difficulty)
|
| 153 |
today = datetime.now().strftime("%Y-%m-%d")
|
| 154 |
|
| 155 |
-
print("=" * 60)
|
| 156 |
print("[AUDIO VALIDATE] 음성 검증 요청")
|
| 157 |
-
print(f" - Session ID (UUID): {session_id}")
|
| 158 |
-
print(f" - Audio Path: {audio_path}")
|
| 159 |
-
print(f" - 시도 횟수: {attempt_count + 1}번째")
|
| 160 |
-
print(f" - 기대 텍스트: {expected_text}")
|
| 161 |
-
print("=" * 60)
|
| 162 |
|
| 163 |
# ========== 직접 서비스 호출 (HTTP 없음) ==========
|
| 164 |
try:
|
|
@@ -172,7 +172,7 @@ class AudioValidationApp:
|
|
| 172 |
# 직접 분석 서비스 호출 (HTTP 대신 함수 호출)
|
| 173 |
api_result = await analyze_voice(audio_bytes, today, session_id)
|
| 174 |
|
| 175 |
-
print(f"[ANALYSIS RESULT] {api_result}")
|
| 176 |
|
| 177 |
# API 오류 처리
|
| 178 |
if api_result.get("status") == "error":
|
|
@@ -300,7 +300,7 @@ class AudioValidationApp:
|
|
| 300 |
}
|
| 301 |
)
|
| 302 |
|
| 303 |
-
print(f"[AUDIO VALIDATE] 실패 처리 - Modal 표시, 총 추측: {len(updated_game_state.get('guesses', []))}개")
|
| 304 |
|
| 305 |
return (
|
| 306 |
history_html, # 0: history_html
|
|
@@ -352,9 +352,9 @@ class AudioValidationApp:
|
|
| 352 |
updated_game_state = GameStateManager.set_win_state(updated_game_state, win=True)
|
| 353 |
|
| 354 |
print(f"[AUDIO VALIDATE] 성공 처리!")
|
| 355 |
-
print(f" - Session ID: {session_id}")
|
| 356 |
-
print(f" - 총 시도 횟수: {len(current_history) + 1}")
|
| 357 |
-
print(f" - 승리 연속: {updated_game_state.get('stats', {}).get('winStreak', 0)}")
|
| 358 |
|
| 359 |
# 통계는 JavaScript에서 화면 표시 시 API 호출하여 업데이트
|
| 360 |
print(f"[SUCCESS] 성공 화면으로 전환 - JS가 통계 API 호출 예정")
|
|
@@ -391,7 +391,7 @@ class AudioValidationApp:
|
|
| 391 |
last_audio = guesses[-1].get('audioFile', "")
|
| 392 |
|
| 393 |
# 통계는 JavaScript에서 화면 표시 시 API 호출하여 업데이트
|
| 394 |
-
print(f"[GIVEUP] 포기 화면으로 전환 - JS가 통계 API 호출 예정")
|
| 395 |
|
| 396 |
# outputs 순서: main_screen, giveup_screen, giveup_content, game_state
|
| 397 |
return (
|
|
@@ -421,13 +421,13 @@ class AudioValidationApp:
|
|
| 421 |
updated_game_state = GameStateManager.get_or_create_session(game_state)
|
| 422 |
session_id = GameStateManager.get_session_id(updated_game_state)
|
| 423 |
|
| 424 |
-
print("=" * 60)
|
| 425 |
-
print("[PAGE LOAD] 페이지 로드")
|
| 426 |
-
print(f" - 오늘 날짜: {today}")
|
| 427 |
-
print(f" - 저장된 날짜: {stored_date}")
|
| 428 |
-
print(f" - Session ID (UUID): {session_id}")
|
| 429 |
-
print(f" - 게임 상태: {updated_game_state.get('winState', -1)} (-1:진행중, 0:포기, 1:성공)")
|
| 430 |
-
print("=" * 60)
|
| 431 |
|
| 432 |
if stored_date != today:
|
| 433 |
# 날짜가 다르면 기록 초기화
|
|
@@ -459,7 +459,7 @@ class AudioValidationApp:
|
|
| 459 |
"""
|
| 460 |
# 앱 시작 시점에 통계 데이터 가져오기
|
| 461 |
stats = self._fetch_dashboard_stats_sync()
|
| 462 |
-
print(f"[BUILD UI] 초기 통계 데이터: {stats}")
|
| 463 |
|
| 464 |
# 이벤트 핸들러 설정
|
| 465 |
handlers = {
|
|
|
|
| 54 |
REFERENCE_AUDIO_DIR = os.path.join(PROJECT_ROOT, "reference_audio")
|
| 55 |
|
| 56 |
# 디버그: 경로 확인
|
| 57 |
+
# print(f"[PATH DEBUG] __file__: {__file__}")
|
| 58 |
+
# print(f"[PATH DEBUG] PROJECT_ROOT: {PROJECT_ROOT}")
|
| 59 |
+
# print(f"[PATH DEBUG] DOCS_DIR: {DOCS_DIR}")
|
| 60 |
+
# print(f"[PATH DEBUG] DOCS_DIR exists: {os.path.exists(DOCS_DIR)}")
|
| 61 |
if os.path.exists(DOCS_DIR):
|
| 62 |
print(f"[PATH DEBUG] DOCS_DIR contents: {os.listdir(DOCS_DIR)}")
|
| 63 |
|
| 64 |
# 환경변수로 allowed_paths 설정 (Spaces 배포용)
|
| 65 |
os.environ["GRADIO_ALLOWED_PATHS"] = f"{UPLOAD_DIR},{DOCS_DIR},{IMAGES_DIR},{REFERENCE_AUDIO_DIR}"
|
| 66 |
+
# print(f"[PATH DEBUG] GRADIO_ALLOWED_PATHS: {os.environ['GRADIO_ALLOWED_PATHS']}")
|
| 67 |
|
| 68 |
|
| 69 |
class AudioValidationApp:
|
|
|
|
| 89 |
"""
|
| 90 |
try:
|
| 91 |
data = get_dashboard_stats()
|
| 92 |
+
# print(f"[DASHBOARD] 통계 데이터: {data}")
|
| 93 |
return data # 전체 데이터 반환 (answer_word, reference_audio_path 포함)
|
| 94 |
|
| 95 |
except Exception as e:
|
|
|
|
| 152 |
expected_text = self.validator.get_expected_text(current_difficulty)
|
| 153 |
today = datetime.now().strftime("%Y-%m-%d")
|
| 154 |
|
| 155 |
+
# print("=" * 60)
|
| 156 |
print("[AUDIO VALIDATE] 음성 검증 요청")
|
| 157 |
+
# print(f" - Session ID (UUID): {session_id}")
|
| 158 |
+
# print(f" - Audio Path: {audio_path}")
|
| 159 |
+
# print(f" - 시도 횟수: {attempt_count + 1}번째")
|
| 160 |
+
# print(f" - 기대 텍스트: {expected_text}")
|
| 161 |
+
# print("=" * 60)
|
| 162 |
|
| 163 |
# ========== 직접 서비스 호출 (HTTP 없음) ==========
|
| 164 |
try:
|
|
|
|
| 172 |
# 직접 분석 서비스 호출 (HTTP 대신 함수 호출)
|
| 173 |
api_result = await analyze_voice(audio_bytes, today, session_id)
|
| 174 |
|
| 175 |
+
# print(f"[ANALYSIS RESULT] {api_result}")
|
| 176 |
|
| 177 |
# API 오류 처리
|
| 178 |
if api_result.get("status") == "error":
|
|
|
|
| 300 |
}
|
| 301 |
)
|
| 302 |
|
| 303 |
+
# print(f"[AUDIO VALIDATE] 실패 처리 - Modal 표시, 총 추측: {len(updated_game_state.get('guesses', []))}개")
|
| 304 |
|
| 305 |
return (
|
| 306 |
history_html, # 0: history_html
|
|
|
|
| 352 |
updated_game_state = GameStateManager.set_win_state(updated_game_state, win=True)
|
| 353 |
|
| 354 |
print(f"[AUDIO VALIDATE] 성공 처리!")
|
| 355 |
+
# print(f" - Session ID: {session_id}")
|
| 356 |
+
# print(f" - 총 시도 횟수: {len(current_history) + 1}")
|
| 357 |
+
# print(f" - 승리 연속: {updated_game_state.get('stats', {}).get('winStreak', 0)}")
|
| 358 |
|
| 359 |
# 통계는 JavaScript에서 화면 표시 시 API 호출하여 업데이트
|
| 360 |
print(f"[SUCCESS] 성공 화면으로 전환 - JS가 통계 API 호출 예정")
|
|
|
|
| 391 |
last_audio = guesses[-1].get('audioFile', "")
|
| 392 |
|
| 393 |
# 통계는 JavaScript에서 화면 표시 시 API 호출하여 업데이트
|
| 394 |
+
# print(f"[GIVEUP] 포기 화면으로 전환 - JS가 통계 API 호출 예정")
|
| 395 |
|
| 396 |
# outputs 순서: main_screen, giveup_screen, giveup_content, game_state
|
| 397 |
return (
|
|
|
|
| 421 |
updated_game_state = GameStateManager.get_or_create_session(game_state)
|
| 422 |
session_id = GameStateManager.get_session_id(updated_game_state)
|
| 423 |
|
| 424 |
+
# print("=" * 60)
|
| 425 |
+
# print("[PAGE LOAD] 페이지 로드")
|
| 426 |
+
# print(f" - 오늘 날짜: {today}")
|
| 427 |
+
# print(f" - 저장된 날짜: {stored_date}")
|
| 428 |
+
# print(f" - Session ID (UUID): {session_id}")
|
| 429 |
+
# print(f" - 게임 상태: {updated_game_state.get('winState', -1)} (-1:진행중, 0:포기, 1:성공)")
|
| 430 |
+
# print("=" * 60)
|
| 431 |
|
| 432 |
if stored_date != today:
|
| 433 |
# 날짜가 다르면 기록 초기화
|
|
|
|
| 459 |
"""
|
| 460 |
# 앱 시작 시점에 통계 데이터 가져오기
|
| 461 |
stats = self._fetch_dashboard_stats_sync()
|
| 462 |
+
# print(f"[BUILD UI] 초기 통계 데이터: {stats}")
|
| 463 |
|
| 464 |
# 이벤트 핸들러 설정
|
| 465 |
handlers = {
|
client/docs/tech-stack.html
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
-
<html lang="
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>Tech Stack -
|
| 7 |
<link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
|
| 8 |
<style>
|
| 9 |
:root {
|
|
@@ -494,8 +494,8 @@
|
|
| 494 |
<!-- Sidebar -->
|
| 495 |
<nav class="sidebar">
|
| 496 |
<a href="#" class="logo">
|
| 497 |
-
<div class="logo-icon">
|
| 498 |
-
|
| 499 |
</a>
|
| 500 |
|
| 501 |
<div class="nav-section">
|
|
@@ -523,7 +523,7 @@
|
|
| 523 |
<div class="nav-title">Backend</div>
|
| 524 |
<ul class="nav-list">
|
| 525 |
<li class="nav-item">
|
| 526 |
-
<a href="#backend" class="nav-link">
|
| 527 |
</li>
|
| 528 |
<li class="nav-item">
|
| 529 |
<a href="#database" class="nav-link">Database</a>
|
|
@@ -535,7 +535,10 @@
|
|
| 535 |
<div class="nav-title">AI / ML</div>
|
| 536 |
<ul class="nav-list">
|
| 537 |
<li class="nav-item">
|
| 538 |
-
<a href="#ai" class="nav-link">
|
|
|
|
|
|
|
|
|
|
| 539 |
</li>
|
| 540 |
<li class="nav-item">
|
| 541 |
<a href="#audio" class="nav-link">Audio Processing</a>
|
|
@@ -564,8 +567,8 @@
|
|
| 564 |
<header class="page-header">
|
| 565 |
<h1 class="page-title">Tech Stack</h1>
|
| 566 |
<p class="page-description">
|
| 567 |
-
|
| 568 |
-
|
| 569 |
</p>
|
| 570 |
</header>
|
| 571 |
|
|
@@ -578,18 +581,18 @@
|
|
| 578 |
<div class="architecture-diagram">
|
| 579 |
<div class="arch-flow">
|
| 580 |
<div class="arch-box">
|
| 581 |
-
<div class="arch-box-title">Client</div>
|
| 582 |
<div class="arch-box-sub">Gradio 6.0</div>
|
| 583 |
</div>
|
| 584 |
<span class="arch-arrow">→</span>
|
| 585 |
<div class="arch-box secondary">
|
| 586 |
-
<div class="arch-box-title">
|
| 587 |
-
<div class="arch-box-sub">
|
| 588 |
</div>
|
| 589 |
<span class="arch-arrow">→</span>
|
| 590 |
<div class="arch-box tertiary">
|
| 591 |
-
<div class="arch-box-title">AI
|
| 592 |
-
<div class="arch-box-sub">
|
| 593 |
</div>
|
| 594 |
</div>
|
| 595 |
</div>
|
|
@@ -609,17 +612,17 @@
|
|
| 609 |
<span class="tech-version">6.0.0</span>
|
| 610 |
</div>
|
| 611 |
<p class="tech-description">
|
| 612 |
-
Python
|
| 613 |
</p>
|
| 614 |
</div>
|
| 615 |
<div class="tech-card">
|
| 616 |
<div class="tech-header">
|
| 617 |
<div class="tech-icon icon-python">Py</div>
|
| 618 |
<span class="tech-name">Python</span>
|
| 619 |
-
<span class="tech-version">3.11
|
| 620 |
</div>
|
| 621 |
<p class="tech-description">
|
| 622 |
-
|
| 623 |
</p>
|
| 624 |
</div>
|
| 625 |
<div class="tech-card">
|
|
@@ -629,7 +632,17 @@
|
|
| 629 |
<span class="tech-version">0.28.1</span>
|
| 630 |
</div>
|
| 631 |
<p class="tech-description">
|
| 632 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 633 |
</p>
|
| 634 |
</div>
|
| 635 |
</div>
|
|
@@ -639,47 +652,47 @@
|
|
| 639 |
<section id="backend" class="section">
|
| 640 |
<h2 class="section-title">
|
| 641 |
<span class="section-icon backend">⚡</span>
|
| 642 |
-
Backend
|
| 643 |
</h2>
|
| 644 |
<div class="tech-grid">
|
| 645 |
<div class="tech-card">
|
| 646 |
<div class="tech-header">
|
| 647 |
-
<div class="tech-icon icon-
|
| 648 |
-
<span class="tech-name">
|
| 649 |
-
<span class="tech-version">
|
| 650 |
</div>
|
| 651 |
<p class="tech-description">
|
| 652 |
-
|
| 653 |
</p>
|
| 654 |
</div>
|
| 655 |
<div class="tech-card">
|
| 656 |
<div class="tech-header">
|
| 657 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 658 |
-
<span class="tech-name">
|
| 659 |
-
<span class="tech-version">
|
| 660 |
</div>
|
| 661 |
<p class="tech-description">
|
| 662 |
-
|
| 663 |
</p>
|
| 664 |
</div>
|
| 665 |
<div class="tech-card">
|
| 666 |
<div class="tech-header">
|
| 667 |
-
<div class="tech-icon
|
| 668 |
-
<span class="tech-name">
|
| 669 |
-
<span class="tech-version">
|
| 670 |
</div>
|
| 671 |
<p class="tech-description">
|
| 672 |
-
|
| 673 |
</p>
|
| 674 |
</div>
|
| 675 |
<div class="tech-card">
|
| 676 |
<div class="tech-header">
|
| 677 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 678 |
-
<span class="tech-name">
|
| 679 |
-
<span class="tech-version">
|
| 680 |
</div>
|
| 681 |
<p class="tech-description">
|
| 682 |
-
|
| 683 |
</p>
|
| 684 |
</div>
|
| 685 |
</div>
|
|
@@ -696,20 +709,30 @@
|
|
| 696 |
<div class="tech-header">
|
| 697 |
<div class="tech-icon icon-postgres">Pg</div>
|
| 698 |
<span class="tech-name">PostgreSQL</span>
|
| 699 |
-
<span class="tech-version">
|
| 700 |
</div>
|
| 701 |
<p class="tech-description">
|
| 702 |
-
|
| 703 |
</p>
|
| 704 |
</div>
|
| 705 |
<div class="tech-card">
|
| 706 |
<div class="tech-header">
|
| 707 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 708 |
-
<span class="tech-name">
|
| 709 |
-
<span class="tech-version">2.
|
| 710 |
</div>
|
| 711 |
<p class="tech-description">
|
| 712 |
-
Python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
</p>
|
| 714 |
</div>
|
| 715 |
</div>
|
|
@@ -726,40 +749,80 @@
|
|
| 726 |
<div class="tech-header">
|
| 727 |
<div class="tech-icon icon-gemini">Gm</div>
|
| 728 |
<span class="tech-name">Google Gemini</span>
|
| 729 |
-
<span class="tech-version">
|
| 730 |
</div>
|
| 731 |
<p class="tech-description">
|
| 732 |
-
|
| 733 |
</p>
|
| 734 |
</div>
|
| 735 |
<div class="tech-card">
|
| 736 |
<div class="tech-header">
|
| 737 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 738 |
-
<span class="tech-name">
|
| 739 |
-
<span class="tech-version">
|
| 740 |
</div>
|
| 741 |
<p class="tech-description">
|
| 742 |
-
|
| 743 |
</p>
|
| 744 |
</div>
|
| 745 |
<div class="tech-card">
|
| 746 |
<div class="tech-header">
|
| 747 |
<div class="tech-icon icon-numpy">Np</div>
|
| 748 |
<span class="tech-name">NumPy</span>
|
| 749 |
-
<span class="tech-version">1.26
|
| 750 |
</div>
|
| 751 |
<p class="tech-description">
|
| 752 |
-
|
| 753 |
</p>
|
| 754 |
</div>
|
| 755 |
<div class="tech-card">
|
| 756 |
<div class="tech-header">
|
| 757 |
<div class="tech-icon" style="background: linear-gradient(135deg, #0d9488, #2dd4bf);">Pd</div>
|
| 758 |
<span class="tech-name">Pandas</span>
|
| 759 |
-
<span class="tech-version">2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
</div>
|
| 761 |
<p class="tech-description">
|
| 762 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 763 |
</p>
|
| 764 |
</div>
|
| 765 |
</div>
|
|
@@ -779,7 +842,7 @@
|
|
| 779 |
<span class="tech-version">0.25.1</span>
|
| 780 |
</div>
|
| 781 |
<p class="tech-description">
|
| 782 |
-
|
| 783 |
</p>
|
| 784 |
</div>
|
| 785 |
<div class="tech-card">
|
|
@@ -789,7 +852,27 @@
|
|
| 789 |
<span class="tech-version">via ffmpy</span>
|
| 790 |
</div>
|
| 791 |
<p class="tech-description">
|
| 792 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
</p>
|
| 794 |
</div>
|
| 795 |
</div>
|
|
@@ -809,7 +892,7 @@
|
|
| 809 |
<span class="tech-version">Latest</span>
|
| 810 |
</div>
|
| 811 |
<p class="tech-description">
|
| 812 |
-
|
| 813 |
</p>
|
| 814 |
</div>
|
| 815 |
<div class="tech-card">
|
|
@@ -819,27 +902,47 @@
|
|
| 819 |
<span class="tech-version">Latest</span>
|
| 820 |
</div>
|
| 821 |
<p class="tech-description">
|
| 822 |
-
|
| 823 |
</p>
|
| 824 |
</div>
|
| 825 |
<div class="tech-card">
|
| 826 |
<div class="tech-header">
|
| 827 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 828 |
-
<span class="tech-name">
|
| 829 |
-
<span class="tech-version"
|
| 830 |
</div>
|
| 831 |
<p class="tech-description">
|
| 832 |
-
|
| 833 |
</p>
|
| 834 |
</div>
|
| 835 |
<div class="tech-card">
|
| 836 |
<div class="tech-header">
|
| 837 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 838 |
-
<span class="tech-name">
|
| 839 |
-
<span class="tech-version">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 840 |
</div>
|
| 841 |
<p class="tech-description">
|
| 842 |
-
|
| 843 |
</p>
|
| 844 |
</div>
|
| 845 |
</div>
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Tech Stack - Voice Sementle</title>
|
| 7 |
<link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
|
| 8 |
<style>
|
| 9 |
:root {
|
|
|
|
| 494 |
<!-- Sidebar -->
|
| 495 |
<nav class="sidebar">
|
| 496 |
<a href="#" class="logo">
|
| 497 |
+
<div class="logo-icon">V</div>
|
| 498 |
+
Voice Sementle
|
| 499 |
</a>
|
| 500 |
|
| 501 |
<div class="nav-section">
|
|
|
|
| 523 |
<div class="nav-title">Backend</div>
|
| 524 |
<ul class="nav-list">
|
| 525 |
<li class="nav-item">
|
| 526 |
+
<a href="#backend" class="nav-link">Services</a>
|
| 527 |
</li>
|
| 528 |
<li class="nav-item">
|
| 529 |
<a href="#database" class="nav-link">Database</a>
|
|
|
|
| 535 |
<div class="nav-title">AI / ML</div>
|
| 536 |
<ul class="nav-list">
|
| 537 |
<li class="nav-item">
|
| 538 |
+
<a href="#ai" class="nav-link">AI Models</a>
|
| 539 |
+
</li>
|
| 540 |
+
<li class="nav-item">
|
| 541 |
+
<a href="#mcp" class="nav-link">MCP Integration</a>
|
| 542 |
</li>
|
| 543 |
<li class="nav-item">
|
| 544 |
<a href="#audio" class="nav-link">Audio Processing</a>
|
|
|
|
| 567 |
<header class="page-header">
|
| 568 |
<h1 class="page-title">Tech Stack</h1>
|
| 569 |
<p class="page-description">
|
| 570 |
+
Comprehensive technology stack for Voice Sementle - a daily voice puzzle game where pronunciation matters.
|
| 571 |
+
Built with modern Python frameworks, AI models, and cloud services.
|
| 572 |
</p>
|
| 573 |
</header>
|
| 574 |
|
|
|
|
| 581 |
<div class="architecture-diagram">
|
| 582 |
<div class="arch-flow">
|
| 583 |
<div class="arch-box">
|
| 584 |
+
<div class="arch-box-title">Client UI</div>
|
| 585 |
<div class="arch-box-sub">Gradio 6.0</div>
|
| 586 |
</div>
|
| 587 |
<span class="arch-arrow">→</span>
|
| 588 |
<div class="arch-box secondary">
|
| 589 |
+
<div class="arch-box-title">Services</div>
|
| 590 |
+
<div class="arch-box-sub">Python Backend</div>
|
| 591 |
</div>
|
| 592 |
<span class="arch-arrow">→</span>
|
| 593 |
<div class="arch-box tertiary">
|
| 594 |
+
<div class="arch-box-title">AI Models</div>
|
| 595 |
+
<div class="arch-box-sub">Gemini + VoiceKit MCP</div>
|
| 596 |
</div>
|
| 597 |
</div>
|
| 598 |
</div>
|
|
|
|
| 612 |
<span class="tech-version">6.0.0</span>
|
| 613 |
</div>
|
| 614 |
<p class="tech-description">
|
| 615 |
+
Python-based UI framework. Powers interactive web interface with voice input, real-time feedback, modals, and dynamic game state transitions.
|
| 616 |
</p>
|
| 617 |
</div>
|
| 618 |
<div class="tech-card">
|
| 619 |
<div class="tech-header">
|
| 620 |
<div class="tech-icon icon-python">Py</div>
|
| 621 |
<span class="tech-name">Python</span>
|
| 622 |
+
<span class="tech-version">3.11+</span>
|
| 623 |
</div>
|
| 624 |
<p class="tech-description">
|
| 625 |
+
Client application language. Leverages type hints, async/await patterns, and modern Python features for clean, maintainable code.
|
| 626 |
</p>
|
| 627 |
</div>
|
| 628 |
<div class="tech-card">
|
|
|
|
| 632 |
<span class="tech-version">0.28.1</span>
|
| 633 |
</div>
|
| 634 |
<p class="tech-description">
|
| 635 |
+
Async HTTP client library for communicating with backend services and external APIs with full HTTP/2 support.
|
| 636 |
+
</p>
|
| 637 |
+
</div>
|
| 638 |
+
<div class="tech-card">
|
| 639 |
+
<div class="tech-header">
|
| 640 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #8b5cf6, #a78bfa);">Js</div>
|
| 641 |
+
<span class="tech-name">Custom CSS/JS</span>
|
| 642 |
+
<span class="tech-version">-</span>
|
| 643 |
+
</div>
|
| 644 |
+
<p class="tech-description">
|
| 645 |
+
Custom styling with animated backgrounds, modal components, radar charts, and responsive design for optimal user experience.
|
| 646 |
</p>
|
| 647 |
</div>
|
| 648 |
</div>
|
|
|
|
| 652 |
<section id="backend" class="section">
|
| 653 |
<h2 class="section-title">
|
| 654 |
<span class="section-icon backend">⚡</span>
|
| 655 |
+
Backend Services
|
| 656 |
</h2>
|
| 657 |
<div class="tech-grid">
|
| 658 |
<div class="tech-card">
|
| 659 |
<div class="tech-header">
|
| 660 |
+
<div class="tech-icon icon-python">Py</div>
|
| 661 |
+
<span class="tech-name">Python Services</span>
|
| 662 |
+
<span class="tech-version">3.11+</span>
|
| 663 |
</div>
|
| 664 |
<p class="tech-description">
|
| 665 |
+
Modular service architecture including voice analyzer, hint generator, database layer, and game state management with async/await support.
|
| 666 |
</p>
|
| 667 |
</div>
|
| 668 |
<div class="tech-card">
|
| 669 |
<div class="tech-header">
|
| 670 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #ec4899, #f472b6);">Pd</div>
|
| 671 |
+
<span class="tech-name">Pydantic</span>
|
| 672 |
+
<span class="tech-version">2.0+</span>
|
| 673 |
</div>
|
| 674 |
<p class="tech-description">
|
| 675 |
+
Data validation and settings management with type safety, automatic serialization, and schema validation.
|
| 676 |
</p>
|
| 677 |
</div>
|
| 678 |
<div class="tech-card">
|
| 679 |
<div class="tech-header">
|
| 680 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #10b981, #34d399);">Ay</div>
|
| 681 |
+
<span class="tech-name">Asyncio</span>
|
| 682 |
+
<span class="tech-version">Built-in</span>
|
| 683 |
</div>
|
| 684 |
<p class="tech-description">
|
| 685 |
+
Native Python async library for concurrent operations, enabling efficient handling of multiple voice analysis requests.
|
| 686 |
</p>
|
| 687 |
</div>
|
| 688 |
<div class="tech-card">
|
| 689 |
<div class="tech-header">
|
| 690 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #f59e0b, #fbbf24);">Dv</div>
|
| 691 |
+
<span class="tech-name">Python-dotenv</span>
|
| 692 |
+
<span class="tech-version">1.0.0</span>
|
| 693 |
</div>
|
| 694 |
<p class="tech-description">
|
| 695 |
+
Environment variable management from .env files for secure API keys and configuration settings.
|
| 696 |
</p>
|
| 697 |
</div>
|
| 698 |
</div>
|
|
|
|
| 709 |
<div class="tech-header">
|
| 710 |
<div class="tech-icon icon-postgres">Pg</div>
|
| 711 |
<span class="tech-name">PostgreSQL</span>
|
| 712 |
+
<span class="tech-version">16-alpine</span>
|
| 713 |
</div>
|
| 714 |
<p class="tech-description">
|
| 715 |
+
Production-grade relational database. Stores user sessions, game history, daily puzzles, attempt statistics, and score analytics.
|
| 716 |
</p>
|
| 717 |
</div>
|
| 718 |
<div class="tech-card">
|
| 719 |
<div class="tech-header">
|
| 720 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #0891b2, #22d3ee);">Ps</div>
|
| 721 |
+
<span class="tech-name">psycopg2</span>
|
| 722 |
+
<span class="tech-version">2.9+</span>
|
| 723 |
</div>
|
| 724 |
<p class="tech-description">
|
| 725 |
+
PostgreSQL adapter for Python. Provides efficient database connections and query execution with connection pooling.
|
| 726 |
+
</p>
|
| 727 |
+
</div>
|
| 728 |
+
<div class="tech-card">
|
| 729 |
+
<div class="tech-header">
|
| 730 |
+
<div class="tech-icon icon-docker">Dk</div>
|
| 731 |
+
<span class="tech-name">Docker Volume</span>
|
| 732 |
+
<span class="tech-version">-</span>
|
| 733 |
+
</div>
|
| 734 |
+
<p class="tech-description">
|
| 735 |
+
Persistent data storage with Docker volumes ensuring database persistence across container restarts.
|
| 736 |
</p>
|
| 737 |
</div>
|
| 738 |
</div>
|
|
|
|
| 749 |
<div class="tech-header">
|
| 750 |
<div class="tech-icon icon-gemini">Gm</div>
|
| 751 |
<span class="tech-name">Google Gemini</span>
|
| 752 |
+
<span class="tech-version">2.0 Flash</span>
|
| 753 |
</div>
|
| 754 |
<p class="tech-description">
|
| 755 |
+
Multimodal AI model powering intelligent hint generation, contextual feedback, and adaptive difficulty based on player performance.
|
| 756 |
</p>
|
| 757 |
</div>
|
| 758 |
<div class="tech-card">
|
| 759 |
<div class="tech-header">
|
| 760 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #6366f1, #818cf8);">11</div>
|
| 761 |
+
<span class="tech-name">ElevenLabs</span>
|
| 762 |
+
<span class="tech-version">2.24.0</span>
|
| 763 |
</div>
|
| 764 |
<p class="tech-description">
|
| 765 |
+
Advanced text-to-speech API with voice cloning. Generates partial audio hints with reference voice matching for natural gameplay experience.
|
| 766 |
</p>
|
| 767 |
</div>
|
| 768 |
<div class="tech-card">
|
| 769 |
<div class="tech-header">
|
| 770 |
<div class="tech-icon icon-numpy">Np</div>
|
| 771 |
<span class="tech-name">NumPy</span>
|
| 772 |
+
<span class="tech-version">1.26.4</span>
|
| 773 |
</div>
|
| 774 |
<p class="tech-description">
|
| 775 |
+
Numerical computing library for audio signal processing, feature extraction, and statistical analysis of voice patterns.
|
| 776 |
</p>
|
| 777 |
</div>
|
| 778 |
<div class="tech-card">
|
| 779 |
<div class="tech-header">
|
| 780 |
<div class="tech-icon" style="background: linear-gradient(135deg, #0d9488, #2dd4bf);">Pd</div>
|
| 781 |
<span class="tech-name">Pandas</span>
|
| 782 |
+
<span class="tech-version">2.2.3</span>
|
| 783 |
+
</div>
|
| 784 |
+
<p class="tech-description">
|
| 785 |
+
Data analysis library for processing game statistics, tracking user progress, and generating performance insights.
|
| 786 |
+
</p>
|
| 787 |
+
</div>
|
| 788 |
+
</div>
|
| 789 |
+
</section>
|
| 790 |
+
|
| 791 |
+
<!-- MCP Integration -->
|
| 792 |
+
<section id="mcp" class="section">
|
| 793 |
+
<h2 class="section-title">
|
| 794 |
+
<span class="section-icon" style="background: linear-gradient(135deg, #8b5cf6, #a78bfa);">🔌</span>
|
| 795 |
+
Model Context Protocol (MCP)
|
| 796 |
+
</h2>
|
| 797 |
+
<div class="tech-grid">
|
| 798 |
+
<div class="tech-card">
|
| 799 |
+
<div class="tech-header">
|
| 800 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #0891b2, #22d3ee);">MC</div>
|
| 801 |
+
<span class="tech-name">MCP Client</span>
|
| 802 |
+
<span class="tech-version">1.22.0</span>
|
| 803 |
</div>
|
| 804 |
<p class="tech-description">
|
| 805 |
+
Model Context Protocol client for seamless AI model integration. Manages sessions, tool calling, and context sharing across AI services.
|
| 806 |
+
</p>
|
| 807 |
+
</div>
|
| 808 |
+
<div class="tech-card">
|
| 809 |
+
<div class="tech-header">
|
| 810 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #f97316, #fb923c);">Vk</div>
|
| 811 |
+
<span class="tech-name">VoiceKit MCP</span>
|
| 812 |
+
<span class="tech-version">HuggingFace</span>
|
| 813 |
+
</div>
|
| 814 |
+
<p class="tech-description">
|
| 815 |
+
Voice analysis MCP server providing voicekit_analyze_voice_similarity tool. Compares user recordings with reference audio for pronunciation scoring.
|
| 816 |
+
</p>
|
| 817 |
+
</div>
|
| 818 |
+
<div class="tech-card">
|
| 819 |
+
<div class="tech-header">
|
| 820 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #ec4899, #f472b6);">SS</div>
|
| 821 |
+
<span class="tech-name">SSE Client</span>
|
| 822 |
+
<span class="tech-version">MCP</span>
|
| 823 |
+
</div>
|
| 824 |
+
<p class="tech-description">
|
| 825 |
+
Server-Sent Events client for real-time MCP communication. Enables streaming responses and bidirectional tool invocation.
|
| 826 |
</p>
|
| 827 |
</div>
|
| 828 |
</div>
|
|
|
|
| 842 |
<span class="tech-version">0.25.1</span>
|
| 843 |
</div>
|
| 844 |
<p class="tech-description">
|
| 845 |
+
Audio manipulation library for format conversion, normalization, compression, and audio file editing with effects processing.
|
| 846 |
</p>
|
| 847 |
</div>
|
| 848 |
<div class="tech-card">
|
|
|
|
| 852 |
<span class="tech-version">via ffmpy</span>
|
| 853 |
</div>
|
| 854 |
<p class="tech-description">
|
| 855 |
+
Multimedia framework for audio encoding/decoding. Handles format conversion and compression for optimal file sizes.
|
| 856 |
+
</p>
|
| 857 |
+
</div>
|
| 858 |
+
<div class="tech-card">
|
| 859 |
+
<div class="tech-header">
|
| 860 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #7c3aed, #a78bfa);">B6</div>
|
| 861 |
+
<span class="tech-name">Base64</span>
|
| 862 |
+
<span class="tech-version">Built-in</span>
|
| 863 |
+
</div>
|
| 864 |
+
<p class="tech-description">
|
| 865 |
+
Audio data encoding for API transmission. Converts binary audio to base64 strings for MCP tool parameters.
|
| 866 |
+
</p>
|
| 867 |
+
</div>
|
| 868 |
+
<div class="tech-card">
|
| 869 |
+
<div class="tech-header">
|
| 870 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #dc2626, #f87171);">Af</div>
|
| 871 |
+
<span class="tech-name">Aiofiles</span>
|
| 872 |
+
<span class="tech-version">24.1.0</span>
|
| 873 |
+
</div>
|
| 874 |
+
<p class="tech-description">
|
| 875 |
+
Async file operations for non-blocking audio file I/O. Enables concurrent file processing without blocking the event loop.
|
| 876 |
</p>
|
| 877 |
</div>
|
| 878 |
</div>
|
|
|
|
| 892 |
<span class="tech-version">Latest</span>
|
| 893 |
</div>
|
| 894 |
<p class="tech-description">
|
| 895 |
+
Containerization platform providing consistent development and deployment environments across all systems.
|
| 896 |
</p>
|
| 897 |
</div>
|
| 898 |
<div class="tech-card">
|
|
|
|
| 902 |
<span class="tech-version">Latest</span>
|
| 903 |
</div>
|
| 904 |
<p class="tech-description">
|
| 905 |
+
Multi-container orchestration for managing PostgreSQL database service with volume persistence and networking.
|
| 906 |
</p>
|
| 907 |
</div>
|
| 908 |
<div class="tech-card">
|
| 909 |
<div class="tech-header">
|
| 910 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #64748b, #94a3b8);">Ev</div>
|
| 911 |
+
<span class="tech-name">Environment Config</span>
|
| 912 |
+
<span class="tech-version">-</span>
|
| 913 |
</div>
|
| 914 |
<p class="tech-description">
|
| 915 |
+
Secure configuration management with .env files for API keys (Gemini, ElevenLabs, VoiceKit) and sensitive settings.
|
| 916 |
</p>
|
| 917 |
</div>
|
| 918 |
<div class="tech-card">
|
| 919 |
<div class="tech-header">
|
| 920 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #16a34a, #4ade80);">Sh</div>
|
| 921 |
+
<span class="tech-name">Shell Scripts</span>
|
| 922 |
+
<span class="tech-version">Bash</span>
|
| 923 |
+
</div>
|
| 924 |
+
<p class="tech-description">
|
| 925 |
+
Automated deployment scripts (start.sh, stop.sh) for streamlined application lifecycle management.
|
| 926 |
+
</p>
|
| 927 |
+
</div>
|
| 928 |
+
<div class="tech-card">
|
| 929 |
+
<div class="tech-header">
|
| 930 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #ea580c, #fb923c);">Hf</div>
|
| 931 |
+
<span class="tech-name">HuggingFace</span>
|
| 932 |
+
<span class="tech-version">Hub</span>
|
| 933 |
+
</div>
|
| 934 |
+
<p class="tech-description">
|
| 935 |
+
Deployment platform hosting VoiceKit MCP server and application demos with integrated spaces for testing.
|
| 936 |
+
</p>
|
| 937 |
+
</div>
|
| 938 |
+
<div class="tech-card">
|
| 939 |
+
<div class="tech-header">
|
| 940 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #9333ea, #a855f7);">Lg</div>
|
| 941 |
+
<span class="tech-name">Logging</span>
|
| 942 |
+
<span class="tech-version">Built-in</span>
|
| 943 |
</div>
|
| 944 |
<p class="tech-description">
|
| 945 |
+
Comprehensive logging system for debugging, monitoring API calls, tracking errors, and performance analysis.
|
| 946 |
</p>
|
| 947 |
</div>
|
| 948 |
</div>
|
client/docs/user-guide.html
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>User Guide -
|
| 7 |
<link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
|
| 8 |
<style>
|
| 9 |
:root {
|
|
@@ -792,8 +792,8 @@
|
|
| 792 |
<div class="metric-label">Pronunciation</div>
|
| 793 |
</div>
|
| 794 |
<div class="metric-item">
|
| 795 |
-
<div class="metric-value">
|
| 796 |
-
<div class="metric-label">
|
| 797 |
</div>
|
| 798 |
<div class="metric-item">
|
| 799 |
<div class="metric-value">Pitch</div>
|
|
@@ -813,8 +813,8 @@
|
|
| 813 |
<div class="content-card">
|
| 814 |
<h3>🎯 Score Interpretation</h3>
|
| 815 |
<ul>
|
| 816 |
-
<li><strong>
|
| 817 |
-
<li><strong>60-
|
| 818 |
<li><strong>40-59 points:</strong> Right direction, but try a different word.</li>
|
| 819 |
<li><strong>Below 40:</strong> Far from the answer. Try a new guess.</li>
|
| 820 |
</ul>
|
|
@@ -840,7 +840,7 @@
|
|
| 840 |
<div class="content-card">
|
| 841 |
<h3>💬 AI Hint Chatbot</h3>
|
| 842 |
<p>
|
| 843 |
-
Click the floating button at the
|
| 844 |
Request hints from the chatbot to receive indirect clues about the answer.
|
| 845 |
</p>
|
| 846 |
</div>
|
|
@@ -852,16 +852,17 @@
|
|
| 852 |
<li><strong>"What's the category?":</strong> Learn the classification of the answer word.</li>
|
| 853 |
<li><strong>"Tell me the first letter":</strong> Get a hint about the first letter.</li>
|
| 854 |
<li><strong>"What words sound similar?":</strong> Get pronunciation-related suggestions.</li>
|
|
|
|
| 855 |
</ul>
|
| 856 |
</div>
|
| 857 |
|
| 858 |
-
<div class="warning-box">
|
| 859 |
<span class="warning-icon">💡</span>
|
| 860 |
<div class="warning-text">
|
| 861 |
<strong>Limited Hint Usage!</strong> Chatbot hints can only be used a limited number of times.
|
| 862 |
Request hints carefully!
|
| 863 |
</div>
|
| 864 |
-
</div>
|
| 865 |
</section>
|
| 866 |
|
| 867 |
<!-- Tips & Tricks -->
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>User Guide - Voice Semenetle</title>
|
| 7 |
<link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
|
| 8 |
<style>
|
| 9 |
:root {
|
|
|
|
| 792 |
<div class="metric-label">Pronunciation</div>
|
| 793 |
</div>
|
| 794 |
<div class="metric-item">
|
| 795 |
+
<div class="metric-value">Line Acc.</div>
|
| 796 |
+
<div class="metric-label">Line Accuracy</div>
|
| 797 |
</div>
|
| 798 |
<div class="metric-item">
|
| 799 |
<div class="metric-value">Pitch</div>
|
|
|
|
| 813 |
<div class="content-card">
|
| 814 |
<h3>🎯 Score Interpretation</h3>
|
| 815 |
<ul>
|
| 816 |
+
<li><strong>85+ points:</strong> Very similar to the answer! You're almost there!</li>
|
| 817 |
+
<li><strong>60-84 points:</strong> Good attempt. Check the hints for guidance.</li>
|
| 818 |
<li><strong>40-59 points:</strong> Right direction, but try a different word.</li>
|
| 819 |
<li><strong>Below 40:</strong> Far from the answer. Try a new guess.</li>
|
| 820 |
</ul>
|
|
|
|
| 840 |
<div class="content-card">
|
| 841 |
<h3>💬 AI Hint Chatbot</h3>
|
| 842 |
<p>
|
| 843 |
+
Click the floating button at the top right of the screen to open the AI hint chatbot.
|
| 844 |
Request hints from the chatbot to receive indirect clues about the answer.
|
| 845 |
</p>
|
| 846 |
</div>
|
|
|
|
| 852 |
<li><strong>"What's the category?":</strong> Learn the classification of the answer word.</li>
|
| 853 |
<li><strong>"Tell me the first letter":</strong> Get a hint about the first letter.</li>
|
| 854 |
<li><strong>"What words sound similar?":</strong> Get pronunciation-related suggestions.</li>
|
| 855 |
+
<li><strong>"Give me an audio hint":</strong> AI provides an audio hint in a tone similar to your voice.</li>
|
| 856 |
</ul>
|
| 857 |
</div>
|
| 858 |
|
| 859 |
+
<!-- <div class="warning-box">
|
| 860 |
<span class="warning-icon">💡</span>
|
| 861 |
<div class="warning-text">
|
| 862 |
<strong>Limited Hint Usage!</strong> Chatbot hints can only be used a limited number of times.
|
| 863 |
Request hints carefully!
|
| 864 |
</div>
|
| 865 |
+
</div> -->
|
| 866 |
</section>
|
| 867 |
|
| 868 |
<!-- Tips & Tricks -->
|
client/frontend/components/floating_chatbot.py
CHANGED
|
@@ -374,14 +374,14 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
|
|
| 374 |
system_prompt = build_system_prompt_from_game_state(game_state, include_audio_tool=tools_will_be_enabled)
|
| 375 |
|
| 376 |
print(f"[CHATBOT] Calling Gemini with context:")
|
| 377 |
-
print(f" - User ID: {user_id}")
|
| 378 |
-
print(f" - Message: {message}")
|
| 379 |
-
print(f" - History length: {len(history)}")
|
| 380 |
-
print(f" - Game state attempts: {attempt_count}")
|
| 381 |
-
print(f" - Answer word: {answer_word}")
|
| 382 |
-
print(f" - ElevenLabs AVAILABLE: {ELEVENLABS_AVAILABLE}")
|
| 383 |
-
print(f" - ElevenLabs API key set: {bool(api_key)}")
|
| 384 |
-
print(f" - ElevenLabs configured: {elevenlabs_ready}")
|
| 385 |
|
| 386 |
# Define audio hint tool if ElevenLabs is configured
|
| 387 |
tools = []
|
|
@@ -426,7 +426,7 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
|
|
| 426 |
print(f"[CHATBOT ERROR] {error}")
|
| 427 |
return f"Sorry, I encountered an error: {error}\n\nPlease try again in a moment."
|
| 428 |
|
| 429 |
-
print(f"[CHATBOT] Got response: text={len(response_text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")
|
| 430 |
|
| 431 |
# Handle tool calls
|
| 432 |
if tool_calls:
|
|
@@ -434,18 +434,18 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
|
|
| 434 |
if tool_call['name'] == 'generate_audio_hint':
|
| 435 |
hint_type = tool_call['input'].get('hint_type', 'syllable')
|
| 436 |
word_index = tool_call['input'].get('word_index', 0) # Default to first word
|
| 437 |
-
print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}, word_index={word_index}")
|
| 438 |
|
| 439 |
# Extract the appropriate portion based on hint_type, word_index, and answer_word
|
| 440 |
from utils.elevenlabs_tts import extract_hint_portion_for_word, generate_audio_hint
|
| 441 |
|
| 442 |
text_to_speak, actual_hint_type = extract_hint_portion_for_word(answer_word, attempt_count, word_index)
|
| 443 |
# Use voice cloning from reference audio when available
|
| 444 |
-
print(f"[CHATBOT] Generating audio for: '{text_to_speak}' with reference: {reference_audio_path}")
|
| 445 |
audio_path = generate_audio_hint(text_to_speak, actual_hint_type, reference_audio_path=reference_audio_path)
|
| 446 |
|
| 447 |
if audio_path:
|
| 448 |
-
print(f"[CHATBOT] Audio hint generated: {audio_path}")
|
| 449 |
# Always include text with audio hint
|
| 450 |
if not response_text or response_text.strip() == "":
|
| 451 |
response_text = f"🎵 Here's an audio hint! Listen to how it sounds:"
|
|
@@ -584,13 +584,13 @@ Don't overthink your first try – it's meant to be a shot in the dark! Just say
|
|
| 584 |
hist.append({"role": "user", "content": message})
|
| 585 |
|
| 586 |
# 콘솔 로그
|
| 587 |
-
print("=" * 60)
|
| 588 |
print("[CHATBOT] 메시지 전송")
|
| 589 |
-
print(f" - Session ID (UUID): {uid}")
|
| 590 |
-
print(f" - Message: {message}")
|
| 591 |
-
print(f" - History Length: {len(hist)}")
|
| 592 |
-
print(f" - Game State Attempts: {len(gs.get('guesses', [])) if gs else 0}")
|
| 593 |
-
print("=" * 60)
|
| 594 |
|
| 595 |
# AI 호출 (Gemini with game_state context)
|
| 596 |
response = call_ai_backend(message, uid, hist, game_state=gs)
|
|
@@ -606,8 +606,8 @@ Don't overthink your first try – it's meant to be a shot in the dark! Just say
|
|
| 606 |
# Path is relative to project root (3 levels up from this file)
|
| 607 |
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
| 608 |
audio_path = os.path.join(project_root, audio_path)
|
| 609 |
-
print(f"[CHATBOT] Audio hint included: {audio_path}")
|
| 610 |
-
print(f"[CHATBOT] Audio file exists: {os.path.exists(audio_path)}")
|
| 611 |
|
| 612 |
# Gradio 6: Use gr.Audio() component for audio content
|
| 613 |
# Reference: chatbot_core_components_simple demo
|
|
@@ -622,7 +622,7 @@ Don't overthink your first try – it's meant to be a shot in the dark! Just say
|
|
| 622 |
|
| 623 |
def close_chat_handler():
|
| 624 |
"""닫기 버튼 핸들러 - 체크박스만 False로"""
|
| 625 |
-
print("[CHATBOT] 닫기 버튼 클릭됨")
|
| 626 |
return gr.update(value=False)
|
| 627 |
|
| 628 |
# ============================================================
|
|
|
|
| 374 |
system_prompt = build_system_prompt_from_game_state(game_state, include_audio_tool=tools_will_be_enabled)
|
| 375 |
|
| 376 |
print(f"[CHATBOT] Calling Gemini with context:")
|
| 377 |
+
# print(f" - User ID: {user_id}")
|
| 378 |
+
# print(f" - Message: {message}")
|
| 379 |
+
# print(f" - History length: {len(history)}")
|
| 380 |
+
# print(f" - Game state attempts: {attempt_count}")
|
| 381 |
+
# print(f" - Answer word: {answer_word}")
|
| 382 |
+
# print(f" - ElevenLabs AVAILABLE: {ELEVENLABS_AVAILABLE}")
|
| 383 |
+
# print(f" - ElevenLabs API key set: {bool(api_key)}")
|
| 384 |
+
# print(f" - ElevenLabs configured: {elevenlabs_ready}")
|
| 385 |
|
| 386 |
# Define audio hint tool if ElevenLabs is configured
|
| 387 |
tools = []
|
|
|
|
| 426 |
print(f"[CHATBOT ERROR] {error}")
|
| 427 |
return f"Sorry, I encountered an error: {error}\n\nPlease try again in a moment."
|
| 428 |
|
| 429 |
+
# print(f"[CHATBOT] Got response: text={len(response_text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")
|
| 430 |
|
| 431 |
# Handle tool calls
|
| 432 |
if tool_calls:
|
|
|
|
| 434 |
if tool_call['name'] == 'generate_audio_hint':
|
| 435 |
hint_type = tool_call['input'].get('hint_type', 'syllable')
|
| 436 |
word_index = tool_call['input'].get('word_index', 0) # Default to first word
|
| 437 |
+
# print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}, word_index={word_index}")
|
| 438 |
|
| 439 |
# Extract the appropriate portion based on hint_type, word_index, and answer_word
|
| 440 |
from utils.elevenlabs_tts import extract_hint_portion_for_word, generate_audio_hint
|
| 441 |
|
| 442 |
text_to_speak, actual_hint_type = extract_hint_portion_for_word(answer_word, attempt_count, word_index)
|
| 443 |
# Use voice cloning from reference audio when available
|
| 444 |
+
# print(f"[CHATBOT] Generating audio for: '{text_to_speak}' with reference: {reference_audio_path}")
|
| 445 |
audio_path = generate_audio_hint(text_to_speak, actual_hint_type, reference_audio_path=reference_audio_path)
|
| 446 |
|
| 447 |
if audio_path:
|
| 448 |
+
# print(f"[CHATBOT] Audio hint generated: {audio_path}")
|
| 449 |
# Always include text with audio hint
|
| 450 |
if not response_text or response_text.strip() == "":
|
| 451 |
response_text = f"🎵 Here's an audio hint! Listen to how it sounds:"
|
|
|
|
| 584 |
hist.append({"role": "user", "content": message})
|
| 585 |
|
| 586 |
# 콘솔 로그
|
| 587 |
+
# print("=" * 60)
|
| 588 |
print("[CHATBOT] 메시지 전송")
|
| 589 |
+
# print(f" - Session ID (UUID): {uid}")
|
| 590 |
+
# print(f" - Message: {message}")
|
| 591 |
+
# print(f" - History Length: {len(hist)}")
|
| 592 |
+
# print(f" - Game State Attempts: {len(gs.get('guesses', [])) if gs else 0}")
|
| 593 |
+
# print("=" * 60)
|
| 594 |
|
| 595 |
# AI 호출 (Gemini with game_state context)
|
| 596 |
response = call_ai_backend(message, uid, hist, game_state=gs)
|
|
|
|
| 606 |
# Path is relative to project root (3 levels up from this file)
|
| 607 |
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
| 608 |
audio_path = os.path.join(project_root, audio_path)
|
| 609 |
+
# print(f"[CHATBOT] Audio hint included: {audio_path}")
|
| 610 |
+
# print(f"[CHATBOT] Audio file exists: {os.path.exists(audio_path)}")
|
| 611 |
|
| 612 |
# Gradio 6: Use gr.Audio() component for audio content
|
| 613 |
# Reference: chatbot_core_components_simple demo
|
|
|
|
| 622 |
|
| 623 |
def close_chat_handler():
|
| 624 |
"""닫기 버튼 핸들러 - 체크박스만 False로"""
|
| 625 |
+
# print("[CHATBOT] 닫기 버튼 클릭됨")
|
| 626 |
return gr.update(value=False)
|
| 627 |
|
| 628 |
# ============================================================
|
client/frontend/styles/result_screen_style.py
CHANGED
|
@@ -279,6 +279,18 @@ RESULT_SCREEN_CSS = """
|
|
| 279 |
text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1);
|
| 280 |
}
|
| 281 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 282 |
.audio-compare-wrapper {
|
| 283 |
display: flex;
|
| 284 |
gap: 12px;
|
|
@@ -336,6 +348,26 @@ RESULT_SCREEN_CSS = """
|
|
| 336 |
font-weight: 700;
|
| 337 |
}
|
| 338 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 339 |
/* 통계 카드 색상 - 라이트모드 (하늘색 애니메이션 테마) */
|
| 340 |
.stat-blue {
|
| 341 |
background: linear-gradient(135deg, #e3f5ff, #c7e9ff);
|
|
|
|
| 279 |
text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1);
|
| 280 |
}
|
| 281 |
|
| 282 |
+
@media (max-width: 800px) {
|
| 283 |
+
.answer-word {
|
| 284 |
+
font-size: 36px;
|
| 285 |
+
}
|
| 286 |
+
}
|
| 287 |
+
|
| 288 |
+
@media (max-width: 600px) {
|
| 289 |
+
.answer-word {
|
| 290 |
+
font-size: 28px;
|
| 291 |
+
}
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
.audio-compare-wrapper {
|
| 295 |
display: flex;
|
| 296 |
gap: 12px;
|
|
|
|
| 348 |
font-weight: 700;
|
| 349 |
}
|
| 350 |
|
| 351 |
+
@media (max-width: 800px) {
|
| 352 |
+
.stat-label {
|
| 353 |
+
font-size: 0.7rem;
|
| 354 |
+
}
|
| 355 |
+
|
| 356 |
+
.stat-value {
|
| 357 |
+
font-size: 1.5rem;
|
| 358 |
+
}
|
| 359 |
+
}
|
| 360 |
+
|
| 361 |
+
@media (max-width: 600px) {
|
| 362 |
+
.stat-label {
|
| 363 |
+
font-size: 0.6rem;
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
.stat-value {
|
| 367 |
+
font-size: 1.2rem;
|
| 368 |
+
}
|
| 369 |
+
}
|
| 370 |
+
|
| 371 |
/* 통계 카드 색상 - 라이트모드 (하늘색 애니메이션 테마) */
|
| 372 |
.stat-blue {
|
| 373 |
background: linear-gradient(135deg, #e3f5ff, #c7e9ff);
|
client/services/analysis_service.py
CHANGED
|
@@ -149,19 +149,19 @@ async def analyze_voice(audio_bytes: bytes, date: str, session_id: str) -> Dict:
|
|
| 149 |
"user_text": user_text,
|
| 150 |
}
|
| 151 |
|
| 152 |
-
print(f"\n{'='*50}")
|
| 153 |
-
print(f"[SCORING RESULT] analyze_voice result:")
|
| 154 |
-
print(f" - status: {result['status']}")
|
| 155 |
-
print(f" - category: {result['category']}")
|
| 156 |
-
print(f" - pitch: {result['pitch']}")
|
| 157 |
-
print(f" - rhythm: {result['rhythm']}")
|
| 158 |
-
print(f" - energy: {result['energy']}")
|
| 159 |
-
print(f" - pronunciation: {result['pronunciation']}")
|
| 160 |
-
print(f" - transcript: {result['transcript']}")
|
| 161 |
-
print(f" - overall: {result['overall']}")
|
| 162 |
-
print(f" - is_correct: {result['is_correct']}")
|
| 163 |
-
print(f" - user_text: {result['user_text']}")
|
| 164 |
-
print(f" - advice: {result['advice'][:100]}..." if len(result['advice']) > 100 else f" - advice: {result['advice']}")
|
| 165 |
-
print(f"{'='*50}\n")
|
| 166 |
|
| 167 |
return result
|
|
|
|
| 149 |
"user_text": user_text,
|
| 150 |
}
|
| 151 |
|
| 152 |
+
# print(f"\n{'='*50}")
|
| 153 |
+
# print(f"[SCORING RESULT] analyze_voice result:")
|
| 154 |
+
# print(f" - status: {result['status']}")
|
| 155 |
+
# print(f" - category: {result['category']}")
|
| 156 |
+
# print(f" - pitch: {result['pitch']}")
|
| 157 |
+
# print(f" - rhythm: {result['rhythm']}")
|
| 158 |
+
# print(f" - energy: {result['energy']}")
|
| 159 |
+
# print(f" - pronunciation: {result['pronunciation']}")
|
| 160 |
+
# print(f" - transcript: {result['transcript']}")
|
| 161 |
+
# print(f" - overall: {result['overall']}")
|
| 162 |
+
# print(f" - is_correct: {result['is_correct']}")
|
| 163 |
+
# print(f" - user_text: {result['user_text']}")
|
| 164 |
+
# print(f" - advice: {result['advice'][:100]}..." if len(result['advice']) > 100 else f" - advice: {result['advice']}")
|
| 165 |
+
# print(f"{'='*50}\n")
|
| 166 |
|
| 167 |
return result
|
client/services/hint_generator.py
CHANGED
|
@@ -124,8 +124,8 @@ Return ONLY this JSON:
|
|
| 124 |
"""
|
| 125 |
|
| 126 |
print(f"[GEMINI HINT] Calling Gemini for hint generation...")
|
| 127 |
-
print(f"[GEMINI HINT] User said: '{user_text}', Overall: {overall_score}, Attempt: {attempt}")
|
| 128 |
-
print(f"[GEMINI HINT] Hint history: {hint_history}")
|
| 129 |
|
| 130 |
# Call Gemini
|
| 131 |
response = call_gemini_with_tools(
|
|
@@ -138,8 +138,8 @@ Return ONLY this JSON:
|
|
| 138 |
|
| 139 |
# Extract JSON from response
|
| 140 |
response_text, error = get_text_from_gemini_response(response)
|
| 141 |
-
print(f"[GEMINI HINT] Response text: {response_text[:200] if response_text else 'None'}...")
|
| 142 |
-
print(f"[GEMINI HINT] Error: {error}")
|
| 143 |
|
| 144 |
if error:
|
| 145 |
logger.error(f"Gemini response error: {error}")
|
|
@@ -154,12 +154,12 @@ Return ONLY this JSON:
|
|
| 154 |
if response_text.startswith("```"):
|
| 155 |
lines = response_text.split("\n")
|
| 156 |
response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
|
| 157 |
-
print(f"[GEMINI HINT] Cleaned markdown, result: {response_text[:200]}...")
|
| 158 |
|
| 159 |
# Parse JSON
|
| 160 |
-
print(f"[GEMINI HINT] Parsing JSON: {response_text[:300]}...")
|
| 161 |
hints_json = json.loads(response_text)
|
| 162 |
-
print(f"[GEMINI HINT] ✓ Parsed successfully: {hints_json}")
|
| 163 |
logger.info(f"✓ Generated fresh hint for attempt {attempt}, category {category}")
|
| 164 |
|
| 165 |
return hints_json
|
|
|
|
| 124 |
"""
|
| 125 |
|
| 126 |
print(f"[GEMINI HINT] Calling Gemini for hint generation...")
|
| 127 |
+
# print(f"[GEMINI HINT] User said: '{user_text}', Overall: {overall_score}, Attempt: {attempt}")
|
| 128 |
+
# print(f"[GEMINI HINT] Hint history: {hint_history}")
|
| 129 |
|
| 130 |
# Call Gemini
|
| 131 |
response = call_gemini_with_tools(
|
|
|
|
| 138 |
|
| 139 |
# Extract JSON from response
|
| 140 |
response_text, error = get_text_from_gemini_response(response)
|
| 141 |
+
# print(f"[GEMINI HINT] Response text: {response_text[:200] if response_text else 'None'}...")
|
| 142 |
+
# print(f"[GEMINI HINT] Error: {error}")
|
| 143 |
|
| 144 |
if error:
|
| 145 |
logger.error(f"Gemini response error: {error}")
|
|
|
|
| 154 |
if response_text.startswith("```"):
|
| 155 |
lines = response_text.split("\n")
|
| 156 |
response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
|
| 157 |
+
# print(f"[GEMINI HINT] Cleaned markdown, result: {response_text[:200]}...")
|
| 158 |
|
| 159 |
# Parse JSON
|
| 160 |
+
# print(f"[GEMINI HINT] Parsing JSON: {response_text[:300]}...")
|
| 161 |
hints_json = json.loads(response_text)
|
| 162 |
+
# print(f"[GEMINI HINT] ✓ Parsed successfully: {hints_json}")
|
| 163 |
logger.info(f"✓ Generated fresh hint for attempt {attempt}, category {category}")
|
| 164 |
|
| 165 |
return hints_json
|
client/utils/audio_validator.py
CHANGED
|
@@ -98,7 +98,7 @@ class AudioValidator:
|
|
| 98 |
similarity = self.calculate_similarity(recognized_text, expected_text)
|
| 99 |
threshold = self.difficulty_thresholds.get(difficulty, 0.80)
|
| 100 |
|
| 101 |
-
print(f"[DEBUG] Similarity: {similarity:.2%}, Threshold: {threshold:.2%}")
|
| 102 |
|
| 103 |
return similarity >= threshold
|
| 104 |
|
|
|
|
| 98 |
similarity = self.calculate_similarity(recognized_text, expected_text)
|
| 99 |
threshold = self.difficulty_thresholds.get(difficulty, 0.80)
|
| 100 |
|
| 101 |
+
# print(f"[DEBUG] Similarity: {similarity:.2%}, Threshold: {threshold:.2%}")
|
| 102 |
|
| 103 |
return similarity >= threshold
|
| 104 |
|
client/utils/elevenlabs_tts.py
CHANGED
|
@@ -57,7 +57,7 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
|
|
| 57 |
|
| 58 |
# Check cache first
|
| 59 |
if reference_audio_path in _cloned_voice_cache:
|
| 60 |
-
print(f"✓ Using cached cloned voice for: {reference_audio_path}")
|
| 61 |
return _cloned_voice_cache[reference_audio_path]
|
| 62 |
|
| 63 |
try:
|
|
@@ -69,7 +69,7 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
|
|
| 69 |
|
| 70 |
# Always use .wav for ElevenLabs (required format for voice cloning)
|
| 71 |
full_path = full_path.with_suffix('.wav')
|
| 72 |
-
print(f"🎵 Using WAV format for ElevenLabs: {full_path}")
|
| 73 |
|
| 74 |
if not full_path.exists():
|
| 75 |
print(f"⚠ Reference audio not found: {full_path}")
|
|
@@ -80,7 +80,7 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
|
|
| 80 |
|
| 81 |
# Create instant voice clone using IVC API
|
| 82 |
voice_name = f"puzzle_voice_{hashlib.md5(reference_audio_path.encode()).hexdigest()[:8]}"
|
| 83 |
-
print(f"🎤 Cloning voice from: {full_path}")
|
| 84 |
|
| 85 |
# Read file as BytesIO (required by ElevenLabs SDK)
|
| 86 |
# remove_background_noise=False allows shorter samples (<4.6s)
|
|
@@ -93,7 +93,7 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
|
|
| 93 |
|
| 94 |
voice_id = voice.voice_id
|
| 95 |
_cloned_voice_cache[reference_audio_path] = voice_id
|
| 96 |
-
print(f"✓ Voice cloned successfully: {voice_id}")
|
| 97 |
|
| 98 |
return voice_id
|
| 99 |
|
|
@@ -139,7 +139,8 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
|
|
| 139 |
if reference_audio_path:
|
| 140 |
voice_id = clone_voice_from_reference(reference_audio_path)
|
| 141 |
if voice_id:
|
| 142 |
-
print(f"🎤 Using cloned voice: {voice_id}")
|
|
|
|
| 143 |
|
| 144 |
# Fallback to preset voice if cloning failed
|
| 145 |
if not voice_id:
|
|
@@ -150,10 +151,10 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
|
|
| 150 |
"Charlie": "IKne3meq5aSn9XLyUdCD", # Charlie - casual male
|
| 151 |
}
|
| 152 |
voice_id = voice_ids.get(voice, "EXAVITQu4vr4xnSDxMaL") # Default to Sarah
|
| 153 |
-
print(f"🎵 Using preset voice: {voice}")
|
| 154 |
|
| 155 |
# Generate audio using text_to_speech.convert (v2.24.0 API)
|
| 156 |
-
print(f"🎵 Generating audio hint: '{text}' ({hint_type})")
|
| 157 |
audio = client.text_to_speech.convert(
|
| 158 |
voice_id=voice_id,
|
| 159 |
text=text,
|
|
@@ -173,7 +174,7 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
|
|
| 173 |
# Return absolute path
|
| 174 |
absolute_path = str(filepath)
|
| 175 |
|
| 176 |
-
print(f"✓ Audio hint generated (fresh): {absolute_path}")
|
| 177 |
return absolute_path
|
| 178 |
|
| 179 |
except Exception as e:
|
|
|
|
| 57 |
|
| 58 |
# Check cache first
|
| 59 |
if reference_audio_path in _cloned_voice_cache:
|
| 60 |
+
# print(f"✓ Using cached cloned voice for: {reference_audio_path}")
|
| 61 |
return _cloned_voice_cache[reference_audio_path]
|
| 62 |
|
| 63 |
try:
|
|
|
|
| 69 |
|
| 70 |
# Always use .wav for ElevenLabs (required format for voice cloning)
|
| 71 |
full_path = full_path.with_suffix('.wav')
|
| 72 |
+
# print(f"🎵 Using WAV format for ElevenLabs: {full_path}")
|
| 73 |
|
| 74 |
if not full_path.exists():
|
| 75 |
print(f"⚠ Reference audio not found: {full_path}")
|
|
|
|
| 80 |
|
| 81 |
# Create instant voice clone using IVC API
|
| 82 |
voice_name = f"puzzle_voice_{hashlib.md5(reference_audio_path.encode()).hexdigest()[:8]}"
|
| 83 |
+
# print(f"🎤 Cloning voice from: {full_path}")
|
| 84 |
|
| 85 |
# Read file as BytesIO (required by ElevenLabs SDK)
|
| 86 |
# remove_background_noise=False allows shorter samples (<4.6s)
|
|
|
|
| 93 |
|
| 94 |
voice_id = voice.voice_id
|
| 95 |
_cloned_voice_cache[reference_audio_path] = voice_id
|
| 96 |
+
# print(f"✓ Voice cloned successfully: {voice_id}")
|
| 97 |
|
| 98 |
return voice_id
|
| 99 |
|
|
|
|
| 139 |
if reference_audio_path:
|
| 140 |
voice_id = clone_voice_from_reference(reference_audio_path)
|
| 141 |
if voice_id:
|
| 142 |
+
# print(f"🎤 Using cloned voice: {voice_id}")
|
| 143 |
+
pass
|
| 144 |
|
| 145 |
# Fallback to preset voice if cloning failed
|
| 146 |
if not voice_id:
|
|
|
|
| 151 |
"Charlie": "IKne3meq5aSn9XLyUdCD", # Charlie - casual male
|
| 152 |
}
|
| 153 |
voice_id = voice_ids.get(voice, "EXAVITQu4vr4xnSDxMaL") # Default to Sarah
|
| 154 |
+
# print(f"🎵 Using preset voice: {voice}")
|
| 155 |
|
| 156 |
# Generate audio using text_to_speech.convert (v2.24.0 API)
|
| 157 |
+
# print(f"🎵 Generating audio hint: '{text}' ({hint_type})")
|
| 158 |
audio = client.text_to_speech.convert(
|
| 159 |
voice_id=voice_id,
|
| 160 |
text=text,
|
|
|
|
| 174 |
# Return absolute path
|
| 175 |
absolute_path = str(filepath)
|
| 176 |
|
| 177 |
+
# print(f"✓ Audio hint generated (fresh): {absolute_path}")
|
| 178 |
return absolute_path
|
| 179 |
|
| 180 |
except Exception as e:
|
client/utils/stt_handler.py
CHANGED
|
@@ -82,7 +82,7 @@ class STTHandler:
|
|
| 82 |
def _setup_mock(self):
|
| 83 |
"""Mock STT (테스트용)"""
|
| 84 |
self.client = None
|
| 85 |
-
print("[INFO] Mock STT 모드 (테스트용)")
|
| 86 |
|
| 87 |
def transcribe(self, audio_path: str, difficulty: int = 1) -> str:
|
| 88 |
"""
|
|
@@ -98,7 +98,7 @@ class STTHandler:
|
|
| 98 |
if not audio_path or not os.path.exists(audio_path):
|
| 99 |
raise ValueError(f"유효하지 않은 오디오 파일: {audio_path}")
|
| 100 |
|
| 101 |
-
print(f"[DEBUG] Transcribing: {audio_path} (provider={self.provider})")
|
| 102 |
|
| 103 |
if self.provider == 'openai':
|
| 104 |
return self._transcribe_openai(audio_path)
|
|
@@ -227,7 +227,7 @@ class STTHandler:
|
|
| 227 |
}
|
| 228 |
|
| 229 |
result = mock_texts.get(difficulty, "테스트 텍스트")
|
| 230 |
-
print(f"[MOCK] STT 결과 (난이도 {difficulty}): {result}")
|
| 231 |
|
| 232 |
return result
|
| 233 |
|
|
|
|
| 82 |
def _setup_mock(self):
|
| 83 |
"""Mock STT (테스트용)"""
|
| 84 |
self.client = None
|
| 85 |
+
# print("[INFO] Mock STT 모드 (테스트용)")
|
| 86 |
|
| 87 |
def transcribe(self, audio_path: str, difficulty: int = 1) -> str:
|
| 88 |
"""
|
|
|
|
| 98 |
if not audio_path or not os.path.exists(audio_path):
|
| 99 |
raise ValueError(f"유효하지 않은 오디오 파일: {audio_path}")
|
| 100 |
|
| 101 |
+
# print(f"[DEBUG] Transcribing: {audio_path} (provider={self.provider})")
|
| 102 |
|
| 103 |
if self.provider == 'openai':
|
| 104 |
return self._transcribe_openai(audio_path)
|
|
|
|
| 227 |
}
|
| 228 |
|
| 229 |
result = mock_texts.get(difficulty, "테스트 텍스트")
|
| 230 |
+
# print(f"[MOCK] STT 결과 (난이도 {difficulty}): {result}")
|
| 231 |
|
| 232 |
return result
|
| 233 |
|
docs/tech-stack.html
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
-
<html lang="
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>Tech Stack -
|
| 7 |
<link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
|
| 8 |
<style>
|
| 9 |
:root {
|
|
@@ -494,8 +494,8 @@
|
|
| 494 |
<!-- Sidebar -->
|
| 495 |
<nav class="sidebar">
|
| 496 |
<a href="#" class="logo">
|
| 497 |
-
<div class="logo-icon">
|
| 498 |
-
|
| 499 |
</a>
|
| 500 |
|
| 501 |
<div class="nav-section">
|
|
@@ -523,7 +523,7 @@
|
|
| 523 |
<div class="nav-title">Backend</div>
|
| 524 |
<ul class="nav-list">
|
| 525 |
<li class="nav-item">
|
| 526 |
-
<a href="#backend" class="nav-link">
|
| 527 |
</li>
|
| 528 |
<li class="nav-item">
|
| 529 |
<a href="#database" class="nav-link">Database</a>
|
|
@@ -535,7 +535,10 @@
|
|
| 535 |
<div class="nav-title">AI / ML</div>
|
| 536 |
<ul class="nav-list">
|
| 537 |
<li class="nav-item">
|
| 538 |
-
<a href="#ai" class="nav-link">
|
|
|
|
|
|
|
|
|
|
| 539 |
</li>
|
| 540 |
<li class="nav-item">
|
| 541 |
<a href="#audio" class="nav-link">Audio Processing</a>
|
|
@@ -564,8 +567,8 @@
|
|
| 564 |
<header class="page-header">
|
| 565 |
<h1 class="page-title">Tech Stack</h1>
|
| 566 |
<p class="page-description">
|
| 567 |
-
|
| 568 |
-
|
| 569 |
</p>
|
| 570 |
</header>
|
| 571 |
|
|
@@ -578,18 +581,18 @@
|
|
| 578 |
<div class="architecture-diagram">
|
| 579 |
<div class="arch-flow">
|
| 580 |
<div class="arch-box">
|
| 581 |
-
<div class="arch-box-title">Client</div>
|
| 582 |
<div class="arch-box-sub">Gradio 6.0</div>
|
| 583 |
</div>
|
| 584 |
<span class="arch-arrow">→</span>
|
| 585 |
<div class="arch-box secondary">
|
| 586 |
-
<div class="arch-box-title">
|
| 587 |
-
<div class="arch-box-sub">
|
| 588 |
</div>
|
| 589 |
<span class="arch-arrow">→</span>
|
| 590 |
<div class="arch-box tertiary">
|
| 591 |
-
<div class="arch-box-title">AI
|
| 592 |
-
<div class="arch-box-sub">
|
| 593 |
</div>
|
| 594 |
</div>
|
| 595 |
</div>
|
|
@@ -609,17 +612,17 @@
|
|
| 609 |
<span class="tech-version">6.0.0</span>
|
| 610 |
</div>
|
| 611 |
<p class="tech-description">
|
| 612 |
-
Python
|
| 613 |
</p>
|
| 614 |
</div>
|
| 615 |
<div class="tech-card">
|
| 616 |
<div class="tech-header">
|
| 617 |
<div class="tech-icon icon-python">Py</div>
|
| 618 |
<span class="tech-name">Python</span>
|
| 619 |
-
<span class="tech-version">3.11
|
| 620 |
</div>
|
| 621 |
<p class="tech-description">
|
| 622 |
-
|
| 623 |
</p>
|
| 624 |
</div>
|
| 625 |
<div class="tech-card">
|
|
@@ -629,7 +632,17 @@
|
|
| 629 |
<span class="tech-version">0.28.1</span>
|
| 630 |
</div>
|
| 631 |
<p class="tech-description">
|
| 632 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 633 |
</p>
|
| 634 |
</div>
|
| 635 |
</div>
|
|
@@ -639,47 +652,47 @@
|
|
| 639 |
<section id="backend" class="section">
|
| 640 |
<h2 class="section-title">
|
| 641 |
<span class="section-icon backend">⚡</span>
|
| 642 |
-
Backend
|
| 643 |
</h2>
|
| 644 |
<div class="tech-grid">
|
| 645 |
<div class="tech-card">
|
| 646 |
<div class="tech-header">
|
| 647 |
-
<div class="tech-icon icon-
|
| 648 |
-
<span class="tech-name">
|
| 649 |
-
<span class="tech-version">
|
| 650 |
</div>
|
| 651 |
<p class="tech-description">
|
| 652 |
-
|
| 653 |
</p>
|
| 654 |
</div>
|
| 655 |
<div class="tech-card">
|
| 656 |
<div class="tech-header">
|
| 657 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 658 |
-
<span class="tech-name">
|
| 659 |
-
<span class="tech-version">
|
| 660 |
</div>
|
| 661 |
<p class="tech-description">
|
| 662 |
-
|
| 663 |
</p>
|
| 664 |
</div>
|
| 665 |
<div class="tech-card">
|
| 666 |
<div class="tech-header">
|
| 667 |
-
<div class="tech-icon
|
| 668 |
-
<span class="tech-name">
|
| 669 |
-
<span class="tech-version">
|
| 670 |
</div>
|
| 671 |
<p class="tech-description">
|
| 672 |
-
|
| 673 |
</p>
|
| 674 |
</div>
|
| 675 |
<div class="tech-card">
|
| 676 |
<div class="tech-header">
|
| 677 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 678 |
-
<span class="tech-name">
|
| 679 |
-
<span class="tech-version">
|
| 680 |
</div>
|
| 681 |
<p class="tech-description">
|
| 682 |
-
|
| 683 |
</p>
|
| 684 |
</div>
|
| 685 |
</div>
|
|
@@ -696,20 +709,30 @@
|
|
| 696 |
<div class="tech-header">
|
| 697 |
<div class="tech-icon icon-postgres">Pg</div>
|
| 698 |
<span class="tech-name">PostgreSQL</span>
|
| 699 |
-
<span class="tech-version">
|
| 700 |
</div>
|
| 701 |
<p class="tech-description">
|
| 702 |
-
|
| 703 |
</p>
|
| 704 |
</div>
|
| 705 |
<div class="tech-card">
|
| 706 |
<div class="tech-header">
|
| 707 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 708 |
-
<span class="tech-name">
|
| 709 |
-
<span class="tech-version">2.
|
| 710 |
</div>
|
| 711 |
<p class="tech-description">
|
| 712 |
-
Python
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 713 |
</p>
|
| 714 |
</div>
|
| 715 |
</div>
|
|
@@ -726,40 +749,80 @@
|
|
| 726 |
<div class="tech-header">
|
| 727 |
<div class="tech-icon icon-gemini">Gm</div>
|
| 728 |
<span class="tech-name">Google Gemini</span>
|
| 729 |
-
<span class="tech-version">
|
| 730 |
</div>
|
| 731 |
<p class="tech-description">
|
| 732 |
-
|
| 733 |
</p>
|
| 734 |
</div>
|
| 735 |
<div class="tech-card">
|
| 736 |
<div class="tech-header">
|
| 737 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 738 |
-
<span class="tech-name">
|
| 739 |
-
<span class="tech-version">
|
| 740 |
</div>
|
| 741 |
<p class="tech-description">
|
| 742 |
-
|
| 743 |
</p>
|
| 744 |
</div>
|
| 745 |
<div class="tech-card">
|
| 746 |
<div class="tech-header">
|
| 747 |
<div class="tech-icon icon-numpy">Np</div>
|
| 748 |
<span class="tech-name">NumPy</span>
|
| 749 |
-
<span class="tech-version">1.26
|
| 750 |
</div>
|
| 751 |
<p class="tech-description">
|
| 752 |
-
|
| 753 |
</p>
|
| 754 |
</div>
|
| 755 |
<div class="tech-card">
|
| 756 |
<div class="tech-header">
|
| 757 |
<div class="tech-icon" style="background: linear-gradient(135deg, #0d9488, #2dd4bf);">Pd</div>
|
| 758 |
<span class="tech-name">Pandas</span>
|
| 759 |
-
<span class="tech-version">2.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
</div>
|
| 761 |
<p class="tech-description">
|
| 762 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 763 |
</p>
|
| 764 |
</div>
|
| 765 |
</div>
|
|
@@ -779,7 +842,7 @@
|
|
| 779 |
<span class="tech-version">0.25.1</span>
|
| 780 |
</div>
|
| 781 |
<p class="tech-description">
|
| 782 |
-
|
| 783 |
</p>
|
| 784 |
</div>
|
| 785 |
<div class="tech-card">
|
|
@@ -789,7 +852,27 @@
|
|
| 789 |
<span class="tech-version">via ffmpy</span>
|
| 790 |
</div>
|
| 791 |
<p class="tech-description">
|
| 792 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 793 |
</p>
|
| 794 |
</div>
|
| 795 |
</div>
|
|
@@ -809,7 +892,7 @@
|
|
| 809 |
<span class="tech-version">Latest</span>
|
| 810 |
</div>
|
| 811 |
<p class="tech-description">
|
| 812 |
-
|
| 813 |
</p>
|
| 814 |
</div>
|
| 815 |
<div class="tech-card">
|
|
@@ -819,27 +902,47 @@
|
|
| 819 |
<span class="tech-version">Latest</span>
|
| 820 |
</div>
|
| 821 |
<p class="tech-description">
|
| 822 |
-
|
| 823 |
</p>
|
| 824 |
</div>
|
| 825 |
<div class="tech-card">
|
| 826 |
<div class="tech-header">
|
| 827 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 828 |
-
<span class="tech-name">
|
| 829 |
-
<span class="tech-version"
|
| 830 |
</div>
|
| 831 |
<p class="tech-description">
|
| 832 |
-
|
| 833 |
</p>
|
| 834 |
</div>
|
| 835 |
<div class="tech-card">
|
| 836 |
<div class="tech-header">
|
| 837 |
-
<div class="tech-icon" style="background: linear-gradient(135deg, #
|
| 838 |
-
<span class="tech-name">
|
| 839 |
-
<span class="tech-version">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 840 |
</div>
|
| 841 |
<p class="tech-description">
|
| 842 |
-
|
| 843 |
</p>
|
| 844 |
</div>
|
| 845 |
</div>
|
|
|
|
| 1 |
<!DOCTYPE html>
|
| 2 |
+
<html lang="en">
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>Tech Stack - Voice Sementle</title>
|
| 7 |
<link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
|
| 8 |
<style>
|
| 9 |
:root {
|
|
|
|
| 494 |
<!-- Sidebar -->
|
| 495 |
<nav class="sidebar">
|
| 496 |
<a href="#" class="logo">
|
| 497 |
+
<div class="logo-icon">V</div>
|
| 498 |
+
Voice Sementle
|
| 499 |
</a>
|
| 500 |
|
| 501 |
<div class="nav-section">
|
|
|
|
| 523 |
<div class="nav-title">Backend</div>
|
| 524 |
<ul class="nav-list">
|
| 525 |
<li class="nav-item">
|
| 526 |
+
<a href="#backend" class="nav-link">Services</a>
|
| 527 |
</li>
|
| 528 |
<li class="nav-item">
|
| 529 |
<a href="#database" class="nav-link">Database</a>
|
|
|
|
| 535 |
<div class="nav-title">AI / ML</div>
|
| 536 |
<ul class="nav-list">
|
| 537 |
<li class="nav-item">
|
| 538 |
+
<a href="#ai" class="nav-link">AI Models</a>
|
| 539 |
+
</li>
|
| 540 |
+
<li class="nav-item">
|
| 541 |
+
<a href="#mcp" class="nav-link">MCP Integration</a>
|
| 542 |
</li>
|
| 543 |
<li class="nav-item">
|
| 544 |
<a href="#audio" class="nav-link">Audio Processing</a>
|
|
|
|
| 567 |
<header class="page-header">
|
| 568 |
<h1 class="page-title">Tech Stack</h1>
|
| 569 |
<p class="page-description">
|
| 570 |
+
Comprehensive technology stack for Voice Sementle - a daily voice puzzle game where pronunciation matters.
|
| 571 |
+
Built with modern Python frameworks, AI models, and cloud services.
|
| 572 |
</p>
|
| 573 |
</header>
|
| 574 |
|
|
|
|
| 581 |
<div class="architecture-diagram">
|
| 582 |
<div class="arch-flow">
|
| 583 |
<div class="arch-box">
|
| 584 |
+
<div class="arch-box-title">Client UI</div>
|
| 585 |
<div class="arch-box-sub">Gradio 6.0</div>
|
| 586 |
</div>
|
| 587 |
<span class="arch-arrow">→</span>
|
| 588 |
<div class="arch-box secondary">
|
| 589 |
+
<div class="arch-box-title">Services</div>
|
| 590 |
+
<div class="arch-box-sub">Python Backend</div>
|
| 591 |
</div>
|
| 592 |
<span class="arch-arrow">→</span>
|
| 593 |
<div class="arch-box tertiary">
|
| 594 |
+
<div class="arch-box-title">AI Models</div>
|
| 595 |
+
<div class="arch-box-sub">Gemini + VoiceKit MCP</div>
|
| 596 |
</div>
|
| 597 |
</div>
|
| 598 |
</div>
|
|
|
|
| 612 |
<span class="tech-version">6.0.0</span>
|
| 613 |
</div>
|
| 614 |
<p class="tech-description">
|
| 615 |
+
Python-based UI framework. Powers interactive web interface with voice input, real-time feedback, modals, and dynamic game state transitions.
|
| 616 |
</p>
|
| 617 |
</div>
|
| 618 |
<div class="tech-card">
|
| 619 |
<div class="tech-header">
|
| 620 |
<div class="tech-icon icon-python">Py</div>
|
| 621 |
<span class="tech-name">Python</span>
|
| 622 |
+
<span class="tech-version">3.11+</span>
|
| 623 |
</div>
|
| 624 |
<p class="tech-description">
|
| 625 |
+
Client application language. Leverages type hints, async/await patterns, and modern Python features for clean, maintainable code.
|
| 626 |
</p>
|
| 627 |
</div>
|
| 628 |
<div class="tech-card">
|
|
|
|
| 632 |
<span class="tech-version">0.28.1</span>
|
| 633 |
</div>
|
| 634 |
<p class="tech-description">
|
| 635 |
+
Async HTTP client library for communicating with backend services and external APIs with full HTTP/2 support.
|
| 636 |
+
</p>
|
| 637 |
+
</div>
|
| 638 |
+
<div class="tech-card">
|
| 639 |
+
<div class="tech-header">
|
| 640 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #8b5cf6, #a78bfa);">Js</div>
|
| 641 |
+
<span class="tech-name">Custom CSS/JS</span>
|
| 642 |
+
<span class="tech-version">-</span>
|
| 643 |
+
</div>
|
| 644 |
+
<p class="tech-description">
|
| 645 |
+
Custom styling with animated backgrounds, modal components, radar charts, and responsive design for optimal user experience.
|
| 646 |
</p>
|
| 647 |
</div>
|
| 648 |
</div>
|
|
|
|
| 652 |
<section id="backend" class="section">
|
| 653 |
<h2 class="section-title">
|
| 654 |
<span class="section-icon backend">⚡</span>
|
| 655 |
+
Backend Services
|
| 656 |
</h2>
|
| 657 |
<div class="tech-grid">
|
| 658 |
<div class="tech-card">
|
| 659 |
<div class="tech-header">
|
| 660 |
+
<div class="tech-icon icon-python">Py</div>
|
| 661 |
+
<span class="tech-name">Python Services</span>
|
| 662 |
+
<span class="tech-version">3.11+</span>
|
| 663 |
</div>
|
| 664 |
<p class="tech-description">
|
| 665 |
+
Modular service architecture including voice analyzer, hint generator, database layer, and game state management with async/await support.
|
| 666 |
</p>
|
| 667 |
</div>
|
| 668 |
<div class="tech-card">
|
| 669 |
<div class="tech-header">
|
| 670 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #ec4899, #f472b6);">Pd</div>
|
| 671 |
+
<span class="tech-name">Pydantic</span>
|
| 672 |
+
<span class="tech-version">2.0+</span>
|
| 673 |
</div>
|
| 674 |
<p class="tech-description">
|
| 675 |
+
Data validation and settings management with type safety, automatic serialization, and schema validation.
|
| 676 |
</p>
|
| 677 |
</div>
|
| 678 |
<div class="tech-card">
|
| 679 |
<div class="tech-header">
|
| 680 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #10b981, #34d399);">Ay</div>
|
| 681 |
+
<span class="tech-name">Asyncio</span>
|
| 682 |
+
<span class="tech-version">Built-in</span>
|
| 683 |
</div>
|
| 684 |
<p class="tech-description">
|
| 685 |
+
Native Python async library for concurrent operations, enabling efficient handling of multiple voice analysis requests.
|
| 686 |
</p>
|
| 687 |
</div>
|
| 688 |
<div class="tech-card">
|
| 689 |
<div class="tech-header">
|
| 690 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #f59e0b, #fbbf24);">Dv</div>
|
| 691 |
+
<span class="tech-name">Python-dotenv</span>
|
| 692 |
+
<span class="tech-version">1.0.0</span>
|
| 693 |
</div>
|
| 694 |
<p class="tech-description">
|
| 695 |
+
Environment variable management from .env files for secure API keys and configuration settings.
|
| 696 |
</p>
|
| 697 |
</div>
|
| 698 |
</div>
|
|
|
|
| 709 |
<div class="tech-header">
|
| 710 |
<div class="tech-icon icon-postgres">Pg</div>
|
| 711 |
<span class="tech-name">PostgreSQL</span>
|
| 712 |
+
<span class="tech-version">16-alpine</span>
|
| 713 |
</div>
|
| 714 |
<p class="tech-description">
|
| 715 |
+
Production-grade relational database. Stores user sessions, game history, daily puzzles, attempt statistics, and score analytics.
|
| 716 |
</p>
|
| 717 |
</div>
|
| 718 |
<div class="tech-card">
|
| 719 |
<div class="tech-header">
|
| 720 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #0891b2, #22d3ee);">Ps</div>
|
| 721 |
+
<span class="tech-name">psycopg2</span>
|
| 722 |
+
<span class="tech-version">2.9+</span>
|
| 723 |
</div>
|
| 724 |
<p class="tech-description">
|
| 725 |
+
PostgreSQL adapter for Python. Provides efficient database connections and query execution with connection pooling.
|
| 726 |
+
</p>
|
| 727 |
+
</div>
|
| 728 |
+
<div class="tech-card">
|
| 729 |
+
<div class="tech-header">
|
| 730 |
+
<div class="tech-icon icon-docker">Dk</div>
|
| 731 |
+
<span class="tech-name">Docker Volume</span>
|
| 732 |
+
<span class="tech-version">-</span>
|
| 733 |
+
</div>
|
| 734 |
+
<p class="tech-description">
|
| 735 |
+
Persistent data storage with Docker volumes ensuring database persistence across container restarts.
|
| 736 |
</p>
|
| 737 |
</div>
|
| 738 |
</div>
|
|
|
|
| 749 |
<div class="tech-header">
|
| 750 |
<div class="tech-icon icon-gemini">Gm</div>
|
| 751 |
<span class="tech-name">Google Gemini</span>
|
| 752 |
+
<span class="tech-version">2.0 Flash</span>
|
| 753 |
</div>
|
| 754 |
<p class="tech-description">
|
| 755 |
+
Multimodal AI model powering intelligent hint generation, contextual feedback, and adaptive difficulty based on player performance.
|
| 756 |
</p>
|
| 757 |
</div>
|
| 758 |
<div class="tech-card">
|
| 759 |
<div class="tech-header">
|
| 760 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #6366f1, #818cf8);">11</div>
|
| 761 |
+
<span class="tech-name">ElevenLabs</span>
|
| 762 |
+
<span class="tech-version">2.24.0</span>
|
| 763 |
</div>
|
| 764 |
<p class="tech-description">
|
| 765 |
+
Advanced text-to-speech API with voice cloning. Generates partial audio hints with reference voice matching for natural gameplay experience.
|
| 766 |
</p>
|
| 767 |
</div>
|
| 768 |
<div class="tech-card">
|
| 769 |
<div class="tech-header">
|
| 770 |
<div class="tech-icon icon-numpy">Np</div>
|
| 771 |
<span class="tech-name">NumPy</span>
|
| 772 |
+
<span class="tech-version">1.26.4</span>
|
| 773 |
</div>
|
| 774 |
<p class="tech-description">
|
| 775 |
+
Numerical computing library for audio signal processing, feature extraction, and statistical analysis of voice patterns.
|
| 776 |
</p>
|
| 777 |
</div>
|
| 778 |
<div class="tech-card">
|
| 779 |
<div class="tech-header">
|
| 780 |
<div class="tech-icon" style="background: linear-gradient(135deg, #0d9488, #2dd4bf);">Pd</div>
|
| 781 |
<span class="tech-name">Pandas</span>
|
| 782 |
+
<span class="tech-version">2.2.3</span>
|
| 783 |
+
</div>
|
| 784 |
+
<p class="tech-description">
|
| 785 |
+
Data analysis library for processing game statistics, tracking user progress, and generating performance insights.
|
| 786 |
+
</p>
|
| 787 |
+
</div>
|
| 788 |
+
</div>
|
| 789 |
+
</section>
|
| 790 |
+
|
| 791 |
+
<!-- MCP Integration -->
|
| 792 |
+
<section id="mcp" class="section">
|
| 793 |
+
<h2 class="section-title">
|
| 794 |
+
<span class="section-icon" style="background: linear-gradient(135deg, #8b5cf6, #a78bfa);">🔌</span>
|
| 795 |
+
Model Context Protocol (MCP)
|
| 796 |
+
</h2>
|
| 797 |
+
<div class="tech-grid">
|
| 798 |
+
<div class="tech-card">
|
| 799 |
+
<div class="tech-header">
|
| 800 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #0891b2, #22d3ee);">MC</div>
|
| 801 |
+
<span class="tech-name">MCP Client</span>
|
| 802 |
+
<span class="tech-version">1.22.0</span>
|
| 803 |
</div>
|
| 804 |
<p class="tech-description">
|
| 805 |
+
Model Context Protocol client for seamless AI model integration. Manages sessions, tool calling, and context sharing across AI services.
|
| 806 |
+
</p>
|
| 807 |
+
</div>
|
| 808 |
+
<div class="tech-card">
|
| 809 |
+
<div class="tech-header">
|
| 810 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #f97316, #fb923c);">Vk</div>
|
| 811 |
+
<span class="tech-name">VoiceKit MCP</span>
|
| 812 |
+
<span class="tech-version">HuggingFace</span>
|
| 813 |
+
</div>
|
| 814 |
+
<p class="tech-description">
|
| 815 |
+
Voice analysis MCP server providing voicekit_analyze_voice_similarity tool. Compares user recordings with reference audio for pronunciation scoring.
|
| 816 |
+
</p>
|
| 817 |
+
</div>
|
| 818 |
+
<div class="tech-card">
|
| 819 |
+
<div class="tech-header">
|
| 820 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #ec4899, #f472b6);">SS</div>
|
| 821 |
+
<span class="tech-name">SSE Client</span>
|
| 822 |
+
<span class="tech-version">MCP</span>
|
| 823 |
+
</div>
|
| 824 |
+
<p class="tech-description">
|
| 825 |
+
Server-Sent Events client for real-time MCP communication. Enables streaming responses and bidirectional tool invocation.
|
| 826 |
</p>
|
| 827 |
</div>
|
| 828 |
</div>
|
|
|
|
| 842 |
<span class="tech-version">0.25.1</span>
|
| 843 |
</div>
|
| 844 |
<p class="tech-description">
|
| 845 |
+
Audio manipulation library for format conversion, normalization, compression, and audio file editing with effects processing.
|
| 846 |
</p>
|
| 847 |
</div>
|
| 848 |
<div class="tech-card">
|
|
|
|
| 852 |
<span class="tech-version">via ffmpy</span>
|
| 853 |
</div>
|
| 854 |
<p class="tech-description">
|
| 855 |
+
Multimedia framework for audio encoding/decoding. Handles format conversion and compression for optimal file sizes.
|
| 856 |
+
</p>
|
| 857 |
+
</div>
|
| 858 |
+
<div class="tech-card">
|
| 859 |
+
<div class="tech-header">
|
| 860 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #7c3aed, #a78bfa);">B6</div>
|
| 861 |
+
<span class="tech-name">Base64</span>
|
| 862 |
+
<span class="tech-version">Built-in</span>
|
| 863 |
+
</div>
|
| 864 |
+
<p class="tech-description">
|
| 865 |
+
Audio data encoding for API transmission. Converts binary audio to base64 strings for MCP tool parameters.
|
| 866 |
+
</p>
|
| 867 |
+
</div>
|
| 868 |
+
<div class="tech-card">
|
| 869 |
+
<div class="tech-header">
|
| 870 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #dc2626, #f87171);">Af</div>
|
| 871 |
+
<span class="tech-name">Aiofiles</span>
|
| 872 |
+
<span class="tech-version">24.1.0</span>
|
| 873 |
+
</div>
|
| 874 |
+
<p class="tech-description">
|
| 875 |
+
Async file operations for non-blocking audio file I/O. Enables concurrent file processing without blocking the event loop.
|
| 876 |
</p>
|
| 877 |
</div>
|
| 878 |
</div>
|
|
|
|
| 892 |
<span class="tech-version">Latest</span>
|
| 893 |
</div>
|
| 894 |
<p class="tech-description">
|
| 895 |
+
Containerization platform providing consistent development and deployment environments across all systems.
|
| 896 |
</p>
|
| 897 |
</div>
|
| 898 |
<div class="tech-card">
|
|
|
|
| 902 |
<span class="tech-version">Latest</span>
|
| 903 |
</div>
|
| 904 |
<p class="tech-description">
|
| 905 |
+
Multi-container orchestration for managing PostgreSQL database service with volume persistence and networking.
|
| 906 |
</p>
|
| 907 |
</div>
|
| 908 |
<div class="tech-card">
|
| 909 |
<div class="tech-header">
|
| 910 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #64748b, #94a3b8);">Ev</div>
|
| 911 |
+
<span class="tech-name">Environment Config</span>
|
| 912 |
+
<span class="tech-version">-</span>
|
| 913 |
</div>
|
| 914 |
<p class="tech-description">
|
| 915 |
+
Secure configuration management with .env files for API keys (Gemini, ElevenLabs, VoiceKit) and sensitive settings.
|
| 916 |
</p>
|
| 917 |
</div>
|
| 918 |
<div class="tech-card">
|
| 919 |
<div class="tech-header">
|
| 920 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #16a34a, #4ade80);">Sh</div>
|
| 921 |
+
<span class="tech-name">Shell Scripts</span>
|
| 922 |
+
<span class="tech-version">Bash</span>
|
| 923 |
+
</div>
|
| 924 |
+
<p class="tech-description">
|
| 925 |
+
Automated deployment scripts (start.sh, stop.sh) for streamlined application lifecycle management.
|
| 926 |
+
</p>
|
| 927 |
+
</div>
|
| 928 |
+
<div class="tech-card">
|
| 929 |
+
<div class="tech-header">
|
| 930 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #ea580c, #fb923c);">Hf</div>
|
| 931 |
+
<span class="tech-name">HuggingFace</span>
|
| 932 |
+
<span class="tech-version">Hub</span>
|
| 933 |
+
</div>
|
| 934 |
+
<p class="tech-description">
|
| 935 |
+
Deployment platform hosting VoiceKit MCP server and application demos with integrated spaces for testing.
|
| 936 |
+
</p>
|
| 937 |
+
</div>
|
| 938 |
+
<div class="tech-card">
|
| 939 |
+
<div class="tech-header">
|
| 940 |
+
<div class="tech-icon" style="background: linear-gradient(135deg, #9333ea, #a855f7);">Lg</div>
|
| 941 |
+
<span class="tech-name">Logging</span>
|
| 942 |
+
<span class="tech-version">Built-in</span>
|
| 943 |
</div>
|
| 944 |
<p class="tech-description">
|
| 945 |
+
Comprehensive logging system for debugging, monitoring API calls, tracking errors, and performance analysis.
|
| 946 |
</p>
|
| 947 |
</div>
|
| 948 |
</div>
|
docs/user-guide.html
CHANGED
|
@@ -3,7 +3,7 @@
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
-
<title>User Guide -
|
| 7 |
<link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
|
| 8 |
<style>
|
| 9 |
:root {
|
|
@@ -792,8 +792,8 @@
|
|
| 792 |
<div class="metric-label">Pronunciation</div>
|
| 793 |
</div>
|
| 794 |
<div class="metric-item">
|
| 795 |
-
<div class="metric-value">
|
| 796 |
-
<div class="metric-label">
|
| 797 |
</div>
|
| 798 |
<div class="metric-item">
|
| 799 |
<div class="metric-value">Pitch</div>
|
|
@@ -813,8 +813,8 @@
|
|
| 813 |
<div class="content-card">
|
| 814 |
<h3>🎯 Score Interpretation</h3>
|
| 815 |
<ul>
|
| 816 |
-
<li><strong>
|
| 817 |
-
<li><strong>60-
|
| 818 |
<li><strong>40-59 points:</strong> Right direction, but try a different word.</li>
|
| 819 |
<li><strong>Below 40:</strong> Far from the answer. Try a new guess.</li>
|
| 820 |
</ul>
|
|
@@ -840,7 +840,7 @@
|
|
| 840 |
<div class="content-card">
|
| 841 |
<h3>💬 AI Hint Chatbot</h3>
|
| 842 |
<p>
|
| 843 |
-
Click the floating button at the
|
| 844 |
Request hints from the chatbot to receive indirect clues about the answer.
|
| 845 |
</p>
|
| 846 |
</div>
|
|
@@ -852,16 +852,17 @@
|
|
| 852 |
<li><strong>"What's the category?":</strong> Learn the classification of the answer word.</li>
|
| 853 |
<li><strong>"Tell me the first letter":</strong> Get a hint about the first letter.</li>
|
| 854 |
<li><strong>"What words sound similar?":</strong> Get pronunciation-related suggestions.</li>
|
|
|
|
| 855 |
</ul>
|
| 856 |
</div>
|
| 857 |
|
| 858 |
-
<div class="warning-box">
|
| 859 |
<span class="warning-icon">💡</span>
|
| 860 |
<div class="warning-text">
|
| 861 |
<strong>Limited Hint Usage!</strong> Chatbot hints can only be used a limited number of times.
|
| 862 |
Request hints carefully!
|
| 863 |
</div>
|
| 864 |
-
</div>
|
| 865 |
</section>
|
| 866 |
|
| 867 |
<!-- Tips & Tricks -->
|
|
|
|
| 3 |
<head>
|
| 4 |
<meta charset="UTF-8">
|
| 5 |
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
| 6 |
+
<title>User Guide - Voice Semenetle</title>
|
| 7 |
<link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
|
| 8 |
<style>
|
| 9 |
:root {
|
|
|
|
| 792 |
<div class="metric-label">Pronunciation</div>
|
| 793 |
</div>
|
| 794 |
<div class="metric-item">
|
| 795 |
+
<div class="metric-value">Line Acc.</div>
|
| 796 |
+
<div class="metric-label">Line Accuracy</div>
|
| 797 |
</div>
|
| 798 |
<div class="metric-item">
|
| 799 |
<div class="metric-value">Pitch</div>
|
|
|
|
| 813 |
<div class="content-card">
|
| 814 |
<h3>🎯 Score Interpretation</h3>
|
| 815 |
<ul>
|
| 816 |
+
<li><strong>85+ points:</strong> Very similar to the answer! You're almost there!</li>
|
| 817 |
+
<li><strong>60-84 points:</strong> Good attempt. Check the hints for guidance.</li>
|
| 818 |
<li><strong>40-59 points:</strong> Right direction, but try a different word.</li>
|
| 819 |
<li><strong>Below 40:</strong> Far from the answer. Try a new guess.</li>
|
| 820 |
</ul>
|
|
|
|
| 840 |
<div class="content-card">
|
| 841 |
<h3>💬 AI Hint Chatbot</h3>
|
| 842 |
<p>
|
| 843 |
+
Click the floating button at the top right of the screen to open the AI hint chatbot.
|
| 844 |
Request hints from the chatbot to receive indirect clues about the answer.
|
| 845 |
</p>
|
| 846 |
</div>
|
|
|
|
| 852 |
<li><strong>"What's the category?":</strong> Learn the classification of the answer word.</li>
|
| 853 |
<li><strong>"Tell me the first letter":</strong> Get a hint about the first letter.</li>
|
| 854 |
<li><strong>"What words sound similar?":</strong> Get pronunciation-related suggestions.</li>
|
| 855 |
+
<li><strong>"Give me an audio hint":</strong> AI provides an audio hint in a tone similar to your voice.</li>
|
| 856 |
</ul>
|
| 857 |
</div>
|
| 858 |
|
| 859 |
+
<!-- <div class="warning-box">
|
| 860 |
<span class="warning-icon">💡</span>
|
| 861 |
<div class="warning-text">
|
| 862 |
<strong>Limited Hint Usage!</strong> Chatbot hints can only be used a limited number of times.
|
| 863 |
Request hints carefully!
|
| 864 |
</div>
|
| 865 |
+
</div> -->
|
| 866 |
</section>
|
| 867 |
|
| 868 |
<!-- Tips & Tricks -->
|
gemini_adapter.py
CHANGED
|
@@ -88,14 +88,14 @@ def convert_messages_to_gemini_format(anthropic_messages):
|
|
| 88 |
# Could be Gradio file format {"path": ..., "mime_type": ...}
|
| 89 |
# Skip audio/video files - they can't be sent to Gemini text API
|
| 90 |
if content.get("path") and content.get("mime_type"):
|
| 91 |
-
print(f"DEBUG convert_messages: Skipping file content: {content.get('mime_type')}")
|
| 92 |
continue
|
| 93 |
# Could be text content {"type": "text", "text": "..."}
|
| 94 |
elif content.get("type") == "text":
|
| 95 |
parts.append(types.Part(text=content.get("text", "")))
|
| 96 |
elif hasattr(content, '__class__') and 'Audio' in content.__class__.__name__:
|
| 97 |
# Skip Gradio Audio component objects
|
| 98 |
-
print(f"DEBUG convert_messages: Skipping Gradio component: {content.__class__.__name__}")
|
| 99 |
continue
|
| 100 |
elif isinstance(content, list):
|
| 101 |
# Complex content with tool calls/results
|
|
@@ -152,11 +152,11 @@ def call_gemini_with_tools(model_name, system_prompt, messages, tools, max_token
|
|
| 152 |
# Convert messages to Gemini format
|
| 153 |
contents = convert_messages_to_gemini_format(messages)
|
| 154 |
|
| 155 |
-
print(f"DEBUG gemini_adapter: Converted {len(messages)} messages to {len(contents)} Contents")
|
| 156 |
-
print(f"DEBUG gemini_adapter: Tools: {len(gemini_tools[0].function_declarations) if gemini_tools else 0}")
|
| 157 |
|
| 158 |
# Call API
|
| 159 |
-
print(f"DEBUG gemini_adapter: Calling Gemini API...")
|
| 160 |
client = get_client()
|
| 161 |
|
| 162 |
# Generate content (no timeout parameter - not supported)
|
|
@@ -175,18 +175,18 @@ def call_gemini_with_tools(model_name, system_prompt, messages, tools, max_token
|
|
| 175 |
|
| 176 |
print(f"DEBUG gemini_adapter: Got response")
|
| 177 |
|
| 178 |
-
print(f"DEBUG gemini_adapter: Response type: {type(response)}")
|
| 179 |
-
print(f"DEBUG gemini_adapter: Response has candidates: {hasattr(response, 'candidates')}")
|
| 180 |
if hasattr(response, 'candidates') and response.candidates:
|
| 181 |
candidate = response.candidates[0]
|
| 182 |
-
print(f"DEBUG gemini_adapter: First candidate type: {type(candidate)}")
|
| 183 |
-
print(f"DEBUG gemini_adapter: finish_reason: {getattr(candidate, 'finish_reason', 'UNKNOWN')}")
|
| 184 |
|
| 185 |
# Log function calls if present
|
| 186 |
if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts') and candidate.content.parts:
|
| 187 |
for i, part in enumerate(candidate.content.parts):
|
| 188 |
if hasattr(part, 'function_call') and part.function_call:
|
| 189 |
-
print(f"DEBUG gemini_adapter: Part {i} has function_call: {part.function_call.name}")
|
| 190 |
print(f"DEBUG gemini_adapter: Function args: {dict(part.function_call.args) if part.function_call.args else {}}")
|
| 191 |
return response
|
| 192 |
|
|
@@ -203,28 +203,28 @@ def extract_tool_calls_from_gemini_response(response):
|
|
| 203 |
"""
|
| 204 |
tool_calls = []
|
| 205 |
|
| 206 |
-
print(f"DEBUG extract_tool_calls: Response type: {type(response)}")
|
| 207 |
-
print(f"DEBUG extract_tool_calls: Has candidates: {hasattr(response, 'candidates')}")
|
| 208 |
|
| 209 |
if not hasattr(response, 'candidates') or not response.candidates:
|
| 210 |
-
print(f"DEBUG extract_tool_calls: No candidates found")
|
| 211 |
return tool_calls
|
| 212 |
|
| 213 |
candidate = response.candidates[0]
|
| 214 |
-
print(f"DEBUG extract_tool_calls: Candidate has content: {hasattr(candidate, 'content')}")
|
| 215 |
|
| 216 |
if not hasattr(candidate, 'content') or not candidate.content:
|
| 217 |
-
print(f"DEBUG extract_tool_calls: No content in candidate")
|
| 218 |
return tool_calls
|
| 219 |
|
| 220 |
-
print(f"DEBUG extract_tool_calls: Content has parts: {hasattr(candidate.content, 'parts')}")
|
| 221 |
|
| 222 |
if not hasattr(candidate.content, 'parts') or not candidate.content.parts:
|
| 223 |
-
print(f"DEBUG extract_tool_calls: No parts in content")
|
| 224 |
return tool_calls
|
| 225 |
|
| 226 |
for i, part in enumerate(candidate.content.parts):
|
| 227 |
-
print(f"DEBUG extract_tool_calls: Part {i} has function_call: {hasattr(part, 'function_call')}")
|
| 228 |
if hasattr(part, 'function_call') and part.function_call:
|
| 229 |
fc = part.function_call
|
| 230 |
tool_calls.append({
|
|
@@ -233,7 +233,7 @@ def extract_tool_calls_from_gemini_response(response):
|
|
| 233 |
"id": f"call_{i}"
|
| 234 |
})
|
| 235 |
|
| 236 |
-
print(f"DEBUG extract_tool_calls: Found {len(tool_calls)} tool calls")
|
| 237 |
return tool_calls
|
| 238 |
|
| 239 |
def get_text_from_gemini_response(response):
|
|
@@ -242,21 +242,21 @@ def get_text_from_gemini_response(response):
|
|
| 242 |
Returns:
|
| 243 |
tuple: (text, error_message) where error_message is None if successful
|
| 244 |
"""
|
| 245 |
-
print(f"DEBUG get_text: Response type: {type(response)}")
|
| 246 |
-
print(f"DEBUG get_text: Has candidates: {hasattr(response, 'candidates')}")
|
| 247 |
|
| 248 |
if not hasattr(response, 'candidates') or not response.candidates:
|
| 249 |
-
print(f"ERROR get_text: No candidates in response")
|
| 250 |
return "", "No response candidates received from AI. Please try again."
|
| 251 |
|
| 252 |
candidate = response.candidates[0]
|
| 253 |
finish_reason = getattr(candidate, 'finish_reason', 'UNKNOWN')
|
| 254 |
-
print(f"DEBUG get_text: Candidate has content: {hasattr(candidate, 'content')}")
|
| 255 |
-
print(f"DEBUG get_text: finish_reason: {finish_reason}")
|
| 256 |
|
| 257 |
if not hasattr(candidate, 'content') or not candidate.content:
|
| 258 |
-
print(f"WARNING get_text: No content in candidate")
|
| 259 |
-
print(f"WARNING get_text: safety_ratings: {getattr(candidate, 'safety_ratings', 'N/A')}")
|
| 260 |
|
| 261 |
# Provide specific error messages based on finish_reason
|
| 262 |
if finish_reason == 'SAFETY':
|
|
@@ -270,27 +270,27 @@ def get_text_from_gemini_response(response):
|
|
| 270 |
else:
|
| 271 |
return "", "AI returned empty response. Please try again."
|
| 272 |
|
| 273 |
-
print(f"DEBUG get_text: Content has parts: {hasattr(candidate.content, 'parts') if candidate.content else False}")
|
| 274 |
|
| 275 |
if not candidate.content or not hasattr(candidate.content, 'parts') or not candidate.content.parts:
|
| 276 |
-
print(f"WARNING get_text: No parts in content")
|
| 277 |
return "", "AI response had no content. Please try again."
|
| 278 |
|
| 279 |
text_parts = []
|
| 280 |
has_function_call = False
|
| 281 |
for i, part in enumerate(candidate.content.parts):
|
| 282 |
-
print(f"DEBUG get_text: Part {i} has text: {hasattr(part, 'text')}")
|
| 283 |
if hasattr(part, 'text') and part.text:
|
| 284 |
text_parts.append(part.text)
|
| 285 |
if hasattr(part, 'function_call') and part.function_call:
|
| 286 |
has_function_call = True
|
| 287 |
|
| 288 |
result = " ".join(text_parts)
|
| 289 |
-
print(f"DEBUG get_text: Extracted text length: {len(result)}, has_function_call: {has_function_call}")
|
| 290 |
|
| 291 |
# Empty text is OK if there's a function call (tool-only response)
|
| 292 |
if (not result or result.strip() == "") and not has_function_call:
|
| 293 |
-
print(f"WARNING get_text: Empty text extracted and no function call")
|
| 294 |
return "", "AI returned empty text. Please try again."
|
| 295 |
|
| 296 |
return result, None
|
|
@@ -321,7 +321,7 @@ def chat_with_gemini_and_tools(system_prompt, messages, tools=None, max_tokens=1
|
|
| 321 |
# Convert tools to Gemini format if provided
|
| 322 |
gemini_tools = convert_tools_to_gemini_format(tools) if tools else None
|
| 323 |
|
| 324 |
-
print(f"DEBUG chat_with_gemini_and_tools: {len(messages)} messages, {len(tools) if tools else 0} tools")
|
| 325 |
|
| 326 |
# Get client
|
| 327 |
client = get_client()
|
|
@@ -353,7 +353,7 @@ def chat_with_gemini_and_tools(system_prompt, messages, tools=None, max_tokens=1
|
|
| 353 |
config=config
|
| 354 |
)
|
| 355 |
|
| 356 |
-
print(f"DEBUG chat_with_gemini_and_tools: Got response")
|
| 357 |
|
| 358 |
# Check for tool calls first
|
| 359 |
tool_calls = extract_tool_calls_from_gemini_response(response)
|
|
@@ -363,13 +363,13 @@ def chat_with_gemini_and_tools(system_prompt, messages, tools=None, max_tokens=1
|
|
| 363 |
|
| 364 |
# If there's an error but we have tool calls, that's OK (tool-only response)
|
| 365 |
if error and not tool_calls:
|
| 366 |
-
print(f"ERROR chat_with_gemini_and_tools: {error}")
|
| 367 |
return "", None, error
|
| 368 |
elif error and tool_calls:
|
| 369 |
-
print(f"DEBUG chat_with_gemini_and_tools: Error '{error}' but have {len(tool_calls)} tool calls, proceeding")
|
| 370 |
text = "" # Clear any error text
|
| 371 |
|
| 372 |
-
print(f"DEBUG chat_with_gemini_and_tools: text={len(text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")
|
| 373 |
return text, tool_calls if tool_calls else None, None
|
| 374 |
|
| 375 |
except Exception as e:
|
|
@@ -398,8 +398,8 @@ def chat_with_gemini(system_prompt, messages, max_tokens=1024, temperature=1.0,
|
|
| 398 |
# Convert messages to Gemini format
|
| 399 |
contents = convert_messages_to_gemini_format(messages)
|
| 400 |
|
| 401 |
-
print(f"DEBUG chat_with_gemini: Converted {len(messages)} messages to {len(contents)} Contents")
|
| 402 |
-
print(f"DEBUG chat_with_gemini: Calling Gemini API with model {model_name}")
|
| 403 |
|
| 404 |
# Get client
|
| 405 |
client = get_client()
|
|
@@ -418,16 +418,16 @@ def chat_with_gemini(system_prompt, messages, max_tokens=1024, temperature=1.0,
|
|
| 418 |
config=config
|
| 419 |
)
|
| 420 |
|
| 421 |
-
print(f"DEBUG chat_with_gemini: Got response")
|
| 422 |
|
| 423 |
# Extract text
|
| 424 |
text, error = get_text_from_gemini_response(response)
|
| 425 |
|
| 426 |
if error:
|
| 427 |
-
print(f"ERROR chat_with_gemini: {error}")
|
| 428 |
return "", error
|
| 429 |
|
| 430 |
-
print(f"DEBUG chat_with_gemini: Successfully extracted text ({len(text)} chars)")
|
| 431 |
return text, None
|
| 432 |
|
| 433 |
except Exception as e:
|
|
|
|
| 88 |
# Could be Gradio file format {"path": ..., "mime_type": ...}
|
| 89 |
# Skip audio/video files - they can't be sent to Gemini text API
|
| 90 |
if content.get("path") and content.get("mime_type"):
|
| 91 |
+
# print(f"DEBUG convert_messages: Skipping file content: {content.get('mime_type')}")
|
| 92 |
continue
|
| 93 |
# Could be text content {"type": "text", "text": "..."}
|
| 94 |
elif content.get("type") == "text":
|
| 95 |
parts.append(types.Part(text=content.get("text", "")))
|
| 96 |
elif hasattr(content, '__class__') and 'Audio' in content.__class__.__name__:
|
| 97 |
# Skip Gradio Audio component objects
|
| 98 |
+
# print(f"DEBUG convert_messages: Skipping Gradio component: {content.__class__.__name__}")
|
| 99 |
continue
|
| 100 |
elif isinstance(content, list):
|
| 101 |
# Complex content with tool calls/results
|
|
|
|
| 152 |
# Convert messages to Gemini format
|
| 153 |
contents = convert_messages_to_gemini_format(messages)
|
| 154 |
|
| 155 |
+
# print(f"DEBUG gemini_adapter: Converted {len(messages)} messages to {len(contents)} Contents")
|
| 156 |
+
# print(f"DEBUG gemini_adapter: Tools: {len(gemini_tools[0].function_declarations) if gemini_tools else 0}")
|
| 157 |
|
| 158 |
# Call API
|
| 159 |
+
# print(f"DEBUG gemini_adapter: Calling Gemini API...")
|
| 160 |
client = get_client()
|
| 161 |
|
| 162 |
# Generate content (no timeout parameter - not supported)
|
|
|
|
| 175 |
|
| 176 |
print(f"DEBUG gemini_adapter: Got response")
|
| 177 |
|
| 178 |
+
# print(f"DEBUG gemini_adapter: Response type: {type(response)}")
|
| 179 |
+
# print(f"DEBUG gemini_adapter: Response has candidates: {hasattr(response, 'candidates')}")
|
| 180 |
if hasattr(response, 'candidates') and response.candidates:
|
| 181 |
candidate = response.candidates[0]
|
| 182 |
+
# print(f"DEBUG gemini_adapter: First candidate type: {type(candidate)}")
|
| 183 |
+
# print(f"DEBUG gemini_adapter: finish_reason: {getattr(candidate, 'finish_reason', 'UNKNOWN')}")
|
| 184 |
|
| 185 |
# Log function calls if present
|
| 186 |
if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts') and candidate.content.parts:
|
| 187 |
for i, part in enumerate(candidate.content.parts):
|
| 188 |
if hasattr(part, 'function_call') and part.function_call:
|
| 189 |
+
# print(f"DEBUG gemini_adapter: Part {i} has function_call: {part.function_call.name}")
|
| 190 |
print(f"DEBUG gemini_adapter: Function args: {dict(part.function_call.args) if part.function_call.args else {}}")
|
| 191 |
return response
|
| 192 |
|
|
|
|
| 203 |
"""
|
| 204 |
tool_calls = []
|
| 205 |
|
| 206 |
+
# print(f"DEBUG extract_tool_calls: Response type: {type(response)}")
|
| 207 |
+
# print(f"DEBUG extract_tool_calls: Has candidates: {hasattr(response, 'candidates')}")
|
| 208 |
|
| 209 |
if not hasattr(response, 'candidates') or not response.candidates:
|
| 210 |
+
# print(f"DEBUG extract_tool_calls: No candidates found")
|
| 211 |
return tool_calls
|
| 212 |
|
| 213 |
candidate = response.candidates[0]
|
| 214 |
+
# print(f"DEBUG extract_tool_calls: Candidate has content: {hasattr(candidate, 'content')}")
|
| 215 |
|
| 216 |
if not hasattr(candidate, 'content') or not candidate.content:
|
| 217 |
+
# print(f"DEBUG extract_tool_calls: No content in candidate")
|
| 218 |
return tool_calls
|
| 219 |
|
| 220 |
+
# print(f"DEBUG extract_tool_calls: Content has parts: {hasattr(candidate.content, 'parts')}")
|
| 221 |
|
| 222 |
if not hasattr(candidate.content, 'parts') or not candidate.content.parts:
|
| 223 |
+
# print(f"DEBUG extract_tool_calls: No parts in content")
|
| 224 |
return tool_calls
|
| 225 |
|
| 226 |
for i, part in enumerate(candidate.content.parts):
|
| 227 |
+
# print(f"DEBUG extract_tool_calls: Part {i} has function_call: {hasattr(part, 'function_call')}")
|
| 228 |
if hasattr(part, 'function_call') and part.function_call:
|
| 229 |
fc = part.function_call
|
| 230 |
tool_calls.append({
|
|
|
|
| 233 |
"id": f"call_{i}"
|
| 234 |
})
|
| 235 |
|
| 236 |
+
# print(f"DEBUG extract_tool_calls: Found {len(tool_calls)} tool calls")
|
| 237 |
return tool_calls
|
| 238 |
|
| 239 |
def get_text_from_gemini_response(response):
|
|
|
|
| 242 |
Returns:
|
| 243 |
tuple: (text, error_message) where error_message is None if successful
|
| 244 |
"""
|
| 245 |
+
# print(f"DEBUG get_text: Response type: {type(response)}")
|
| 246 |
+
# print(f"DEBUG get_text: Has candidates: {hasattr(response, 'candidates')}")
|
| 247 |
|
| 248 |
if not hasattr(response, 'candidates') or not response.candidates:
|
| 249 |
+
# print(f"ERROR get_text: No candidates in response")
|
| 250 |
return "", "No response candidates received from AI. Please try again."
|
| 251 |
|
| 252 |
candidate = response.candidates[0]
|
| 253 |
finish_reason = getattr(candidate, 'finish_reason', 'UNKNOWN')
|
| 254 |
+
# print(f"DEBUG get_text: Candidate has content: {hasattr(candidate, 'content')}")
|
| 255 |
+
# print(f"DEBUG get_text: finish_reason: {finish_reason}")
|
| 256 |
|
| 257 |
if not hasattr(candidate, 'content') or not candidate.content:
|
| 258 |
+
# print(f"WARNING get_text: No content in candidate")
|
| 259 |
+
# print(f"WARNING get_text: safety_ratings: {getattr(candidate, 'safety_ratings', 'N/A')}")
|
| 260 |
|
| 261 |
# Provide specific error messages based on finish_reason
|
| 262 |
if finish_reason == 'SAFETY':
|
|
|
|
| 270 |
else:
|
| 271 |
return "", "AI returned empty response. Please try again."
|
| 272 |
|
| 273 |
+
# print(f"DEBUG get_text: Content has parts: {hasattr(candidate.content, 'parts') if candidate.content else False}")
|
| 274 |
|
| 275 |
if not candidate.content or not hasattr(candidate.content, 'parts') or not candidate.content.parts:
|
| 276 |
+
# print(f"WARNING get_text: No parts in content")
|
| 277 |
return "", "AI response had no content. Please try again."
|
| 278 |
|
| 279 |
text_parts = []
|
| 280 |
has_function_call = False
|
| 281 |
for i, part in enumerate(candidate.content.parts):
|
| 282 |
+
# print(f"DEBUG get_text: Part {i} has text: {hasattr(part, 'text')}")
|
| 283 |
if hasattr(part, 'text') and part.text:
|
| 284 |
text_parts.append(part.text)
|
| 285 |
if hasattr(part, 'function_call') and part.function_call:
|
| 286 |
has_function_call = True
|
| 287 |
|
| 288 |
result = " ".join(text_parts)
|
| 289 |
+
# print(f"DEBUG get_text: Extracted text length: {len(result)}, has_function_call: {has_function_call}")
|
| 290 |
|
| 291 |
# Empty text is OK if there's a function call (tool-only response)
|
| 292 |
if (not result or result.strip() == "") and not has_function_call:
|
| 293 |
+
# print(f"WARNING get_text: Empty text extracted and no function call")
|
| 294 |
return "", "AI returned empty text. Please try again."
|
| 295 |
|
| 296 |
return result, None
|
|
|
|
| 321 |
# Convert tools to Gemini format if provided
|
| 322 |
gemini_tools = convert_tools_to_gemini_format(tools) if tools else None
|
| 323 |
|
| 324 |
+
# print(f"DEBUG chat_with_gemini_and_tools: {len(messages)} messages, {len(tools) if tools else 0} tools")
|
| 325 |
|
| 326 |
# Get client
|
| 327 |
client = get_client()
|
|
|
|
| 353 |
config=config
|
| 354 |
)
|
| 355 |
|
| 356 |
+
# print(f"DEBUG chat_with_gemini_and_tools: Got response")
|
| 357 |
|
| 358 |
# Check for tool calls first
|
| 359 |
tool_calls = extract_tool_calls_from_gemini_response(response)
|
|
|
|
| 363 |
|
| 364 |
# If there's an error but we have tool calls, that's OK (tool-only response)
|
| 365 |
if error and not tool_calls:
|
| 366 |
+
# print(f"ERROR chat_with_gemini_and_tools: {error}")
|
| 367 |
return "", None, error
|
| 368 |
elif error and tool_calls:
|
| 369 |
+
# print(f"DEBUG chat_with_gemini_and_tools: Error '{error}' but have {len(tool_calls)} tool calls, proceeding")
|
| 370 |
text = "" # Clear any error text
|
| 371 |
|
| 372 |
+
# print(f"DEBUG chat_with_gemini_and_tools: text={len(text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")
|
| 373 |
return text, tool_calls if tool_calls else None, None
|
| 374 |
|
| 375 |
except Exception as e:
|
|
|
|
| 398 |
# Convert messages to Gemini format
|
| 399 |
contents = convert_messages_to_gemini_format(messages)
|
| 400 |
|
| 401 |
+
# print(f"DEBUG chat_with_gemini: Converted {len(messages)} messages to {len(contents)} Contents")
|
| 402 |
+
# print(f"DEBUG chat_with_gemini: Calling Gemini API with model {model_name}")
|
| 403 |
|
| 404 |
# Get client
|
| 405 |
client = get_client()
|
|
|
|
| 418 |
config=config
|
| 419 |
)
|
| 420 |
|
| 421 |
+
# print(f"DEBUG chat_with_gemini: Got response")
|
| 422 |
|
| 423 |
# Extract text
|
| 424 |
text, error = get_text_from_gemini_response(response)
|
| 425 |
|
| 426 |
if error:
|
| 427 |
+
# print(f"ERROR chat_with_gemini: {error}")
|
| 428 |
return "", error
|
| 429 |
|
| 430 |
+
# print(f"DEBUG chat_with_gemini: Successfully extracted text ({len(text)} chars)")
|
| 431 |
return text, None
|
| 432 |
|
| 433 |
except Exception as e:
|
gradio_ui.py
CHANGED
|
@@ -149,7 +149,7 @@ def get_today_puzzle():
|
|
| 149 |
|
| 150 |
# Use backend function to get puzzle
|
| 151 |
puzzle = get_puzzle_by_date(today)
|
| 152 |
-
print(puzzle)
|
| 153 |
if puzzle:
|
| 154 |
return f"""
|
| 155 |
### 📅 오늘의 퍼즐
|
|
@@ -240,8 +240,6 @@ with gr.Blocks(title="Chloe's Voice Komentle") as demo:
|
|
| 240 |
|
| 241 |
# Launch configuration
|
| 242 |
if __name__ == "__main__":
|
| 243 |
-
print("🚀 Starting Chloe's Voice Komentle...")
|
| 244 |
-
|
| 245 |
# Initialize backend (VoiceKit MCP session)
|
| 246 |
print("⏳ Initializing VoiceKit MCP...")
|
| 247 |
|
|
|
|
| 149 |
|
| 150 |
# Use backend function to get puzzle
|
| 151 |
puzzle = get_puzzle_by_date(today)
|
| 152 |
+
# print(puzzle)
|
| 153 |
if puzzle:
|
| 154 |
return f"""
|
| 155 |
### 📅 오늘의 퍼즐
|
|
|
|
| 240 |
|
| 241 |
# Launch configuration
|
| 242 |
if __name__ == "__main__":
|
|
|
|
|
|
|
| 243 |
# Initialize backend (VoiceKit MCP session)
|
| 244 |
print("⏳ Initializing VoiceKit MCP...")
|
| 245 |
|
inspect_routes.py
CHANGED
|
@@ -13,7 +13,7 @@ os.environ["BACKEND_API_URL"] = ""
|
|
| 13 |
try:
|
| 14 |
from client.app import app
|
| 15 |
print("Successfully imported app")
|
| 16 |
-
print("Routes:")
|
| 17 |
for route in app.routes:
|
| 18 |
print(f" {route.path} ({type(route).__name__})")
|
| 19 |
except Exception as e:
|
|
|
|
| 13 |
try:
|
| 14 |
from client.app import app
|
| 15 |
print("Successfully imported app")
|
| 16 |
+
# print("Routes:")
|
| 17 |
for route in app.routes:
|
| 18 |
print(f" {route.path} ({type(route).__name__})")
|
| 19 |
except Exception as e:
|
test_analyze_voice.py
CHANGED
|
@@ -128,9 +128,9 @@ async def test_analyze_voice(audio_file, date: str, session_id: str, generate_ne
|
|
| 128 |
with open(audio_path, "rb") as f:
|
| 129 |
audio_bytes = f.read()
|
| 130 |
|
| 131 |
-
print(f"📁 Audio file size: {len(audio_bytes)} bytes")
|
| 132 |
-
print(f"📅 Date: {date}")
|
| 133 |
-
print(f"🆔 Session ID: {session_id}")
|
| 134 |
|
| 135 |
# Call the function
|
| 136 |
result = await analyze_voice_logic(audio_bytes, date, session_id)
|
|
@@ -247,15 +247,15 @@ with gr.Blocks(title="Analyze Voice Logic Test") as demo:
|
|
| 247 |
|
| 248 |
|
| 249 |
if __name__ == "__main__":
|
| 250 |
-
print("=" * 60)
|
| 251 |
-
print("🎤 Analyze Voice Logic Test Page")
|
| 252 |
-
print("=" * 60)
|
| 253 |
-
print("\n✅ 사전 준비:")
|
| 254 |
-
print(" 1. PostgreSQL 실행: docker-compose up -d postgres")
|
| 255 |
-
print(" 2. 환경변수 설정: GOOGLE_API_KEY, DATABASE_URL")
|
| 256 |
-
print(" 3. VoiceKit MCP 서버 (외부): 자동 연결")
|
| 257 |
-
print("\n💡 이 페이지는 독립적으로 실행됩니다 (Backend 서버 불필요)")
|
| 258 |
-
print("\n" + "=" * 60 + "\n")
|
| 259 |
|
| 260 |
demo.launch(
|
| 261 |
server_name="127.0.0.1",
|
|
|
|
| 128 |
with open(audio_path, "rb") as f:
|
| 129 |
audio_bytes = f.read()
|
| 130 |
|
| 131 |
+
# print(f"📁 Audio file size: {len(audio_bytes)} bytes")
|
| 132 |
+
# print(f"📅 Date: {date}")
|
| 133 |
+
# print(f"🆔 Session ID: {session_id}")
|
| 134 |
|
| 135 |
# Call the function
|
| 136 |
result = await analyze_voice_logic(audio_bytes, date, session_id)
|
|
|
|
| 247 |
|
| 248 |
|
| 249 |
if __name__ == "__main__":
|
| 250 |
+
# print("=" * 60)
|
| 251 |
+
# print("🎤 Analyze Voice Logic Test Page")
|
| 252 |
+
# print("=" * 60)
|
| 253 |
+
# print("\n✅ 사전 준비:")
|
| 254 |
+
# print(" 1. PostgreSQL 실행: docker-compose up -d postgres")
|
| 255 |
+
# print(" 2. 환경변수 설정: GOOGLE_API_KEY, DATABASE_URL")
|
| 256 |
+
# print(" 3. VoiceKit MCP 서버 (외부): 자동 연결")
|
| 257 |
+
# print("\n💡 이 페이지는 독립적으로 실행됩니다 (Backend 서버 불필요)")
|
| 258 |
+
# print("\n" + "=" * 60 + "\n")
|
| 259 |
|
| 260 |
demo.launch(
|
| 261 |
server_name="127.0.0.1",
|