Sungjoon Lee commited on
Commit
48b92eb
·
1 Parent(s): 5578f9d

[DOCS] 문서 수정 및 디버그 코드 제거

Browse files
client/app.py CHANGED
@@ -54,16 +54,16 @@ IMAGES_DIR = os.path.join(PROJECT_ROOT, "images")
54
  REFERENCE_AUDIO_DIR = os.path.join(PROJECT_ROOT, "reference_audio")
55
 
56
  # 디버그: 경로 확인
57
- print(f"[PATH DEBUG] __file__: {__file__}")
58
- print(f"[PATH DEBUG] PROJECT_ROOT: {PROJECT_ROOT}")
59
- print(f"[PATH DEBUG] DOCS_DIR: {DOCS_DIR}")
60
- print(f"[PATH DEBUG] DOCS_DIR exists: {os.path.exists(DOCS_DIR)}")
61
  if os.path.exists(DOCS_DIR):
62
  print(f"[PATH DEBUG] DOCS_DIR contents: {os.listdir(DOCS_DIR)}")
63
 
64
  # 환경변수로 allowed_paths 설정 (Spaces 배포용)
65
  os.environ["GRADIO_ALLOWED_PATHS"] = f"{UPLOAD_DIR},{DOCS_DIR},{IMAGES_DIR},{REFERENCE_AUDIO_DIR}"
66
- print(f"[PATH DEBUG] GRADIO_ALLOWED_PATHS: {os.environ['GRADIO_ALLOWED_PATHS']}")
67
 
68
 
69
  class AudioValidationApp:
@@ -89,7 +89,7 @@ class AudioValidationApp:
89
  """
90
  try:
91
  data = get_dashboard_stats()
92
- print(f"[DASHBOARD] 통계 데이터: {data}")
93
  return data # 전체 데이터 반환 (answer_word, reference_audio_path 포함)
94
 
95
  except Exception as e:
@@ -152,13 +152,13 @@ class AudioValidationApp:
152
  expected_text = self.validator.get_expected_text(current_difficulty)
153
  today = datetime.now().strftime("%Y-%m-%d")
154
 
155
- print("=" * 60)
156
  print("[AUDIO VALIDATE] 음성 검증 요청")
157
- print(f" - Session ID (UUID): {session_id}")
158
- print(f" - Audio Path: {audio_path}")
159
- print(f" - 시도 횟수: {attempt_count + 1}번째")
160
- print(f" - 기대 텍스트: {expected_text}")
161
- print("=" * 60)
162
 
163
  # ========== 직접 서비스 호출 (HTTP 없음) ==========
164
  try:
@@ -172,7 +172,7 @@ class AudioValidationApp:
172
  # 직접 분석 서비스 호출 (HTTP 대신 함수 호출)
173
  api_result = await analyze_voice(audio_bytes, today, session_id)
174
 
175
- print(f"[ANALYSIS RESULT] {api_result}")
176
 
177
  # API 오류 처리
178
  if api_result.get("status") == "error":
@@ -300,7 +300,7 @@ class AudioValidationApp:
300
  }
301
  )
302
 
303
- print(f"[AUDIO VALIDATE] 실패 처리 - Modal 표시, 총 추측: {len(updated_game_state.get('guesses', []))}개")
304
 
305
  return (
306
  history_html, # 0: history_html
@@ -352,9 +352,9 @@ class AudioValidationApp:
352
  updated_game_state = GameStateManager.set_win_state(updated_game_state, win=True)
353
 
354
  print(f"[AUDIO VALIDATE] 성공 처리!")
355
- print(f" - Session ID: {session_id}")
356
- print(f" - 총 시도 횟수: {len(current_history) + 1}")
357
- print(f" - 승리 연속: {updated_game_state.get('stats', {}).get('winStreak', 0)}")
358
 
359
  # 통계는 JavaScript에서 화면 표시 시 API 호출하여 업데이트
360
  print(f"[SUCCESS] 성공 화면으로 전환 - JS가 통계 API 호출 예정")
@@ -391,7 +391,7 @@ class AudioValidationApp:
391
  last_audio = guesses[-1].get('audioFile', "")
392
 
393
  # 통계는 JavaScript에서 화면 표시 시 API 호출하여 업데이트
394
- print(f"[GIVEUP] 포기 화면으로 전환 - JS가 통계 API 호출 예정")
395
 
396
  # outputs 순서: main_screen, giveup_screen, giveup_content, game_state
397
  return (
@@ -421,13 +421,13 @@ class AudioValidationApp:
421
  updated_game_state = GameStateManager.get_or_create_session(game_state)
422
  session_id = GameStateManager.get_session_id(updated_game_state)
423
 
424
- print("=" * 60)
425
- print("[PAGE LOAD] 페이지 로드")
426
- print(f" - 오늘 날짜: {today}")
427
- print(f" - 저장된 날짜: {stored_date}")
428
- print(f" - Session ID (UUID): {session_id}")
429
- print(f" - 게임 상태: {updated_game_state.get('winState', -1)} (-1:진행중, 0:포기, 1:성공)")
430
- print("=" * 60)
431
 
432
  if stored_date != today:
433
  # 날짜가 다르면 기록 초기화
@@ -459,7 +459,7 @@ class AudioValidationApp:
459
  """
460
  # 앱 시작 시점에 통계 데이터 가져오기
461
  stats = self._fetch_dashboard_stats_sync()
462
- print(f"[BUILD UI] 초기 통계 데이터: {stats}")
463
 
464
  # 이벤트 핸들러 설정
465
  handlers = {
 
54
  REFERENCE_AUDIO_DIR = os.path.join(PROJECT_ROOT, "reference_audio")
55
 
56
  # 디버그: 경로 확인
57
+ # print(f"[PATH DEBUG] __file__: {__file__}")
58
+ # print(f"[PATH DEBUG] PROJECT_ROOT: {PROJECT_ROOT}")
59
+ # print(f"[PATH DEBUG] DOCS_DIR: {DOCS_DIR}")
60
+ # print(f"[PATH DEBUG] DOCS_DIR exists: {os.path.exists(DOCS_DIR)}")
61
  if os.path.exists(DOCS_DIR):
62
  print(f"[PATH DEBUG] DOCS_DIR contents: {os.listdir(DOCS_DIR)}")
63
 
64
  # 환경변수로 allowed_paths 설정 (Spaces 배포용)
65
  os.environ["GRADIO_ALLOWED_PATHS"] = f"{UPLOAD_DIR},{DOCS_DIR},{IMAGES_DIR},{REFERENCE_AUDIO_DIR}"
66
+ # print(f"[PATH DEBUG] GRADIO_ALLOWED_PATHS: {os.environ['GRADIO_ALLOWED_PATHS']}")
67
 
68
 
69
  class AudioValidationApp:
 
89
  """
90
  try:
91
  data = get_dashboard_stats()
92
+ # print(f"[DASHBOARD] 통계 데이터: {data}")
93
  return data # 전체 데이터 반환 (answer_word, reference_audio_path 포함)
94
 
95
  except Exception as e:
 
152
  expected_text = self.validator.get_expected_text(current_difficulty)
153
  today = datetime.now().strftime("%Y-%m-%d")
154
 
155
+ # print("=" * 60)
156
  print("[AUDIO VALIDATE] 음성 검증 요청")
157
+ # print(f" - Session ID (UUID): {session_id}")
158
+ # print(f" - Audio Path: {audio_path}")
159
+ # print(f" - 시도 횟수: {attempt_count + 1}번째")
160
+ # print(f" - 기대 텍스트: {expected_text}")
161
+ # print("=" * 60)
162
 
163
  # ========== 직접 서비스 호출 (HTTP 없음) ==========
164
  try:
 
172
  # 직접 분석 서비스 호출 (HTTP 대신 함수 호출)
173
  api_result = await analyze_voice(audio_bytes, today, session_id)
174
 
175
+ # print(f"[ANALYSIS RESULT] {api_result}")
176
 
177
  # API 오류 처리
178
  if api_result.get("status") == "error":
 
300
  }
301
  )
302
 
303
+ # print(f"[AUDIO VALIDATE] 실패 처리 - Modal 표시, 총 추측: {len(updated_game_state.get('guesses', []))}개")
304
 
305
  return (
306
  history_html, # 0: history_html
 
352
  updated_game_state = GameStateManager.set_win_state(updated_game_state, win=True)
353
 
354
  print(f"[AUDIO VALIDATE] 성공 처리!")
355
+ # print(f" - Session ID: {session_id}")
356
+ # print(f" - 총 시도 횟수: {len(current_history) + 1}")
357
+ # print(f" - 승리 연속: {updated_game_state.get('stats', {}).get('winStreak', 0)}")
358
 
359
  # 통계는 JavaScript에서 화면 표시 시 API 호출하여 업데이트
360
  print(f"[SUCCESS] 성공 화면으로 전환 - JS가 통계 API 호출 예정")
 
391
  last_audio = guesses[-1].get('audioFile', "")
392
 
393
  # 통계는 JavaScript에서 화면 표시 시 API 호출하여 업데이트
394
+ # print(f"[GIVEUP] 포기 화면으로 전환 - JS가 통계 API 호출 예정")
395
 
396
  # outputs 순서: main_screen, giveup_screen, giveup_content, game_state
397
  return (
 
421
  updated_game_state = GameStateManager.get_or_create_session(game_state)
422
  session_id = GameStateManager.get_session_id(updated_game_state)
423
 
424
+ # print("=" * 60)
425
+ # print("[PAGE LOAD] 페이지 로드")
426
+ # print(f" - 오늘 날짜: {today}")
427
+ # print(f" - 저장된 날짜: {stored_date}")
428
+ # print(f" - Session ID (UUID): {session_id}")
429
+ # print(f" - 게임 상태: {updated_game_state.get('winState', -1)} (-1:진행중, 0:포기, 1:성공)")
430
+ # print("=" * 60)
431
 
432
  if stored_date != today:
433
  # 날짜가 다르면 기록 초기화
 
459
  """
460
  # 앱 시작 시점에 통계 데이터 가져오기
461
  stats = self._fetch_dashboard_stats_sync()
462
+ # print(f"[BUILD UI] 초기 통계 데이터: {stats}")
463
 
464
  # 이벤트 핸들러 설정
465
  handlers = {
client/docs/tech-stack.html CHANGED
@@ -1,9 +1,9 @@
1
  <!DOCTYPE html>
2
- <html lang="ko">
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Tech Stack - Komentle Voice Challenge</title>
7
  <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
8
  <style>
9
  :root {
@@ -494,8 +494,8 @@
494
  <!-- Sidebar -->
495
  <nav class="sidebar">
496
  <a href="#" class="logo">
497
- <div class="logo-icon">K</div>
498
- Komentle
499
  </a>
500
 
501
  <div class="nav-section">
@@ -523,7 +523,7 @@
523
  <div class="nav-title">Backend</div>
524
  <ul class="nav-list">
525
  <li class="nav-item">
526
- <a href="#backend" class="nav-link">FastAPI Server</a>
527
  </li>
528
  <li class="nav-item">
529
  <a href="#database" class="nav-link">Database</a>
@@ -535,7 +535,10 @@
535
  <div class="nav-title">AI / ML</div>
536
  <ul class="nav-list">
537
  <li class="nav-item">
538
- <a href="#ai" class="nav-link">Voice Analysis</a>
 
 
 
539
  </li>
540
  <li class="nav-item">
541
  <a href="#audio" class="nav-link">Audio Processing</a>
@@ -564,8 +567,8 @@
564
  <header class="page-header">
565
  <h1 class="page-title">Tech Stack</h1>
566
  <p class="page-description">
567
- Komentle Voice Challenge에 사용된 기술 스택입니다.
568
- 음성 기반 발음 검증 퀴즈 게임을 위한 풀스택 구성이에요.
569
  </p>
570
  </header>
571
 
@@ -578,18 +581,18 @@
578
  <div class="architecture-diagram">
579
  <div class="arch-flow">
580
  <div class="arch-box">
581
- <div class="arch-box-title">Client</div>
582
  <div class="arch-box-sub">Gradio 6.0</div>
583
  </div>
584
  <span class="arch-arrow">→</span>
585
  <div class="arch-box secondary">
586
- <div class="arch-box-title">Backend</div>
587
- <div class="arch-box-sub">FastAPI</div>
588
  </div>
589
  <span class="arch-arrow">→</span>
590
  <div class="arch-box tertiary">
591
- <div class="arch-box-title">AI Engine</div>
592
- <div class="arch-box-sub">Google Gemini</div>
593
  </div>
594
  </div>
595
  </div>
@@ -609,17 +612,17 @@
609
  <span class="tech-version">6.0.0</span>
610
  </div>
611
  <p class="tech-description">
612
- Python 기반 UI 프레임워크. 음성 입력, 실시간 피드백, 게임 화면 전환 인터랙티브한 인터페이스 구현
613
  </p>
614
  </div>
615
  <div class="tech-card">
616
  <div class="tech-header">
617
  <div class="tech-icon icon-python">Py</div>
618
  <span class="tech-name">Python</span>
619
- <span class="tech-version">3.11</span>
620
  </div>
621
  <p class="tech-description">
622
- 클라이언트 애플리케이션 개발 언어. 타입 힌팅과 async/await 패턴 활용
623
  </p>
624
  </div>
625
  <div class="tech-card">
@@ -629,7 +632,17 @@
629
  <span class="tech-version">0.28.1</span>
630
  </div>
631
  <p class="tech-description">
632
- 비동기 HTTP 클라이언트. 백엔드 API와의 통신 담당
 
 
 
 
 
 
 
 
 
 
633
  </p>
634
  </div>
635
  </div>
@@ -639,47 +652,47 @@
639
  <section id="backend" class="section">
640
  <h2 class="section-title">
641
  <span class="section-icon backend">⚡</span>
642
- Backend
643
  </h2>
644
  <div class="tech-grid">
645
  <div class="tech-card">
646
  <div class="tech-header">
647
- <div class="tech-icon icon-fastapi">Fa</div>
648
- <span class="tech-name">FastAPI</span>
649
- <span class="tech-version">0.121.3</span>
650
  </div>
651
  <p class="tech-description">
652
- 고성능 Python 프레임워크. REST API 엔드포인트 제공, 자동 OpenAPI 문서 생성
653
  </p>
654
  </div>
655
  <div class="tech-card">
656
  <div class="tech-header">
657
- <div class="tech-icon" style="background: linear-gradient(135deg, #7c3aed, #a78bfa);">Uv</div>
658
- <span class="tech-name">Uvicorn</span>
659
- <span class="tech-version">0.38.0</span>
660
  </div>
661
  <p class="tech-description">
662
- Lightning-fast ASGI 서버. 비동기 요청 처리 WebSocket 지원
663
  </p>
664
  </div>
665
  <div class="tech-card">
666
  <div class="tech-header">
667
- <div class="tech-icon icon-python">Py</div>
668
- <span class="tech-name">Python</span>
669
- <span class="tech-version">3.12</span>
670
  </div>
671
  <p class="tech-description">
672
- 백엔드 서버 개발 언어. 최신 버전의 성능 최적화 기능 활용
673
  </p>
674
  </div>
675
  <div class="tech-card">
676
  <div class="tech-header">
677
- <div class="tech-icon" style="background: linear-gradient(135deg, #ec4899, #f472b6);">Pd</div>
678
- <span class="tech-name">Pydantic</span>
679
- <span class="tech-version">2.12.4</span>
680
  </div>
681
  <p class="tech-description">
682
- 데이터 검증 설정 관리. 타입 안전성과 자동 직렬화 제공
683
  </p>
684
  </div>
685
  </div>
@@ -696,20 +709,30 @@
696
  <div class="tech-header">
697
  <div class="tech-icon icon-postgres">Pg</div>
698
  <span class="tech-name">PostgreSQL</span>
699
- <span class="tech-version">Latest</span>
700
  </div>
701
  <p class="tech-description">
702
- 관계형 데이터베이스. 사용자 세션, 게임 기록, 통계 데이터 저장
703
  </p>
704
  </div>
705
  <div class="tech-card">
706
  <div class="tech-header">
707
- <div class="tech-icon" style="background: linear-gradient(135deg, #dc2626, #f87171);">SA</div>
708
- <span class="tech-name">SQLAlchemy</span>
709
- <span class="tech-version">2.0.44</span>
710
  </div>
711
  <p class="tech-description">
712
- Python ORM. 데이터베이스 추상화 쿼리 빌더 제공
 
 
 
 
 
 
 
 
 
 
713
  </p>
714
  </div>
715
  </div>
@@ -726,40 +749,80 @@
726
  <div class="tech-header">
727
  <div class="tech-icon icon-gemini">Gm</div>
728
  <span class="tech-name">Google Gemini</span>
729
- <span class="tech-version">API</span>
730
  </div>
731
  <p class="tech-description">
732
- 멀티모달 AI 모델. 음성 분석, 발음 평가, 자연어 피드백 생성
733
  </p>
734
  </div>
735
  <div class="tech-card">
736
  <div class="tech-header">
737
- <div class="tech-icon" style="background: linear-gradient(135deg, #0891b2, #22d3ee);">MC</div>
738
- <span class="tech-name">MCP</span>
739
- <span class="tech-version">1.0.0+</span>
740
  </div>
741
  <p class="tech-description">
742
- Model Context Protocol. AI 모델 통합 �� 컨텍스트 관리
743
  </p>
744
  </div>
745
  <div class="tech-card">
746
  <div class="tech-header">
747
  <div class="tech-icon icon-numpy">Np</div>
748
  <span class="tech-name">NumPy</span>
749
- <span class="tech-version">1.26+</span>
750
  </div>
751
  <p class="tech-description">
752
- 수치 계산 라이브러리. 오디오 신호 처리 데이터 분석
753
  </p>
754
  </div>
755
  <div class="tech-card">
756
  <div class="tech-header">
757
  <div class="tech-icon" style="background: linear-gradient(135deg, #0d9488, #2dd4bf);">Pd</div>
758
  <span class="tech-name">Pandas</span>
759
- <span class="tech-version">2.0+</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
  </div>
761
  <p class="tech-description">
762
- 데이터 분석 라이브러리. 통계 처리 데이터 조작
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  </p>
764
  </div>
765
  </div>
@@ -779,7 +842,7 @@
779
  <span class="tech-version">0.25.1</span>
780
  </div>
781
  <p class="tech-description">
782
- 오디오 처리 라이브러리. 포맷 변환, 정규화, 편집 기능 제공
783
  </p>
784
  </div>
785
  <div class="tech-card">
@@ -789,7 +852,27 @@
789
  <span class="tech-version">via ffmpy</span>
790
  </div>
791
  <p class="tech-description">
792
- 멀티미디어 프레임워크. 오디오 인코딩/디코딩 처리
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
793
  </p>
794
  </div>
795
  </div>
@@ -809,7 +892,7 @@
809
  <span class="tech-version">Latest</span>
810
  </div>
811
  <p class="tech-description">
812
- 컨테이너화 플랫폼. 일관된 개발/배포 환경 제공
813
  </p>
814
  </div>
815
  <div class="tech-card">
@@ -819,27 +902,47 @@
819
  <span class="tech-version">Latest</span>
820
  </div>
821
  <p class="tech-description">
822
- 멀티 컨테이너 오케스트레이션. 서비스 네트워크 볼륨 관리
823
  </p>
824
  </div>
825
  <div class="tech-card">
826
  <div class="tech-header">
827
- <div class="tech-icon" style="background: linear-gradient(135deg, #fbbf24, #fcd34d);">Pt</div>
828
- <span class="tech-name">pytest</span>
829
- <span class="tech-version">Latest</span>
830
  </div>
831
  <p class="tech-description">
832
- 테스트 프레임워크. 단위 테스트 통합 테스트 실행
833
  </p>
834
  </div>
835
  <div class="tech-card">
836
  <div class="tech-header">
837
- <div class="tech-icon" style="background: linear-gradient(135deg, #64748b, #94a3b8);">Ev</div>
838
- <span class="tech-name">python-dotenv</span>
839
- <span class="tech-version">1.2.1</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
840
  </div>
841
  <p class="tech-description">
842
- 환경 변수 관리. .env 파일 기반 설정 로드
843
  </p>
844
  </div>
845
  </div>
 
1
  <!DOCTYPE html>
2
+ <html lang="en">
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Tech Stack - Voice Sementle</title>
7
  <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
8
  <style>
9
  :root {
 
494
  <!-- Sidebar -->
495
  <nav class="sidebar">
496
  <a href="#" class="logo">
497
+ <div class="logo-icon">V</div>
498
+ Voice Sementle
499
  </a>
500
 
501
  <div class="nav-section">
 
523
  <div class="nav-title">Backend</div>
524
  <ul class="nav-list">
525
  <li class="nav-item">
526
+ <a href="#backend" class="nav-link">Services</a>
527
  </li>
528
  <li class="nav-item">
529
  <a href="#database" class="nav-link">Database</a>
 
535
  <div class="nav-title">AI / ML</div>
536
  <ul class="nav-list">
537
  <li class="nav-item">
538
+ <a href="#ai" class="nav-link">AI Models</a>
539
+ </li>
540
+ <li class="nav-item">
541
+ <a href="#mcp" class="nav-link">MCP Integration</a>
542
  </li>
543
  <li class="nav-item">
544
  <a href="#audio" class="nav-link">Audio Processing</a>
 
567
  <header class="page-header">
568
  <h1 class="page-title">Tech Stack</h1>
569
  <p class="page-description">
570
+ Comprehensive technology stack for Voice Sementle - a daily voice puzzle game where pronunciation matters.
571
+ Built with modern Python frameworks, AI models, and cloud services.
572
  </p>
573
  </header>
574
 
 
581
  <div class="architecture-diagram">
582
  <div class="arch-flow">
583
  <div class="arch-box">
584
+ <div class="arch-box-title">Client UI</div>
585
  <div class="arch-box-sub">Gradio 6.0</div>
586
  </div>
587
  <span class="arch-arrow">→</span>
588
  <div class="arch-box secondary">
589
+ <div class="arch-box-title">Services</div>
590
+ <div class="arch-box-sub">Python Backend</div>
591
  </div>
592
  <span class="arch-arrow">→</span>
593
  <div class="arch-box tertiary">
594
+ <div class="arch-box-title">AI Models</div>
595
+ <div class="arch-box-sub">Gemini + VoiceKit MCP</div>
596
  </div>
597
  </div>
598
  </div>
 
612
  <span class="tech-version">6.0.0</span>
613
  </div>
614
  <p class="tech-description">
615
+ Python-based UI framework. Powers interactive web interface with voice input, real-time feedback, modals, and dynamic game state transitions.
616
  </p>
617
  </div>
618
  <div class="tech-card">
619
  <div class="tech-header">
620
  <div class="tech-icon icon-python">Py</div>
621
  <span class="tech-name">Python</span>
622
+ <span class="tech-version">3.11+</span>
623
  </div>
624
  <p class="tech-description">
625
+ Client application language. Leverages type hints, async/await patterns, and modern Python features for clean, maintainable code.
626
  </p>
627
  </div>
628
  <div class="tech-card">
 
632
  <span class="tech-version">0.28.1</span>
633
  </div>
634
  <p class="tech-description">
635
+ Async HTTP client library for communicating with backend services and external APIs with full HTTP/2 support.
636
+ </p>
637
+ </div>
638
+ <div class="tech-card">
639
+ <div class="tech-header">
640
+ <div class="tech-icon" style="background: linear-gradient(135deg, #8b5cf6, #a78bfa);">Js</div>
641
+ <span class="tech-name">Custom CSS/JS</span>
642
+ <span class="tech-version">-</span>
643
+ </div>
644
+ <p class="tech-description">
645
+ Custom styling with animated backgrounds, modal components, radar charts, and responsive design for optimal user experience.
646
  </p>
647
  </div>
648
  </div>
 
652
  <section id="backend" class="section">
653
  <h2 class="section-title">
654
  <span class="section-icon backend">⚡</span>
655
+ Backend Services
656
  </h2>
657
  <div class="tech-grid">
658
  <div class="tech-card">
659
  <div class="tech-header">
660
+ <div class="tech-icon icon-python">Py</div>
661
+ <span class="tech-name">Python Services</span>
662
+ <span class="tech-version">3.11+</span>
663
  </div>
664
  <p class="tech-description">
665
+ Modular service architecture including voice analyzer, hint generator, database layer, and game state management with async/await support.
666
  </p>
667
  </div>
668
  <div class="tech-card">
669
  <div class="tech-header">
670
+ <div class="tech-icon" style="background: linear-gradient(135deg, #ec4899, #f472b6);">Pd</div>
671
+ <span class="tech-name">Pydantic</span>
672
+ <span class="tech-version">2.0+</span>
673
  </div>
674
  <p class="tech-description">
675
+ Data validation and settings management with type safety, automatic serialization, and schema validation.
676
  </p>
677
  </div>
678
  <div class="tech-card">
679
  <div class="tech-header">
680
+ <div class="tech-icon" style="background: linear-gradient(135deg, #10b981, #34d399);">Ay</div>
681
+ <span class="tech-name">Asyncio</span>
682
+ <span class="tech-version">Built-in</span>
683
  </div>
684
  <p class="tech-description">
685
+ Native Python async library for concurrent operations, enabling efficient handling of multiple voice analysis requests.
686
  </p>
687
  </div>
688
  <div class="tech-card">
689
  <div class="tech-header">
690
+ <div class="tech-icon" style="background: linear-gradient(135deg, #f59e0b, #fbbf24);">Dv</div>
691
+ <span class="tech-name">Python-dotenv</span>
692
+ <span class="tech-version">1.0.0</span>
693
  </div>
694
  <p class="tech-description">
695
+ Environment variable management from .env files for secure API keys and configuration settings.
696
  </p>
697
  </div>
698
  </div>
 
709
  <div class="tech-header">
710
  <div class="tech-icon icon-postgres">Pg</div>
711
  <span class="tech-name">PostgreSQL</span>
712
+ <span class="tech-version">16-alpine</span>
713
  </div>
714
  <p class="tech-description">
715
+ Production-grade relational database. Stores user sessions, game history, daily puzzles, attempt statistics, and score analytics.
716
  </p>
717
  </div>
718
  <div class="tech-card">
719
  <div class="tech-header">
720
+ <div class="tech-icon" style="background: linear-gradient(135deg, #0891b2, #22d3ee);">Ps</div>
721
+ <span class="tech-name">psycopg2</span>
722
+ <span class="tech-version">2.9+</span>
723
  </div>
724
  <p class="tech-description">
725
+ PostgreSQL adapter for Python. Provides efficient database connections and query execution with connection pooling.
726
+ </p>
727
+ </div>
728
+ <div class="tech-card">
729
+ <div class="tech-header">
730
+ <div class="tech-icon icon-docker">Dk</div>
731
+ <span class="tech-name">Docker Volume</span>
732
+ <span class="tech-version">-</span>
733
+ </div>
734
+ <p class="tech-description">
735
+ Persistent data storage with Docker volumes ensuring database persistence across container restarts.
736
  </p>
737
  </div>
738
  </div>
 
749
  <div class="tech-header">
750
  <div class="tech-icon icon-gemini">Gm</div>
751
  <span class="tech-name">Google Gemini</span>
752
+ <span class="tech-version">2.0 Flash</span>
753
  </div>
754
  <p class="tech-description">
755
+ Multimodal AI model powering intelligent hint generation, contextual feedback, and adaptive difficulty based on player performance.
756
  </p>
757
  </div>
758
  <div class="tech-card">
759
  <div class="tech-header">
760
+ <div class="tech-icon" style="background: linear-gradient(135deg, #6366f1, #818cf8);">11</div>
761
+ <span class="tech-name">ElevenLabs</span>
762
+ <span class="tech-version">2.24.0</span>
763
  </div>
764
  <p class="tech-description">
765
+ Advanced text-to-speech API with voice cloning. Generates partial audio hints with reference voice matching for natural gameplay experience.
766
  </p>
767
  </div>
768
  <div class="tech-card">
769
  <div class="tech-header">
770
  <div class="tech-icon icon-numpy">Np</div>
771
  <span class="tech-name">NumPy</span>
772
+ <span class="tech-version">1.26.4</span>
773
  </div>
774
  <p class="tech-description">
775
+ Numerical computing library for audio signal processing, feature extraction, and statistical analysis of voice patterns.
776
  </p>
777
  </div>
778
  <div class="tech-card">
779
  <div class="tech-header">
780
  <div class="tech-icon" style="background: linear-gradient(135deg, #0d9488, #2dd4bf);">Pd</div>
781
  <span class="tech-name">Pandas</span>
782
+ <span class="tech-version">2.2.3</span>
783
+ </div>
784
+ <p class="tech-description">
785
+ Data analysis library for processing game statistics, tracking user progress, and generating performance insights.
786
+ </p>
787
+ </div>
788
+ </div>
789
+ </section>
790
+
791
+ <!-- MCP Integration -->
792
+ <section id="mcp" class="section">
793
+ <h2 class="section-title">
794
+ <span class="section-icon" style="background: linear-gradient(135deg, #8b5cf6, #a78bfa);">🔌</span>
795
+ Model Context Protocol (MCP)
796
+ </h2>
797
+ <div class="tech-grid">
798
+ <div class="tech-card">
799
+ <div class="tech-header">
800
+ <div class="tech-icon" style="background: linear-gradient(135deg, #0891b2, #22d3ee);">MC</div>
801
+ <span class="tech-name">MCP Client</span>
802
+ <span class="tech-version">1.22.0</span>
803
  </div>
804
  <p class="tech-description">
805
+ Model Context Protocol client for seamless AI model integration. Manages sessions, tool calling, and context sharing across AI services.
806
+ </p>
807
+ </div>
808
+ <div class="tech-card">
809
+ <div class="tech-header">
810
+ <div class="tech-icon" style="background: linear-gradient(135deg, #f97316, #fb923c);">Vk</div>
811
+ <span class="tech-name">VoiceKit MCP</span>
812
+ <span class="tech-version">HuggingFace</span>
813
+ </div>
814
+ <p class="tech-description">
815
+ Voice analysis MCP server providing voicekit_analyze_voice_similarity tool. Compares user recordings with reference audio for pronunciation scoring.
816
+ </p>
817
+ </div>
818
+ <div class="tech-card">
819
+ <div class="tech-header">
820
+ <div class="tech-icon" style="background: linear-gradient(135deg, #ec4899, #f472b6);">SS</div>
821
+ <span class="tech-name">SSE Client</span>
822
+ <span class="tech-version">MCP</span>
823
+ </div>
824
+ <p class="tech-description">
825
+ Server-Sent Events client for real-time MCP communication. Enables streaming responses and bidirectional tool invocation.
826
  </p>
827
  </div>
828
  </div>
 
842
  <span class="tech-version">0.25.1</span>
843
  </div>
844
  <p class="tech-description">
845
+ Audio manipulation library for format conversion, normalization, compression, and audio file editing with effects processing.
846
  </p>
847
  </div>
848
  <div class="tech-card">
 
852
  <span class="tech-version">via ffmpy</span>
853
  </div>
854
  <p class="tech-description">
855
+ Multimedia framework for audio encoding/decoding. Handles format conversion and compression for optimal file sizes.
856
+ </p>
857
+ </div>
858
+ <div class="tech-card">
859
+ <div class="tech-header">
860
+ <div class="tech-icon" style="background: linear-gradient(135deg, #7c3aed, #a78bfa);">B6</div>
861
+ <span class="tech-name">Base64</span>
862
+ <span class="tech-version">Built-in</span>
863
+ </div>
864
+ <p class="tech-description">
865
+ Audio data encoding for API transmission. Converts binary audio to base64 strings for MCP tool parameters.
866
+ </p>
867
+ </div>
868
+ <div class="tech-card">
869
+ <div class="tech-header">
870
+ <div class="tech-icon" style="background: linear-gradient(135deg, #dc2626, #f87171);">Af</div>
871
+ <span class="tech-name">Aiofiles</span>
872
+ <span class="tech-version">24.1.0</span>
873
+ </div>
874
+ <p class="tech-description">
875
+ Async file operations for non-blocking audio file I/O. Enables concurrent file processing without blocking the event loop.
876
  </p>
877
  </div>
878
  </div>
 
892
  <span class="tech-version">Latest</span>
893
  </div>
894
  <p class="tech-description">
895
+ Containerization platform providing consistent development and deployment environments across all systems.
896
  </p>
897
  </div>
898
  <div class="tech-card">
 
902
  <span class="tech-version">Latest</span>
903
  </div>
904
  <p class="tech-description">
905
+ Multi-container orchestration for managing PostgreSQL database service with volume persistence and networking.
906
  </p>
907
  </div>
908
  <div class="tech-card">
909
  <div class="tech-header">
910
+ <div class="tech-icon" style="background: linear-gradient(135deg, #64748b, #94a3b8);">Ev</div>
911
+ <span class="tech-name">Environment Config</span>
912
+ <span class="tech-version">-</span>
913
  </div>
914
  <p class="tech-description">
915
+ Secure configuration management with .env files for API keys (Gemini, ElevenLabs, VoiceKit) and sensitive settings.
916
  </p>
917
  </div>
918
  <div class="tech-card">
919
  <div class="tech-header">
920
+ <div class="tech-icon" style="background: linear-gradient(135deg, #16a34a, #4ade80);">Sh</div>
921
+ <span class="tech-name">Shell Scripts</span>
922
+ <span class="tech-version">Bash</span>
923
+ </div>
924
+ <p class="tech-description">
925
+ Automated deployment scripts (start.sh, stop.sh) for streamlined application lifecycle management.
926
+ </p>
927
+ </div>
928
+ <div class="tech-card">
929
+ <div class="tech-header">
930
+ <div class="tech-icon" style="background: linear-gradient(135deg, #ea580c, #fb923c);">Hf</div>
931
+ <span class="tech-name">HuggingFace</span>
932
+ <span class="tech-version">Hub</span>
933
+ </div>
934
+ <p class="tech-description">
935
+ Deployment platform hosting VoiceKit MCP server and application demos with integrated spaces for testing.
936
+ </p>
937
+ </div>
938
+ <div class="tech-card">
939
+ <div class="tech-header">
940
+ <div class="tech-icon" style="background: linear-gradient(135deg, #9333ea, #a855f7);">Lg</div>
941
+ <span class="tech-name">Logging</span>
942
+ <span class="tech-version">Built-in</span>
943
  </div>
944
  <p class="tech-description">
945
+ Comprehensive logging system for debugging, monitoring API calls, tracking errors, and performance analysis.
946
  </p>
947
  </div>
948
  </div>
client/docs/user-guide.html CHANGED
@@ -3,7 +3,7 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>User Guide - Komentle Voice Challenge</title>
7
  <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
8
  <style>
9
  :root {
@@ -792,8 +792,8 @@
792
  <div class="metric-label">Pronunciation</div>
793
  </div>
794
  <div class="metric-item">
795
- <div class="metric-value">Tone</div>
796
- <div class="metric-label">Voice Tone</div>
797
  </div>
798
  <div class="metric-item">
799
  <div class="metric-value">Pitch</div>
@@ -813,8 +813,8 @@
813
  <div class="content-card">
814
  <h3>🎯 Score Interpretation</h3>
815
  <ul>
816
- <li><strong>80+ points:</strong> Very similar to the answer! You're almost there!</li>
817
- <li><strong>60-79 points:</strong> Good attempt. Check the hints for guidance.</li>
818
  <li><strong>40-59 points:</strong> Right direction, but try a different word.</li>
819
  <li><strong>Below 40:</strong> Far from the answer. Try a new guess.</li>
820
  </ul>
@@ -840,7 +840,7 @@
840
  <div class="content-card">
841
  <h3>💬 AI Hint Chatbot</h3>
842
  <p>
843
- Click the floating button at the bottom right of the screen to open the AI hint chatbot.
844
  Request hints from the chatbot to receive indirect clues about the answer.
845
  </p>
846
  </div>
@@ -852,16 +852,17 @@
852
  <li><strong>"What's the category?":</strong> Learn the classification of the answer word.</li>
853
  <li><strong>"Tell me the first letter":</strong> Get a hint about the first letter.</li>
854
  <li><strong>"What words sound similar?":</strong> Get pronunciation-related suggestions.</li>
 
855
  </ul>
856
  </div>
857
 
858
- <div class="warning-box">
859
  <span class="warning-icon">💡</span>
860
  <div class="warning-text">
861
  <strong>Limited Hint Usage!</strong> Chatbot hints can only be used a limited number of times.
862
  Request hints carefully!
863
  </div>
864
- </div>
865
  </section>
866
 
867
  <!-- Tips & Tricks -->
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>User Guide - Voice Semenetle</title>
7
  <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
8
  <style>
9
  :root {
 
792
  <div class="metric-label">Pronunciation</div>
793
  </div>
794
  <div class="metric-item">
795
+ <div class="metric-value">Line Acc.</div>
796
+ <div class="metric-label">Line Accuracy</div>
797
  </div>
798
  <div class="metric-item">
799
  <div class="metric-value">Pitch</div>
 
813
  <div class="content-card">
814
  <h3>🎯 Score Interpretation</h3>
815
  <ul>
816
+ <li><strong>85+ points:</strong> Very similar to the answer! You're almost there!</li>
817
+ <li><strong>60-84 points:</strong> Good attempt. Check the hints for guidance.</li>
818
  <li><strong>40-59 points:</strong> Right direction, but try a different word.</li>
819
  <li><strong>Below 40:</strong> Far from the answer. Try a new guess.</li>
820
  </ul>
 
840
  <div class="content-card">
841
  <h3>💬 AI Hint Chatbot</h3>
842
  <p>
843
+ Click the floating button at the top right of the screen to open the AI hint chatbot.
844
  Request hints from the chatbot to receive indirect clues about the answer.
845
  </p>
846
  </div>
 
852
  <li><strong>"What's the category?":</strong> Learn the classification of the answer word.</li>
853
  <li><strong>"Tell me the first letter":</strong> Get a hint about the first letter.</li>
854
  <li><strong>"What words sound similar?":</strong> Get pronunciation-related suggestions.</li>
855
+ <li><strong>"Give me an audio hint":</strong> AI provides an audio hint in a tone similar to your voice.</li>
856
  </ul>
857
  </div>
858
 
859
+ <!-- <div class="warning-box">
860
  <span class="warning-icon">💡</span>
861
  <div class="warning-text">
862
  <strong>Limited Hint Usage!</strong> Chatbot hints can only be used a limited number of times.
863
  Request hints carefully!
864
  </div>
865
+ </div> -->
866
  </section>
867
 
868
  <!-- Tips & Tricks -->
client/frontend/components/floating_chatbot.py CHANGED
@@ -374,14 +374,14 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
374
  system_prompt = build_system_prompt_from_game_state(game_state, include_audio_tool=tools_will_be_enabled)
375
 
376
  print(f"[CHATBOT] Calling Gemini with context:")
377
- print(f" - User ID: {user_id}")
378
- print(f" - Message: {message}")
379
- print(f" - History length: {len(history)}")
380
- print(f" - Game state attempts: {attempt_count}")
381
- print(f" - Answer word: {answer_word}")
382
- print(f" - ElevenLabs AVAILABLE: {ELEVENLABS_AVAILABLE}")
383
- print(f" - ElevenLabs API key set: {bool(api_key)}")
384
- print(f" - ElevenLabs configured: {elevenlabs_ready}")
385
 
386
  # Define audio hint tool if ElevenLabs is configured
387
  tools = []
@@ -426,7 +426,7 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
426
  print(f"[CHATBOT ERROR] {error}")
427
  return f"Sorry, I encountered an error: {error}\n\nPlease try again in a moment."
428
 
429
- print(f"[CHATBOT] Got response: text={len(response_text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")
430
 
431
  # Handle tool calls
432
  if tool_calls:
@@ -434,18 +434,18 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
434
  if tool_call['name'] == 'generate_audio_hint':
435
  hint_type = tool_call['input'].get('hint_type', 'syllable')
436
  word_index = tool_call['input'].get('word_index', 0) # Default to first word
437
- print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}, word_index={word_index}")
438
 
439
  # Extract the appropriate portion based on hint_type, word_index, and answer_word
440
  from utils.elevenlabs_tts import extract_hint_portion_for_word, generate_audio_hint
441
 
442
  text_to_speak, actual_hint_type = extract_hint_portion_for_word(answer_word, attempt_count, word_index)
443
  # Use voice cloning from reference audio when available
444
- print(f"[CHATBOT] Generating audio for: '{text_to_speak}' with reference: {reference_audio_path}")
445
  audio_path = generate_audio_hint(text_to_speak, actual_hint_type, reference_audio_path=reference_audio_path)
446
 
447
  if audio_path:
448
- print(f"[CHATBOT] Audio hint generated: {audio_path}")
449
  # Always include text with audio hint
450
  if not response_text or response_text.strip() == "":
451
  response_text = f"🎵 Here's an audio hint! Listen to how it sounds:"
@@ -584,13 +584,13 @@ Don't overthink your first try – it's meant to be a shot in the dark! Just say
584
  hist.append({"role": "user", "content": message})
585
 
586
  # 콘솔 로그
587
- print("=" * 60)
588
  print("[CHATBOT] 메시지 전송")
589
- print(f" - Session ID (UUID): {uid}")
590
- print(f" - Message: {message}")
591
- print(f" - History Length: {len(hist)}")
592
- print(f" - Game State Attempts: {len(gs.get('guesses', [])) if gs else 0}")
593
- print("=" * 60)
594
 
595
  # AI 호출 (Gemini with game_state context)
596
  response = call_ai_backend(message, uid, hist, game_state=gs)
@@ -606,8 +606,8 @@ Don't overthink your first try – it's meant to be a shot in the dark! Just say
606
  # Path is relative to project root (3 levels up from this file)
607
  project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
608
  audio_path = os.path.join(project_root, audio_path)
609
- print(f"[CHATBOT] Audio hint included: {audio_path}")
610
- print(f"[CHATBOT] Audio file exists: {os.path.exists(audio_path)}")
611
 
612
  # Gradio 6: Use gr.Audio() component for audio content
613
  # Reference: chatbot_core_components_simple demo
@@ -622,7 +622,7 @@ Don't overthink your first try – it's meant to be a shot in the dark! Just say
622
 
623
  def close_chat_handler():
624
  """닫기 버튼 핸들러 - 체크박스만 False로"""
625
- print("[CHATBOT] 닫기 버튼 클릭됨")
626
  return gr.update(value=False)
627
 
628
  # ============================================================
 
374
  system_prompt = build_system_prompt_from_game_state(game_state, include_audio_tool=tools_will_be_enabled)
375
 
376
  print(f"[CHATBOT] Calling Gemini with context:")
377
+ # print(f" - User ID: {user_id}")
378
+ # print(f" - Message: {message}")
379
+ # print(f" - History length: {len(history)}")
380
+ # print(f" - Game state attempts: {attempt_count}")
381
+ # print(f" - Answer word: {answer_word}")
382
+ # print(f" - ElevenLabs AVAILABLE: {ELEVENLABS_AVAILABLE}")
383
+ # print(f" - ElevenLabs API key set: {bool(api_key)}")
384
+ # print(f" - ElevenLabs configured: {elevenlabs_ready}")
385
 
386
  # Define audio hint tool if ElevenLabs is configured
387
  tools = []
 
426
  print(f"[CHATBOT ERROR] {error}")
427
  return f"Sorry, I encountered an error: {error}\n\nPlease try again in a moment."
428
 
429
+ # print(f"[CHATBOT] Got response: text={len(response_text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")
430
 
431
  # Handle tool calls
432
  if tool_calls:
 
434
  if tool_call['name'] == 'generate_audio_hint':
435
  hint_type = tool_call['input'].get('hint_type', 'syllable')
436
  word_index = tool_call['input'].get('word_index', 0) # Default to first word
437
+ # print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}, word_index={word_index}")
438
 
439
  # Extract the appropriate portion based on hint_type, word_index, and answer_word
440
  from utils.elevenlabs_tts import extract_hint_portion_for_word, generate_audio_hint
441
 
442
  text_to_speak, actual_hint_type = extract_hint_portion_for_word(answer_word, attempt_count, word_index)
443
  # Use voice cloning from reference audio when available
444
+ # print(f"[CHATBOT] Generating audio for: '{text_to_speak}' with reference: {reference_audio_path}")
445
  audio_path = generate_audio_hint(text_to_speak, actual_hint_type, reference_audio_path=reference_audio_path)
446
 
447
  if audio_path:
448
+ # print(f"[CHATBOT] Audio hint generated: {audio_path}")
449
  # Always include text with audio hint
450
  if not response_text or response_text.strip() == "":
451
  response_text = f"🎵 Here's an audio hint! Listen to how it sounds:"
 
584
  hist.append({"role": "user", "content": message})
585
 
586
  # 콘솔 로그
587
+ # print("=" * 60)
588
  print("[CHATBOT] 메시지 전송")
589
+ # print(f" - Session ID (UUID): {uid}")
590
+ # print(f" - Message: {message}")
591
+ # print(f" - History Length: {len(hist)}")
592
+ # print(f" - Game State Attempts: {len(gs.get('guesses', [])) if gs else 0}")
593
+ # print("=" * 60)
594
 
595
  # AI 호출 (Gemini with game_state context)
596
  response = call_ai_backend(message, uid, hist, game_state=gs)
 
606
  # Path is relative to project root (3 levels up from this file)
607
  project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
608
  audio_path = os.path.join(project_root, audio_path)
609
+ # print(f"[CHATBOT] Audio hint included: {audio_path}")
610
+ # print(f"[CHATBOT] Audio file exists: {os.path.exists(audio_path)}")
611
 
612
  # Gradio 6: Use gr.Audio() component for audio content
613
  # Reference: chatbot_core_components_simple demo
 
622
 
623
  def close_chat_handler():
624
  """닫기 버튼 핸들러 - 체크박스만 False로"""
625
+ # print("[CHATBOT] 닫기 버튼 클릭됨")
626
  return gr.update(value=False)
627
 
628
  # ============================================================
client/frontend/styles/result_screen_style.py CHANGED
@@ -279,6 +279,18 @@ RESULT_SCREEN_CSS = """
279
  text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1);
280
  }
281
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  .audio-compare-wrapper {
283
  display: flex;
284
  gap: 12px;
@@ -336,6 +348,26 @@ RESULT_SCREEN_CSS = """
336
  font-weight: 700;
337
  }
338
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
339
  /* 통계 카드 색상 - 라이트모드 (하늘색 애니메이션 테마) */
340
  .stat-blue {
341
  background: linear-gradient(135deg, #e3f5ff, #c7e9ff);
 
279
  text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.1);
280
  }
281
 
282
+ @media (max-width: 800px) {
283
+ .answer-word {
284
+ font-size: 36px;
285
+ }
286
+ }
287
+
288
+ @media (max-width: 600px) {
289
+ .answer-word {
290
+ font-size: 28px;
291
+ }
292
+ }
293
+
294
  .audio-compare-wrapper {
295
  display: flex;
296
  gap: 12px;
 
348
  font-weight: 700;
349
  }
350
 
351
+ @media (max-width: 800px) {
352
+ .stat-label {
353
+ font-size: 0.7rem;
354
+ }
355
+
356
+ .stat-value {
357
+ font-size: 1.5rem;
358
+ }
359
+ }
360
+
361
+ @media (max-width: 600px) {
362
+ .stat-label {
363
+ font-size: 0.6rem;
364
+ }
365
+
366
+ .stat-value {
367
+ font-size: 1.2rem;
368
+ }
369
+ }
370
+
371
  /* 통계 카드 색상 - 라이트모드 (하늘색 애니메이션 테마) */
372
  .stat-blue {
373
  background: linear-gradient(135deg, #e3f5ff, #c7e9ff);
client/services/analysis_service.py CHANGED
@@ -149,19 +149,19 @@ async def analyze_voice(audio_bytes: bytes, date: str, session_id: str) -> Dict:
149
  "user_text": user_text,
150
  }
151
 
152
- print(f"\n{'='*50}")
153
- print(f"[SCORING RESULT] analyze_voice result:")
154
- print(f" - status: {result['status']}")
155
- print(f" - category: {result['category']}")
156
- print(f" - pitch: {result['pitch']}")
157
- print(f" - rhythm: {result['rhythm']}")
158
- print(f" - energy: {result['energy']}")
159
- print(f" - pronunciation: {result['pronunciation']}")
160
- print(f" - transcript: {result['transcript']}")
161
- print(f" - overall: {result['overall']}")
162
- print(f" - is_correct: {result['is_correct']}")
163
- print(f" - user_text: {result['user_text']}")
164
- print(f" - advice: {result['advice'][:100]}..." if len(result['advice']) > 100 else f" - advice: {result['advice']}")
165
- print(f"{'='*50}\n")
166
 
167
  return result
 
149
  "user_text": user_text,
150
  }
151
 
152
+ # print(f"\n{'='*50}")
153
+ # print(f"[SCORING RESULT] analyze_voice result:")
154
+ # print(f" - status: {result['status']}")
155
+ # print(f" - category: {result['category']}")
156
+ # print(f" - pitch: {result['pitch']}")
157
+ # print(f" - rhythm: {result['rhythm']}")
158
+ # print(f" - energy: {result['energy']}")
159
+ # print(f" - pronunciation: {result['pronunciation']}")
160
+ # print(f" - transcript: {result['transcript']}")
161
+ # print(f" - overall: {result['overall']}")
162
+ # print(f" - is_correct: {result['is_correct']}")
163
+ # print(f" - user_text: {result['user_text']}")
164
+ # print(f" - advice: {result['advice'][:100]}..." if len(result['advice']) > 100 else f" - advice: {result['advice']}")
165
+ # print(f"{'='*50}\n")
166
 
167
  return result
client/services/hint_generator.py CHANGED
@@ -124,8 +124,8 @@ Return ONLY this JSON:
124
  """
125
 
126
  print(f"[GEMINI HINT] Calling Gemini for hint generation...")
127
- print(f"[GEMINI HINT] User said: '{user_text}', Overall: {overall_score}, Attempt: {attempt}")
128
- print(f"[GEMINI HINT] Hint history: {hint_history}")
129
 
130
  # Call Gemini
131
  response = call_gemini_with_tools(
@@ -138,8 +138,8 @@ Return ONLY this JSON:
138
 
139
  # Extract JSON from response
140
  response_text, error = get_text_from_gemini_response(response)
141
- print(f"[GEMINI HINT] Response text: {response_text[:200] if response_text else 'None'}...")
142
- print(f"[GEMINI HINT] Error: {error}")
143
 
144
  if error:
145
  logger.error(f"Gemini response error: {error}")
@@ -154,12 +154,12 @@ Return ONLY this JSON:
154
  if response_text.startswith("```"):
155
  lines = response_text.split("\n")
156
  response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
157
- print(f"[GEMINI HINT] Cleaned markdown, result: {response_text[:200]}...")
158
 
159
  # Parse JSON
160
- print(f"[GEMINI HINT] Parsing JSON: {response_text[:300]}...")
161
  hints_json = json.loads(response_text)
162
- print(f"[GEMINI HINT] ✓ Parsed successfully: {hints_json}")
163
  logger.info(f"✓ Generated fresh hint for attempt {attempt}, category {category}")
164
 
165
  return hints_json
 
124
  """
125
 
126
  print(f"[GEMINI HINT] Calling Gemini for hint generation...")
127
+ # print(f"[GEMINI HINT] User said: '{user_text}', Overall: {overall_score}, Attempt: {attempt}")
128
+ # print(f"[GEMINI HINT] Hint history: {hint_history}")
129
 
130
  # Call Gemini
131
  response = call_gemini_with_tools(
 
138
 
139
  # Extract JSON from response
140
  response_text, error = get_text_from_gemini_response(response)
141
+ # print(f"[GEMINI HINT] Response text: {response_text[:200] if response_text else 'None'}...")
142
+ # print(f"[GEMINI HINT] Error: {error}")
143
 
144
  if error:
145
  logger.error(f"Gemini response error: {error}")
 
154
  if response_text.startswith("```"):
155
  lines = response_text.split("\n")
156
  response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
157
+ # print(f"[GEMINI HINT] Cleaned markdown, result: {response_text[:200]}...")
158
 
159
  # Parse JSON
160
+ # print(f"[GEMINI HINT] Parsing JSON: {response_text[:300]}...")
161
  hints_json = json.loads(response_text)
162
+ # print(f"[GEMINI HINT] ✓ Parsed successfully: {hints_json}")
163
  logger.info(f"✓ Generated fresh hint for attempt {attempt}, category {category}")
164
 
165
  return hints_json
client/utils/audio_validator.py CHANGED
@@ -98,7 +98,7 @@ class AudioValidator:
98
  similarity = self.calculate_similarity(recognized_text, expected_text)
99
  threshold = self.difficulty_thresholds.get(difficulty, 0.80)
100
 
101
- print(f"[DEBUG] Similarity: {similarity:.2%}, Threshold: {threshold:.2%}")
102
 
103
  return similarity >= threshold
104
 
 
98
  similarity = self.calculate_similarity(recognized_text, expected_text)
99
  threshold = self.difficulty_thresholds.get(difficulty, 0.80)
100
 
101
+ # print(f"[DEBUG] Similarity: {similarity:.2%}, Threshold: {threshold:.2%}")
102
 
103
  return similarity >= threshold
104
 
client/utils/elevenlabs_tts.py CHANGED
@@ -57,7 +57,7 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
57
 
58
  # Check cache first
59
  if reference_audio_path in _cloned_voice_cache:
60
- print(f"✓ Using cached cloned voice for: {reference_audio_path}")
61
  return _cloned_voice_cache[reference_audio_path]
62
 
63
  try:
@@ -69,7 +69,7 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
69
 
70
  # Always use .wav for ElevenLabs (required format for voice cloning)
71
  full_path = full_path.with_suffix('.wav')
72
- print(f"🎵 Using WAV format for ElevenLabs: {full_path}")
73
 
74
  if not full_path.exists():
75
  print(f"⚠ Reference audio not found: {full_path}")
@@ -80,7 +80,7 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
80
 
81
  # Create instant voice clone using IVC API
82
  voice_name = f"puzzle_voice_{hashlib.md5(reference_audio_path.encode()).hexdigest()[:8]}"
83
- print(f"🎤 Cloning voice from: {full_path}")
84
 
85
  # Read file as BytesIO (required by ElevenLabs SDK)
86
  # remove_background_noise=False allows shorter samples (<4.6s)
@@ -93,7 +93,7 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
93
 
94
  voice_id = voice.voice_id
95
  _cloned_voice_cache[reference_audio_path] = voice_id
96
- print(f"✓ Voice cloned successfully: {voice_id}")
97
 
98
  return voice_id
99
 
@@ -139,7 +139,8 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
139
  if reference_audio_path:
140
  voice_id = clone_voice_from_reference(reference_audio_path)
141
  if voice_id:
142
- print(f"🎤 Using cloned voice: {voice_id}")
 
143
 
144
  # Fallback to preset voice if cloning failed
145
  if not voice_id:
@@ -150,10 +151,10 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
150
  "Charlie": "IKne3meq5aSn9XLyUdCD", # Charlie - casual male
151
  }
152
  voice_id = voice_ids.get(voice, "EXAVITQu4vr4xnSDxMaL") # Default to Sarah
153
- print(f"🎵 Using preset voice: {voice}")
154
 
155
  # Generate audio using text_to_speech.convert (v2.24.0 API)
156
- print(f"🎵 Generating audio hint: '{text}' ({hint_type})")
157
  audio = client.text_to_speech.convert(
158
  voice_id=voice_id,
159
  text=text,
@@ -173,7 +174,7 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
173
  # Return absolute path
174
  absolute_path = str(filepath)
175
 
176
- print(f"✓ Audio hint generated (fresh): {absolute_path}")
177
  return absolute_path
178
 
179
  except Exception as e:
 
57
 
58
  # Check cache first
59
  if reference_audio_path in _cloned_voice_cache:
60
+ # print(f"✓ Using cached cloned voice for: {reference_audio_path}")
61
  return _cloned_voice_cache[reference_audio_path]
62
 
63
  try:
 
69
 
70
  # Always use .wav for ElevenLabs (required format for voice cloning)
71
  full_path = full_path.with_suffix('.wav')
72
+ # print(f"🎵 Using WAV format for ElevenLabs: {full_path}")
73
 
74
  if not full_path.exists():
75
  print(f"⚠ Reference audio not found: {full_path}")
 
80
 
81
  # Create instant voice clone using IVC API
82
  voice_name = f"puzzle_voice_{hashlib.md5(reference_audio_path.encode()).hexdigest()[:8]}"
83
+ # print(f"🎤 Cloning voice from: {full_path}")
84
 
85
  # Read file as BytesIO (required by ElevenLabs SDK)
86
  # remove_background_noise=False allows shorter samples (<4.6s)
 
93
 
94
  voice_id = voice.voice_id
95
  _cloned_voice_cache[reference_audio_path] = voice_id
96
+ # print(f"✓ Voice cloned successfully: {voice_id}")
97
 
98
  return voice_id
99
 
 
139
  if reference_audio_path:
140
  voice_id = clone_voice_from_reference(reference_audio_path)
141
  if voice_id:
142
+ # print(f"🎤 Using cloned voice: {voice_id}")
143
+ pass
144
 
145
  # Fallback to preset voice if cloning failed
146
  if not voice_id:
 
151
  "Charlie": "IKne3meq5aSn9XLyUdCD", # Charlie - casual male
152
  }
153
  voice_id = voice_ids.get(voice, "EXAVITQu4vr4xnSDxMaL") # Default to Sarah
154
+ # print(f"🎵 Using preset voice: {voice}")
155
 
156
  # Generate audio using text_to_speech.convert (v2.24.0 API)
157
+ # print(f"🎵 Generating audio hint: '{text}' ({hint_type})")
158
  audio = client.text_to_speech.convert(
159
  voice_id=voice_id,
160
  text=text,
 
174
  # Return absolute path
175
  absolute_path = str(filepath)
176
 
177
+ # print(f"✓ Audio hint generated (fresh): {absolute_path}")
178
  return absolute_path
179
 
180
  except Exception as e:
client/utils/stt_handler.py CHANGED
@@ -82,7 +82,7 @@ class STTHandler:
82
  def _setup_mock(self):
83
  """Mock STT (테스트용)"""
84
  self.client = None
85
- print("[INFO] Mock STT 모드 (테스트용)")
86
 
87
  def transcribe(self, audio_path: str, difficulty: int = 1) -> str:
88
  """
@@ -98,7 +98,7 @@ class STTHandler:
98
  if not audio_path or not os.path.exists(audio_path):
99
  raise ValueError(f"유효하지 않은 오디오 파일: {audio_path}")
100
 
101
- print(f"[DEBUG] Transcribing: {audio_path} (provider={self.provider})")
102
 
103
  if self.provider == 'openai':
104
  return self._transcribe_openai(audio_path)
@@ -227,7 +227,7 @@ class STTHandler:
227
  }
228
 
229
  result = mock_texts.get(difficulty, "테스트 텍스트")
230
- print(f"[MOCK] STT 결과 (난이도 {difficulty}): {result}")
231
 
232
  return result
233
 
 
82
  def _setup_mock(self):
83
  """Mock STT (테스트용)"""
84
  self.client = None
85
+ # print("[INFO] Mock STT 모드 (테스트용)")
86
 
87
  def transcribe(self, audio_path: str, difficulty: int = 1) -> str:
88
  """
 
98
  if not audio_path or not os.path.exists(audio_path):
99
  raise ValueError(f"유효하지 않은 오디오 파일: {audio_path}")
100
 
101
+ # print(f"[DEBUG] Transcribing: {audio_path} (provider={self.provider})")
102
 
103
  if self.provider == 'openai':
104
  return self._transcribe_openai(audio_path)
 
227
  }
228
 
229
  result = mock_texts.get(difficulty, "테스트 텍스트")
230
+ # print(f"[MOCK] STT 결과 (난이도 {difficulty}): {result}")
231
 
232
  return result
233
 
docs/tech-stack.html CHANGED
@@ -1,9 +1,9 @@
1
  <!DOCTYPE html>
2
- <html lang="ko">
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>Tech Stack - Komentle Voice Challenge</title>
7
  <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
8
  <style>
9
  :root {
@@ -494,8 +494,8 @@
494
  <!-- Sidebar -->
495
  <nav class="sidebar">
496
  <a href="#" class="logo">
497
- <div class="logo-icon">K</div>
498
- Komentle
499
  </a>
500
 
501
  <div class="nav-section">
@@ -523,7 +523,7 @@
523
  <div class="nav-title">Backend</div>
524
  <ul class="nav-list">
525
  <li class="nav-item">
526
- <a href="#backend" class="nav-link">FastAPI Server</a>
527
  </li>
528
  <li class="nav-item">
529
  <a href="#database" class="nav-link">Database</a>
@@ -535,7 +535,10 @@
535
  <div class="nav-title">AI / ML</div>
536
  <ul class="nav-list">
537
  <li class="nav-item">
538
- <a href="#ai" class="nav-link">Voice Analysis</a>
 
 
 
539
  </li>
540
  <li class="nav-item">
541
  <a href="#audio" class="nav-link">Audio Processing</a>
@@ -564,8 +567,8 @@
564
  <header class="page-header">
565
  <h1 class="page-title">Tech Stack</h1>
566
  <p class="page-description">
567
- Komentle Voice Challenge에 사용된 기술 스택입니다.
568
- 음성 기반 발음 검증 퀴즈 게임을 위한 풀스택 구성이에요.
569
  </p>
570
  </header>
571
 
@@ -578,18 +581,18 @@
578
  <div class="architecture-diagram">
579
  <div class="arch-flow">
580
  <div class="arch-box">
581
- <div class="arch-box-title">Client</div>
582
  <div class="arch-box-sub">Gradio 6.0</div>
583
  </div>
584
  <span class="arch-arrow">→</span>
585
  <div class="arch-box secondary">
586
- <div class="arch-box-title">Backend</div>
587
- <div class="arch-box-sub">FastAPI</div>
588
  </div>
589
  <span class="arch-arrow">→</span>
590
  <div class="arch-box tertiary">
591
- <div class="arch-box-title">AI Engine</div>
592
- <div class="arch-box-sub">Google Gemini</div>
593
  </div>
594
  </div>
595
  </div>
@@ -609,17 +612,17 @@
609
  <span class="tech-version">6.0.0</span>
610
  </div>
611
  <p class="tech-description">
612
- Python 기반 UI 프레임워크. 음성 입력, 실시간 피드백, 게임 화면 전환 인터랙티브한 인터페이스 구현
613
  </p>
614
  </div>
615
  <div class="tech-card">
616
  <div class="tech-header">
617
  <div class="tech-icon icon-python">Py</div>
618
  <span class="tech-name">Python</span>
619
- <span class="tech-version">3.11</span>
620
  </div>
621
  <p class="tech-description">
622
- 클라이언트 애플리케이션 개발 언어. 타입 힌팅과 async/await 패턴 활용
623
  </p>
624
  </div>
625
  <div class="tech-card">
@@ -629,7 +632,17 @@
629
  <span class="tech-version">0.28.1</span>
630
  </div>
631
  <p class="tech-description">
632
- 비동기 HTTP 클라이언트. 백엔드 API와의 통신 담당
 
 
 
 
 
 
 
 
 
 
633
  </p>
634
  </div>
635
  </div>
@@ -639,47 +652,47 @@
639
  <section id="backend" class="section">
640
  <h2 class="section-title">
641
  <span class="section-icon backend">⚡</span>
642
- Backend
643
  </h2>
644
  <div class="tech-grid">
645
  <div class="tech-card">
646
  <div class="tech-header">
647
- <div class="tech-icon icon-fastapi">Fa</div>
648
- <span class="tech-name">FastAPI</span>
649
- <span class="tech-version">0.121.3</span>
650
  </div>
651
  <p class="tech-description">
652
- 고성능 Python 프레임워크. REST API 엔드포인트 제공, 자동 OpenAPI 문서 생성
653
  </p>
654
  </div>
655
  <div class="tech-card">
656
  <div class="tech-header">
657
- <div class="tech-icon" style="background: linear-gradient(135deg, #7c3aed, #a78bfa);">Uv</div>
658
- <span class="tech-name">Uvicorn</span>
659
- <span class="tech-version">0.38.0</span>
660
  </div>
661
  <p class="tech-description">
662
- Lightning-fast ASGI 서버. 비동기 요청 처리 WebSocket 지원
663
  </p>
664
  </div>
665
  <div class="tech-card">
666
  <div class="tech-header">
667
- <div class="tech-icon icon-python">Py</div>
668
- <span class="tech-name">Python</span>
669
- <span class="tech-version">3.12</span>
670
  </div>
671
  <p class="tech-description">
672
- 백엔드 서버 개발 언어. 최신 버전의 성능 최적화 기능 활용
673
  </p>
674
  </div>
675
  <div class="tech-card">
676
  <div class="tech-header">
677
- <div class="tech-icon" style="background: linear-gradient(135deg, #ec4899, #f472b6);">Pd</div>
678
- <span class="tech-name">Pydantic</span>
679
- <span class="tech-version">2.12.4</span>
680
  </div>
681
  <p class="tech-description">
682
- 데이터 검증 설정 관리. 타입 안전성과 자동 직렬화 제공
683
  </p>
684
  </div>
685
  </div>
@@ -696,20 +709,30 @@
696
  <div class="tech-header">
697
  <div class="tech-icon icon-postgres">Pg</div>
698
  <span class="tech-name">PostgreSQL</span>
699
- <span class="tech-version">Latest</span>
700
  </div>
701
  <p class="tech-description">
702
- 관계형 데이터베이스. 사용자 세션, 게임 기록, 통계 데이터 저장
703
  </p>
704
  </div>
705
  <div class="tech-card">
706
  <div class="tech-header">
707
- <div class="tech-icon" style="background: linear-gradient(135deg, #dc2626, #f87171);">SA</div>
708
- <span class="tech-name">SQLAlchemy</span>
709
- <span class="tech-version">2.0.44</span>
710
  </div>
711
  <p class="tech-description">
712
- Python ORM. 데이터베이스 추상화 쿼리 빌더 제공
 
 
 
 
 
 
 
 
 
 
713
  </p>
714
  </div>
715
  </div>
@@ -726,40 +749,80 @@
726
  <div class="tech-header">
727
  <div class="tech-icon icon-gemini">Gm</div>
728
  <span class="tech-name">Google Gemini</span>
729
- <span class="tech-version">API</span>
730
  </div>
731
  <p class="tech-description">
732
- 멀티모달 AI 모델. 음성 분석, 발음 평가, 자연어 피드백 생성
733
  </p>
734
  </div>
735
  <div class="tech-card">
736
  <div class="tech-header">
737
- <div class="tech-icon" style="background: linear-gradient(135deg, #0891b2, #22d3ee);">MC</div>
738
- <span class="tech-name">MCP</span>
739
- <span class="tech-version">1.0.0+</span>
740
  </div>
741
  <p class="tech-description">
742
- Model Context Protocol. AI 모델 통합 �� 컨텍스트 관리
743
  </p>
744
  </div>
745
  <div class="tech-card">
746
  <div class="tech-header">
747
  <div class="tech-icon icon-numpy">Np</div>
748
  <span class="tech-name">NumPy</span>
749
- <span class="tech-version">1.26+</span>
750
  </div>
751
  <p class="tech-description">
752
- 수치 계산 라이브러리. 오디오 신호 처리 데이터 분석
753
  </p>
754
  </div>
755
  <div class="tech-card">
756
  <div class="tech-header">
757
  <div class="tech-icon" style="background: linear-gradient(135deg, #0d9488, #2dd4bf);">Pd</div>
758
  <span class="tech-name">Pandas</span>
759
- <span class="tech-version">2.0+</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
760
  </div>
761
  <p class="tech-description">
762
- 데이터 분석 라이브러리. 통계 처리 데이터 조작
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
763
  </p>
764
  </div>
765
  </div>
@@ -779,7 +842,7 @@
779
  <span class="tech-version">0.25.1</span>
780
  </div>
781
  <p class="tech-description">
782
- 오디오 처리 라이브러리. 포맷 변환, 정규화, 편집 기능 제공
783
  </p>
784
  </div>
785
  <div class="tech-card">
@@ -789,7 +852,27 @@
789
  <span class="tech-version">via ffmpy</span>
790
  </div>
791
  <p class="tech-description">
792
- 멀티미디어 프레임워크. 오디오 인코딩/디코딩 처리
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
793
  </p>
794
  </div>
795
  </div>
@@ -809,7 +892,7 @@
809
  <span class="tech-version">Latest</span>
810
  </div>
811
  <p class="tech-description">
812
- 컨테이너화 플랫폼. 일관된 개발/배포 환경 제공
813
  </p>
814
  </div>
815
  <div class="tech-card">
@@ -819,27 +902,47 @@
819
  <span class="tech-version">Latest</span>
820
  </div>
821
  <p class="tech-description">
822
- 멀티 컨테이너 오케스트레이션. 서비스 네트워크 볼륨 관리
823
  </p>
824
  </div>
825
  <div class="tech-card">
826
  <div class="tech-header">
827
- <div class="tech-icon" style="background: linear-gradient(135deg, #fbbf24, #fcd34d);">Pt</div>
828
- <span class="tech-name">pytest</span>
829
- <span class="tech-version">Latest</span>
830
  </div>
831
  <p class="tech-description">
832
- 테스트 프레임워크. 단위 테스트 통합 테스트 실행
833
  </p>
834
  </div>
835
  <div class="tech-card">
836
  <div class="tech-header">
837
- <div class="tech-icon" style="background: linear-gradient(135deg, #64748b, #94a3b8);">Ev</div>
838
- <span class="tech-name">python-dotenv</span>
839
- <span class="tech-version">1.2.1</span>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
840
  </div>
841
  <p class="tech-description">
842
- 환경 변수 관리. .env 파일 기반 설정 로드
843
  </p>
844
  </div>
845
  </div>
 
1
  <!DOCTYPE html>
2
+ <html lang="en">
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Tech Stack - Voice Sementle</title>
7
  <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
8
  <style>
9
  :root {
 
494
  <!-- Sidebar -->
495
  <nav class="sidebar">
496
  <a href="#" class="logo">
497
+ <div class="logo-icon">V</div>
498
+ Voice Sementle
499
  </a>
500
 
501
  <div class="nav-section">
 
523
  <div class="nav-title">Backend</div>
524
  <ul class="nav-list">
525
  <li class="nav-item">
526
+ <a href="#backend" class="nav-link">Services</a>
527
  </li>
528
  <li class="nav-item">
529
  <a href="#database" class="nav-link">Database</a>
 
535
  <div class="nav-title">AI / ML</div>
536
  <ul class="nav-list">
537
  <li class="nav-item">
538
+ <a href="#ai" class="nav-link">AI Models</a>
539
+ </li>
540
+ <li class="nav-item">
541
+ <a href="#mcp" class="nav-link">MCP Integration</a>
542
  </li>
543
  <li class="nav-item">
544
  <a href="#audio" class="nav-link">Audio Processing</a>
 
567
  <header class="page-header">
568
  <h1 class="page-title">Tech Stack</h1>
569
  <p class="page-description">
570
+ Comprehensive technology stack for Voice Sementle - a daily voice puzzle game where pronunciation matters.
571
+ Built with modern Python frameworks, AI models, and cloud services.
572
  </p>
573
  </header>
574
 
 
581
  <div class="architecture-diagram">
582
  <div class="arch-flow">
583
  <div class="arch-box">
584
+ <div class="arch-box-title">Client UI</div>
585
  <div class="arch-box-sub">Gradio 6.0</div>
586
  </div>
587
  <span class="arch-arrow">→</span>
588
  <div class="arch-box secondary">
589
+ <div class="arch-box-title">Services</div>
590
+ <div class="arch-box-sub">Python Backend</div>
591
  </div>
592
  <span class="arch-arrow">→</span>
593
  <div class="arch-box tertiary">
594
+ <div class="arch-box-title">AI Models</div>
595
+ <div class="arch-box-sub">Gemini + VoiceKit MCP</div>
596
  </div>
597
  </div>
598
  </div>
 
612
  <span class="tech-version">6.0.0</span>
613
  </div>
614
  <p class="tech-description">
615
+ Python-based UI framework. Powers interactive web interface with voice input, real-time feedback, modals, and dynamic game state transitions.
616
  </p>
617
  </div>
618
  <div class="tech-card">
619
  <div class="tech-header">
620
  <div class="tech-icon icon-python">Py</div>
621
  <span class="tech-name">Python</span>
622
+ <span class="tech-version">3.11+</span>
623
  </div>
624
  <p class="tech-description">
625
+ Client application language. Leverages type hints, async/await patterns, and modern Python features for clean, maintainable code.
626
  </p>
627
  </div>
628
  <div class="tech-card">
 
632
  <span class="tech-version">0.28.1</span>
633
  </div>
634
  <p class="tech-description">
635
+ Async HTTP client library for communicating with backend services and external APIs with full HTTP/2 support.
636
+ </p>
637
+ </div>
638
+ <div class="tech-card">
639
+ <div class="tech-header">
640
+ <div class="tech-icon" style="background: linear-gradient(135deg, #8b5cf6, #a78bfa);">Js</div>
641
+ <span class="tech-name">Custom CSS/JS</span>
642
+ <span class="tech-version">-</span>
643
+ </div>
644
+ <p class="tech-description">
645
+ Custom styling with animated backgrounds, modal components, radar charts, and responsive design for optimal user experience.
646
  </p>
647
  </div>
648
  </div>
 
652
  <section id="backend" class="section">
653
  <h2 class="section-title">
654
  <span class="section-icon backend">⚡</span>
655
+ Backend Services
656
  </h2>
657
  <div class="tech-grid">
658
  <div class="tech-card">
659
  <div class="tech-header">
660
+ <div class="tech-icon icon-python">Py</div>
661
+ <span class="tech-name">Python Services</span>
662
+ <span class="tech-version">3.11+</span>
663
  </div>
664
  <p class="tech-description">
665
+ Modular service architecture including voice analyzer, hint generator, database layer, and game state management with async/await support.
666
  </p>
667
  </div>
668
  <div class="tech-card">
669
  <div class="tech-header">
670
+ <div class="tech-icon" style="background: linear-gradient(135deg, #ec4899, #f472b6);">Pd</div>
671
+ <span class="tech-name">Pydantic</span>
672
+ <span class="tech-version">2.0+</span>
673
  </div>
674
  <p class="tech-description">
675
+ Data validation and settings management with type safety, automatic serialization, and schema validation.
676
  </p>
677
  </div>
678
  <div class="tech-card">
679
  <div class="tech-header">
680
+ <div class="tech-icon" style="background: linear-gradient(135deg, #10b981, #34d399);">Ay</div>
681
+ <span class="tech-name">Asyncio</span>
682
+ <span class="tech-version">Built-in</span>
683
  </div>
684
  <p class="tech-description">
685
+ Native Python async library for concurrent operations, enabling efficient handling of multiple voice analysis requests.
686
  </p>
687
  </div>
688
  <div class="tech-card">
689
  <div class="tech-header">
690
+ <div class="tech-icon" style="background: linear-gradient(135deg, #f59e0b, #fbbf24);">Dv</div>
691
+ <span class="tech-name">Python-dotenv</span>
692
+ <span class="tech-version">1.0.0</span>
693
  </div>
694
  <p class="tech-description">
695
+ Environment variable management from .env files for secure API keys and configuration settings.
696
  </p>
697
  </div>
698
  </div>
 
709
  <div class="tech-header">
710
  <div class="tech-icon icon-postgres">Pg</div>
711
  <span class="tech-name">PostgreSQL</span>
712
+ <span class="tech-version">16-alpine</span>
713
  </div>
714
  <p class="tech-description">
715
+ Production-grade relational database. Stores user sessions, game history, daily puzzles, attempt statistics, and score analytics.
716
  </p>
717
  </div>
718
  <div class="tech-card">
719
  <div class="tech-header">
720
+ <div class="tech-icon" style="background: linear-gradient(135deg, #0891b2, #22d3ee);">Ps</div>
721
+ <span class="tech-name">psycopg2</span>
722
+ <span class="tech-version">2.9+</span>
723
  </div>
724
  <p class="tech-description">
725
+ PostgreSQL adapter for Python. Provides efficient database connections and query execution with connection pooling.
726
+ </p>
727
+ </div>
728
+ <div class="tech-card">
729
+ <div class="tech-header">
730
+ <div class="tech-icon icon-docker">Dk</div>
731
+ <span class="tech-name">Docker Volume</span>
732
+ <span class="tech-version">-</span>
733
+ </div>
734
+ <p class="tech-description">
735
+ Persistent data storage with Docker volumes ensuring database persistence across container restarts.
736
  </p>
737
  </div>
738
  </div>
 
749
  <div class="tech-header">
750
  <div class="tech-icon icon-gemini">Gm</div>
751
  <span class="tech-name">Google Gemini</span>
752
+ <span class="tech-version">2.0 Flash</span>
753
  </div>
754
  <p class="tech-description">
755
+ Multimodal AI model powering intelligent hint generation, contextual feedback, and adaptive difficulty based on player performance.
756
  </p>
757
  </div>
758
  <div class="tech-card">
759
  <div class="tech-header">
760
+ <div class="tech-icon" style="background: linear-gradient(135deg, #6366f1, #818cf8);">11</div>
761
+ <span class="tech-name">ElevenLabs</span>
762
+ <span class="tech-version">2.24.0</span>
763
  </div>
764
  <p class="tech-description">
765
+ Advanced text-to-speech API with voice cloning. Generates partial audio hints with reference voice matching for natural gameplay experience.
766
  </p>
767
  </div>
768
  <div class="tech-card">
769
  <div class="tech-header">
770
  <div class="tech-icon icon-numpy">Np</div>
771
  <span class="tech-name">NumPy</span>
772
+ <span class="tech-version">1.26.4</span>
773
  </div>
774
  <p class="tech-description">
775
+ Numerical computing library for audio signal processing, feature extraction, and statistical analysis of voice patterns.
776
  </p>
777
  </div>
778
  <div class="tech-card">
779
  <div class="tech-header">
780
  <div class="tech-icon" style="background: linear-gradient(135deg, #0d9488, #2dd4bf);">Pd</div>
781
  <span class="tech-name">Pandas</span>
782
+ <span class="tech-version">2.2.3</span>
783
+ </div>
784
+ <p class="tech-description">
785
+ Data analysis library for processing game statistics, tracking user progress, and generating performance insights.
786
+ </p>
787
+ </div>
788
+ </div>
789
+ </section>
790
+
791
+ <!-- MCP Integration -->
792
+ <section id="mcp" class="section">
793
+ <h2 class="section-title">
794
+ <span class="section-icon" style="background: linear-gradient(135deg, #8b5cf6, #a78bfa);">🔌</span>
795
+ Model Context Protocol (MCP)
796
+ </h2>
797
+ <div class="tech-grid">
798
+ <div class="tech-card">
799
+ <div class="tech-header">
800
+ <div class="tech-icon" style="background: linear-gradient(135deg, #0891b2, #22d3ee);">MC</div>
801
+ <span class="tech-name">MCP Client</span>
802
+ <span class="tech-version">1.22.0</span>
803
  </div>
804
  <p class="tech-description">
805
+ Model Context Protocol client for seamless AI model integration. Manages sessions, tool calling, and context sharing across AI services.
806
+ </p>
807
+ </div>
808
+ <div class="tech-card">
809
+ <div class="tech-header">
810
+ <div class="tech-icon" style="background: linear-gradient(135deg, #f97316, #fb923c);">Vk</div>
811
+ <span class="tech-name">VoiceKit MCP</span>
812
+ <span class="tech-version">HuggingFace</span>
813
+ </div>
814
+ <p class="tech-description">
815
+ Voice analysis MCP server providing voicekit_analyze_voice_similarity tool. Compares user recordings with reference audio for pronunciation scoring.
816
+ </p>
817
+ </div>
818
+ <div class="tech-card">
819
+ <div class="tech-header">
820
+ <div class="tech-icon" style="background: linear-gradient(135deg, #ec4899, #f472b6);">SS</div>
821
+ <span class="tech-name">SSE Client</span>
822
+ <span class="tech-version">MCP</span>
823
+ </div>
824
+ <p class="tech-description">
825
+ Server-Sent Events client for real-time MCP communication. Enables streaming responses and bidirectional tool invocation.
826
  </p>
827
  </div>
828
  </div>
 
842
  <span class="tech-version">0.25.1</span>
843
  </div>
844
  <p class="tech-description">
845
+ Audio manipulation library for format conversion, normalization, compression, and audio file editing with effects processing.
846
  </p>
847
  </div>
848
  <div class="tech-card">
 
852
  <span class="tech-version">via ffmpy</span>
853
  </div>
854
  <p class="tech-description">
855
+ Multimedia framework for audio encoding/decoding. Handles format conversion and compression for optimal file sizes.
856
+ </p>
857
+ </div>
858
+ <div class="tech-card">
859
+ <div class="tech-header">
860
+ <div class="tech-icon" style="background: linear-gradient(135deg, #7c3aed, #a78bfa);">B6</div>
861
+ <span class="tech-name">Base64</span>
862
+ <span class="tech-version">Built-in</span>
863
+ </div>
864
+ <p class="tech-description">
865
+ Audio data encoding for API transmission. Converts binary audio to base64 strings for MCP tool parameters.
866
+ </p>
867
+ </div>
868
+ <div class="tech-card">
869
+ <div class="tech-header">
870
+ <div class="tech-icon" style="background: linear-gradient(135deg, #dc2626, #f87171);">Af</div>
871
+ <span class="tech-name">Aiofiles</span>
872
+ <span class="tech-version">24.1.0</span>
873
+ </div>
874
+ <p class="tech-description">
875
+ Async file operations for non-blocking audio file I/O. Enables concurrent file processing without blocking the event loop.
876
  </p>
877
  </div>
878
  </div>
 
892
  <span class="tech-version">Latest</span>
893
  </div>
894
  <p class="tech-description">
895
+ Containerization platform providing consistent development and deployment environments across all systems.
896
  </p>
897
  </div>
898
  <div class="tech-card">
 
902
  <span class="tech-version">Latest</span>
903
  </div>
904
  <p class="tech-description">
905
+ Multi-container orchestration for managing PostgreSQL database service with volume persistence and networking.
906
  </p>
907
  </div>
908
  <div class="tech-card">
909
  <div class="tech-header">
910
+ <div class="tech-icon" style="background: linear-gradient(135deg, #64748b, #94a3b8);">Ev</div>
911
+ <span class="tech-name">Environment Config</span>
912
+ <span class="tech-version">-</span>
913
  </div>
914
  <p class="tech-description">
915
+ Secure configuration management with .env files for API keys (Gemini, ElevenLabs, VoiceKit) and sensitive settings.
916
  </p>
917
  </div>
918
  <div class="tech-card">
919
  <div class="tech-header">
920
+ <div class="tech-icon" style="background: linear-gradient(135deg, #16a34a, #4ade80);">Sh</div>
921
+ <span class="tech-name">Shell Scripts</span>
922
+ <span class="tech-version">Bash</span>
923
+ </div>
924
+ <p class="tech-description">
925
+ Automated deployment scripts (start.sh, stop.sh) for streamlined application lifecycle management.
926
+ </p>
927
+ </div>
928
+ <div class="tech-card">
929
+ <div class="tech-header">
930
+ <div class="tech-icon" style="background: linear-gradient(135deg, #ea580c, #fb923c);">Hf</div>
931
+ <span class="tech-name">HuggingFace</span>
932
+ <span class="tech-version">Hub</span>
933
+ </div>
934
+ <p class="tech-description">
935
+ Deployment platform hosting VoiceKit MCP server and application demos with integrated spaces for testing.
936
+ </p>
937
+ </div>
938
+ <div class="tech-card">
939
+ <div class="tech-header">
940
+ <div class="tech-icon" style="background: linear-gradient(135deg, #9333ea, #a855f7);">Lg</div>
941
+ <span class="tech-name">Logging</span>
942
+ <span class="tech-version">Built-in</span>
943
  </div>
944
  <p class="tech-description">
945
+ Comprehensive logging system for debugging, monitoring API calls, tracking errors, and performance analysis.
946
  </p>
947
  </div>
948
  </div>
docs/user-guide.html CHANGED
@@ -3,7 +3,7 @@
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
- <title>User Guide - Komentle Voice Challenge</title>
7
  <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
8
  <style>
9
  :root {
@@ -792,8 +792,8 @@
792
  <div class="metric-label">Pronunciation</div>
793
  </div>
794
  <div class="metric-item">
795
- <div class="metric-value">Tone</div>
796
- <div class="metric-label">Voice Tone</div>
797
  </div>
798
  <div class="metric-item">
799
  <div class="metric-value">Pitch</div>
@@ -813,8 +813,8 @@
813
  <div class="content-card">
814
  <h3>🎯 Score Interpretation</h3>
815
  <ul>
816
- <li><strong>80+ points:</strong> Very similar to the answer! You're almost there!</li>
817
- <li><strong>60-79 points:</strong> Good attempt. Check the hints for guidance.</li>
818
  <li><strong>40-59 points:</strong> Right direction, but try a different word.</li>
819
  <li><strong>Below 40:</strong> Far from the answer. Try a new guess.</li>
820
  </ul>
@@ -840,7 +840,7 @@
840
  <div class="content-card">
841
  <h3>💬 AI Hint Chatbot</h3>
842
  <p>
843
- Click the floating button at the bottom right of the screen to open the AI hint chatbot.
844
  Request hints from the chatbot to receive indirect clues about the answer.
845
  </p>
846
  </div>
@@ -852,16 +852,17 @@
852
  <li><strong>"What's the category?":</strong> Learn the classification of the answer word.</li>
853
  <li><strong>"Tell me the first letter":</strong> Get a hint about the first letter.</li>
854
  <li><strong>"What words sound similar?":</strong> Get pronunciation-related suggestions.</li>
 
855
  </ul>
856
  </div>
857
 
858
- <div class="warning-box">
859
  <span class="warning-icon">💡</span>
860
  <div class="warning-text">
861
  <strong>Limited Hint Usage!</strong> Chatbot hints can only be used a limited number of times.
862
  Request hints carefully!
863
  </div>
864
- </div>
865
  </section>
866
 
867
  <!-- Tips & Tricks -->
 
3
  <head>
4
  <meta charset="UTF-8">
5
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>User Guide - Voice Semenetle</title>
7
  <link href="https://fonts.googleapis.com/css2?family=Nunito:wght@400;600;700;800&display=swap" rel="stylesheet">
8
  <style>
9
  :root {
 
792
  <div class="metric-label">Pronunciation</div>
793
  </div>
794
  <div class="metric-item">
795
+ <div class="metric-value">Line Acc.</div>
796
+ <div class="metric-label">Line Accuracy</div>
797
  </div>
798
  <div class="metric-item">
799
  <div class="metric-value">Pitch</div>
 
813
  <div class="content-card">
814
  <h3>🎯 Score Interpretation</h3>
815
  <ul>
816
+ <li><strong>85+ points:</strong> Very similar to the answer! You're almost there!</li>
817
+ <li><strong>60-84 points:</strong> Good attempt. Check the hints for guidance.</li>
818
  <li><strong>40-59 points:</strong> Right direction, but try a different word.</li>
819
  <li><strong>Below 40:</strong> Far from the answer. Try a new guess.</li>
820
  </ul>
 
840
  <div class="content-card">
841
  <h3>💬 AI Hint Chatbot</h3>
842
  <p>
843
+ Click the floating button at the top right of the screen to open the AI hint chatbot.
844
  Request hints from the chatbot to receive indirect clues about the answer.
845
  </p>
846
  </div>
 
852
  <li><strong>"What's the category?":</strong> Learn the classification of the answer word.</li>
853
  <li><strong>"Tell me the first letter":</strong> Get a hint about the first letter.</li>
854
  <li><strong>"What words sound similar?":</strong> Get pronunciation-related suggestions.</li>
855
+ <li><strong>"Give me an audio hint":</strong> AI provides an audio hint in a tone similar to your voice.</li>
856
  </ul>
857
  </div>
858
 
859
+ <!-- <div class="warning-box">
860
  <span class="warning-icon">💡</span>
861
  <div class="warning-text">
862
  <strong>Limited Hint Usage!</strong> Chatbot hints can only be used a limited number of times.
863
  Request hints carefully!
864
  </div>
865
+ </div> -->
866
  </section>
867
 
868
  <!-- Tips & Tricks -->
gemini_adapter.py CHANGED
@@ -88,14 +88,14 @@ def convert_messages_to_gemini_format(anthropic_messages):
88
  # Could be Gradio file format {"path": ..., "mime_type": ...}
89
  # Skip audio/video files - they can't be sent to Gemini text API
90
  if content.get("path") and content.get("mime_type"):
91
- print(f"DEBUG convert_messages: Skipping file content: {content.get('mime_type')}")
92
  continue
93
  # Could be text content {"type": "text", "text": "..."}
94
  elif content.get("type") == "text":
95
  parts.append(types.Part(text=content.get("text", "")))
96
  elif hasattr(content, '__class__') and 'Audio' in content.__class__.__name__:
97
  # Skip Gradio Audio component objects
98
- print(f"DEBUG convert_messages: Skipping Gradio component: {content.__class__.__name__}")
99
  continue
100
  elif isinstance(content, list):
101
  # Complex content with tool calls/results
@@ -152,11 +152,11 @@ def call_gemini_with_tools(model_name, system_prompt, messages, tools, max_token
152
  # Convert messages to Gemini format
153
  contents = convert_messages_to_gemini_format(messages)
154
 
155
- print(f"DEBUG gemini_adapter: Converted {len(messages)} messages to {len(contents)} Contents")
156
- print(f"DEBUG gemini_adapter: Tools: {len(gemini_tools[0].function_declarations) if gemini_tools else 0}")
157
 
158
  # Call API
159
- print(f"DEBUG gemini_adapter: Calling Gemini API...")
160
  client = get_client()
161
 
162
  # Generate content (no timeout parameter - not supported)
@@ -175,18 +175,18 @@ def call_gemini_with_tools(model_name, system_prompt, messages, tools, max_token
175
 
176
  print(f"DEBUG gemini_adapter: Got response")
177
 
178
- print(f"DEBUG gemini_adapter: Response type: {type(response)}")
179
- print(f"DEBUG gemini_adapter: Response has candidates: {hasattr(response, 'candidates')}")
180
  if hasattr(response, 'candidates') and response.candidates:
181
  candidate = response.candidates[0]
182
- print(f"DEBUG gemini_adapter: First candidate type: {type(candidate)}")
183
- print(f"DEBUG gemini_adapter: finish_reason: {getattr(candidate, 'finish_reason', 'UNKNOWN')}")
184
 
185
  # Log function calls if present
186
  if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts') and candidate.content.parts:
187
  for i, part in enumerate(candidate.content.parts):
188
  if hasattr(part, 'function_call') and part.function_call:
189
- print(f"DEBUG gemini_adapter: Part {i} has function_call: {part.function_call.name}")
190
  print(f"DEBUG gemini_adapter: Function args: {dict(part.function_call.args) if part.function_call.args else {}}")
191
  return response
192
 
@@ -203,28 +203,28 @@ def extract_tool_calls_from_gemini_response(response):
203
  """
204
  tool_calls = []
205
 
206
- print(f"DEBUG extract_tool_calls: Response type: {type(response)}")
207
- print(f"DEBUG extract_tool_calls: Has candidates: {hasattr(response, 'candidates')}")
208
 
209
  if not hasattr(response, 'candidates') or not response.candidates:
210
- print(f"DEBUG extract_tool_calls: No candidates found")
211
  return tool_calls
212
 
213
  candidate = response.candidates[0]
214
- print(f"DEBUG extract_tool_calls: Candidate has content: {hasattr(candidate, 'content')}")
215
 
216
  if not hasattr(candidate, 'content') or not candidate.content:
217
- print(f"DEBUG extract_tool_calls: No content in candidate")
218
  return tool_calls
219
 
220
- print(f"DEBUG extract_tool_calls: Content has parts: {hasattr(candidate.content, 'parts')}")
221
 
222
  if not hasattr(candidate.content, 'parts') or not candidate.content.parts:
223
- print(f"DEBUG extract_tool_calls: No parts in content")
224
  return tool_calls
225
 
226
  for i, part in enumerate(candidate.content.parts):
227
- print(f"DEBUG extract_tool_calls: Part {i} has function_call: {hasattr(part, 'function_call')}")
228
  if hasattr(part, 'function_call') and part.function_call:
229
  fc = part.function_call
230
  tool_calls.append({
@@ -233,7 +233,7 @@ def extract_tool_calls_from_gemini_response(response):
233
  "id": f"call_{i}"
234
  })
235
 
236
- print(f"DEBUG extract_tool_calls: Found {len(tool_calls)} tool calls")
237
  return tool_calls
238
 
239
  def get_text_from_gemini_response(response):
@@ -242,21 +242,21 @@ def get_text_from_gemini_response(response):
242
  Returns:
243
  tuple: (text, error_message) where error_message is None if successful
244
  """
245
- print(f"DEBUG get_text: Response type: {type(response)}")
246
- print(f"DEBUG get_text: Has candidates: {hasattr(response, 'candidates')}")
247
 
248
  if not hasattr(response, 'candidates') or not response.candidates:
249
- print(f"ERROR get_text: No candidates in response")
250
  return "", "No response candidates received from AI. Please try again."
251
 
252
  candidate = response.candidates[0]
253
  finish_reason = getattr(candidate, 'finish_reason', 'UNKNOWN')
254
- print(f"DEBUG get_text: Candidate has content: {hasattr(candidate, 'content')}")
255
- print(f"DEBUG get_text: finish_reason: {finish_reason}")
256
 
257
  if not hasattr(candidate, 'content') or not candidate.content:
258
- print(f"WARNING get_text: No content in candidate")
259
- print(f"WARNING get_text: safety_ratings: {getattr(candidate, 'safety_ratings', 'N/A')}")
260
 
261
  # Provide specific error messages based on finish_reason
262
  if finish_reason == 'SAFETY':
@@ -270,27 +270,27 @@ def get_text_from_gemini_response(response):
270
  else:
271
  return "", "AI returned empty response. Please try again."
272
 
273
- print(f"DEBUG get_text: Content has parts: {hasattr(candidate.content, 'parts') if candidate.content else False}")
274
 
275
  if not candidate.content or not hasattr(candidate.content, 'parts') or not candidate.content.parts:
276
- print(f"WARNING get_text: No parts in content")
277
  return "", "AI response had no content. Please try again."
278
 
279
  text_parts = []
280
  has_function_call = False
281
  for i, part in enumerate(candidate.content.parts):
282
- print(f"DEBUG get_text: Part {i} has text: {hasattr(part, 'text')}")
283
  if hasattr(part, 'text') and part.text:
284
  text_parts.append(part.text)
285
  if hasattr(part, 'function_call') and part.function_call:
286
  has_function_call = True
287
 
288
  result = " ".join(text_parts)
289
- print(f"DEBUG get_text: Extracted text length: {len(result)}, has_function_call: {has_function_call}")
290
 
291
  # Empty text is OK if there's a function call (tool-only response)
292
  if (not result or result.strip() == "") and not has_function_call:
293
- print(f"WARNING get_text: Empty text extracted and no function call")
294
  return "", "AI returned empty text. Please try again."
295
 
296
  return result, None
@@ -321,7 +321,7 @@ def chat_with_gemini_and_tools(system_prompt, messages, tools=None, max_tokens=1
321
  # Convert tools to Gemini format if provided
322
  gemini_tools = convert_tools_to_gemini_format(tools) if tools else None
323
 
324
- print(f"DEBUG chat_with_gemini_and_tools: {len(messages)} messages, {len(tools) if tools else 0} tools")
325
 
326
  # Get client
327
  client = get_client()
@@ -353,7 +353,7 @@ def chat_with_gemini_and_tools(system_prompt, messages, tools=None, max_tokens=1
353
  config=config
354
  )
355
 
356
- print(f"DEBUG chat_with_gemini_and_tools: Got response")
357
 
358
  # Check for tool calls first
359
  tool_calls = extract_tool_calls_from_gemini_response(response)
@@ -363,13 +363,13 @@ def chat_with_gemini_and_tools(system_prompt, messages, tools=None, max_tokens=1
363
 
364
  # If there's an error but we have tool calls, that's OK (tool-only response)
365
  if error and not tool_calls:
366
- print(f"ERROR chat_with_gemini_and_tools: {error}")
367
  return "", None, error
368
  elif error and tool_calls:
369
- print(f"DEBUG chat_with_gemini_and_tools: Error '{error}' but have {len(tool_calls)} tool calls, proceeding")
370
  text = "" # Clear any error text
371
 
372
- print(f"DEBUG chat_with_gemini_and_tools: text={len(text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")
373
  return text, tool_calls if tool_calls else None, None
374
 
375
  except Exception as e:
@@ -398,8 +398,8 @@ def chat_with_gemini(system_prompt, messages, max_tokens=1024, temperature=1.0,
398
  # Convert messages to Gemini format
399
  contents = convert_messages_to_gemini_format(messages)
400
 
401
- print(f"DEBUG chat_with_gemini: Converted {len(messages)} messages to {len(contents)} Contents")
402
- print(f"DEBUG chat_with_gemini: Calling Gemini API with model {model_name}")
403
 
404
  # Get client
405
  client = get_client()
@@ -418,16 +418,16 @@ def chat_with_gemini(system_prompt, messages, max_tokens=1024, temperature=1.0,
418
  config=config
419
  )
420
 
421
- print(f"DEBUG chat_with_gemini: Got response")
422
 
423
  # Extract text
424
  text, error = get_text_from_gemini_response(response)
425
 
426
  if error:
427
- print(f"ERROR chat_with_gemini: {error}")
428
  return "", error
429
 
430
- print(f"DEBUG chat_with_gemini: Successfully extracted text ({len(text)} chars)")
431
  return text, None
432
 
433
  except Exception as e:
 
88
  # Could be Gradio file format {"path": ..., "mime_type": ...}
89
  # Skip audio/video files - they can't be sent to Gemini text API
90
  if content.get("path") and content.get("mime_type"):
91
+ # print(f"DEBUG convert_messages: Skipping file content: {content.get('mime_type')}")
92
  continue
93
  # Could be text content {"type": "text", "text": "..."}
94
  elif content.get("type") == "text":
95
  parts.append(types.Part(text=content.get("text", "")))
96
  elif hasattr(content, '__class__') and 'Audio' in content.__class__.__name__:
97
  # Skip Gradio Audio component objects
98
+ # print(f"DEBUG convert_messages: Skipping Gradio component: {content.__class__.__name__}")
99
  continue
100
  elif isinstance(content, list):
101
  # Complex content with tool calls/results
 
152
  # Convert messages to Gemini format
153
  contents = convert_messages_to_gemini_format(messages)
154
 
155
+ # print(f"DEBUG gemini_adapter: Converted {len(messages)} messages to {len(contents)} Contents")
156
+ # print(f"DEBUG gemini_adapter: Tools: {len(gemini_tools[0].function_declarations) if gemini_tools else 0}")
157
 
158
  # Call API
159
+ # print(f"DEBUG gemini_adapter: Calling Gemini API...")
160
  client = get_client()
161
 
162
  # Generate content (no timeout parameter - not supported)
 
175
 
176
  print(f"DEBUG gemini_adapter: Got response")
177
 
178
+ # print(f"DEBUG gemini_adapter: Response type: {type(response)}")
179
+ # print(f"DEBUG gemini_adapter: Response has candidates: {hasattr(response, 'candidates')}")
180
  if hasattr(response, 'candidates') and response.candidates:
181
  candidate = response.candidates[0]
182
+ # print(f"DEBUG gemini_adapter: First candidate type: {type(candidate)}")
183
+ # print(f"DEBUG gemini_adapter: finish_reason: {getattr(candidate, 'finish_reason', 'UNKNOWN')}")
184
 
185
  # Log function calls if present
186
  if hasattr(candidate, 'content') and candidate.content and hasattr(candidate.content, 'parts') and candidate.content.parts:
187
  for i, part in enumerate(candidate.content.parts):
188
  if hasattr(part, 'function_call') and part.function_call:
189
+ # print(f"DEBUG gemini_adapter: Part {i} has function_call: {part.function_call.name}")
190
  print(f"DEBUG gemini_adapter: Function args: {dict(part.function_call.args) if part.function_call.args else {}}")
191
  return response
192
 
 
203
  """
204
  tool_calls = []
205
 
206
+ # print(f"DEBUG extract_tool_calls: Response type: {type(response)}")
207
+ # print(f"DEBUG extract_tool_calls: Has candidates: {hasattr(response, 'candidates')}")
208
 
209
  if not hasattr(response, 'candidates') or not response.candidates:
210
+ # print(f"DEBUG extract_tool_calls: No candidates found")
211
  return tool_calls
212
 
213
  candidate = response.candidates[0]
214
+ # print(f"DEBUG extract_tool_calls: Candidate has content: {hasattr(candidate, 'content')}")
215
 
216
  if not hasattr(candidate, 'content') or not candidate.content:
217
+ # print(f"DEBUG extract_tool_calls: No content in candidate")
218
  return tool_calls
219
 
220
+ # print(f"DEBUG extract_tool_calls: Content has parts: {hasattr(candidate.content, 'parts')}")
221
 
222
  if not hasattr(candidate.content, 'parts') or not candidate.content.parts:
223
+ # print(f"DEBUG extract_tool_calls: No parts in content")
224
  return tool_calls
225
 
226
  for i, part in enumerate(candidate.content.parts):
227
+ # print(f"DEBUG extract_tool_calls: Part {i} has function_call: {hasattr(part, 'function_call')}")
228
  if hasattr(part, 'function_call') and part.function_call:
229
  fc = part.function_call
230
  tool_calls.append({
 
233
  "id": f"call_{i}"
234
  })
235
 
236
+ # print(f"DEBUG extract_tool_calls: Found {len(tool_calls)} tool calls")
237
  return tool_calls
238
 
239
  def get_text_from_gemini_response(response):
 
242
  Returns:
243
  tuple: (text, error_message) where error_message is None if successful
244
  """
245
+ # print(f"DEBUG get_text: Response type: {type(response)}")
246
+ # print(f"DEBUG get_text: Has candidates: {hasattr(response, 'candidates')}")
247
 
248
  if not hasattr(response, 'candidates') or not response.candidates:
249
+ # print(f"ERROR get_text: No candidates in response")
250
  return "", "No response candidates received from AI. Please try again."
251
 
252
  candidate = response.candidates[0]
253
  finish_reason = getattr(candidate, 'finish_reason', 'UNKNOWN')
254
+ # print(f"DEBUG get_text: Candidate has content: {hasattr(candidate, 'content')}")
255
+ # print(f"DEBUG get_text: finish_reason: {finish_reason}")
256
 
257
  if not hasattr(candidate, 'content') or not candidate.content:
258
+ # print(f"WARNING get_text: No content in candidate")
259
+ # print(f"WARNING get_text: safety_ratings: {getattr(candidate, 'safety_ratings', 'N/A')}")
260
 
261
  # Provide specific error messages based on finish_reason
262
  if finish_reason == 'SAFETY':
 
270
  else:
271
  return "", "AI returned empty response. Please try again."
272
 
273
+ # print(f"DEBUG get_text: Content has parts: {hasattr(candidate.content, 'parts') if candidate.content else False}")
274
 
275
  if not candidate.content or not hasattr(candidate.content, 'parts') or not candidate.content.parts:
276
+ # print(f"WARNING get_text: No parts in content")
277
  return "", "AI response had no content. Please try again."
278
 
279
  text_parts = []
280
  has_function_call = False
281
  for i, part in enumerate(candidate.content.parts):
282
+ # print(f"DEBUG get_text: Part {i} has text: {hasattr(part, 'text')}")
283
  if hasattr(part, 'text') and part.text:
284
  text_parts.append(part.text)
285
  if hasattr(part, 'function_call') and part.function_call:
286
  has_function_call = True
287
 
288
  result = " ".join(text_parts)
289
+ # print(f"DEBUG get_text: Extracted text length: {len(result)}, has_function_call: {has_function_call}")
290
 
291
  # Empty text is OK if there's a function call (tool-only response)
292
  if (not result or result.strip() == "") and not has_function_call:
293
+ # print(f"WARNING get_text: Empty text extracted and no function call")
294
  return "", "AI returned empty text. Please try again."
295
 
296
  return result, None
 
321
  # Convert tools to Gemini format if provided
322
  gemini_tools = convert_tools_to_gemini_format(tools) if tools else None
323
 
324
+ # print(f"DEBUG chat_with_gemini_and_tools: {len(messages)} messages, {len(tools) if tools else 0} tools")
325
 
326
  # Get client
327
  client = get_client()
 
353
  config=config
354
  )
355
 
356
+ # print(f"DEBUG chat_with_gemini_and_tools: Got response")
357
 
358
  # Check for tool calls first
359
  tool_calls = extract_tool_calls_from_gemini_response(response)
 
363
 
364
  # If there's an error but we have tool calls, that's OK (tool-only response)
365
  if error and not tool_calls:
366
+ # print(f"ERROR chat_with_gemini_and_tools: {error}")
367
  return "", None, error
368
  elif error and tool_calls:
369
+ # print(f"DEBUG chat_with_gemini_and_tools: Error '{error}' but have {len(tool_calls)} tool calls, proceeding")
370
  text = "" # Clear any error text
371
 
372
+ # print(f"DEBUG chat_with_gemini_and_tools: text={len(text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")
373
  return text, tool_calls if tool_calls else None, None
374
 
375
  except Exception as e:
 
398
  # Convert messages to Gemini format
399
  contents = convert_messages_to_gemini_format(messages)
400
 
401
+ # print(f"DEBUG chat_with_gemini: Converted {len(messages)} messages to {len(contents)} Contents")
402
+ # print(f"DEBUG chat_with_gemini: Calling Gemini API with model {model_name}")
403
 
404
  # Get client
405
  client = get_client()
 
418
  config=config
419
  )
420
 
421
+ # print(f"DEBUG chat_with_gemini: Got response")
422
 
423
  # Extract text
424
  text, error = get_text_from_gemini_response(response)
425
 
426
  if error:
427
+ # print(f"ERROR chat_with_gemini: {error}")
428
  return "", error
429
 
430
+ # print(f"DEBUG chat_with_gemini: Successfully extracted text ({len(text)} chars)")
431
  return text, None
432
 
433
  except Exception as e:
gradio_ui.py CHANGED
@@ -149,7 +149,7 @@ def get_today_puzzle():
149
 
150
  # Use backend function to get puzzle
151
  puzzle = get_puzzle_by_date(today)
152
- print(puzzle)
153
  if puzzle:
154
  return f"""
155
  ### 📅 오늘의 퍼즐
@@ -240,8 +240,6 @@ with gr.Blocks(title="Chloe's Voice Komentle") as demo:
240
 
241
  # Launch configuration
242
  if __name__ == "__main__":
243
- print("🚀 Starting Chloe's Voice Komentle...")
244
-
245
  # Initialize backend (VoiceKit MCP session)
246
  print("⏳ Initializing VoiceKit MCP...")
247
 
 
149
 
150
  # Use backend function to get puzzle
151
  puzzle = get_puzzle_by_date(today)
152
+ # print(puzzle)
153
  if puzzle:
154
  return f"""
155
  ### 📅 오늘의 퍼즐
 
240
 
241
  # Launch configuration
242
  if __name__ == "__main__":
 
 
243
  # Initialize backend (VoiceKit MCP session)
244
  print("⏳ Initializing VoiceKit MCP...")
245
 
inspect_routes.py CHANGED
@@ -13,7 +13,7 @@ os.environ["BACKEND_API_URL"] = ""
13
  try:
14
  from client.app import app
15
  print("Successfully imported app")
16
- print("Routes:")
17
  for route in app.routes:
18
  print(f" {route.path} ({type(route).__name__})")
19
  except Exception as e:
 
13
  try:
14
  from client.app import app
15
  print("Successfully imported app")
16
+ # print("Routes:")
17
  for route in app.routes:
18
  print(f" {route.path} ({type(route).__name__})")
19
  except Exception as e:
test_analyze_voice.py CHANGED
@@ -128,9 +128,9 @@ async def test_analyze_voice(audio_file, date: str, session_id: str, generate_ne
128
  with open(audio_path, "rb") as f:
129
  audio_bytes = f.read()
130
 
131
- print(f"📁 Audio file size: {len(audio_bytes)} bytes")
132
- print(f"📅 Date: {date}")
133
- print(f"🆔 Session ID: {session_id}")
134
 
135
  # Call the function
136
  result = await analyze_voice_logic(audio_bytes, date, session_id)
@@ -247,15 +247,15 @@ with gr.Blocks(title="Analyze Voice Logic Test") as demo:
247
 
248
 
249
  if __name__ == "__main__":
250
- print("=" * 60)
251
- print("🎤 Analyze Voice Logic Test Page")
252
- print("=" * 60)
253
- print("\n✅ 사전 준비:")
254
- print(" 1. PostgreSQL 실행: docker-compose up -d postgres")
255
- print(" 2. 환경변수 설정: GOOGLE_API_KEY, DATABASE_URL")
256
- print(" 3. VoiceKit MCP 서버 (외부): 자동 연결")
257
- print("\n💡 이 페이지는 독립적으로 실행됩니다 (Backend 서버 불필요)")
258
- print("\n" + "=" * 60 + "\n")
259
 
260
  demo.launch(
261
  server_name="127.0.0.1",
 
128
  with open(audio_path, "rb") as f:
129
  audio_bytes = f.read()
130
 
131
+ # print(f"📁 Audio file size: {len(audio_bytes)} bytes")
132
+ # print(f"📅 Date: {date}")
133
+ # print(f"🆔 Session ID: {session_id}")
134
 
135
  # Call the function
136
  result = await analyze_voice_logic(audio_bytes, date, session_id)
 
247
 
248
 
249
  if __name__ == "__main__":
250
+ # print("=" * 60)
251
+ # print("🎤 Analyze Voice Logic Test Page")
252
+ # print("=" * 60)
253
+ # print("\n✅ 사전 준비:")
254
+ # print(" 1. PostgreSQL 실행: docker-compose up -d postgres")
255
+ # print(" 2. 환경변수 설정: GOOGLE_API_KEY, DATABASE_URL")
256
+ # print(" 3. VoiceKit MCP 서버 (외부): 자동 연결")
257
+ # print("\n💡 이 페이지는 독립적으로 실행됩니다 (Backend 서버 불필요)")
258
+ # print("\n" + "=" * 60 + "\n")
259
 
260
  demo.launch(
261
  server_name="127.0.0.1",