Spaces:

MCP-1st-Birthday
/

VoiceSementle

Running

App Files Files Community

SJLee-0525 commited on 10 days ago

Commit

8374119

1 Parent(s): 6d34043

[TEST] test29

Browse files

Files changed (26) hide show

.gitignore +2 -1
client/app.py +16 -4
client/frontend/app_ui.py +1 -1
client/frontend/components/audio_input.py +3 -3
client/frontend/components/floating_chatbot.py +44 -25
client/frontend/components/header.py +10 -10
client/frontend/components/history_display.py +2 -2
client/frontend/styles/__init__.py +1 -1
client/frontend/styles/buttons_style.py +3 -3
client/frontend/styles/chatbot_style.py +2 -2
client/frontend/styles/custom_css.py +1 -1
client/frontend/styles/failure_modal_style.py +3 -3
client/frontend/styles/falling_elements_style.py +1 -1
client/frontend/styles/history_style.py +3 -3
client/frontend/styles/result_screen_style.py +3 -3
client/frontend/styles/theme_style.py +3 -3
client/services/analysis_service.py +17 -4
client/services/hint_generator.py +60 -53
client/services/voice_analyzer.py +24 -0
client/utils/elevenlabs_tts.py +124 -47
gemini_adapter.py +13 -0
gradio_ui.py +279 -0
postgresql.py +88 -0
reference_audio/movie/wingardiumleviosa.mp3 +0 -0
requirements.txt +43 -7
voice_app.py +148 -0

.gitignore CHANGED Viewed

@@ -4,4 +4,5 @@ gradio_uploads/
 venv/
 .venv/
 __pycache__/

 venv/
 .venv/
 __pycache__/
+gradio-env/
+*.wav

client/app.py CHANGED Viewed

@@ -1,12 +1,17 @@
 """
 음성 검증 앱 - 메인 애플리케이션 (통합 버전)
 Author: Kevin's Team
 Description: 음성 인식 기반 발음 검증 시스템
-Backend 로직이 통합된 단일 앱 버전 (포트 1개만 사용)
 """
 import os
 import sys
 import asyncio
@@ -279,6 +284,7 @@ class AudioValidationApp:
                     "category": category,    # Puzzle category for chatbot
                     "answerWord": answer_word,  # Answer word for chatbot context
                     "referenceAudioPath": reference_audio_path,  # For TTS voice cloning
                     **metrics
                 }
             )
@@ -318,9 +324,12 @@ class AudioValidationApp:
             # game_state에 성공한 오디오 기록 추가 (User Audio 표시용)
             updated_game_state = GameStateManager.add_guess(
-                game_state, recognized_text, audio_path,
                 {
                     "score": score,
                     "answerWord": answer_word,
                     "referenceAudioPath": reference_audio_path,
                     "category": category,
@@ -517,6 +526,9 @@ if __name__ == "__main__":
         server_port=frontend_port,
         show_error=True,
         allowed_paths=[UPLOAD_DIR, DOCS_DIR, IMAGES_DIR, REFERENCE_AUDIO_DIR],
-        footer_links=[]
     )

 """
+Backend 로직이 통합된 단일 앱 버전 (포트 1개만 사용)
 음성 검증 앱 - 메인 애플리케이션 (통합 버전)
 Author: Kevin's Team
 Description: 음성 인식 기반 발음 검증 시스템
 """
 import os
+# .env 로드 (다른 모듈 import 전에 먼저 실행)
+from dotenv import load_dotenv
+load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"))
 import sys
 import asyncio
                     "category": category,    # Puzzle category for chatbot
                     "answerWord": answer_word,  # Answer word for chatbot context
                     "referenceAudioPath": reference_audio_path,  # For TTS voice cloning
+                    "userText": user_text,   # STT result for chatbot context
                     **metrics
                 }
             )
             # game_state에 성공한 오디오 기록 추가 (User Audio 표시용)
             updated_game_state = GameStateManager.add_guess(
+                game_state,
+                recognized_text,
+                audio_path,
                 {
                     "score": score,
+                    "userText": user_text,
                     "answerWord": answer_word,
                     "referenceAudioPath": reference_audio_path,
                     "category": category,
         server_port=frontend_port,
         show_error=True,
         allowed_paths=[UPLOAD_DIR, DOCS_DIR, IMAGES_DIR, REFERENCE_AUDIO_DIR],
+        footer_links=[
+            {"text": "User Guide", "url": f"/file={DOCS_DIR}/user-guide.html"},
+            {"text": "Tech Stack", "url": f"/file={DOCS_DIR}/tech-stack.html"},
+        ]
     )

client/frontend/app_ui.py CHANGED Viewed

@@ -64,7 +64,7 @@ class AppUI:
         import json
         stats_json = json.dumps(stats)
-        with gr.Blocks(title="VOICE SEMENTLE") as demo:
             # ============== Dashboard Stats (JS에서 접근 가능) ==============
             gr.HTML(

         import json
         stats_json = json.dumps(stats)
+        with gr.Blocks(title="VOICE SEMANTLE") as demo:
             # ============== Dashboard Stats (JS에서 접근 가능) ==============
             gr.HTML(

client/frontend/components/audio_input.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-오디오 입력 컴포넌트 - Voice Sementle 스타일
 애니메이션풍 하늘색 테마의 음성 입력 인터페이스
 커스텀 버튼으로 Gradio Audio 컴포넌트 제어
@@ -10,12 +10,12 @@ import gradio as gr
 class AudioInputComponent:
-    """Voice Sementle 스타일 오디오 입력 컴포넌트"""
     # 마이크 버튼 HTML 템플릿
     MIC_BUTTON_HTML_TEMPLATE = """
     <div class="mic-section">
-        <div class="mic-status" id="mic-status">Click the microphone button to start recording</div>
         <button class="mic-btn" id="mic-btn" title="녹음 시작">
             <svg xmlns="http://www.w3.org/2000/svg" height="44" viewBox="0 0 64 64" width="44">
                 <path fill="#fff" d="M24 18 Q20 18 20 22 L20 42 Q20 46 24 46 L46 34 Q50 32 46 30 Z" stroke="#fff" stroke-width="2" stroke-linejoin="round" stroke-linecap="round"/>

 """
+오디오 입력 컴포넌트 - Voice Semantle 스타일
 애니메이션풍 하늘색 테마의 음성 입력 인터페이스
 커스텀 버튼으로 Gradio Audio 컴포넌트 제어
 class AudioInputComponent:
+    """Voice Semantle 스타일 오디오 입력 컴포넌트"""
     # 마이크 버튼 HTML 템플릿
     MIC_BUTTON_HTML_TEMPLATE = """
     <div class="mic-section">
+        <div class="mic-status" id="mic-status">Click the play button to start game</div>
         <button class="mic-btn" id="mic-btn" title="녹음 시작">
             <svg xmlns="http://www.w3.org/2000/svg" height="44" viewBox="0 0 64 64" width="44">
                 <path fill="#fff" d="M24 18 Q20 18 20 22 L20 42 Q20 46 24 46 L46 34 Q50 32 46 30 Z" stroke="#fff" stroke-width="2" stroke-linejoin="round" stroke-linecap="round"/>

client/frontend/components/floating_chatbot.py CHANGED Viewed

@@ -230,11 +230,12 @@ Greet them warmly and help them understand the game:
         # Add audio hint capability info (Phase 2 + Tool Calling)
         if is_elevenlabs_configured():
-            context_parts.append(f"\n### AUDIO HINT TOOL (IMPORTANT - You MUST use this)")
-            context_parts.append(f"- You have a tool called `generate_audio_hint` that generates real TTS audio")
-            context_parts.append(f"- WHEN USER ASKS FOR AUDIO: You MUST call the generate_audio_hint tool")
-            context_parts.append(f"  - Keywords: 'play audio', 'TTS', 'hear it', 'how does it sound', 'generate audio'")
-            context_parts.append(f"  - Do NOT just describe audio - actually CALL the tool!")
             context_parts.append(f"- Tool parameters:")
             context_parts.append(f"  - hint_type: 'syllable' | 'partial' | 'rhythm' | 'almost_full'")
             context_parts.append(f"- Choose hint_type based on attempt count:")
@@ -242,8 +243,6 @@ Greet them warmly and help them understand the game:
             context_parts.append(f"  - Attempt 5-6: use 'partial' (first half)")
             context_parts.append(f"  - Attempt 7-9: use 'rhythm' (with pauses)")
             context_parts.append(f"  - Attempt 10+: use 'almost_full' (almost complete)")
-            context_parts.append(f"- After calling the tool, explain what the user will hear")
-            context_parts.append(f"- Example: User says 'give me TTS' → Call generate_audio_hint(hint_type='syllable')")
         context_parts.append("")
     context_parts.append(f"They are trying to figure out what word/phrase to pronounce.\n")
@@ -259,15 +258,19 @@ Greet them warmly and help them understand the game:
         pronunciation = ai_analysis.get("pronunciation", "N/A")
         overall = ai_analysis.get("overall_score", "N/A")  # Match standardized field name
-        # Get recognized text from guess (backend doesn't provide transcription yet)
         guessed_word = guess.get("guessedWord", "")
         # AI가 준 이전 조언/힌트 (있다면)
         advice = ai_analysis.get("advice", "")
         context_parts.append(f"### Attempt {i}")
-        # Only show what they said if we have actual text (not just "Score: X.X")
-        if guessed_word and not guessed_word.startswith("Score:"):
             context_parts.append(f"- What they said: \"{guessed_word}\"")
         context_parts.append(f"- Scores: Pitch={pitch}, Rhythm={rhythm}, Energy={energy}, Pronunciation={pronunciation}, Overall={overall}")
@@ -374,10 +377,16 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
         elevenlabs_ready = is_elevenlabs_configured()
         print(f"[CHATBOT] ElevenLabs ready: {elevenlabs_ready}, Answer word: '{answer_word}'")
-        if elevenlabs_ready and answer_word:
             tools = [{
                 "name": "generate_audio_hint",
-                "description": "Generate an audio pronunciation hint using text-to-speech. You MUST call this tool when the user asks for audio, TTS, or wants to hear the pronunciation. Do not just describe audio - actually call this function.",
                 "input_schema": {
                     "type": "object",
                     "properties": {
@@ -385,12 +394,18 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
                             "type": "string",
                             "enum": ["syllable", "partial", "rhythm", "almost_full"],
                             "description": "Type of audio hint: 'syllable' (first syllable), 'partial' (first half), 'rhythm' (with pauses), 'almost_full' (missing last part)"
                         }
                     },
                     "required": ["hint_type"]
                 }
             }]
-            print(f"[CHATBOT] Audio tool enabled! Tool count: {len(tools)}")
         # Call Gemini with tools
         response_text, tool_calls, error = chat_with_gemini_and_tools(
@@ -412,15 +427,16 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
             for tool_call in tool_calls:
                 if tool_call['name'] == 'generate_audio_hint':
                     hint_type = tool_call['input'].get('hint_type', 'syllable')
-                    print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}")
-                    # Extract the appropriate portion based on hint_type and answer_word
-                    from utils.elevenlabs_tts import extract_hint_portion, generate_audio_hint
-                    text_to_speak, _ = extract_hint_portion(answer_word, attempt_count)
                     # Use voice cloning from reference audio when available
-                    print(f"[CHATBOT] Generating audio with reference: {reference_audio_path}")
-                    audio_path = generate_audio_hint(text_to_speak, hint_type, reference_audio_path=reference_audio_path)
                     if audio_path:
                         print(f"[CHATBOT] Audio hint generated: {audio_path}")
@@ -560,20 +576,23 @@ class FloatingChatbotComponent:
             print(f"[CHATBOT] AI 응답: {str(response)[:100]}...")
-            # Convert tuple response (text, audio_path) to Gradio format
             if isinstance(response, tuple):
                 text, audio_path = response
                 # Convert relative path to absolute path for Gradio
                 import os
-                if not os.path.isabs(audio_path):
                     # Path is relative to project root (3 levels up from this file)
                     project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
                     audio_path = os.path.join(project_root, audio_path)
-                content = [
-                    {"type": "text", "text": text},
-                    {"type": "file", "file": {"path": audio_path, "mime_type": "audio/mpeg"}}
-                ]
                 print(f"[CHATBOT] Audio hint included: {audio_path}")
             else:
                 content = response

         # Add audio hint capability info (Phase 2 + Tool Calling)
         if is_elevenlabs_configured():
+            context_parts.append(f"\n### AUDIO HINT TOOL (Use ONLY when explicitly requested)")
+            context_parts.append(f"- You have a tool called `generate_audio_hint` that generates TTS audio")
+            context_parts.append(f"- ONLY call this tool when the user EXPLICITLY asks for audio hints:")
+            context_parts.append(f"  - Keywords that REQUIRE audio: 'audio hint', 'play audio', 'TTS', 'hear it', 'how does it sound', 'generate audio', 'listen'")
+            context_parts.append(f"  - Keywords that do NOT require audio: 'hint', 'help', 'clue', 'what is it', general questions")
+            context_parts.append(f"- DO NOT call this tool for general hints or questions - only for explicit audio requests")
             context_parts.append(f"- Tool parameters:")
             context_parts.append(f"  - hint_type: 'syllable' | 'partial' | 'rhythm' | 'almost_full'")
             context_parts.append(f"- Choose hint_type based on attempt count:")
             context_parts.append(f"  - Attempt 5-6: use 'partial' (first half)")
             context_parts.append(f"  - Attempt 7-9: use 'rhythm' (with pauses)")
             context_parts.append(f"  - Attempt 10+: use 'almost_full' (almost complete)")
         context_parts.append("")
     context_parts.append(f"They are trying to figure out what word/phrase to pronounce.\n")
         pronunciation = ai_analysis.get("pronunciation", "N/A")
         overall = ai_analysis.get("overall_score", "N/A")  # Match standardized field name
+        # Get recognized text from guess
         guessed_word = guess.get("guessedWord", "")
+        # Get actual STT result (what user pronounced)
+        user_spoken_text = ai_analysis.get("userText", "")
         # AI가 준 이전 조언/힌트 (있다면)
         advice = ai_analysis.get("advice", "")
         context_parts.append(f"### Attempt {i}")
+        # Show what they actually said via STT (more accurate for pronunciation feedback)
+        if user_spoken_text:
+            context_parts.append(f"- What they pronounced (STT): \"{user_spoken_text}\"")
+        elif guessed_word and not guessed_word.startswith("Score:"):
             context_parts.append(f"- What they said: \"{guessed_word}\"")
         context_parts.append(f"- Scores: Pitch={pitch}, Rhythm={rhythm}, Energy={energy}, Pronunciation={pronunciation}, Overall={overall}")
         elevenlabs_ready = is_elevenlabs_configured()
         print(f"[CHATBOT] ElevenLabs ready: {elevenlabs_ready}, Answer word: '{answer_word}'")
+        # Only enable audio tool if user EXPLICITLY asks for audio in THIS message
+        audio_keywords = ['audio', 'play', 'sound', 'hear', 'listen', 'tts', 'pronounce', '들려', '발음']
+        user_wants_audio = any(kw in message.lower() for kw in audio_keywords)
+        if elevenlabs_ready and answer_word and user_wants_audio:
+            # Get word count for tool description
+            word_count = len(answer_word.split())
             tools = [{
                 "name": "generate_audio_hint",
+                "description": "Generate an audio pronunciation hint using TTS.",
                 "input_schema": {
                     "type": "object",
                     "properties": {
                             "type": "string",
                             "enum": ["syllable", "partial", "rhythm", "almost_full"],
                             "description": "Type of audio hint: 'syllable' (first syllable), 'partial' (first half), 'rhythm' (with pauses), 'almost_full' (missing last part)"
+                        },
+                        "word_index": {
+                            "type": "integer",
+                            "description": f"Which word to hint (0-indexed). The phrase has {word_count} word(s). Use 0 for first word, 1 for second word, etc. If user knows first word but not second, use 1."
                         }
                     },
                     "required": ["hint_type"]
                 }
             }]
+            print(f"[CHATBOT] Audio tool enabled! User requested audio.")
+        elif elevenlabs_ready and answer_word:
+            print(f"[CHATBOT] Audio tool NOT enabled - user didn't request audio. Message: '{message[:50]}...'")
         # Call Gemini with tools
         response_text, tool_calls, error = chat_with_gemini_and_tools(
             for tool_call in tool_calls:
                 if tool_call['name'] == 'generate_audio_hint':
                     hint_type = tool_call['input'].get('hint_type', 'syllable')
+                    word_index = tool_call['input'].get('word_index', 0)  # Default to first word
+                    print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}, word_index={word_index}")
+                    # Extract the appropriate portion based on hint_type, word_index, and answer_word
+                    from utils.elevenlabs_tts import extract_hint_portion_for_word, generate_audio_hint
+                    text_to_speak, actual_hint_type = extract_hint_portion_for_word(answer_word, attempt_count, word_index)
                     # Use voice cloning from reference audio when available
+                    print(f"[CHATBOT] Generating audio for: '{text_to_speak}' with reference: {reference_audio_path}")
+                    audio_path = generate_audio_hint(text_to_speak, actual_hint_type, reference_audio_path=reference_audio_path)
                     if audio_path:
                         print(f"[CHATBOT] Audio hint generated: {audio_path}")
             print(f"[CHATBOT] AI 응답: {str(response)[:100]}...")
+            # Convert tuple response (text, audio_path) to Gradio 6 format
             if isinstance(response, tuple):
                 text, audio_path = response
                 # Convert relative path to absolute path for Gradio
                 import os
+                if audio_path and not os.path.isabs(audio_path):
                     # Path is relative to project root (3 levels up from this file)
                     project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
                     audio_path = os.path.join(project_root, audio_path)
                 print(f"[CHATBOT] Audio hint included: {audio_path}")
+                print(f"[CHATBOT] Audio file exists: {os.path.exists(audio_path)}")
+                # Gradio 6: Use gr.Audio() component for audio content
+                # Reference: chatbot_core_components_simple demo
+                hist.append({"role": "assistant", "content": text})
+                hist.append({"role": "assistant", "content": gr.Audio(audio_path)})
+                return "", hist, hist
             else:
                 content = response

client/frontend/components/header.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-헤더 컴포넌트 - Voice Sementle 스타일
 애니메이션풍 귀여운 로고 디자인
 👨‍💻 담당: 개발자 A
@@ -9,9 +9,9 @@ import gradio as gr
 class HeaderComponent:
-    """Voice Sementle 스타일 헤더 컴포넌트"""
-    # Voice Sementle 스타일 헤더 HTML 템플릿
     HEADER_HTML_TEMPLATE = """
     <link href="https://fonts.googleapis.com/css2?family=Lilita+One&family=Bangers&display=swap" rel="stylesheet">
     <div class="vs-header">
@@ -21,7 +21,7 @@ class HeaderComponent:
                     <span class="title-voice">VOICE</span>
                 </span>
                 <span class="title-line">
-                    <span class="title-sementle">SEMENTLE</span>
                 </span>
             </h1>
         </div>
@@ -69,7 +69,7 @@ class HeaderComponent:
             0 0 20px rgba(90, 200, 250, 0.5);
         paint-order: stroke fill;
     }
-    .title-sementle {
         font-size: 90px;
         font-weight: 500;
         color: #e8a054;
@@ -96,7 +96,7 @@ class HeaderComponent:
             8px 8px 0 #082a35,
             0 0 25px rgba(90, 200, 250, 0.6);
     }
-    .dark .title-sementle {
         color: #e8a054;
         -webkit-text-stroke: 4px #5c3d1e;
         text-shadow:
@@ -118,7 +118,7 @@ class HeaderComponent:
                 7px 7px 0 #0d4a5f,
                 0 0 20px rgba(90, 200, 250, 0.5);
         }
-        .title-sementle {
             font-size: 72px;
             letter-spacing: 4px;
             -webkit-text-stroke: 4px #8b5a2b;
@@ -136,7 +136,7 @@ class HeaderComponent:
                 7px 7px 0 #0d4a5f,
                 0 0 20px rgba(90, 200, 250, 0.5);
         }
-        .dark .title-sementle {
             -webkit-text-stroke: 4px #8b5a2b;
             text-shadow:
                 5px 5px 0 #8b5a2b,
@@ -157,7 +157,7 @@ class HeaderComponent:
                 5px 5px 0 #0d4a5f,
                 0 0 15px rgba(90, 200, 250, 0.5);
         }
-        .title-sementle {
             font-size: 52px;
             letter-spacing: 3px;
             -webkit-text-stroke: 3px #8b5a2b;
@@ -173,7 +173,7 @@ class HeaderComponent:
                 5px 5px 0 #082a35,
                 0 0 20px rgba(90, 200, 250, 0.6);
         }
-        .dark .title-sementle {
             -webkit-text-stroke: 3px #5c3d1e;
             text-shadow:
                 4px 4px 0 #5c3d1e,

 """
+헤더 컴포넌트 - Voice Semantle 스타일
 애니메이션풍 귀여운 로고 디자인
 👨‍💻 담당: 개발자 A
 class HeaderComponent:
+    """Voice Semantle 스타일 헤더 컴포넌트"""
+    # Voice Semantle 스타일 헤더 HTML 템플릿
     HEADER_HTML_TEMPLATE = """
     <link href="https://fonts.googleapis.com/css2?family=Lilita+One&family=Bangers&display=swap" rel="stylesheet">
     <div class="vs-header">
                     <span class="title-voice">VOICE</span>
                 </span>
                 <span class="title-line">
+                    <span class="title-semantle">SEMANTLE</span>
                 </span>
             </h1>
         </div>
             0 0 20px rgba(90, 200, 250, 0.5);
         paint-order: stroke fill;
     }
+    .title-semantle {
         font-size: 90px;
         font-weight: 500;
         color: #e8a054;
             8px 8px 0 #082a35,
             0 0 25px rgba(90, 200, 250, 0.6);
     }
+    .dark .title-semantle {
         color: #e8a054;
         -webkit-text-stroke: 4px #5c3d1e;
         text-shadow:
                 7px 7px 0 #0d4a5f,
                 0 0 20px rgba(90, 200, 250, 0.5);
         }
+        .title-semantle {
             font-size: 72px;
             letter-spacing: 4px;
             -webkit-text-stroke: 4px #8b5a2b;
                 7px 7px 0 #0d4a5f,
                 0 0 20px rgba(90, 200, 250, 0.5);
         }
+        .dark .title-semantle {
             -webkit-text-stroke: 4px #8b5a2b;
             text-shadow:
                 5px 5px 0 #8b5a2b,
                 5px 5px 0 #0d4a5f,
                 0 0 15px rgba(90, 200, 250, 0.5);
         }
+        .title-semantle {
             font-size: 52px;
             letter-spacing: 3px;
             -webkit-text-stroke: 3px #8b5a2b;
                 5px 5px 0 #082a35,
                 0 0 20px rgba(90, 200, 250, 0.6);
         }
+        .dark .title-semantle {
             -webkit-text-stroke: 3px #5c3d1e;
             text-shadow:
                 4px 4px 0 #5c3d1e,

client/frontend/components/history_display.py CHANGED Viewed

@@ -337,7 +337,7 @@ class HistoryDisplayComponent:
                 <div class="history-list" style='
                     flex: 1;
                     min-width: 0;
-                    max-height: 480px;
                     overflow-y: auto;
                 '>
         """
@@ -401,7 +401,7 @@ class HistoryDisplayComponent:
                 <!-- 우측: 오각 그래프 영역 (상하 배치 시 위로) -->
                 <div class="graph-area" style='
-                    min-height: 480px;
                     background: #f0f7fc;
                     position: relative;
                 '>

                 <div class="history-list" style='
                     flex: 1;
                     min-width: 0;
+                    max-height: 560px;
                     overflow-y: auto;
                 '>
         """
                 <!-- 우측: 오각 그래프 영역 (상하 배치 시 위로) -->
                 <div class="graph-area" style='
+                    min-height: 560px;
                     background: #f0f7fc;
                     position: relative;
                 '>

client/frontend/styles/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-Voice Sementle 스타일 모듈
 애니메이션풍 하늘색 테마 기반의 전체 애플리케이션 CSS
 """

 """
+Voice Semantle 스타일 모듈
 애니메이션풍 하늘색 테마 기반의 전체 애플리케이션 CSS
 """

client/frontend/styles/buttons_style.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """
-버튼 및 오디오 입력 CSS - Voice Sementle 테마
 버튼 스타일 및 오디오 컴포넌트 스타일링
 """
-# Voice Sementle 스타일 버튼 CSS - 애니메이션풍 하늘색
 BUTTON_CSS = """
-/* Voice Sementle 스타일 버튼 - 하늘색 테마 */
 #verify-btn,
 #restart-btn {
     font-family: 'Lilita One' !important;

 """
+버튼 및 오디오 입력 CSS - Voice Semantle 테마
 버튼 스타일 및 오디오 컴포넌트 스타일링
 """
+# Voice Semantle 스타일 버튼 CSS - 애니메이션풍 하늘색
 BUTTON_CSS = """
+/* Voice Semantle 스타일 버튼 - 하늘색 테마 */
 #verify-btn,
 #restart-btn {
     font-family: 'Lilita One' !important;

client/frontend/styles/chatbot_style.py CHANGED Viewed

@@ -1,9 +1,9 @@
 """
-플로팅 AI 챗봇 CSS - Voice Sementle 테마
 플로팅 토글 버튼, 챗봇 컨테이너, 입력 영역 스타일
 """
-# 플로팅 AI 챗봇 CSS - Voice Sementle 하늘색 테마
 FLOATING_CHATBOT_CSS = """
 /* 플로팅 토글 버튼 컨테이너 */
 #floating-toggle {

 """
+플로팅 AI 챗봇 CSS - Voice Semantle 테마
 플로팅 토글 버튼, 챗봇 컨테이너, 입력 영역 스타일
 """
+# 플로팅 AI 챗봇 CSS - Voice Semantle 하늘색 테마
 FLOATING_CHATBOT_CSS = """
 /* 플로팅 토글 버튼 컨테이너 */
 #floating-toggle {

client/frontend/styles/custom_css.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-커스텀 CSS 스타일 - Voice Sementle 테마
 애니메이션풍 하늘색 테마 기반의 전체 애플리케이션 CSS
 이 파일은 하위 호환성을 위해 유지됩니다.

 """
+커스텀 CSS 스타일 - Voice Semantle 테마
 애니메이션풍 하늘색 테마 기반의 전체 애플리케이션 CSS
 이 파일은 하위 호환성을 위해 유지됩니다.

client/frontend/styles/failure_modal_style.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """
-실패 모달 CSS - Voice Sementle 테마
 실패 모달 및 결과 타일 스타일
 """
-# 실패 모달 CSS - Voice Sementle 하늘색 테마
 FAILURE_MODAL_CSS = """
-/* 실패 모달 - Voice Sementle 하늘색 테마 */
 .modal-content {
     text-align: center;
     padding: 20px;

 """
+실패 모달 CSS - Voice Semantle 테마
 실패 모달 및 결과 타일 스타일
 """
+# 실패 모달 CSS - Voice Semantle 하늘색 테마
 FAILURE_MODAL_CSS = """
+/* 실패 모달 - Voice Semantle 하늘색 테마 */
 .modal-content {
     text-align: center;
     padding: 20px;

client/frontend/styles/falling_elements_style.py CHANGED Viewed

@@ -1,5 +1,5 @@
 """
-배경 꽃/음표 떨어지는 애니메이션 - Voice Sementle 테마
 배경 애니메이션 CSS 및 JavaScript
 """

 """
+배경 꽃/음표 떨어지는 애니메이션 - Voice Semantle 테마
 배경 애니메이션 CSS 및 JavaScript
 """

client/frontend/styles/history_style.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """
-히스토리 디스플레이 CSS - Voice Sementle 테마
 시도 기록 및 히스토리 표시 스타일
 """
-# 히스토리 디스플레이 CSS - Voice Sementle 하늘색 테마
 HISTORY_CSS = """
-/* 시도 기록 - Voice Sementle 하늘색 테마 */
 .attempt-history {
     margin-top: 20px;
     padding: 16px;

 """
+히스토리 디스플레이 CSS - Voice Semantle 테마
 시도 기록 및 히스토리 표시 스타일
 """
+# 히스토리 디스플레이 CSS - Voice Semantle 하늘색 테마
 HISTORY_CSS = """
+/* 시도 기록 - Voice Semantle 하늘색 테마 */
 .attempt-history {
     margin-top: 20px;
     padding: 16px;

client/frontend/styles/result_screen_style.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """
-성공 화면 CSS 및 Celebration JavaScript - Voice Sementle 테마
 성공 화면 스타일, 통계 카드, Confetti 효과
 """
-# 성공 화면 CSS - Voice Sementle 애니메이션 테마 + Confetti 효과
 RESULT_SCREEN_CSS = """
-/* 성공 화면 - Voice Sementle 하늘색 테마 */
 #success-screen,
 #giveup-screen {
     position: fixed !important;

 """
+성공 화면 CSS 및 Celebration JavaScript - Voice Semantle 테마
 성공 화면 스타일, 통계 카드, Confetti 효과
 """
+# 성공 화면 CSS - Voice Semantle 애니메이션 테마 + Confetti 효과
 RESULT_SCREEN_CSS = """
+/* 성공 화면 - Voice Semantle 하늘색 테마 */
 #success-screen,
 #giveup-screen {
     position: fixed !important;

client/frontend/styles/theme_style.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """
-기본 테마 CSS - Voice Sementle 테마
 애니메이션풍 하늘색 테마 기반의 전역 변수 및 기본 스타일
 """
-# Voice Sementle 테마 CSS - 애니메이션풍 하늘색 테마
 BASE_THEME_CSS = """
-/* Voice Sementle 스타일 기본 테마 - 애니메이션풍 하늘색 */
 :root {
     /* 메인 테마 색상 - 하늘색 계열 */
     --theme-primary: #4db8ff;

 """
+기본 테마 CSS - Voice Semantle 테마
 애니메이션풍 하늘색 테마 기반의 전역 변수 및 기본 스타일
 """
+# Voice Semantle 테마 CSS - 애니메이션풍 하늘색 테마
 BASE_THEME_CSS = """
+/* Voice Semantle 스타일 기본 테마 - 애니메이션풍 하늘색 */
 :root {
     /* 메인 테마 색상 - 하늘색 계열 */
     --theme-primary: #4db8ff;

client/services/analysis_service.py CHANGED Viewed

@@ -7,7 +7,11 @@ import time
 import logging
 from typing import Dict
-from .voice_analyzer import analyze_voice_with_mcp
 from .hint_generator import generate_hints_with_gemini, extract_advice_text
 from .database import get_puzzle_by_date, save_guess_record
@@ -75,7 +79,10 @@ async def analyze_voice(audio_bytes: bytes, date: str, session_id: str) -> Dict:
     logger.info(f"VoiceKit scores: pitch={pitch}, rhythm={rhythm}, energy={energy}, "
                 f"pronunciation={pronunciation}, transcript={transcript}, overall={overall}")
-    # 4. Generate hints with Gemini
     gemini_start = time.time()
     hints_json = await generate_hints_with_gemini(
         scores={
@@ -87,15 +94,21 @@ async def analyze_voice(audio_bytes: bytes, date: str, session_id: str) -> Dict:
         },
         attempt=attempt,
         answer_word=puzzle["answer_word"],
-        category=puzzle["category"]
     )
     gemini_time = (time.time() - gemini_start) * 1000
     logger.info(f"⏱️  Gemini hint generation: {gemini_time:.1f}ms")
     advice = extract_advice_text(hints_json)
     is_correct = overall > 85
-    # 5. Save guess record to database
     save_guess_record(
         session_id=session_id,
         puzzle_number=puzzle["puzzle_number"],

 import logging
 from typing import Dict
+from .voice_analyzer import (
+    analyze_voice_with_mcp,
+    get_hint_history,
+    add_hint_to_history
+)
 from .hint_generator import generate_hints_with_gemini, extract_advice_text
 from .database import get_puzzle_by_date, save_guess_record
     logger.info(f"VoiceKit scores: pitch={pitch}, rhythm={rhythm}, energy={energy}, "
                 f"pronunciation={pronunciation}, transcript={transcript}, overall={overall}")
+    # 4. Get hint history for this session (to avoid repetition)
+    hint_history = get_hint_history(session_id)
+    # 5. Generate hints with Gemini (including user's spoken text for context-aware advice)
     gemini_start = time.time()
     hints_json = await generate_hints_with_gemini(
         scores={
         },
         attempt=attempt,
         answer_word=puzzle["answer_word"],
+        category=puzzle["category"],
+        user_text=user_text,
+        hint_history=hint_history
     )
     gemini_time = (time.time() - gemini_start) * 1000
     logger.info(f"⏱️  Gemini hint generation: {gemini_time:.1f}ms")
+    logger.info(f"Generated hints: {hints_json}")
     advice = extract_advice_text(hints_json)
     is_correct = overall > 85
+    # 6. Store this hint in history (for next time)
+    add_hint_to_history(session_id, advice)
+    # 7. Save guess record to database
     save_guess_record(
         session_id=session_id,
         puzzle_number=puzzle["puzzle_number"],

client/services/hint_generator.py CHANGED Viewed

@@ -34,9 +34,10 @@ def list_hint_files(category: str) -> list:
 async def generate_hints_with_gemini(
-    scores: dict, attempt: int, answer_word: str, category: str
 ) -> dict:
-    """Generate JSON hints using Gemini LLM"""
     try:
         # Find weakest metrics
         metrics = {
@@ -79,68 +80,73 @@ async def generate_hints_with_gemini(
             guidance = f"Attempt {attempt}! Focus on pronunciation coaching for {', '.join(weakest_names)}. Give very strong hints about what to say."
             category_hint = f"After {attempt} attempts, be very helpful while still not directly revealing the answer."
         # Build prompt for Gemini
-        prompt = f"""You are a hint generator for "Audio Semantle" - a pronunciation puzzle game where players start blind and must figure out what word to say.
-**Current State:**
-- Answer word: "{answer_word}" (DO NOT reveal this directly!)
-- Category: {category} (this is a {category})
-- Attempt number: {attempt} (players have UNLIMITED attempts)
-- Scores (0-100): Pitch={scores.get('pitch', 0)}, Rhythm={scores.get('rhythm', 0)}, Energy={scores.get('energy', 0)}, Pronunciation={scores.get('pronunciation', 0)}, Overall={scores.get('overall', 0)}
-- Weakest areas: {', '.join(weakest_names)}
-- Available hint images: {hint_files_str}
-**Task:** {guidance}
-**Category Guidance:** {category_hint}
-**Hint Examples by Category:**
-- If category = "meme": "This viral phrase often appears in funny internet videos..."
-- If category = "movie": "This famous movie quote/title was released in..."
-- If category = "song": "This classic song by [artist hint] topped the charts..."
-**Return ONLY this JSON format, no other text:**
-{{
-  "type": "{hint_type}",
-  "answer": [
-    {{
-      "text": "Your hint or advice text here (can mention category)",
-      "path": "images/hints/{category}/filename.jpg" OR ""
-    }}
-  ]
-}}
-**Rules for Progressive Hints:**
-1. Remember: Players start COMPLETELY BLIND - they don't know what to say initially
-2. Hints should get progressively more helpful with each attempt
-3. For "hint" type: Follow the guidance above based on attempt number
-4. For "advice" type: Focus on pronunciation + give strong contextual clues
-5. Keep text concise (1-2 sentences max)
-6. NEVER reveal the answer directly, but after 10+ attempts be very helpful
-7. Return ONLY valid JSON, no markdown, no extra text
 """
         # Call Gemini
         response = call_gemini_with_tools(
             model_name="gemini-2.5-flash",
             system_prompt="You are a JSON generator. Return ONLY valid JSON with no markdown formatting or extra text.",
             messages=[{"role": "user", "content": prompt}],
             tools=[],
-            max_tokens=512,
         )
         # Extract JSON from response
         response_text, error = get_text_from_gemini_response(response)
         if error:
             logger.error(f"Gemini response error: {error}")
-            # Fallback hint
             return {
                 "type": "advice",
-                "answer": [
-                    {
-                        "text": f"Focus on improving {weakest_names[0]} (score: {weakest[0][1]:.0f}/100)",
-                        "path": "",
-                    }
-                ],
             }
         # Clean response text (remove markdown code blocks if present)
@@ -148,24 +154,25 @@ async def generate_hints_with_gemini(
         if response_text.startswith("```"):
             lines = response_text.split("\n")
             response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
         # Parse JSON
         hints_json = json.loads(response_text)
         logger.info(f"✓ Generated fresh hint for attempt {attempt}, category {category}")
         return hints_json
     except Exception as e:
         logger.error(f"Hint generation error: {e}")
         # Fallback hint
         return {
             "type": "advice",
-            "answer": [
-                {
-                    "text": "Keep practicing! Focus on your pronunciation.",
-                    "path": "",
-                }
-            ],
         }

 async def generate_hints_with_gemini(
+    scores: dict, attempt: int, answer_word: str, category: str,
+    user_text: str = "", hint_history: list = None
 ) -> dict:
+    """Generate JSON hints using Gemini LLM (with hint history to avoid repetition)"""
     try:
         # Find weakest metrics
         metrics = {
             guidance = f"Attempt {attempt}! Focus on pronunciation coaching for {', '.join(weakest_names)}. Give very strong hints about what to say."
             category_hint = f"After {attempt} attempts, be very helpful while still not directly revealing the answer."
+        # Format hint history for prompt (avoid repetition)
+        history_text = ""
+        if hint_history and len(hint_history) > 0:
+            recent_hints = hint_history[-5:]  # Last 5 hints only
+            history_text = "\n".join([f"  - {h}" for h in recent_hints])
+        # Overall score for context-aware advice
+        overall_score = scores.get('overall', 0)
         # Build prompt for Gemini
+        prompt = f"""You generate hints for a pronunciation game. User tries to guess and say a secret phrase.
+CONTEXT:
+- User said: "{user_text}"
+- Secret answer: "{answer_word}" (NEVER reveal!)
+- Category: {category}
+- Overall score: {overall_score}/100
+- Attempt: {attempt}
+PREVIOUS HINTS GIVEN (DO NOT REPEAT THESE - give NEW information!):
+{history_text if history_text else "  (none yet)"}
+MANDATORY FORMAT: Always start with "You said '[what user said]' - " then your feedback.
+RULES:
+1. If overall >= 70: User is saying the RIGHT phrase. Give pronunciation tips.
+   → "You said 'Wingardium Leviosa' - Correct! Work on your pitch - try more dramatic."
+2. If overall < 70: User is saying the WRONG phrase. Analyze what they said and GUIDE them:
+   - Same franchise? → "You said 'Shut up Malfoy' - Right franchise! Now think of a famous SPELL..."
+   - Similar category? → "You said 'I'll be back' - Good movie instinct! But try a magical fantasy..."
+   - Unrelated? → "You said 'Hello' - That's not it. This is a famous {category}..."
+3. NEVER say "focus on pronunciation" when overall < 70!
+4. Be helpful based on attempt ({attempt}): 1-3 vague, 4-6 specific, 7+ very helpful.
+5. NEVER repeat hints from the history above! Always give NEW, FRESH information.
+Return ONLY this JSON:
+{{"type": "{hint_type}", "answer": [{{"text": "You said '...' - your feedback", "path": ""}}]}}
 """
+        print(f"[GEMINI HINT] Calling Gemini for hint generation...")
+        print(f"[GEMINI HINT] User said: '{user_text}', Overall: {overall_score}, Attempt: {attempt}")
+        print(f"[GEMINI HINT] Hint history: {hint_history}")
         # Call Gemini
         response = call_gemini_with_tools(
             model_name="gemini-2.5-flash",
             system_prompt="You are a JSON generator. Return ONLY valid JSON with no markdown formatting or extra text.",
             messages=[{"role": "user", "content": prompt}],
             tools=[],
+            max_tokens=2048,  # Generous limit for hint generation with history
         )
         # Extract JSON from response
         response_text, error = get_text_from_gemini_response(response)
+        print(f"[GEMINI HINT] Response text: {response_text[:200] if response_text else 'None'}...")
+        print(f"[GEMINI HINT] Error: {error}")
         if error:
             logger.error(f"Gemini response error: {error}")
+            print(f"[GEMINI HINT] ❌ FALLBACK triggered due to error: {error}")
             return {
                 "type": "advice",
+                "answer": [{"text": f"Keep trying! This is a famous {category}.", "path": ""}]
             }
         # Clean response text (remove markdown code blocks if present)
         if response_text.startswith("```"):
             lines = response_text.split("\n")
             response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
+            print(f"[GEMINI HINT] Cleaned markdown, result: {response_text[:200]}...")
         # Parse JSON
+        print(f"[GEMINI HINT] Parsing JSON: {response_text[:300]}...")
         hints_json = json.loads(response_text)
+        print(f"[GEMINI HINT] ✓ Parsed successfully: {hints_json}")
         logger.info(f"✓ Generated fresh hint for attempt {attempt}, category {category}")
         return hints_json
     except Exception as e:
         logger.error(f"Hint generation error: {e}")
+        print(f"[GEMINI HINT] ❌ EXCEPTION: {type(e).__name__}: {e}")
+        import traceback
+        traceback.print_exc()
         # Fallback hint
         return {
             "type": "advice",
+            "answer": [{"text": f"Keep trying! This is a famous {category}.", "path": ""}]
         }

client/services/voice_analyzer.py CHANGED Viewed

@@ -39,6 +39,9 @@ _mcp_lock = None
 # Session tracking for attempt counts
 _session_attempts = {}
 async def initialize_voicekit_mcp():
     """Initialize VoiceKit MCP connection on app startup"""
@@ -243,6 +246,27 @@ def get_attempt_count(session_id: str) -> int:
     return _session_attempts[session_id]
 async def analyze_voice_with_mcp(
     audio_bytes: bytes,
     session_id: str,

 # Session tracking for attempt counts
 _session_attempts = {}
+# Session tracking for hint/advice history (to avoid repetition)
+_session_hint_history = {}  # {session_id: [list of previous hints/advice]}
 async def initialize_voicekit_mcp():
     """Initialize VoiceKit MCP connection on app startup"""
     return _session_attempts[session_id]
+def get_hint_history(session_id: str) -> list:
+    """Get hint history for session (to avoid repetition)"""
+    global _session_hint_history
+    if session_id not in _session_hint_history:
+        _session_hint_history[session_id] = []
+    return _session_hint_history[session_id]
+def add_hint_to_history(session_id: str, hint_text: str) -> None:
+    """Add a hint to session history (keeps last 10 hints)"""
+    global _session_hint_history
+    if session_id not in _session_hint_history:
+        _session_hint_history[session_id] = []
+    if hint_text and hint_text != "Keep practicing!":
+        _session_hint_history[session_id].append(hint_text)
+        # Keep only last 10 hints to avoid memory bloat
+        if len(_session_hint_history[session_id]) > 10:
+            _session_hint_history[session_id] = _session_hint_history[session_id][-10:]
 async def analyze_voice_with_mcp(
     audio_bytes: bytes,
     session_id: str,

client/utils/elevenlabs_tts.py CHANGED Viewed

@@ -7,8 +7,10 @@ audio hints in the Audio Semantle game.
 import os
 import hashlib
 from pathlib import Path
 from typing import Optional
 # Try to import ElevenLabs SDK
 try:
@@ -18,8 +20,8 @@ except ImportError:
     ELEVENLABS_AVAILABLE = False
     print("Warning: elevenlabs package not installed. Audio hints will not be available.")
-# Configuration - use /tmp for Gradio compatibility (always in allowed_paths)
-AUDIO_HINTS_DIR = Path("/tmp/audio_hints")
 AUDIO_HINTS_DIR.mkdir(parents=True, exist_ok=True)
 # In-memory cache for generated audio hints
@@ -65,6 +67,10 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
         # Strip leading slash if present and resolve from project root
         full_path = project_root / reference_audio_path.lstrip("/")
         if not full_path.exists():
             print(f"⚠ Reference audio not found: {full_path}")
             return None
@@ -76,11 +82,13 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
         voice_name = f"puzzle_voice_{hashlib.md5(reference_audio_path.encode()).hexdigest()[:8]}"
         print(f"🎤 Cloning voice from: {full_path}")
         voice = client.voices.ivc.create(
             name=voice_name,
-            files=[str(full_path)],  # List of audio file paths
             description="Cloned voice for Audio Semantle hints",
-            remove_background_noise=True
         )
         voice_id = voice.voice_id
@@ -118,15 +126,9 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
         return None
     try:
-        # Generate cache key from text + reference path for unique caching
-        ref_hash = hashlib.md5((reference_audio_path or "").encode()).hexdigest()[:8]
         text_hash = hashlib.md5(text.encode()).hexdigest()[:12]
-        cache_key = f"{text_hash}_{hint_type}_{ref_hash}"
-        # Check cache first
-        if cache_key in _audio_hint_cache:
-            print(f"✓ Using cached audio hint: {cache_key}")
-            return _audio_hint_cache[cache_key]
         # Initialize ElevenLabs client (v2.24.0 API)
         api_key = get_api_key()
@@ -159,8 +161,8 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
             output_format="mp3_44100_128"
         )
-        # Save to file
-        filename = f"{text_hash}_{hint_type}.mp3"
         filepath = AUDIO_HINTS_DIR / filename
         # Write audio bytes to file
@@ -168,11 +170,10 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
             for chunk in audio:
                 f.write(chunk)
-        # Return absolute path (Gradio allows /tmp by default)
         absolute_path = str(filepath)
-        _audio_hint_cache[cache_key] = absolute_path
-        print(f"✓ Audio hint saved: {absolute_path}")
         return absolute_path
     except Exception as e:
@@ -184,7 +185,8 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
 def extract_hint_portion(answer_word: str, attempts: int) -> tuple[str, str]:
     """
-    Extract what portion of the answer to pronounce based on attempt count
     Args:
         answer_word: The correct answer
@@ -193,44 +195,119 @@ def extract_hint_portion(answer_word: str, attempts: int) -> tuple[str, str]:
     Returns:
         Tuple of (text_to_speak, hint_type)
-    Strategy:
-        - Attempt 3: First syllable/word
-        - Attempt 5: First half
-        - Attempt 7: Rhythm pattern (with pauses)
-        - Attempt 10+: Almost full (missing last part)
     """
     words = answer_word.split()
-    if attempts == 3:
-        # First syllable or first word
-        first_word = words[0]
-        # Simple syllable extraction: take first half of first word
-        mid = len(first_word) // 2
-        if mid > 0:
-            return first_word[:mid], "syllable"
-        return first_word, "syllable"
-    elif attempts == 5:
         # First half of phrase
-        mid = len(words) // 2
-        if mid > 0:
-            return " ".join(words[:mid]), "partial"
-        return words[0], "partial"
-    elif attempts == 7:
-        # Rhythm pattern with pauses
-        return " ... ".join(words), "rhythm"
-    elif attempts >= 10:
-        # Almost full (missing last word)
         if len(words) > 1:
             return " ".join(words[:-1]), "almost_full"
-        # For single word, return first 80%
         cutoff = int(len(answer_word) * 0.8)
         return answer_word[:cutoff], "almost_full"
-    # Default: first word
-    return words[0], "syllable"
 def should_offer_audio_hint(message: str, attempts: int, answer_word: str) -> bool:

 import os
 import hashlib
+import time
 from pathlib import Path
 from typing import Optional
+from io import BytesIO
 # Try to import ElevenLabs SDK
 try:
     ELEVENLABS_AVAILABLE = False
     print("Warning: elevenlabs package not installed. Audio hints will not be available.")
+# Configuration - use project uploads directory for Gradio compatibility
+AUDIO_HINTS_DIR = Path(__file__).parent.parent / "uploads" / "audio_hints"
 AUDIO_HINTS_DIR.mkdir(parents=True, exist_ok=True)
 # In-memory cache for generated audio hints
         # Strip leading slash if present and resolve from project root
         full_path = project_root / reference_audio_path.lstrip("/")
+        # Always use .wav for ElevenLabs (required format for voice cloning)
+        full_path = full_path.with_suffix('.wav')
+        print(f"🎵 Using WAV format for ElevenLabs: {full_path}")
         if not full_path.exists():
             print(f"⚠ Reference audio not found: {full_path}")
             return None
         voice_name = f"puzzle_voice_{hashlib.md5(reference_audio_path.encode()).hexdigest()[:8]}"
         print(f"🎤 Cloning voice from: {full_path}")
+        # Read file as BytesIO (required by ElevenLabs SDK)
+        # remove_background_noise=False allows shorter samples (<4.6s)
         voice = client.voices.ivc.create(
             name=voice_name,
+            files=[BytesIO(open(full_path, "rb").read())],
             description="Cloned voice for Audio Semantle hints",
+            remove_background_noise=False
         )
         voice_id = voice.voice_id
         return None
     try:
+        # Generate unique filename with timestamp (no caching)
         text_hash = hashlib.md5(text.encode()).hexdigest()[:12]
+        timestamp = int(time.time() * 1000)
         # Initialize ElevenLabs client (v2.24.0 API)
         api_key = get_api_key()
             output_format="mp3_44100_128"
         )
+        # Save to file with unique timestamp
+        filename = f"{text_hash}_{hint_type}_{timestamp}.mp3"
         filepath = AUDIO_HINTS_DIR / filename
         # Write audio bytes to file
             for chunk in audio:
                 f.write(chunk)
+        # Return absolute path
         absolute_path = str(filepath)
+        print(f"✓ Audio hint generated (fresh): {absolute_path}")
         return absolute_path
     except Exception as e:
 def extract_hint_portion(answer_word: str, attempts: int) -> tuple[str, str]:
     """
+    Extract what portion of the answer to pronounce based on attempt count.
+    Uses natural syllable breaks for more gradual progression.
     Args:
         answer_word: The correct answer
     Returns:
         Tuple of (text_to_speak, hint_type)
+    Strategy (syllable-based, gradual):
+        - Attempt 1-2: First syllable (~25%) with "-"
+        - Attempt 3-4: ~40% of first word with "-"
+        - Attempt 5-6: ~60% of first word with "-"
+        - Attempt 7-8: Full first word
+        - Attempt 9-10: First half of phrase
+        - Attempt 11+: Almost full (missing last part)
     """
     words = answer_word.split()
+    first_word = words[0]
+    def get_syllable_break(word: str, fraction: float) -> int:
+        """Find a natural syllable break point at approximately the given fraction of the word."""
+        target = int(len(word) * fraction)
+        vowels = set('aeiouAEIOU')
+        # Search around target for a consonant after vowel (syllable break)
+        for i in range(max(2, target - 2), min(len(word), target + 3)):
+            if i > 0 and word[i-1] in vowels and word[i] not in vowels:
+                return i
+        return max(2, target)
+    if attempts <= 2:
+        # First syllable only: ~25% with natural break
+        cut = get_syllable_break(first_word, 0.25)
+        return first_word[:cut] + "-", "minimal"
+    elif attempts <= 4:
+        # ~40% of first word
+        cut = get_syllable_break(first_word, 0.4)
+        return first_word[:cut] + "-", "syllable"
+    elif attempts <= 6:
+        # ~60% of first word
+        cut = get_syllable_break(first_word, 0.6)
+        return first_word[:cut] + "-", "partial"
+    elif attempts <= 8:
+        # First word only
+        return first_word, "word"
+    elif attempts <= 10:
         # First half of phrase
+        mid = max(1, len(words) // 2)
+        return " ".join(words[:mid]), "half"
+    else:  # 11+
+        # Almost full (missing last word or 20%)
         if len(words) > 1:
             return " ".join(words[:-1]), "almost_full"
         cutoff = int(len(answer_word) * 0.8)
         return answer_word[:cutoff], "almost_full"
+def extract_hint_portion_for_word(answer_word: str, attempts: int, word_index: int = 0) -> tuple[str, str]:
+    """
+    Extract what portion of a SPECIFIC WORD to pronounce based on attempt count.
+    This allows the chatbot to hint specific words when user already knows others.
+    For example, if user knows "Wingardium" but not "Leviosa", set word_index=1.
+    Args:
+        answer_word: The full correct answer (may have multiple words)
+        attempts: Number of attempts user has made
+        word_index: Which word to hint (0=first, 1=second, etc.)
+    Returns:
+        Tuple of (text_to_speak, hint_type)
+    """
+    words = answer_word.split()
+    # Clamp word_index to valid range
+    if word_index < 0:
+        word_index = 0
+    if word_index >= len(words):
+        word_index = len(words) - 1
+    target_word = words[word_index]
+    def get_syllable_break(word: str, fraction: float) -> int:
+        """Find a natural syllable break point at approximately the given fraction of the word."""
+        target = int(len(word) * fraction)
+        vowels = set('aeiouAEIOU')
+        # Search around target for a consonant after vowel (syllable break)
+        for i in range(max(2, target - 2), min(len(word), target + 3)):
+            if i > 0 and word[i-1] in vowels and word[i] not in vowels:
+                return i
+        return max(2, target)
+    # Progressive hints for the target word
+    if attempts <= 2:
+        # First syllable only: ~25% with natural break
+        cut = get_syllable_break(target_word, 0.25)
+        return target_word[:cut] + "-", "minimal"
+    elif attempts <= 4:
+        # ~40% of target word
+        cut = get_syllable_break(target_word, 0.4)
+        return target_word[:cut] + "-", "syllable"
+    elif attempts <= 6:
+        # ~60% of target word
+        cut = get_syllable_break(target_word, 0.6)
+        return target_word[:cut] + "-", "partial"
+    elif attempts <= 8:
+        # Full target word
+        return target_word, "word"
+    else:  # 9+
+        # Full target word (no more to reveal for single word)
+        return target_word, "almost_full"
 def should_offer_audio_hint(message: str, attempts: int, answer_word: str) -> bool:

gemini_adapter.py CHANGED Viewed

@@ -84,6 +84,19 @@ def convert_messages_to_gemini_format(anthropic_messages):
         if isinstance(content, str):
             # Simple text message
             parts.append(types.Part(text=content))
         elif isinstance(content, list):
             # Complex content with tool calls/results
             for item in content:

         if isinstance(content, str):
             # Simple text message
             parts.append(types.Part(text=content))
+        elif isinstance(content, dict):
+            # Could be Gradio file format {"path": ..., "mime_type": ...}
+            # Skip audio/video files - they can't be sent to Gemini text API
+            if content.get("path") and content.get("mime_type"):
+                print(f"DEBUG convert_messages: Skipping file content: {content.get('mime_type')}")
+                continue
+            # Could be text content {"type": "text", "text": "..."}
+            elif content.get("type") == "text":
+                parts.append(types.Part(text=content.get("text", "")))
+        elif hasattr(content, '__class__') and 'Audio' in content.__class__.__name__:
+            # Skip Gradio Audio component objects
+            print(f"DEBUG convert_messages: Skipping Gradio component: {content.__class__.__name__}")
+            continue
         elif isinstance(content, list):
             # Complex content with tool calls/results
             for item in content:

gradio_ui.py ADDED Viewed

	@@ -0,0 +1,279 @@

+"""
+Gradio UI for Chloe's Voice Komentle Game
+Connects to FastAPI backend for voice analysis
+"""
+import os
+# Set Gradio temp directory BEFORE importing gradio
+_upload_dir = os.path.join(os.path.dirname(__file__), "gradio_uploads")
+os.makedirs(_upload_dir, exist_ok=True)
+os.environ["GRADIO_TEMP_DIR"] = _upload_dir
+import gradio as gr
+from datetime import datetime
+import uuid
+import asyncio
+from sqlalchemy import create_engine, text
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+# Import backend functions
+from backend import (
+    analyze_voice_logic,
+    get_puzzle_by_date,
+    lifespan,
+    app as backend_app,
+)
+# Database connection
+DATABASE_URL = os.getenv("DATABASE_URL")
+engine = create_engine(
+    DATABASE_URL,
+    pool_size=10,  # 기본 연결 풀 크기
+    max_overflow=20,  # 최대 추가 연결 수
+    pool_pre_ping=True,  # 연결 사용 전 유효성 검사
+    pool_recycle=3600,  # 1시간마다 연결 재생성
+    connect_args={
+        "connect_timeout": 10,  # 연결 타임아웃 10초
+        "options": "-c statement_timeout=30000"  # 쿼리 타임아웃 30초
+    }
+)
+# Session ID (persistent across attempts)
+session_id = str(uuid.uuid4())
+# Backend initialization flag
+backend_initialized = False
+async def analyze_voice_async(audio_file, date_str):
+    """
+    Analyze voice using backend logic directly
+    Args:
+        audio_file: Path to recorded audio file
+        date_str: Date string for puzzle lookup
+    Returns:
+        tuple: (result_text, scores_text, hint_text, image_path)
+    """
+    if audio_file is None:
+        return "❌ 오디오를 먼저 녹음해주세요!", "", "", None
+    try:
+        # Read audio file
+        with open(audio_file, "rb") as f:
+            audio_bytes = f.read()
+        # Call backend logic directly
+        result = await analyze_voice_logic(audio_bytes, date_str, session_id)
+        # Handle errors
+        if result.get("status") == "error":
+            return f"❌ {result.get('message', 'Unknown error')}", "", "", None
+        # Parse response (already in 0-100 range from backend)
+        category = result.get("category", "unknown")
+        pitch = result.get("pitch", 0.0)
+        rhythm = result.get("rhythm", 0.0)
+        energy = result.get("energy", 0.0)
+        pronunciation = result.get("pronunciation", 0.0)
+        transcript = result.get("transcript", 0.0)
+        overall = result.get("overall", 0.0)
+        advice = result.get("advice", "")
+        is_correct = result.get("is_correct", False)
+        hints = {}  # hints are embedded in advice now
+        # Format result message
+        if is_correct:
+            result_msg = f"🎉 정답입니다! 전체 점수: {overall:.1f}/100"
+        else:
+            result_msg = f"📊 전체 점수: {overall:.1f}/100 - 다시 시도해보세요!"
+        # Format scores
+        scores_text = f"""
+### 📊 점수 상세
+**카테고리:** {category.upper()}
+- **발음 (Pronunciation):** {pronunciation:.1f}/100
+- **음높이 (Pitch):** {pitch:.1f}/100
+- **리듬 (Rhythm):** {rhythm:.1f}/100
+- **에너지 (Energy):** {energy:.1f}/100
+- **전사 (Transcript):** {transcript:.1f}/100
+- **전체 (Overall):** {overall:.1f}/100
+"""
+        # Format hints
+        hint_text = ""
+        hint_image = None
+        if hints and "answer" in hints:
+            hint_type = hints.get("type", "hint")
+            hint_items = hints.get("answer", [])
+            if hint_type == "hint":
+                hint_text = "💡 **힌트:**\n\n"
+            else:
+                hint_text = "🎯 **발음 조언:**\n\n"
+            for item in hint_items:
+                hint_text += f"{item.get('text', '')}\n\n"
+                # Get image path if exists
+                img_path = item.get("path", "")
+                if img_path and os.path.exists(img_path):
+                    hint_image = img_path
+        # Add advice if no hints
+        if not hint_text and advice:
+            hint_text = f"💬 **조언:**\n\n{advice}"
+        return result_msg, scores_text, hint_text, hint_image
+    except Exception as e:
+        return f"❌ 오류 발생: {str(e)}", "", "", None
+def analyze_voice(audio_file, date_str):
+    """Synchronous wrapper for async analyze_voice_async"""
+    return asyncio.run(analyze_voice_async(audio_file, date_str))
+def get_today_puzzle():
+    """Get today's puzzle information from database"""
+    try:
+        today = datetime.now().strftime("%Y-%m-%d")
+        # Use backend function to get puzzle
+        puzzle = get_puzzle_by_date(today)
+        print(puzzle)
+        if puzzle:
+            return f"""
+### 📅 오늘의 퍼즐
+**날짜:** {puzzle.get('puzzle_date', 'N/A')}
+**퍼즐 번호:** #{puzzle.get('puzzle_number', 'N/A')}
+**카테고리:** {puzzle.get('category', 'N/A').upper()}
+**난이도:** {puzzle.get('difficulty', 'N/A')}
+정답 단어를 발음해보세요! (최대 6회 시도)
+"""
+        else:
+            return "❌ 오늘의 퍼즐을 찾을 수 없습니다."
+    except Exception as e:
+        return f"❌ 퍼즐 정보를 가져올 수 없습니다: {str(e)}"
+def reset_session():
+    """Reset session for new game"""
+    global session_id
+    session_id = str(uuid.uuid4())
+    return "✅ 새 게임 시작! 오디오를 녹음해주세요.", "", "", None
+# Create Gradio Interface
+with gr.Blocks(title="Chloe's Voice Komentle") as demo:
+    gr.Markdown("# 🎤 Chloe's Voice Komentle")
+    # Puzzle info section
+    with gr.Row():
+        puzzle_info = gr.Markdown(value=get_today_puzzle())
+        refresh_btn = gr.Button("🔄 퍼즐 정보 새로고침", size="sm")
+    with gr.Row():
+        with gr.Column(scale=1):
+            # Audio recording
+            gr.Markdown("### 🎙️ 음성 녹음")
+            audio_input = gr.Audio(
+                sources=["microphone"],
+                type="filepath",
+                label="마이크로 녹음",
+                format="wav",
+            )
+            # Date input (auto-filled with today)
+            date_input = gr.Textbox(
+                label="날짜 (YYYY-MM-DD)",
+                value=datetime.now().strftime("%Y-%m-%d"),
+                interactive=True,
+            )
+            # Submit button
+            submit_btn = gr.Button("🎯 분석하기", variant="primary", size="lg")
+            reset_btn = gr.Button("🔄 새 게임 시작", variant="secondary")
+        with gr.Column(scale=1):
+            # Results
+            gr.Markdown("### 📊 결과")
+            result_output = gr.Markdown(label="결과")
+            scores_output = gr.Markdown(label="점수 상세")
+    # Hints section
+    with gr.Row():
+        with gr.Column():
+            hint_output = gr.Markdown(label="힌트 및 조언")
+        with gr.Column():
+            hint_image = gr.Image(label="힌트 이미지", show_label=True)
+    # Event handlers
+    submit_btn.click(
+        fn=analyze_voice,
+        inputs=[audio_input, date_input],
+        outputs=[result_output, scores_output, hint_output, hint_image],
+    )
+    reset_btn.click(
+        fn=reset_session,
+        inputs=[],
+        outputs=[result_output, scores_output, hint_output, hint_image],
+    )
+    refresh_btn.click(fn=get_today_puzzle, inputs=[], outputs=[puzzle_info])
+    # Footer
+    gr.Markdown("---\n**Powered by:** VoiceKit MCP + Gemini AI")
+# Launch configuration
+if __name__ == "__main__":
+    print("🚀 Starting Chloe's Voice Komentle...")
+    # Initialize backend (VoiceKit MCP session)
+    print("⏳ Initializing VoiceKit MCP...")
+    async def init_backend():
+        """Initialize backend resources"""
+        async with lifespan(backend_app):
+            print("✓ VoiceKit MCP initialized")
+            # Keep the lifespan context active
+            await asyncio.Event().wait()  # Wait forever
+    # Run backend initialization in background
+    import threading
+    def run_backend_init():
+        asyncio.run(init_backend())
+    backend_thread = threading.Thread(target=run_backend_init, daemon=True)
+    backend_thread.start()
+    # Wait a bit for initialization
+    import time
+    time.sleep(5)
+    print("✓ Backend initialized")
+    # Launch Gradio
+    server_host = os.getenv("SERVER_HOST")
+    frontend_port = int(os.getenv("FRONTEND_PORT"))
+    demo.launch(
+        server_name=server_host,  # Listen on all interfaces
+        server_port=frontend_port,  # Default Gradio port
+        share=False,  # Set to True for public link
+        show_error=True,
+        allowed_paths=[os.path.join(os.path.dirname(__file__), "hints", "audio")],  # Allow serving TTS audio hints
+    )

postgresql.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import os
+from dotenv import load_dotenv
+from sqlalchemy import create_engine, text
+import pandas as pd
+import gradio as gr
+load_dotenv()
+DATABASE_URL = os.getenv('DATABASE_URL')
+engine = create_engine(DATABASE_URL)
+def test_connection():
+    """Test database connection and show basic info"""
+    try:
+        with engine.connect() as connection:
+            result = connection.execute(text("SELECT version()"))
+            version = result.scalar()
+            return f"✅ 연결 성공!\n데이터베이스 버전: {version}"
+    except Exception as e:
+        return f"❌ 연결 실패: {e}"
+def get_all_puzzles():
+    """Fetch all puzzles from database"""
+    try:
+        query = text("SELECT * FROM puzzles ORDER BY puzzle_number")
+        df = pd.read_sql_query(query, engine)
+        return df
+    except Exception as e:
+        return pd.DataFrame({"error": [str(e)]})
+def get_puzzle_by_difficulty(difficulty):
+    """Fetch puzzles filtered by difficulty"""
+    try:
+        if difficulty == "전체":
+            query = text("SELECT * FROM puzzles ORDER BY puzzle_number")
+            df = pd.read_sql_query(query, engine)
+        else:
+            query = text("SELECT * FROM puzzles WHERE difficulty = :difficulty ORDER BY puzzle_number")
+            df = pd.read_sql_query(query, engine, params={"difficulty": difficulty})
+        return df
+    except Exception as e:
+        return pd.DataFrame({"error": [str(e)]})
+def get_table_count():
+    """Get total count of puzzles"""
+    try:
+        query = text("SELECT COUNT(*) FROM puzzles")
+        with engine.connect() as connection:
+            result = connection.execute(query)
+            count = result.scalar()
+            return f"전체 퍼즐 개수: {count}개"
+    except Exception as e:
+        return f"오류: {e}"
+with gr.Blocks(title="Puzzles DB 테스트") as demo:
+    gr.Markdown("# 🧩 Puzzles 데이터베이스 연결 테스트")
+    with gr.Row():
+        with gr.Column():
+            test_btn = gr.Button("연결 테스트", variant="primary")
+            connection_status = gr.Textbox(label="연결 상태", lines=3)
+        with gr.Column():
+            count_btn = gr.Button("데이터 개수 확인")
+            count_output = gr.Textbox(label="개수")
+    gr.Markdown("## 전체 데이터 조회")
+    load_all_btn = gr.Button("모든 퍼즐 불러오기")
+    all_data = gr.Dataframe(label="전체 퍼즐 데이터")
+    gr.Markdown("## 난이도별 필터링")
+    difficulty_filter = gr.Dropdown(
+        ["전체", "easy", "medium", "hard"],
+        value="전체",
+        label="난이도 선택"
+    )
+    filter_btn = gr.Button("필터 적용")
+    filtered_data = gr.Dataframe(label="필터링된 데이터")
+    # Event handlers
+    test_btn.click(test_connection, outputs=connection_status)
+    count_btn.click(get_table_count, outputs=count_output)
+    load_all_btn.click(get_all_puzzles, outputs=all_data)
+    filter_btn.click(get_puzzle_by_difficulty, inputs=difficulty_filter, outputs=filtered_data)
+    difficulty_filter.change(get_puzzle_by_difficulty, inputs=difficulty_filter, outputs=filtered_data)
+if __name__ == "__main__":
+    demo.launch()

reference_audio/movie/wingardiumleviosa.mp3 ADDED Viewed

Binary file (35.8 kB). View file

requirements.txt CHANGED Viewed

@@ -2,17 +2,26 @@ aiofiles==24.1.0
 annotated-doc==0.0.4
 annotated-types==0.7.0
 anyio==4.11.0
 brotli==1.2.0
 certifi==2025.11.12
 click==8.3.1
 colorama==0.4.6
 dotenv==0.9.9
-elevenlabs==0.2.26
 fastapi==0.122.0
 ffmpy==1.0.0
 filelock==3.20.0
 fsspec==2025.10.0
-google-genai>=0.1.0
 gradio==6.0.0
 gradio_client==2.0.0.dev3
 greenlet==3.2.4
@@ -21,42 +30,69 @@ h11==0.16.0
 hf-xet==1.2.0
 httpcore==1.0.9
 httpx==0.28.1
 huggingface_hub==1.1.5
 idna==3.11
 Jinja2==3.1.6
 markdown-it-py==4.0.0
 MarkupSafe==3.0.3
-mcp>=1.0.0
 mdurl==0.1.2
-numpy>=1.24.0,<2.0.0
 orjson==3.11.4
 packaging==25.0
-pandas>=2.0.0,<2.3.0
-pillow==11.3.0
 psycopg2-binary==2.9.11
 pydantic==2.12.4
 pydantic_core==2.41.5
 pydub==0.25.1
 Pygments==2.19.2
 python-dateutil==2.9.0.post0
 python-dotenv==1.2.1
 python-multipart==0.0.20
 pytz==2025.2
 PyYAML==6.0.3
 requests==2.31.0
 rich==14.2.0
 safehttpx==0.1.7
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0
 sniffio==1.3.1
 SQLAlchemy==2.0.44
 starlette==0.50.0
 tomlkit==0.13.3
 tqdm==4.67.1
 typer==0.20.0
 typer-slim==0.20.0
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 tzdata==2025.2
-uvicorn==0.38.0

 annotated-doc==0.0.4
 annotated-types==0.7.0
 anyio==4.11.0
+asttokens==3.0.1
+attrs==25.4.0
 brotli==1.2.0
+cachetools==6.2.2
 certifi==2025.11.12
+cffi==2.0.0
+charset-normalizer==3.4.4
 click==8.3.1
 colorama==0.4.6
+cryptography==46.0.3
+decorator==5.2.1
 dotenv==0.9.9
+elevenlabs==2.24.0
+executing==2.2.1
 fastapi==0.122.0
 ffmpy==1.0.0
 filelock==3.20.0
 fsspec==2025.10.0
+google-auth==2.43.0
+google-genai==1.52.0
 gradio==6.0.0
 gradio_client==2.0.0.dev3
 greenlet==3.2.4
 hf-xet==1.2.0
 httpcore==1.0.9
 httpx==0.28.1
+httpx-sse==0.4.3
 huggingface_hub==1.1.5
 idna==3.11
+ipython==9.7.0
+ipython_pygments_lexers==1.1.1
+jedi==0.19.2
 Jinja2==3.1.6
+jsonschema==4.25.1
+jsonschema-specifications==2025.9.1
 markdown-it-py==4.0.0
 MarkupSafe==3.0.3
+matplotlib-inline==0.2.1
+mcp==1.22.0
 mdurl==0.1.2
+numpy==1.26.4
 orjson==3.11.4
 packaging==25.0
+pandas==2.2.3
+parso==0.8.5
+pexpect==4.9.0
+pillow==11.3.0
+prompt_toolkit==3.0.52
 psycopg2-binary==2.9.11
+ptyprocess==0.7.0
+pure_eval==0.2.3
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
 pydantic==2.12.4
+pydantic-settings==2.12.0
 pydantic_core==2.41.5
 pydub==0.25.1
 Pygments==2.19.2
+PyJWT==2.10.1
 python-dateutil==2.9.0.post0
 python-dotenv==1.2.1
 python-multipart==0.0.20
 pytz==2025.2
 PyYAML==6.0.3
+referencing==0.37.0
 requests==2.31.0
 rich==14.2.0
+rpds-py==0.29.0
+rsa==4.9.1
 safehttpx==0.1.7
 semantic-version==2.10.0
 shellingham==1.5.4
 six==1.17.0
 sniffio==1.3.1
 SQLAlchemy==2.0.44
+sse-starlette==3.0.3
+stack-data==0.6.3
 starlette==0.50.0
+tenacity==9.1.2
 tomlkit==0.13.3
 tqdm==4.67.1
+traitlets==5.14.3
 typer==0.20.0
 typer-slim==0.20.0
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 tzdata==2025.2
+urllib3==2.5.0
+uvicorn==0.38.0
+wcwidth==0.2.14
+websockets==15.0.1

voice_app.py ADDED Viewed

	@@ -0,0 +1,148 @@

+"""
+Gradio Voice Recording App for Komentle
+Records user voice and sends to FastAPI backend
+"""
+import gradio as gr
+import requests
+import uuid
+from datetime import datetime
+import os
+# Backend API URL (환경변수로 관리 가능)
+BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:8000")
+def process_voice(audio):
+    """
+    Process recorded voice and send to backend
+    Args:
+        audio: tuple (sample_rate, audio_data) or file path
+    Returns:
+        dict: Response from backend with scores
+    """
+    if audio is None:
+        return {
+            "status": "error",
+            "message": "음성이 녹음되지 않았습니다."
+        }
+    try:
+        # Generate or retrieve session ID (실제로는 세션 관리 필요)
+        session_id = str(uuid.uuid4())
+        today = datetime.now().strftime("%Y-%m-%d")
+        # Prepare request data
+        files = {
+            'audio': ('audio.wav', open(audio, 'rb'), 'audio/wav')
+        }
+        data = {
+            'date': today,
+            'session_id': session_id
+        }
+        # Send to backend
+        response = requests.post(
+            f"{BACKEND_URL}/api/analyze-voice",
+            files=files,
+            data=data,
+            timeout=30
+        )
+        if response.status_code == 200:
+            result = response.json()
+            return format_result(result)
+        else:
+            return {
+                "status": "error",
+                "message": f"백엔드 오류: {response.status_code}"
+            }
+    except Exception as e:
+        return {
+            "status": "error",
+            "message": f"오류 발생: {str(e)}"
+        }
+def format_result(result):
+    """Format backend response for display"""
+    if result.get("status") == "error":
+        return f"❌ 오류: {result.get('message')}"
+    category = result.get("category", "unknown")
+    pitch = result.get("pitch", 0.0)
+    rhythm = result.get("rhythm", 0.0)
+    energy = result.get("energy", 0.0)
+    pronunciation = result.get("pronunciation", 0.0)
+    transcript = result.get("transcript", 0.0)  # 대사 정확도 점수
+    overall = result.get("overall", 0.0)
+    advice = result.get("advice", "")
+    is_correct = result.get("is_correct", False)  # 정답 여부
+    output = f"🎯 카테고리: {category}\n\n"
+    # 정답 여부
+    if is_correct:
+        output += "🎉 정답입니다! 축하합니다!\n\n"
+    else:
+        output += "❌ 아직 정답이 아닙니다. 다시 도전해보세요!\n\n"
+    output += "📊 분석 결과:\n"
+    output += f"  - 음높이 (Pitch): {pitch:.1f}/100\n"
+    output += f"  - 리듬감 (Rhythm): {rhythm:.1f}/100\n"
+    output += f"  - 에너지 (Energy): {energy:.1f}/100\n"
+    output += f"  - 발음 (Pronunciation): {pronunciation:.1f}/100\n"
+    output += f"  - 대사 정확도 (Transcript): {transcript:.1f}/100\n"
+    output += f"\n⭐ 총점 (Overall): {overall:.1f}/100\n"
+    # AI 조언
+    if advice:
+        output += f"\n💡 AI 조언:\n{advice}\n"
+    return output
+# Gradio Interface
+with gr.Blocks(title="🎤 Komentle Voice Challenge") as demo:
+    gr.Markdown("# 🎤 Komentle Voice Challenge")
+    gr.Markdown("오늘의 문제를 음성으로 도전하세요!")
+    gr.Markdown("### 사용 방법")
+    gr.Markdown("""
+    1. 🎙️ 마이크 버튼을 클릭하여 녹음 시작
+    2. 오늘의 문제를 음성으로 말하기
+    3. 녹음 완료 후 '분석 시작' 버튼 클릭
+    4. AI가 분석한 점수 확인
+    """)
+    with gr.Row():
+        with gr.Column():
+            audio_input = gr.Audio(
+                sources=["microphone"],
+                type="filepath",
+                label="🎙️ 음성 녹음",
+                format="wav"
+            )
+            submit_btn = gr.Button("분석 시작", variant="primary", size="lg")
+        with gr.Column():
+            result_output = gr.Textbox(
+                label="📊 분석 결과",
+                lines=10,
+                interactive=False
+            )
+    # Event handlers
+    submit_btn.click(
+        fn=process_voice,
+        inputs=audio_input,
+        outputs=result_output
+    )
+if __name__ == "__main__":
+    demo.launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False
+    )