SJLee-0525
commited on
Commit
Β·
8374119
1
Parent(s):
6d34043
[TEST] test29
Browse files- .gitignore +2 -1
- client/app.py +16 -4
- client/frontend/app_ui.py +1 -1
- client/frontend/components/audio_input.py +3 -3
- client/frontend/components/floating_chatbot.py +44 -25
- client/frontend/components/header.py +10 -10
- client/frontend/components/history_display.py +2 -2
- client/frontend/styles/__init__.py +1 -1
- client/frontend/styles/buttons_style.py +3 -3
- client/frontend/styles/chatbot_style.py +2 -2
- client/frontend/styles/custom_css.py +1 -1
- client/frontend/styles/failure_modal_style.py +3 -3
- client/frontend/styles/falling_elements_style.py +1 -1
- client/frontend/styles/history_style.py +3 -3
- client/frontend/styles/result_screen_style.py +3 -3
- client/frontend/styles/theme_style.py +3 -3
- client/services/analysis_service.py +17 -4
- client/services/hint_generator.py +60 -53
- client/services/voice_analyzer.py +24 -0
- client/utils/elevenlabs_tts.py +124 -47
- gemini_adapter.py +13 -0
- gradio_ui.py +279 -0
- postgresql.py +88 -0
- reference_audio/movie/wingardiumleviosa.mp3 +0 -0
- requirements.txt +43 -7
- voice_app.py +148 -0
.gitignore
CHANGED
|
@@ -4,4 +4,5 @@ gradio_uploads/
|
|
| 4 |
venv/
|
| 5 |
.venv/
|
| 6 |
__pycache__/
|
| 7 |
-
|
|
|
|
|
|
| 4 |
venv/
|
| 5 |
.venv/
|
| 6 |
__pycache__/
|
| 7 |
+
gradio-env/
|
| 8 |
+
*.wav
|
client/app.py
CHANGED
|
@@ -1,12 +1,17 @@
|
|
| 1 |
"""
|
|
|
|
|
|
|
| 2 |
μμ± κ²μ¦ μ± - λ©μΈ μ ν리μΌμ΄μ
(ν΅ν© λ²μ )
|
| 3 |
Author: Kevin's Team
|
| 4 |
Description: μμ± μΈμ κΈ°λ° λ°μ κ²μ¦ μμ€ν
|
| 5 |
-
|
| 6 |
-
Backend λ‘μ§μ΄ ν΅ν©λ λ¨μΌ μ± λ²μ (ν¬νΈ 1κ°λ§ μ¬μ©)
|
| 7 |
"""
|
| 8 |
|
| 9 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
import sys
|
| 11 |
import asyncio
|
| 12 |
|
|
@@ -279,6 +284,7 @@ class AudioValidationApp:
|
|
| 279 |
"category": category, # Puzzle category for chatbot
|
| 280 |
"answerWord": answer_word, # Answer word for chatbot context
|
| 281 |
"referenceAudioPath": reference_audio_path, # For TTS voice cloning
|
|
|
|
| 282 |
**metrics
|
| 283 |
}
|
| 284 |
)
|
|
@@ -318,9 +324,12 @@ class AudioValidationApp:
|
|
| 318 |
|
| 319 |
# game_stateμ μ±κ³΅ν μ€λμ€ κΈ°λ‘ μΆκ° (User Audio νμμ©)
|
| 320 |
updated_game_state = GameStateManager.add_guess(
|
| 321 |
-
game_state,
|
|
|
|
|
|
|
| 322 |
{
|
| 323 |
"score": score,
|
|
|
|
| 324 |
"answerWord": answer_word,
|
| 325 |
"referenceAudioPath": reference_audio_path,
|
| 326 |
"category": category,
|
|
@@ -517,6 +526,9 @@ if __name__ == "__main__":
|
|
| 517 |
server_port=frontend_port,
|
| 518 |
show_error=True,
|
| 519 |
allowed_paths=[UPLOAD_DIR, DOCS_DIR, IMAGES_DIR, REFERENCE_AUDIO_DIR],
|
| 520 |
-
footer_links=[
|
|
|
|
|
|
|
|
|
|
| 521 |
)
|
| 522 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
Backend λ‘μ§μ΄ ν΅ν©λ λ¨μΌ μ± λ²μ (ν¬νΈ 1κ°λ§ μ¬μ©)
|
| 3 |
+
|
| 4 |
μμ± κ²μ¦ μ± - λ©μΈ μ ν리μΌμ΄μ
(ν΅ν© λ²μ )
|
| 5 |
Author: Kevin's Team
|
| 6 |
Description: μμ± μΈμ κΈ°λ° λ°μ κ²μ¦ μμ€ν
|
|
|
|
|
|
|
| 7 |
"""
|
| 8 |
|
| 9 |
import os
|
| 10 |
+
|
| 11 |
+
# .env λ‘λ (λ€λ₯Έ λͺ¨λ import μ μ λ¨Όμ μ€ν)
|
| 12 |
+
from dotenv import load_dotenv
|
| 13 |
+
load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"))
|
| 14 |
+
|
| 15 |
import sys
|
| 16 |
import asyncio
|
| 17 |
|
|
|
|
| 284 |
"category": category, # Puzzle category for chatbot
|
| 285 |
"answerWord": answer_word, # Answer word for chatbot context
|
| 286 |
"referenceAudioPath": reference_audio_path, # For TTS voice cloning
|
| 287 |
+
"userText": user_text, # STT result for chatbot context
|
| 288 |
**metrics
|
| 289 |
}
|
| 290 |
)
|
|
|
|
| 324 |
|
| 325 |
# game_stateμ μ±κ³΅ν μ€λμ€ κΈ°λ‘ μΆκ° (User Audio νμμ©)
|
| 326 |
updated_game_state = GameStateManager.add_guess(
|
| 327 |
+
game_state,
|
| 328 |
+
recognized_text,
|
| 329 |
+
audio_path,
|
| 330 |
{
|
| 331 |
"score": score,
|
| 332 |
+
"userText": user_text,
|
| 333 |
"answerWord": answer_word,
|
| 334 |
"referenceAudioPath": reference_audio_path,
|
| 335 |
"category": category,
|
|
|
|
| 526 |
server_port=frontend_port,
|
| 527 |
show_error=True,
|
| 528 |
allowed_paths=[UPLOAD_DIR, DOCS_DIR, IMAGES_DIR, REFERENCE_AUDIO_DIR],
|
| 529 |
+
footer_links=[
|
| 530 |
+
{"text": "User Guide", "url": f"/file={DOCS_DIR}/user-guide.html"},
|
| 531 |
+
{"text": "Tech Stack", "url": f"/file={DOCS_DIR}/tech-stack.html"},
|
| 532 |
+
]
|
| 533 |
)
|
| 534 |
|
client/frontend/app_ui.py
CHANGED
|
@@ -64,7 +64,7 @@ class AppUI:
|
|
| 64 |
import json
|
| 65 |
stats_json = json.dumps(stats)
|
| 66 |
|
| 67 |
-
with gr.Blocks(title="VOICE
|
| 68 |
|
| 69 |
# ============== Dashboard Stats (JSμμ μ κ·Ό κ°λ₯) ==============
|
| 70 |
gr.HTML(
|
|
|
|
| 64 |
import json
|
| 65 |
stats_json = json.dumps(stats)
|
| 66 |
|
| 67 |
+
with gr.Blocks(title="VOICE SEMANTLE") as demo:
|
| 68 |
|
| 69 |
# ============== Dashboard Stats (JSμμ μ κ·Ό κ°λ₯) ==============
|
| 70 |
gr.HTML(
|
client/frontend/components/audio_input.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
μ€λμ€ μ
λ ₯ μ»΄ν¬λνΈ - Voice
|
| 3 |
μ λλ©μ΄μ
ν νλμ ν
λ§μ μμ± μ
λ ₯ μΈν°νμ΄μ€
|
| 4 |
컀μ€ν
λ²νΌμΌλ‘ Gradio Audio μ»΄ν¬λνΈ μ μ΄
|
| 5 |
|
|
@@ -10,12 +10,12 @@ import gradio as gr
|
|
| 10 |
|
| 11 |
|
| 12 |
class AudioInputComponent:
|
| 13 |
-
"""Voice
|
| 14 |
|
| 15 |
# λ§μ΄ν¬ λ²νΌ HTML ν
νλ¦Ώ
|
| 16 |
MIC_BUTTON_HTML_TEMPLATE = """
|
| 17 |
<div class="mic-section">
|
| 18 |
-
<div class="mic-status" id="mic-status">Click the
|
| 19 |
<button class="mic-btn" id="mic-btn" title="λ
Ήμ μμ">
|
| 20 |
<svg xmlns="http://www.w3.org/2000/svg" height="44" viewBox="0 0 64 64" width="44">
|
| 21 |
<path fill="#fff" d="M24 18 Q20 18 20 22 L20 42 Q20 46 24 46 L46 34 Q50 32 46 30 Z" stroke="#fff" stroke-width="2" stroke-linejoin="round" stroke-linecap="round"/>
|
|
|
|
| 1 |
"""
|
| 2 |
+
μ€λμ€ μ
λ ₯ μ»΄ν¬λνΈ - Voice Semantle μ€νμΌ
|
| 3 |
μ λλ©μ΄μ
ν νλμ ν
λ§μ μμ± μ
λ ₯ μΈν°νμ΄μ€
|
| 4 |
컀μ€ν
λ²νΌμΌλ‘ Gradio Audio μ»΄ν¬λνΈ μ μ΄
|
| 5 |
|
|
|
|
| 10 |
|
| 11 |
|
| 12 |
class AudioInputComponent:
|
| 13 |
+
"""Voice Semantle μ€νμΌ μ€λμ€ μ
λ ₯ μ»΄ν¬λνΈ"""
|
| 14 |
|
| 15 |
# λ§μ΄ν¬ λ²νΌ HTML ν
νλ¦Ώ
|
| 16 |
MIC_BUTTON_HTML_TEMPLATE = """
|
| 17 |
<div class="mic-section">
|
| 18 |
+
<div class="mic-status" id="mic-status">Click the play button to start game</div>
|
| 19 |
<button class="mic-btn" id="mic-btn" title="λ
Ήμ μμ">
|
| 20 |
<svg xmlns="http://www.w3.org/2000/svg" height="44" viewBox="0 0 64 64" width="44">
|
| 21 |
<path fill="#fff" d="M24 18 Q20 18 20 22 L20 42 Q20 46 24 46 L46 34 Q50 32 46 30 Z" stroke="#fff" stroke-width="2" stroke-linejoin="round" stroke-linecap="round"/>
|
client/frontend/components/floating_chatbot.py
CHANGED
|
@@ -230,11 +230,12 @@ Greet them warmly and help them understand the game:
|
|
| 230 |
|
| 231 |
# Add audio hint capability info (Phase 2 + Tool Calling)
|
| 232 |
if is_elevenlabs_configured():
|
| 233 |
-
context_parts.append(f"\n### AUDIO HINT TOOL (
|
| 234 |
-
context_parts.append(f"- You have a tool called `generate_audio_hint` that generates
|
| 235 |
-
context_parts.append(f"-
|
| 236 |
-
context_parts.append(f" - Keywords: 'play audio', 'TTS', 'hear it', 'how does it sound', 'generate audio'")
|
| 237 |
-
context_parts.append(f" -
|
|
|
|
| 238 |
context_parts.append(f"- Tool parameters:")
|
| 239 |
context_parts.append(f" - hint_type: 'syllable' | 'partial' | 'rhythm' | 'almost_full'")
|
| 240 |
context_parts.append(f"- Choose hint_type based on attempt count:")
|
|
@@ -242,8 +243,6 @@ Greet them warmly and help them understand the game:
|
|
| 242 |
context_parts.append(f" - Attempt 5-6: use 'partial' (first half)")
|
| 243 |
context_parts.append(f" - Attempt 7-9: use 'rhythm' (with pauses)")
|
| 244 |
context_parts.append(f" - Attempt 10+: use 'almost_full' (almost complete)")
|
| 245 |
-
context_parts.append(f"- After calling the tool, explain what the user will hear")
|
| 246 |
-
context_parts.append(f"- Example: User says 'give me TTS' β Call generate_audio_hint(hint_type='syllable')")
|
| 247 |
context_parts.append("")
|
| 248 |
|
| 249 |
context_parts.append(f"They are trying to figure out what word/phrase to pronounce.\n")
|
|
@@ -259,15 +258,19 @@ Greet them warmly and help them understand the game:
|
|
| 259 |
pronunciation = ai_analysis.get("pronunciation", "N/A")
|
| 260 |
overall = ai_analysis.get("overall_score", "N/A") # Match standardized field name
|
| 261 |
|
| 262 |
-
# Get recognized text from guess
|
| 263 |
guessed_word = guess.get("guessedWord", "")
|
|
|
|
|
|
|
| 264 |
|
| 265 |
# AIκ° μ€ μ΄μ μ‘°μΈ/ννΈ (μλ€λ©΄)
|
| 266 |
advice = ai_analysis.get("advice", "")
|
| 267 |
|
| 268 |
context_parts.append(f"### Attempt {i}")
|
| 269 |
-
#
|
| 270 |
-
if
|
|
|
|
|
|
|
| 271 |
context_parts.append(f"- What they said: \"{guessed_word}\"")
|
| 272 |
context_parts.append(f"- Scores: Pitch={pitch}, Rhythm={rhythm}, Energy={energy}, Pronunciation={pronunciation}, Overall={overall}")
|
| 273 |
|
|
@@ -374,10 +377,16 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
|
|
| 374 |
elevenlabs_ready = is_elevenlabs_configured()
|
| 375 |
print(f"[CHATBOT] ElevenLabs ready: {elevenlabs_ready}, Answer word: '{answer_word}'")
|
| 376 |
|
| 377 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
tools = [{
|
| 379 |
"name": "generate_audio_hint",
|
| 380 |
-
"description": "Generate an audio pronunciation hint using
|
| 381 |
"input_schema": {
|
| 382 |
"type": "object",
|
| 383 |
"properties": {
|
|
@@ -385,12 +394,18 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
|
|
| 385 |
"type": "string",
|
| 386 |
"enum": ["syllable", "partial", "rhythm", "almost_full"],
|
| 387 |
"description": "Type of audio hint: 'syllable' (first syllable), 'partial' (first half), 'rhythm' (with pauses), 'almost_full' (missing last part)"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 388 |
}
|
| 389 |
},
|
| 390 |
"required": ["hint_type"]
|
| 391 |
}
|
| 392 |
}]
|
| 393 |
-
print(f"[CHATBOT] Audio tool enabled!
|
|
|
|
|
|
|
| 394 |
|
| 395 |
# Call Gemini with tools
|
| 396 |
response_text, tool_calls, error = chat_with_gemini_and_tools(
|
|
@@ -412,15 +427,16 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
|
|
| 412 |
for tool_call in tool_calls:
|
| 413 |
if tool_call['name'] == 'generate_audio_hint':
|
| 414 |
hint_type = tool_call['input'].get('hint_type', 'syllable')
|
| 415 |
-
|
|
|
|
| 416 |
|
| 417 |
-
# Extract the appropriate portion based on hint_type and answer_word
|
| 418 |
-
from utils.elevenlabs_tts import
|
| 419 |
|
| 420 |
-
text_to_speak,
|
| 421 |
# Use voice cloning from reference audio when available
|
| 422 |
-
print(f"[CHATBOT] Generating audio with reference: {reference_audio_path}")
|
| 423 |
-
audio_path = generate_audio_hint(text_to_speak,
|
| 424 |
|
| 425 |
if audio_path:
|
| 426 |
print(f"[CHATBOT] Audio hint generated: {audio_path}")
|
|
@@ -560,20 +576,23 @@ class FloatingChatbotComponent:
|
|
| 560 |
|
| 561 |
print(f"[CHATBOT] AI μλ΅: {str(response)[:100]}...")
|
| 562 |
|
| 563 |
-
# Convert tuple response (text, audio_path) to Gradio format
|
| 564 |
if isinstance(response, tuple):
|
| 565 |
text, audio_path = response
|
| 566 |
# Convert relative path to absolute path for Gradio
|
| 567 |
import os
|
| 568 |
-
if not os.path.isabs(audio_path):
|
| 569 |
# Path is relative to project root (3 levels up from this file)
|
| 570 |
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
| 571 |
audio_path = os.path.join(project_root, audio_path)
|
| 572 |
-
content = [
|
| 573 |
-
{"type": "text", "text": text},
|
| 574 |
-
{"type": "file", "file": {"path": audio_path, "mime_type": "audio/mpeg"}}
|
| 575 |
-
]
|
| 576 |
print(f"[CHATBOT] Audio hint included: {audio_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 577 |
else:
|
| 578 |
content = response
|
| 579 |
|
|
|
|
| 230 |
|
| 231 |
# Add audio hint capability info (Phase 2 + Tool Calling)
|
| 232 |
if is_elevenlabs_configured():
|
| 233 |
+
context_parts.append(f"\n### AUDIO HINT TOOL (Use ONLY when explicitly requested)")
|
| 234 |
+
context_parts.append(f"- You have a tool called `generate_audio_hint` that generates TTS audio")
|
| 235 |
+
context_parts.append(f"- ONLY call this tool when the user EXPLICITLY asks for audio hints:")
|
| 236 |
+
context_parts.append(f" - Keywords that REQUIRE audio: 'audio hint', 'play audio', 'TTS', 'hear it', 'how does it sound', 'generate audio', 'listen'")
|
| 237 |
+
context_parts.append(f" - Keywords that do NOT require audio: 'hint', 'help', 'clue', 'what is it', general questions")
|
| 238 |
+
context_parts.append(f"- DO NOT call this tool for general hints or questions - only for explicit audio requests")
|
| 239 |
context_parts.append(f"- Tool parameters:")
|
| 240 |
context_parts.append(f" - hint_type: 'syllable' | 'partial' | 'rhythm' | 'almost_full'")
|
| 241 |
context_parts.append(f"- Choose hint_type based on attempt count:")
|
|
|
|
| 243 |
context_parts.append(f" - Attempt 5-6: use 'partial' (first half)")
|
| 244 |
context_parts.append(f" - Attempt 7-9: use 'rhythm' (with pauses)")
|
| 245 |
context_parts.append(f" - Attempt 10+: use 'almost_full' (almost complete)")
|
|
|
|
|
|
|
| 246 |
context_parts.append("")
|
| 247 |
|
| 248 |
context_parts.append(f"They are trying to figure out what word/phrase to pronounce.\n")
|
|
|
|
| 258 |
pronunciation = ai_analysis.get("pronunciation", "N/A")
|
| 259 |
overall = ai_analysis.get("overall_score", "N/A") # Match standardized field name
|
| 260 |
|
| 261 |
+
# Get recognized text from guess
|
| 262 |
guessed_word = guess.get("guessedWord", "")
|
| 263 |
+
# Get actual STT result (what user pronounced)
|
| 264 |
+
user_spoken_text = ai_analysis.get("userText", "")
|
| 265 |
|
| 266 |
# AIκ° μ€ μ΄μ μ‘°μΈ/ννΈ (μλ€λ©΄)
|
| 267 |
advice = ai_analysis.get("advice", "")
|
| 268 |
|
| 269 |
context_parts.append(f"### Attempt {i}")
|
| 270 |
+
# Show what they actually said via STT (more accurate for pronunciation feedback)
|
| 271 |
+
if user_spoken_text:
|
| 272 |
+
context_parts.append(f"- What they pronounced (STT): \"{user_spoken_text}\"")
|
| 273 |
+
elif guessed_word and not guessed_word.startswith("Score:"):
|
| 274 |
context_parts.append(f"- What they said: \"{guessed_word}\"")
|
| 275 |
context_parts.append(f"- Scores: Pitch={pitch}, Rhythm={rhythm}, Energy={energy}, Pronunciation={pronunciation}, Overall={overall}")
|
| 276 |
|
|
|
|
| 377 |
elevenlabs_ready = is_elevenlabs_configured()
|
| 378 |
print(f"[CHATBOT] ElevenLabs ready: {elevenlabs_ready}, Answer word: '{answer_word}'")
|
| 379 |
|
| 380 |
+
# Only enable audio tool if user EXPLICITLY asks for audio in THIS message
|
| 381 |
+
audio_keywords = ['audio', 'play', 'sound', 'hear', 'listen', 'tts', 'pronounce', 'λ€λ €', 'λ°μ']
|
| 382 |
+
user_wants_audio = any(kw in message.lower() for kw in audio_keywords)
|
| 383 |
+
|
| 384 |
+
if elevenlabs_ready and answer_word and user_wants_audio:
|
| 385 |
+
# Get word count for tool description
|
| 386 |
+
word_count = len(answer_word.split())
|
| 387 |
tools = [{
|
| 388 |
"name": "generate_audio_hint",
|
| 389 |
+
"description": "Generate an audio pronunciation hint using TTS.",
|
| 390 |
"input_schema": {
|
| 391 |
"type": "object",
|
| 392 |
"properties": {
|
|
|
|
| 394 |
"type": "string",
|
| 395 |
"enum": ["syllable", "partial", "rhythm", "almost_full"],
|
| 396 |
"description": "Type of audio hint: 'syllable' (first syllable), 'partial' (first half), 'rhythm' (with pauses), 'almost_full' (missing last part)"
|
| 397 |
+
},
|
| 398 |
+
"word_index": {
|
| 399 |
+
"type": "integer",
|
| 400 |
+
"description": f"Which word to hint (0-indexed). The phrase has {word_count} word(s). Use 0 for first word, 1 for second word, etc. If user knows first word but not second, use 1."
|
| 401 |
}
|
| 402 |
},
|
| 403 |
"required": ["hint_type"]
|
| 404 |
}
|
| 405 |
}]
|
| 406 |
+
print(f"[CHATBOT] Audio tool enabled! User requested audio.")
|
| 407 |
+
elif elevenlabs_ready and answer_word:
|
| 408 |
+
print(f"[CHATBOT] Audio tool NOT enabled - user didn't request audio. Message: '{message[:50]}...'")
|
| 409 |
|
| 410 |
# Call Gemini with tools
|
| 411 |
response_text, tool_calls, error = chat_with_gemini_and_tools(
|
|
|
|
| 427 |
for tool_call in tool_calls:
|
| 428 |
if tool_call['name'] == 'generate_audio_hint':
|
| 429 |
hint_type = tool_call['input'].get('hint_type', 'syllable')
|
| 430 |
+
word_index = tool_call['input'].get('word_index', 0) # Default to first word
|
| 431 |
+
print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}, word_index={word_index}")
|
| 432 |
|
| 433 |
+
# Extract the appropriate portion based on hint_type, word_index, and answer_word
|
| 434 |
+
from utils.elevenlabs_tts import extract_hint_portion_for_word, generate_audio_hint
|
| 435 |
|
| 436 |
+
text_to_speak, actual_hint_type = extract_hint_portion_for_word(answer_word, attempt_count, word_index)
|
| 437 |
# Use voice cloning from reference audio when available
|
| 438 |
+
print(f"[CHATBOT] Generating audio for: '{text_to_speak}' with reference: {reference_audio_path}")
|
| 439 |
+
audio_path = generate_audio_hint(text_to_speak, actual_hint_type, reference_audio_path=reference_audio_path)
|
| 440 |
|
| 441 |
if audio_path:
|
| 442 |
print(f"[CHATBOT] Audio hint generated: {audio_path}")
|
|
|
|
| 576 |
|
| 577 |
print(f"[CHATBOT] AI μλ΅: {str(response)[:100]}...")
|
| 578 |
|
| 579 |
+
# Convert tuple response (text, audio_path) to Gradio 6 format
|
| 580 |
if isinstance(response, tuple):
|
| 581 |
text, audio_path = response
|
| 582 |
# Convert relative path to absolute path for Gradio
|
| 583 |
import os
|
| 584 |
+
if audio_path and not os.path.isabs(audio_path):
|
| 585 |
# Path is relative to project root (3 levels up from this file)
|
| 586 |
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
| 587 |
audio_path = os.path.join(project_root, audio_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 588 |
print(f"[CHATBOT] Audio hint included: {audio_path}")
|
| 589 |
+
print(f"[CHATBOT] Audio file exists: {os.path.exists(audio_path)}")
|
| 590 |
+
|
| 591 |
+
# Gradio 6: Use gr.Audio() component for audio content
|
| 592 |
+
# Reference: chatbot_core_components_simple demo
|
| 593 |
+
hist.append({"role": "assistant", "content": text})
|
| 594 |
+
hist.append({"role": "assistant", "content": gr.Audio(audio_path)})
|
| 595 |
+
return "", hist, hist
|
| 596 |
else:
|
| 597 |
content = response
|
| 598 |
|
client/frontend/components/header.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
ν€λ μ»΄ν¬λνΈ - Voice
|
| 3 |
μ λλ©μ΄μ
ν κ·μ¬μ΄ λ‘κ³ λμμΈ
|
| 4 |
|
| 5 |
π¨βπ» λ΄λΉ: κ°λ°μ A
|
|
@@ -9,9 +9,9 @@ import gradio as gr
|
|
| 9 |
|
| 10 |
|
| 11 |
class HeaderComponent:
|
| 12 |
-
"""Voice
|
| 13 |
|
| 14 |
-
# Voice
|
| 15 |
HEADER_HTML_TEMPLATE = """
|
| 16 |
<link href="https://fonts.googleapis.com/css2?family=Lilita+One&family=Bangers&display=swap" rel="stylesheet">
|
| 17 |
<div class="vs-header">
|
|
@@ -21,7 +21,7 @@ class HeaderComponent:
|
|
| 21 |
<span class="title-voice">VOICE</span>
|
| 22 |
</span>
|
| 23 |
<span class="title-line">
|
| 24 |
-
<span class="title-
|
| 25 |
</span>
|
| 26 |
</h1>
|
| 27 |
</div>
|
|
@@ -69,7 +69,7 @@ class HeaderComponent:
|
|
| 69 |
0 0 20px rgba(90, 200, 250, 0.5);
|
| 70 |
paint-order: stroke fill;
|
| 71 |
}
|
| 72 |
-
.title-
|
| 73 |
font-size: 90px;
|
| 74 |
font-weight: 500;
|
| 75 |
color: #e8a054;
|
|
@@ -96,7 +96,7 @@ class HeaderComponent:
|
|
| 96 |
8px 8px 0 #082a35,
|
| 97 |
0 0 25px rgba(90, 200, 250, 0.6);
|
| 98 |
}
|
| 99 |
-
.dark .title-
|
| 100 |
color: #e8a054;
|
| 101 |
-webkit-text-stroke: 4px #5c3d1e;
|
| 102 |
text-shadow:
|
|
@@ -118,7 +118,7 @@ class HeaderComponent:
|
|
| 118 |
7px 7px 0 #0d4a5f,
|
| 119 |
0 0 20px rgba(90, 200, 250, 0.5);
|
| 120 |
}
|
| 121 |
-
.title-
|
| 122 |
font-size: 72px;
|
| 123 |
letter-spacing: 4px;
|
| 124 |
-webkit-text-stroke: 4px #8b5a2b;
|
|
@@ -136,7 +136,7 @@ class HeaderComponent:
|
|
| 136 |
7px 7px 0 #0d4a5f,
|
| 137 |
0 0 20px rgba(90, 200, 250, 0.5);
|
| 138 |
}
|
| 139 |
-
.dark .title-
|
| 140 |
-webkit-text-stroke: 4px #8b5a2b;
|
| 141 |
text-shadow:
|
| 142 |
5px 5px 0 #8b5a2b,
|
|
@@ -157,7 +157,7 @@ class HeaderComponent:
|
|
| 157 |
5px 5px 0 #0d4a5f,
|
| 158 |
0 0 15px rgba(90, 200, 250, 0.5);
|
| 159 |
}
|
| 160 |
-
.title-
|
| 161 |
font-size: 52px;
|
| 162 |
letter-spacing: 3px;
|
| 163 |
-webkit-text-stroke: 3px #8b5a2b;
|
|
@@ -173,7 +173,7 @@ class HeaderComponent:
|
|
| 173 |
5px 5px 0 #082a35,
|
| 174 |
0 0 20px rgba(90, 200, 250, 0.6);
|
| 175 |
}
|
| 176 |
-
.dark .title-
|
| 177 |
-webkit-text-stroke: 3px #5c3d1e;
|
| 178 |
text-shadow:
|
| 179 |
4px 4px 0 #5c3d1e,
|
|
|
|
| 1 |
"""
|
| 2 |
+
ν€λ μ»΄ν¬λνΈ - Voice Semantle μ€νμΌ
|
| 3 |
μ λλ©μ΄μ
ν κ·μ¬μ΄ λ‘κ³ λμμΈ
|
| 4 |
|
| 5 |
π¨βπ» λ΄λΉ: κ°λ°μ A
|
|
|
|
| 9 |
|
| 10 |
|
| 11 |
class HeaderComponent:
|
| 12 |
+
"""Voice Semantle μ€νμΌ ν€λ μ»΄ν¬λνΈ"""
|
| 13 |
|
| 14 |
+
# Voice Semantle μ€νμΌ ν€λ HTML ν
νλ¦Ώ
|
| 15 |
HEADER_HTML_TEMPLATE = """
|
| 16 |
<link href="https://fonts.googleapis.com/css2?family=Lilita+One&family=Bangers&display=swap" rel="stylesheet">
|
| 17 |
<div class="vs-header">
|
|
|
|
| 21 |
<span class="title-voice">VOICE</span>
|
| 22 |
</span>
|
| 23 |
<span class="title-line">
|
| 24 |
+
<span class="title-semantle">SEMANTLE</span>
|
| 25 |
</span>
|
| 26 |
</h1>
|
| 27 |
</div>
|
|
|
|
| 69 |
0 0 20px rgba(90, 200, 250, 0.5);
|
| 70 |
paint-order: stroke fill;
|
| 71 |
}
|
| 72 |
+
.title-semantle {
|
| 73 |
font-size: 90px;
|
| 74 |
font-weight: 500;
|
| 75 |
color: #e8a054;
|
|
|
|
| 96 |
8px 8px 0 #082a35,
|
| 97 |
0 0 25px rgba(90, 200, 250, 0.6);
|
| 98 |
}
|
| 99 |
+
.dark .title-semantle {
|
| 100 |
color: #e8a054;
|
| 101 |
-webkit-text-stroke: 4px #5c3d1e;
|
| 102 |
text-shadow:
|
|
|
|
| 118 |
7px 7px 0 #0d4a5f,
|
| 119 |
0 0 20px rgba(90, 200, 250, 0.5);
|
| 120 |
}
|
| 121 |
+
.title-semantle {
|
| 122 |
font-size: 72px;
|
| 123 |
letter-spacing: 4px;
|
| 124 |
-webkit-text-stroke: 4px #8b5a2b;
|
|
|
|
| 136 |
7px 7px 0 #0d4a5f,
|
| 137 |
0 0 20px rgba(90, 200, 250, 0.5);
|
| 138 |
}
|
| 139 |
+
.dark .title-semantle {
|
| 140 |
-webkit-text-stroke: 4px #8b5a2b;
|
| 141 |
text-shadow:
|
| 142 |
5px 5px 0 #8b5a2b,
|
|
|
|
| 157 |
5px 5px 0 #0d4a5f,
|
| 158 |
0 0 15px rgba(90, 200, 250, 0.5);
|
| 159 |
}
|
| 160 |
+
.title-semantle {
|
| 161 |
font-size: 52px;
|
| 162 |
letter-spacing: 3px;
|
| 163 |
-webkit-text-stroke: 3px #8b5a2b;
|
|
|
|
| 173 |
5px 5px 0 #082a35,
|
| 174 |
0 0 20px rgba(90, 200, 250, 0.6);
|
| 175 |
}
|
| 176 |
+
.dark .title-semantle {
|
| 177 |
-webkit-text-stroke: 3px #5c3d1e;
|
| 178 |
text-shadow:
|
| 179 |
4px 4px 0 #5c3d1e,
|
client/frontend/components/history_display.py
CHANGED
|
@@ -337,7 +337,7 @@ class HistoryDisplayComponent:
|
|
| 337 |
<div class="history-list" style='
|
| 338 |
flex: 1;
|
| 339 |
min-width: 0;
|
| 340 |
-
max-height:
|
| 341 |
overflow-y: auto;
|
| 342 |
'>
|
| 343 |
"""
|
|
@@ -401,7 +401,7 @@ class HistoryDisplayComponent:
|
|
| 401 |
|
| 402 |
<!-- μ°μΈ‘: μ€κ° κ·Έλν μμ (μν λ°°μΉ μ μλ‘) -->
|
| 403 |
<div class="graph-area" style='
|
| 404 |
-
min-height:
|
| 405 |
background: #f0f7fc;
|
| 406 |
position: relative;
|
| 407 |
'>
|
|
|
|
| 337 |
<div class="history-list" style='
|
| 338 |
flex: 1;
|
| 339 |
min-width: 0;
|
| 340 |
+
max-height: 560px;
|
| 341 |
overflow-y: auto;
|
| 342 |
'>
|
| 343 |
"""
|
|
|
|
| 401 |
|
| 402 |
<!-- μ°μΈ‘: μ€κ° κ·Έλν μμ (μν λ°°μΉ μ μλ‘) -->
|
| 403 |
<div class="graph-area" style='
|
| 404 |
+
min-height: 560px;
|
| 405 |
background: #f0f7fc;
|
| 406 |
position: relative;
|
| 407 |
'>
|
client/frontend/styles/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
Voice
|
| 3 |
μ λλ©μ΄μ
ν νλμ ν
λ§ κΈ°λ°μ μ 체 μ ν리μΌμ΄μ
CSS
|
| 4 |
"""
|
| 5 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
Voice Semantle μ€νμΌ λͺ¨λ
|
| 3 |
μ λλ©μ΄μ
ν νλμ ν
λ§ κΈ°λ°μ μ 체 μ ν리μΌμ΄μ
CSS
|
| 4 |
"""
|
| 5 |
|
client/frontend/styles/buttons_style.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
-
λ²νΌ λ° μ€λμ€ μ
λ ₯ CSS - Voice
|
| 3 |
λ²νΌ μ€νμΌ λ° μ€λμ€ μ»΄ν¬λνΈ μ€νμΌλ§
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
# Voice
|
| 7 |
BUTTON_CSS = """
|
| 8 |
-
/* Voice
|
| 9 |
#verify-btn,
|
| 10 |
#restart-btn {
|
| 11 |
font-family: 'Lilita One' !important;
|
|
|
|
| 1 |
"""
|
| 2 |
+
λ²νΌ λ° μ€λμ€ μ
λ ₯ CSS - Voice Semantle ν
λ§
|
| 3 |
λ²νΌ μ€νμΌ λ° μ€λμ€ μ»΄ν¬λνΈ μ€νμΌλ§
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
# Voice Semantle μ€νμΌ λ²νΌ CSS - μ λλ©μ΄μ
ν νλμ
|
| 7 |
BUTTON_CSS = """
|
| 8 |
+
/* Voice Semantle μ€νμΌ λ²νΌ - νλμ ν
λ§ */
|
| 9 |
#verify-btn,
|
| 10 |
#restart-btn {
|
| 11 |
font-family: 'Lilita One' !important;
|
client/frontend/styles/chatbot_style.py
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
"""
|
| 2 |
-
νλ‘ν
AI μ±λ΄ CSS - Voice
|
| 3 |
νλ‘ν
ν κΈ λ²νΌ, μ±λ΄ 컨ν
μ΄λ, μ
λ ₯ μμ μ€νμΌ
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
# νλ‘ν
AI μ±λ΄ CSS - Voice
|
| 7 |
FLOATING_CHATBOT_CSS = """
|
| 8 |
/* νλ‘ν
ν κΈ λ²νΌ 컨ν
μ΄λ */
|
| 9 |
#floating-toggle {
|
|
|
|
| 1 |
"""
|
| 2 |
+
νλ‘ν
AI μ±λ΄ CSS - Voice Semantle ν
λ§
|
| 3 |
νλ‘ν
ν κΈ λ²νΌ, μ±λ΄ 컨ν
μ΄λ, μ
λ ₯ μμ μ€νμΌ
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
# νλ‘ν
AI μ±λ΄ CSS - Voice Semantle νλμ ν
λ§
|
| 7 |
FLOATING_CHATBOT_CSS = """
|
| 8 |
/* νλ‘ν
ν κΈ λ²νΌ 컨ν
μ΄λ */
|
| 9 |
#floating-toggle {
|
client/frontend/styles/custom_css.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
컀μ€ν
CSS μ€νμΌ - Voice
|
| 3 |
μ λλ©μ΄μ
ν νλμ ν
λ§ κΈ°λ°μ μ 체 μ ν리μΌμ΄μ
CSS
|
| 4 |
|
| 5 |
μ΄ νμΌμ νμ νΈνμ±μ μν΄ μ μ§λ©λλ€.
|
|
|
|
| 1 |
"""
|
| 2 |
+
컀μ€ν
CSS μ€νμΌ - Voice Semantle ν
λ§
|
| 3 |
μ λλ©μ΄μ
ν νλμ ν
λ§ κΈ°λ°μ μ 체 μ ν리μΌμ΄μ
CSS
|
| 4 |
|
| 5 |
μ΄ νμΌμ νμ νΈνμ±μ μν΄ μ μ§λ©λλ€.
|
client/frontend/styles/failure_modal_style.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
-
μ€ν¨ λͺ¨λ¬ CSS - Voice
|
| 3 |
μ€ν¨ λͺ¨λ¬ λ° κ²°κ³Ό νμΌ μ€νμΌ
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
# μ€ν¨ λͺ¨λ¬ CSS - Voice
|
| 7 |
FAILURE_MODAL_CSS = """
|
| 8 |
-
/* μ€ν¨ λͺ¨λ¬ - Voice
|
| 9 |
.modal-content {
|
| 10 |
text-align: center;
|
| 11 |
padding: 20px;
|
|
|
|
| 1 |
"""
|
| 2 |
+
μ€ν¨ λͺ¨λ¬ CSS - Voice Semantle ν
λ§
|
| 3 |
μ€ν¨ λͺ¨λ¬ λ° κ²°κ³Ό νμΌ μ€νμΌ
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
# μ€ν¨ λͺ¨λ¬ CSS - Voice Semantle νλμ ν
λ§
|
| 7 |
FAILURE_MODAL_CSS = """
|
| 8 |
+
/* μ€ν¨ λͺ¨λ¬ - Voice Semantle νλμ ν
λ§ */
|
| 9 |
.modal-content {
|
| 10 |
text-align: center;
|
| 11 |
padding: 20px;
|
client/frontend/styles/falling_elements_style.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
λ°°κ²½ κ½/μν λ¨μ΄μ§λ μ λλ©μ΄μ
- Voice
|
| 3 |
λ°°κ²½ μ λλ©μ΄μ
CSS λ° JavaScript
|
| 4 |
"""
|
| 5 |
|
|
|
|
| 1 |
"""
|
| 2 |
+
λ°°κ²½ κ½/μν λ¨μ΄μ§λ μ λλ©μ΄μ
- Voice Semantle ν
λ§
|
| 3 |
λ°°κ²½ μ λλ©μ΄μ
CSS λ° JavaScript
|
| 4 |
"""
|
| 5 |
|
client/frontend/styles/history_style.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
-
νμ€ν 리 λμ€νλ μ΄ CSS - Voice
|
| 3 |
μλ κΈ°λ‘ λ° νμ€ν 리 νμ μ€νμΌ
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
# νμ€ν 리 λμ€νλ μ΄ CSS - Voice
|
| 7 |
HISTORY_CSS = """
|
| 8 |
-
/* μλ κΈ°λ‘ - Voice
|
| 9 |
.attempt-history {
|
| 10 |
margin-top: 20px;
|
| 11 |
padding: 16px;
|
|
|
|
| 1 |
"""
|
| 2 |
+
νμ€ν 리 λμ€νλ μ΄ CSS - Voice Semantle ν
λ§
|
| 3 |
μλ κΈ°λ‘ λ° νμ€ν 리 νμ μ€νμΌ
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
# νμ€ν 리 λμ€νλ μ΄ CSS - Voice Semantle νλμ ν
λ§
|
| 7 |
HISTORY_CSS = """
|
| 8 |
+
/* μλ κΈ°λ‘ - Voice Semantle νλμ ν
λ§ */
|
| 9 |
.attempt-history {
|
| 10 |
margin-top: 20px;
|
| 11 |
padding: 16px;
|
client/frontend/styles/result_screen_style.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
-
μ±κ³΅ νλ©΄ CSS λ° Celebration JavaScript - Voice
|
| 3 |
μ±κ³΅ νλ©΄ μ€νμΌ, ν΅κ³ μΉ΄λ, Confetti ν¨κ³Ό
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
# μ±κ³΅ νλ©΄ CSS - Voice
|
| 7 |
RESULT_SCREEN_CSS = """
|
| 8 |
-
/* μ±κ³΅ νλ©΄ - Voice
|
| 9 |
#success-screen,
|
| 10 |
#giveup-screen {
|
| 11 |
position: fixed !important;
|
|
|
|
| 1 |
"""
|
| 2 |
+
μ±κ³΅ νλ©΄ CSS λ° Celebration JavaScript - Voice Semantle ν
λ§
|
| 3 |
μ±κ³΅ νλ©΄ μ€νμΌ, ν΅κ³ μΉ΄λ, Confetti ν¨κ³Ό
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
# μ±κ³΅ νλ©΄ CSS - Voice Semantle μ λλ©μ΄μ
ν
λ§ + Confetti ν¨κ³Ό
|
| 7 |
RESULT_SCREEN_CSS = """
|
| 8 |
+
/* μ±κ³΅ νλ©΄ - Voice Semantle νλμ ν
λ§ */
|
| 9 |
#success-screen,
|
| 10 |
#giveup-screen {
|
| 11 |
position: fixed !important;
|
client/frontend/styles/theme_style.py
CHANGED
|
@@ -1,11 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
-
κΈ°λ³Έ ν
λ§ CSS - Voice
|
| 3 |
μ λλ©μ΄μ
ν νλμ ν
λ§ κΈ°λ°μ μ μ λ³μ λ° κΈ°λ³Έ μ€νμΌ
|
| 4 |
"""
|
| 5 |
|
| 6 |
-
# Voice
|
| 7 |
BASE_THEME_CSS = """
|
| 8 |
-
/* Voice
|
| 9 |
:root {
|
| 10 |
/* λ©μΈ ν
λ§ μμ - νλμ κ³μ΄ */
|
| 11 |
--theme-primary: #4db8ff;
|
|
|
|
| 1 |
"""
|
| 2 |
+
κΈ°λ³Έ ν
λ§ CSS - Voice Semantle ν
λ§
|
| 3 |
μ λλ©μ΄μ
ν νλμ ν
λ§ κΈ°λ°μ μ μ λ³μ λ° κΈ°λ³Έ μ€νμΌ
|
| 4 |
"""
|
| 5 |
|
| 6 |
+
# Voice Semantle ν
λ§ CSS - μ λλ©μ΄μ
ν νλμ ν
λ§
|
| 7 |
BASE_THEME_CSS = """
|
| 8 |
+
/* Voice Semantle μ€νμΌ κΈ°λ³Έ ν
λ§ - μ λλ©μ΄μ
ν νλμ */
|
| 9 |
:root {
|
| 10 |
/* λ©μΈ ν
λ§ μμ - νλμ κ³μ΄ */
|
| 11 |
--theme-primary: #4db8ff;
|
client/services/analysis_service.py
CHANGED
|
@@ -7,7 +7,11 @@ import time
|
|
| 7 |
import logging
|
| 8 |
from typing import Dict
|
| 9 |
|
| 10 |
-
from .voice_analyzer import
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from .hint_generator import generate_hints_with_gemini, extract_advice_text
|
| 12 |
from .database import get_puzzle_by_date, save_guess_record
|
| 13 |
|
|
@@ -75,7 +79,10 @@ async def analyze_voice(audio_bytes: bytes, date: str, session_id: str) -> Dict:
|
|
| 75 |
logger.info(f"VoiceKit scores: pitch={pitch}, rhythm={rhythm}, energy={energy}, "
|
| 76 |
f"pronunciation={pronunciation}, transcript={transcript}, overall={overall}")
|
| 77 |
|
| 78 |
-
# 4.
|
|
|
|
|
|
|
|
|
|
| 79 |
gemini_start = time.time()
|
| 80 |
hints_json = await generate_hints_with_gemini(
|
| 81 |
scores={
|
|
@@ -87,15 +94,21 @@ async def analyze_voice(audio_bytes: bytes, date: str, session_id: str) -> Dict:
|
|
| 87 |
},
|
| 88 |
attempt=attempt,
|
| 89 |
answer_word=puzzle["answer_word"],
|
| 90 |
-
category=puzzle["category"]
|
|
|
|
|
|
|
| 91 |
)
|
| 92 |
gemini_time = (time.time() - gemini_start) * 1000
|
| 93 |
logger.info(f"β±οΈ Gemini hint generation: {gemini_time:.1f}ms")
|
|
|
|
| 94 |
|
| 95 |
advice = extract_advice_text(hints_json)
|
| 96 |
is_correct = overall > 85
|
| 97 |
|
| 98 |
-
#
|
|
|
|
|
|
|
|
|
|
| 99 |
save_guess_record(
|
| 100 |
session_id=session_id,
|
| 101 |
puzzle_number=puzzle["puzzle_number"],
|
|
|
|
| 7 |
import logging
|
| 8 |
from typing import Dict
|
| 9 |
|
| 10 |
+
from .voice_analyzer import (
|
| 11 |
+
analyze_voice_with_mcp,
|
| 12 |
+
get_hint_history,
|
| 13 |
+
add_hint_to_history
|
| 14 |
+
)
|
| 15 |
from .hint_generator import generate_hints_with_gemini, extract_advice_text
|
| 16 |
from .database import get_puzzle_by_date, save_guess_record
|
| 17 |
|
|
|
|
| 79 |
logger.info(f"VoiceKit scores: pitch={pitch}, rhythm={rhythm}, energy={energy}, "
|
| 80 |
f"pronunciation={pronunciation}, transcript={transcript}, overall={overall}")
|
| 81 |
|
| 82 |
+
# 4. Get hint history for this session (to avoid repetition)
|
| 83 |
+
hint_history = get_hint_history(session_id)
|
| 84 |
+
|
| 85 |
+
# 5. Generate hints with Gemini (including user's spoken text for context-aware advice)
|
| 86 |
gemini_start = time.time()
|
| 87 |
hints_json = await generate_hints_with_gemini(
|
| 88 |
scores={
|
|
|
|
| 94 |
},
|
| 95 |
attempt=attempt,
|
| 96 |
answer_word=puzzle["answer_word"],
|
| 97 |
+
category=puzzle["category"],
|
| 98 |
+
user_text=user_text,
|
| 99 |
+
hint_history=hint_history
|
| 100 |
)
|
| 101 |
gemini_time = (time.time() - gemini_start) * 1000
|
| 102 |
logger.info(f"β±οΈ Gemini hint generation: {gemini_time:.1f}ms")
|
| 103 |
+
logger.info(f"Generated hints: {hints_json}")
|
| 104 |
|
| 105 |
advice = extract_advice_text(hints_json)
|
| 106 |
is_correct = overall > 85
|
| 107 |
|
| 108 |
+
# 6. Store this hint in history (for next time)
|
| 109 |
+
add_hint_to_history(session_id, advice)
|
| 110 |
+
|
| 111 |
+
# 7. Save guess record to database
|
| 112 |
save_guess_record(
|
| 113 |
session_id=session_id,
|
| 114 |
puzzle_number=puzzle["puzzle_number"],
|
client/services/hint_generator.py
CHANGED
|
@@ -34,9 +34,10 @@ def list_hint_files(category: str) -> list:
|
|
| 34 |
|
| 35 |
|
| 36 |
async def generate_hints_with_gemini(
|
| 37 |
-
scores: dict, attempt: int, answer_word: str, category: str
|
|
|
|
| 38 |
) -> dict:
|
| 39 |
-
"""Generate JSON hints using Gemini LLM"""
|
| 40 |
try:
|
| 41 |
# Find weakest metrics
|
| 42 |
metrics = {
|
|
@@ -79,68 +80,73 @@ async def generate_hints_with_gemini(
|
|
| 79 |
guidance = f"Attempt {attempt}! Focus on pronunciation coaching for {', '.join(weakest_names)}. Give very strong hints about what to say."
|
| 80 |
category_hint = f"After {attempt} attempts, be very helpful while still not directly revealing the answer."
|
| 81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
# Build prompt for Gemini
|
| 83 |
-
prompt = f"""You
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
-
|
| 87 |
-
-
|
| 88 |
-
-
|
| 89 |
-
-
|
| 90 |
-
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
2. Hints should get progressively more helpful with each attempt
|
| 115 |
-
3. For "hint" type: Follow the guidance above based on attempt number
|
| 116 |
-
4. For "advice" type: Focus on pronunciation + give strong contextual clues
|
| 117 |
-
5. Keep text concise (1-2 sentences max)
|
| 118 |
-
6. NEVER reveal the answer directly, but after 10+ attempts be very helpful
|
| 119 |
-
7. Return ONLY valid JSON, no markdown, no extra text
|
| 120 |
"""
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
# Call Gemini
|
| 123 |
response = call_gemini_with_tools(
|
| 124 |
model_name="gemini-2.5-flash",
|
| 125 |
system_prompt="You are a JSON generator. Return ONLY valid JSON with no markdown formatting or extra text.",
|
| 126 |
messages=[{"role": "user", "content": prompt}],
|
| 127 |
tools=[],
|
| 128 |
-
max_tokens=
|
| 129 |
)
|
| 130 |
|
| 131 |
# Extract JSON from response
|
| 132 |
response_text, error = get_text_from_gemini_response(response)
|
|
|
|
|
|
|
|
|
|
| 133 |
if error:
|
| 134 |
logger.error(f"Gemini response error: {error}")
|
| 135 |
-
|
| 136 |
return {
|
| 137 |
"type": "advice",
|
| 138 |
-
"answer": [
|
| 139 |
-
{
|
| 140 |
-
"text": f"Focus on improving {weakest_names[0]} (score: {weakest[0][1]:.0f}/100)",
|
| 141 |
-
"path": "",
|
| 142 |
-
}
|
| 143 |
-
],
|
| 144 |
}
|
| 145 |
|
| 146 |
# Clean response text (remove markdown code blocks if present)
|
|
@@ -148,24 +154,25 @@ async def generate_hints_with_gemini(
|
|
| 148 |
if response_text.startswith("```"):
|
| 149 |
lines = response_text.split("\n")
|
| 150 |
response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
|
|
|
|
| 151 |
|
| 152 |
# Parse JSON
|
|
|
|
| 153 |
hints_json = json.loads(response_text)
|
|
|
|
| 154 |
logger.info(f"β Generated fresh hint for attempt {attempt}, category {category}")
|
| 155 |
|
| 156 |
return hints_json
|
| 157 |
|
| 158 |
except Exception as e:
|
| 159 |
logger.error(f"Hint generation error: {e}")
|
|
|
|
|
|
|
|
|
|
| 160 |
# Fallback hint
|
| 161 |
return {
|
| 162 |
"type": "advice",
|
| 163 |
-
"answer": [
|
| 164 |
-
{
|
| 165 |
-
"text": "Keep practicing! Focus on your pronunciation.",
|
| 166 |
-
"path": "",
|
| 167 |
-
}
|
| 168 |
-
],
|
| 169 |
}
|
| 170 |
|
| 171 |
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
async def generate_hints_with_gemini(
|
| 37 |
+
scores: dict, attempt: int, answer_word: str, category: str,
|
| 38 |
+
user_text: str = "", hint_history: list = None
|
| 39 |
) -> dict:
|
| 40 |
+
"""Generate JSON hints using Gemini LLM (with hint history to avoid repetition)"""
|
| 41 |
try:
|
| 42 |
# Find weakest metrics
|
| 43 |
metrics = {
|
|
|
|
| 80 |
guidance = f"Attempt {attempt}! Focus on pronunciation coaching for {', '.join(weakest_names)}. Give very strong hints about what to say."
|
| 81 |
category_hint = f"After {attempt} attempts, be very helpful while still not directly revealing the answer."
|
| 82 |
|
| 83 |
+
# Format hint history for prompt (avoid repetition)
|
| 84 |
+
history_text = ""
|
| 85 |
+
if hint_history and len(hint_history) > 0:
|
| 86 |
+
recent_hints = hint_history[-5:] # Last 5 hints only
|
| 87 |
+
history_text = "\n".join([f" - {h}" for h in recent_hints])
|
| 88 |
+
|
| 89 |
+
# Overall score for context-aware advice
|
| 90 |
+
overall_score = scores.get('overall', 0)
|
| 91 |
+
|
| 92 |
# Build prompt for Gemini
|
| 93 |
+
prompt = f"""You generate hints for a pronunciation game. User tries to guess and say a secret phrase.
|
| 94 |
+
|
| 95 |
+
CONTEXT:
|
| 96 |
+
- User said: "{user_text}"
|
| 97 |
+
- Secret answer: "{answer_word}" (NEVER reveal!)
|
| 98 |
+
- Category: {category}
|
| 99 |
+
- Overall score: {overall_score}/100
|
| 100 |
+
- Attempt: {attempt}
|
| 101 |
+
|
| 102 |
+
PREVIOUS HINTS GIVEN (DO NOT REPEAT THESE - give NEW information!):
|
| 103 |
+
{history_text if history_text else " (none yet)"}
|
| 104 |
+
|
| 105 |
+
MANDATORY FORMAT: Always start with "You said '[what user said]' - " then your feedback.
|
| 106 |
+
|
| 107 |
+
RULES:
|
| 108 |
+
1. If overall >= 70: User is saying the RIGHT phrase. Give pronunciation tips.
|
| 109 |
+
β "You said 'Wingardium Leviosa' - Correct! Work on your pitch - try more dramatic."
|
| 110 |
+
|
| 111 |
+
2. If overall < 70: User is saying the WRONG phrase. Analyze what they said and GUIDE them:
|
| 112 |
+
- Same franchise? β "You said 'Shut up Malfoy' - Right franchise! Now think of a famous SPELL..."
|
| 113 |
+
- Similar category? β "You said 'I'll be back' - Good movie instinct! But try a magical fantasy..."
|
| 114 |
+
- Unrelated? β "You said 'Hello' - That's not it. This is a famous {category}..."
|
| 115 |
+
|
| 116 |
+
3. NEVER say "focus on pronunciation" when overall < 70!
|
| 117 |
+
|
| 118 |
+
4. Be helpful based on attempt ({attempt}): 1-3 vague, 4-6 specific, 7+ very helpful.
|
| 119 |
+
|
| 120 |
+
5. NEVER repeat hints from the history above! Always give NEW, FRESH information.
|
| 121 |
+
|
| 122 |
+
Return ONLY this JSON:
|
| 123 |
+
{{"type": "{hint_type}", "answer": [{{"text": "You said '...' - your feedback", "path": ""}}]}}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
"""
|
| 125 |
|
| 126 |
+
print(f"[GEMINI HINT] Calling Gemini for hint generation...")
|
| 127 |
+
print(f"[GEMINI HINT] User said: '{user_text}', Overall: {overall_score}, Attempt: {attempt}")
|
| 128 |
+
print(f"[GEMINI HINT] Hint history: {hint_history}")
|
| 129 |
+
|
| 130 |
# Call Gemini
|
| 131 |
response = call_gemini_with_tools(
|
| 132 |
model_name="gemini-2.5-flash",
|
| 133 |
system_prompt="You are a JSON generator. Return ONLY valid JSON with no markdown formatting or extra text.",
|
| 134 |
messages=[{"role": "user", "content": prompt}],
|
| 135 |
tools=[],
|
| 136 |
+
max_tokens=2048, # Generous limit for hint generation with history
|
| 137 |
)
|
| 138 |
|
| 139 |
# Extract JSON from response
|
| 140 |
response_text, error = get_text_from_gemini_response(response)
|
| 141 |
+
print(f"[GEMINI HINT] Response text: {response_text[:200] if response_text else 'None'}...")
|
| 142 |
+
print(f"[GEMINI HINT] Error: {error}")
|
| 143 |
+
|
| 144 |
if error:
|
| 145 |
logger.error(f"Gemini response error: {error}")
|
| 146 |
+
print(f"[GEMINI HINT] β FALLBACK triggered due to error: {error}")
|
| 147 |
return {
|
| 148 |
"type": "advice",
|
| 149 |
+
"answer": [{"text": f"Keep trying! This is a famous {category}.", "path": ""}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
}
|
| 151 |
|
| 152 |
# Clean response text (remove markdown code blocks if present)
|
|
|
|
| 154 |
if response_text.startswith("```"):
|
| 155 |
lines = response_text.split("\n")
|
| 156 |
response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
|
| 157 |
+
print(f"[GEMINI HINT] Cleaned markdown, result: {response_text[:200]}...")
|
| 158 |
|
| 159 |
# Parse JSON
|
| 160 |
+
print(f"[GEMINI HINT] Parsing JSON: {response_text[:300]}...")
|
| 161 |
hints_json = json.loads(response_text)
|
| 162 |
+
print(f"[GEMINI HINT] β Parsed successfully: {hints_json}")
|
| 163 |
logger.info(f"β Generated fresh hint for attempt {attempt}, category {category}")
|
| 164 |
|
| 165 |
return hints_json
|
| 166 |
|
| 167 |
except Exception as e:
|
| 168 |
logger.error(f"Hint generation error: {e}")
|
| 169 |
+
print(f"[GEMINI HINT] β EXCEPTION: {type(e).__name__}: {e}")
|
| 170 |
+
import traceback
|
| 171 |
+
traceback.print_exc()
|
| 172 |
# Fallback hint
|
| 173 |
return {
|
| 174 |
"type": "advice",
|
| 175 |
+
"answer": [{"text": f"Keep trying! This is a famous {category}.", "path": ""}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
}
|
| 177 |
|
| 178 |
|
client/services/voice_analyzer.py
CHANGED
|
@@ -39,6 +39,9 @@ _mcp_lock = None
|
|
| 39 |
# Session tracking for attempt counts
|
| 40 |
_session_attempts = {}
|
| 41 |
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
async def initialize_voicekit_mcp():
|
| 44 |
"""Initialize VoiceKit MCP connection on app startup"""
|
|
@@ -243,6 +246,27 @@ def get_attempt_count(session_id: str) -> int:
|
|
| 243 |
return _session_attempts[session_id]
|
| 244 |
|
| 245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
async def analyze_voice_with_mcp(
|
| 247 |
audio_bytes: bytes,
|
| 248 |
session_id: str,
|
|
|
|
| 39 |
# Session tracking for attempt counts
|
| 40 |
_session_attempts = {}
|
| 41 |
|
| 42 |
+
# Session tracking for hint/advice history (to avoid repetition)
|
| 43 |
+
_session_hint_history = {} # {session_id: [list of previous hints/advice]}
|
| 44 |
+
|
| 45 |
|
| 46 |
async def initialize_voicekit_mcp():
|
| 47 |
"""Initialize VoiceKit MCP connection on app startup"""
|
|
|
|
| 246 |
return _session_attempts[session_id]
|
| 247 |
|
| 248 |
|
| 249 |
+
def get_hint_history(session_id: str) -> list:
|
| 250 |
+
"""Get hint history for session (to avoid repetition)"""
|
| 251 |
+
global _session_hint_history
|
| 252 |
+
if session_id not in _session_hint_history:
|
| 253 |
+
_session_hint_history[session_id] = []
|
| 254 |
+
return _session_hint_history[session_id]
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def add_hint_to_history(session_id: str, hint_text: str) -> None:
|
| 258 |
+
"""Add a hint to session history (keeps last 10 hints)"""
|
| 259 |
+
global _session_hint_history
|
| 260 |
+
if session_id not in _session_hint_history:
|
| 261 |
+
_session_hint_history[session_id] = []
|
| 262 |
+
|
| 263 |
+
if hint_text and hint_text != "Keep practicing!":
|
| 264 |
+
_session_hint_history[session_id].append(hint_text)
|
| 265 |
+
# Keep only last 10 hints to avoid memory bloat
|
| 266 |
+
if len(_session_hint_history[session_id]) > 10:
|
| 267 |
+
_session_hint_history[session_id] = _session_hint_history[session_id][-10:]
|
| 268 |
+
|
| 269 |
+
|
| 270 |
async def analyze_voice_with_mcp(
|
| 271 |
audio_bytes: bytes,
|
| 272 |
session_id: str,
|
client/utils/elevenlabs_tts.py
CHANGED
|
@@ -7,8 +7,10 @@ audio hints in the Audio Semantle game.
|
|
| 7 |
|
| 8 |
import os
|
| 9 |
import hashlib
|
|
|
|
| 10 |
from pathlib import Path
|
| 11 |
from typing import Optional
|
|
|
|
| 12 |
|
| 13 |
# Try to import ElevenLabs SDK
|
| 14 |
try:
|
|
@@ -18,8 +20,8 @@ except ImportError:
|
|
| 18 |
ELEVENLABS_AVAILABLE = False
|
| 19 |
print("Warning: elevenlabs package not installed. Audio hints will not be available.")
|
| 20 |
|
| 21 |
-
# Configuration - use
|
| 22 |
-
AUDIO_HINTS_DIR = Path("/
|
| 23 |
AUDIO_HINTS_DIR.mkdir(parents=True, exist_ok=True)
|
| 24 |
|
| 25 |
# In-memory cache for generated audio hints
|
|
@@ -65,6 +67,10 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
|
|
| 65 |
# Strip leading slash if present and resolve from project root
|
| 66 |
full_path = project_root / reference_audio_path.lstrip("/")
|
| 67 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 68 |
if not full_path.exists():
|
| 69 |
print(f"β Reference audio not found: {full_path}")
|
| 70 |
return None
|
|
@@ -76,11 +82,13 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
|
|
| 76 |
voice_name = f"puzzle_voice_{hashlib.md5(reference_audio_path.encode()).hexdigest()[:8]}"
|
| 77 |
print(f"π€ Cloning voice from: {full_path}")
|
| 78 |
|
|
|
|
|
|
|
| 79 |
voice = client.voices.ivc.create(
|
| 80 |
name=voice_name,
|
| 81 |
-
files=[
|
| 82 |
description="Cloned voice for Audio Semantle hints",
|
| 83 |
-
remove_background_noise=
|
| 84 |
)
|
| 85 |
|
| 86 |
voice_id = voice.voice_id
|
|
@@ -118,15 +126,9 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
|
|
| 118 |
return None
|
| 119 |
|
| 120 |
try:
|
| 121 |
-
# Generate
|
| 122 |
-
ref_hash = hashlib.md5((reference_audio_path or "").encode()).hexdigest()[:8]
|
| 123 |
text_hash = hashlib.md5(text.encode()).hexdigest()[:12]
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
# Check cache first
|
| 127 |
-
if cache_key in _audio_hint_cache:
|
| 128 |
-
print(f"β Using cached audio hint: {cache_key}")
|
| 129 |
-
return _audio_hint_cache[cache_key]
|
| 130 |
|
| 131 |
# Initialize ElevenLabs client (v2.24.0 API)
|
| 132 |
api_key = get_api_key()
|
|
@@ -159,8 +161,8 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
|
|
| 159 |
output_format="mp3_44100_128"
|
| 160 |
)
|
| 161 |
|
| 162 |
-
# Save to file
|
| 163 |
-
filename = f"{text_hash}_{hint_type}.mp3"
|
| 164 |
filepath = AUDIO_HINTS_DIR / filename
|
| 165 |
|
| 166 |
# Write audio bytes to file
|
|
@@ -168,11 +170,10 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
|
|
| 168 |
for chunk in audio:
|
| 169 |
f.write(chunk)
|
| 170 |
|
| 171 |
-
# Return absolute path
|
| 172 |
absolute_path = str(filepath)
|
| 173 |
-
_audio_hint_cache[cache_key] = absolute_path
|
| 174 |
|
| 175 |
-
print(f"β Audio hint
|
| 176 |
return absolute_path
|
| 177 |
|
| 178 |
except Exception as e:
|
|
@@ -184,7 +185,8 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
|
|
| 184 |
|
| 185 |
def extract_hint_portion(answer_word: str, attempts: int) -> tuple[str, str]:
|
| 186 |
"""
|
| 187 |
-
Extract what portion of the answer to pronounce based on attempt count
|
|
|
|
| 188 |
|
| 189 |
Args:
|
| 190 |
answer_word: The correct answer
|
|
@@ -193,44 +195,119 @@ def extract_hint_portion(answer_word: str, attempts: int) -> tuple[str, str]:
|
|
| 193 |
Returns:
|
| 194 |
Tuple of (text_to_speak, hint_type)
|
| 195 |
|
| 196 |
-
Strategy:
|
| 197 |
-
- Attempt
|
| 198 |
-
- Attempt
|
| 199 |
-
- Attempt
|
| 200 |
-
- Attempt
|
|
|
|
|
|
|
| 201 |
"""
|
| 202 |
words = answer_word.split()
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
# First half of phrase
|
| 215 |
-
mid = len(words) // 2
|
| 216 |
-
|
| 217 |
-
return " ".join(words[:mid]), "partial"
|
| 218 |
-
return words[0], "partial"
|
| 219 |
-
|
| 220 |
-
elif attempts == 7:
|
| 221 |
-
# Rhythm pattern with pauses
|
| 222 |
-
return " ... ".join(words), "rhythm"
|
| 223 |
|
| 224 |
-
|
| 225 |
-
# Almost full (missing last word)
|
| 226 |
if len(words) > 1:
|
| 227 |
return " ".join(words[:-1]), "almost_full"
|
| 228 |
-
# For single word, return first 80%
|
| 229 |
cutoff = int(len(answer_word) * 0.8)
|
| 230 |
return answer_word[:cutoff], "almost_full"
|
| 231 |
|
| 232 |
-
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
|
| 236 |
def should_offer_audio_hint(message: str, attempts: int, answer_word: str) -> bool:
|
|
|
|
| 7 |
|
| 8 |
import os
|
| 9 |
import hashlib
|
| 10 |
+
import time
|
| 11 |
from pathlib import Path
|
| 12 |
from typing import Optional
|
| 13 |
+
from io import BytesIO
|
| 14 |
|
| 15 |
# Try to import ElevenLabs SDK
|
| 16 |
try:
|
|
|
|
| 20 |
ELEVENLABS_AVAILABLE = False
|
| 21 |
print("Warning: elevenlabs package not installed. Audio hints will not be available.")
|
| 22 |
|
| 23 |
+
# Configuration - use project uploads directory for Gradio compatibility
|
| 24 |
+
AUDIO_HINTS_DIR = Path(__file__).parent.parent / "uploads" / "audio_hints"
|
| 25 |
AUDIO_HINTS_DIR.mkdir(parents=True, exist_ok=True)
|
| 26 |
|
| 27 |
# In-memory cache for generated audio hints
|
|
|
|
| 67 |
# Strip leading slash if present and resolve from project root
|
| 68 |
full_path = project_root / reference_audio_path.lstrip("/")
|
| 69 |
|
| 70 |
+
# Always use .wav for ElevenLabs (required format for voice cloning)
|
| 71 |
+
full_path = full_path.with_suffix('.wav')
|
| 72 |
+
print(f"π΅ Using WAV format for ElevenLabs: {full_path}")
|
| 73 |
+
|
| 74 |
if not full_path.exists():
|
| 75 |
print(f"β Reference audio not found: {full_path}")
|
| 76 |
return None
|
|
|
|
| 82 |
voice_name = f"puzzle_voice_{hashlib.md5(reference_audio_path.encode()).hexdigest()[:8]}"
|
| 83 |
print(f"π€ Cloning voice from: {full_path}")
|
| 84 |
|
| 85 |
+
# Read file as BytesIO (required by ElevenLabs SDK)
|
| 86 |
+
# remove_background_noise=False allows shorter samples (<4.6s)
|
| 87 |
voice = client.voices.ivc.create(
|
| 88 |
name=voice_name,
|
| 89 |
+
files=[BytesIO(open(full_path, "rb").read())],
|
| 90 |
description="Cloned voice for Audio Semantle hints",
|
| 91 |
+
remove_background_noise=False
|
| 92 |
)
|
| 93 |
|
| 94 |
voice_id = voice.voice_id
|
|
|
|
| 126 |
return None
|
| 127 |
|
| 128 |
try:
|
| 129 |
+
# Generate unique filename with timestamp (no caching)
|
|
|
|
| 130 |
text_hash = hashlib.md5(text.encode()).hexdigest()[:12]
|
| 131 |
+
timestamp = int(time.time() * 1000)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 132 |
|
| 133 |
# Initialize ElevenLabs client (v2.24.0 API)
|
| 134 |
api_key = get_api_key()
|
|
|
|
| 161 |
output_format="mp3_44100_128"
|
| 162 |
)
|
| 163 |
|
| 164 |
+
# Save to file with unique timestamp
|
| 165 |
+
filename = f"{text_hash}_{hint_type}_{timestamp}.mp3"
|
| 166 |
filepath = AUDIO_HINTS_DIR / filename
|
| 167 |
|
| 168 |
# Write audio bytes to file
|
|
|
|
| 170 |
for chunk in audio:
|
| 171 |
f.write(chunk)
|
| 172 |
|
| 173 |
+
# Return absolute path
|
| 174 |
absolute_path = str(filepath)
|
|
|
|
| 175 |
|
| 176 |
+
print(f"β Audio hint generated (fresh): {absolute_path}")
|
| 177 |
return absolute_path
|
| 178 |
|
| 179 |
except Exception as e:
|
|
|
|
| 185 |
|
| 186 |
def extract_hint_portion(answer_word: str, attempts: int) -> tuple[str, str]:
|
| 187 |
"""
|
| 188 |
+
Extract what portion of the answer to pronounce based on attempt count.
|
| 189 |
+
Uses natural syllable breaks for more gradual progression.
|
| 190 |
|
| 191 |
Args:
|
| 192 |
answer_word: The correct answer
|
|
|
|
| 195 |
Returns:
|
| 196 |
Tuple of (text_to_speak, hint_type)
|
| 197 |
|
| 198 |
+
Strategy (syllable-based, gradual):
|
| 199 |
+
- Attempt 1-2: First syllable (~25%) with "-"
|
| 200 |
+
- Attempt 3-4: ~40% of first word with "-"
|
| 201 |
+
- Attempt 5-6: ~60% of first word with "-"
|
| 202 |
+
- Attempt 7-8: Full first word
|
| 203 |
+
- Attempt 9-10: First half of phrase
|
| 204 |
+
- Attempt 11+: Almost full (missing last part)
|
| 205 |
"""
|
| 206 |
words = answer_word.split()
|
| 207 |
+
first_word = words[0]
|
| 208 |
+
|
| 209 |
+
def get_syllable_break(word: str, fraction: float) -> int:
|
| 210 |
+
"""Find a natural syllable break point at approximately the given fraction of the word."""
|
| 211 |
+
target = int(len(word) * fraction)
|
| 212 |
+
vowels = set('aeiouAEIOU')
|
| 213 |
+
|
| 214 |
+
# Search around target for a consonant after vowel (syllable break)
|
| 215 |
+
for i in range(max(2, target - 2), min(len(word), target + 3)):
|
| 216 |
+
if i > 0 and word[i-1] in vowels and word[i] not in vowels:
|
| 217 |
+
return i
|
| 218 |
+
return max(2, target)
|
| 219 |
+
|
| 220 |
+
if attempts <= 2:
|
| 221 |
+
# First syllable only: ~25% with natural break
|
| 222 |
+
cut = get_syllable_break(first_word, 0.25)
|
| 223 |
+
return first_word[:cut] + "-", "minimal"
|
| 224 |
+
|
| 225 |
+
elif attempts <= 4:
|
| 226 |
+
# ~40% of first word
|
| 227 |
+
cut = get_syllable_break(first_word, 0.4)
|
| 228 |
+
return first_word[:cut] + "-", "syllable"
|
| 229 |
+
|
| 230 |
+
elif attempts <= 6:
|
| 231 |
+
# ~60% of first word
|
| 232 |
+
cut = get_syllable_break(first_word, 0.6)
|
| 233 |
+
return first_word[:cut] + "-", "partial"
|
| 234 |
+
|
| 235 |
+
elif attempts <= 8:
|
| 236 |
+
# First word only
|
| 237 |
+
return first_word, "word"
|
| 238 |
+
|
| 239 |
+
elif attempts <= 10:
|
| 240 |
# First half of phrase
|
| 241 |
+
mid = max(1, len(words) // 2)
|
| 242 |
+
return " ".join(words[:mid]), "half"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
+
else: # 11+
|
| 245 |
+
# Almost full (missing last word or 20%)
|
| 246 |
if len(words) > 1:
|
| 247 |
return " ".join(words[:-1]), "almost_full"
|
|
|
|
| 248 |
cutoff = int(len(answer_word) * 0.8)
|
| 249 |
return answer_word[:cutoff], "almost_full"
|
| 250 |
|
| 251 |
+
|
| 252 |
+
def extract_hint_portion_for_word(answer_word: str, attempts: int, word_index: int = 0) -> tuple[str, str]:
|
| 253 |
+
"""
|
| 254 |
+
Extract what portion of a SPECIFIC WORD to pronounce based on attempt count.
|
| 255 |
+
|
| 256 |
+
This allows the chatbot to hint specific words when user already knows others.
|
| 257 |
+
For example, if user knows "Wingardium" but not "Leviosa", set word_index=1.
|
| 258 |
+
|
| 259 |
+
Args:
|
| 260 |
+
answer_word: The full correct answer (may have multiple words)
|
| 261 |
+
attempts: Number of attempts user has made
|
| 262 |
+
word_index: Which word to hint (0=first, 1=second, etc.)
|
| 263 |
+
|
| 264 |
+
Returns:
|
| 265 |
+
Tuple of (text_to_speak, hint_type)
|
| 266 |
+
"""
|
| 267 |
+
words = answer_word.split()
|
| 268 |
+
|
| 269 |
+
# Clamp word_index to valid range
|
| 270 |
+
if word_index < 0:
|
| 271 |
+
word_index = 0
|
| 272 |
+
if word_index >= len(words):
|
| 273 |
+
word_index = len(words) - 1
|
| 274 |
+
|
| 275 |
+
target_word = words[word_index]
|
| 276 |
+
|
| 277 |
+
def get_syllable_break(word: str, fraction: float) -> int:
|
| 278 |
+
"""Find a natural syllable break point at approximately the given fraction of the word."""
|
| 279 |
+
target = int(len(word) * fraction)
|
| 280 |
+
vowels = set('aeiouAEIOU')
|
| 281 |
+
|
| 282 |
+
# Search around target for a consonant after vowel (syllable break)
|
| 283 |
+
for i in range(max(2, target - 2), min(len(word), target + 3)):
|
| 284 |
+
if i > 0 and word[i-1] in vowels and word[i] not in vowels:
|
| 285 |
+
return i
|
| 286 |
+
return max(2, target)
|
| 287 |
+
|
| 288 |
+
# Progressive hints for the target word
|
| 289 |
+
if attempts <= 2:
|
| 290 |
+
# First syllable only: ~25% with natural break
|
| 291 |
+
cut = get_syllable_break(target_word, 0.25)
|
| 292 |
+
return target_word[:cut] + "-", "minimal"
|
| 293 |
+
|
| 294 |
+
elif attempts <= 4:
|
| 295 |
+
# ~40% of target word
|
| 296 |
+
cut = get_syllable_break(target_word, 0.4)
|
| 297 |
+
return target_word[:cut] + "-", "syllable"
|
| 298 |
+
|
| 299 |
+
elif attempts <= 6:
|
| 300 |
+
# ~60% of target word
|
| 301 |
+
cut = get_syllable_break(target_word, 0.6)
|
| 302 |
+
return target_word[:cut] + "-", "partial"
|
| 303 |
+
|
| 304 |
+
elif attempts <= 8:
|
| 305 |
+
# Full target word
|
| 306 |
+
return target_word, "word"
|
| 307 |
+
|
| 308 |
+
else: # 9+
|
| 309 |
+
# Full target word (no more to reveal for single word)
|
| 310 |
+
return target_word, "almost_full"
|
| 311 |
|
| 312 |
|
| 313 |
def should_offer_audio_hint(message: str, attempts: int, answer_word: str) -> bool:
|
gemini_adapter.py
CHANGED
|
@@ -84,6 +84,19 @@ def convert_messages_to_gemini_format(anthropic_messages):
|
|
| 84 |
if isinstance(content, str):
|
| 85 |
# Simple text message
|
| 86 |
parts.append(types.Part(text=content))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 87 |
elif isinstance(content, list):
|
| 88 |
# Complex content with tool calls/results
|
| 89 |
for item in content:
|
|
|
|
| 84 |
if isinstance(content, str):
|
| 85 |
# Simple text message
|
| 86 |
parts.append(types.Part(text=content))
|
| 87 |
+
elif isinstance(content, dict):
|
| 88 |
+
# Could be Gradio file format {"path": ..., "mime_type": ...}
|
| 89 |
+
# Skip audio/video files - they can't be sent to Gemini text API
|
| 90 |
+
if content.get("path") and content.get("mime_type"):
|
| 91 |
+
print(f"DEBUG convert_messages: Skipping file content: {content.get('mime_type')}")
|
| 92 |
+
continue
|
| 93 |
+
# Could be text content {"type": "text", "text": "..."}
|
| 94 |
+
elif content.get("type") == "text":
|
| 95 |
+
parts.append(types.Part(text=content.get("text", "")))
|
| 96 |
+
elif hasattr(content, '__class__') and 'Audio' in content.__class__.__name__:
|
| 97 |
+
# Skip Gradio Audio component objects
|
| 98 |
+
print(f"DEBUG convert_messages: Skipping Gradio component: {content.__class__.__name__}")
|
| 99 |
+
continue
|
| 100 |
elif isinstance(content, list):
|
| 101 |
# Complex content with tool calls/results
|
| 102 |
for item in content:
|
gradio_ui.py
ADDED
|
@@ -0,0 +1,279 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio UI for Chloe's Voice Komentle Game
|
| 3 |
+
Connects to FastAPI backend for voice analysis
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
# Set Gradio temp directory BEFORE importing gradio
|
| 8 |
+
_upload_dir = os.path.join(os.path.dirname(__file__), "gradio_uploads")
|
| 9 |
+
os.makedirs(_upload_dir, exist_ok=True)
|
| 10 |
+
os.environ["GRADIO_TEMP_DIR"] = _upload_dir
|
| 11 |
+
|
| 12 |
+
import gradio as gr
|
| 13 |
+
from datetime import datetime
|
| 14 |
+
import uuid
|
| 15 |
+
import asyncio
|
| 16 |
+
from sqlalchemy import create_engine, text
|
| 17 |
+
from dotenv import load_dotenv
|
| 18 |
+
|
| 19 |
+
# Load environment variables
|
| 20 |
+
load_dotenv()
|
| 21 |
+
|
| 22 |
+
# Import backend functions
|
| 23 |
+
from backend import (
|
| 24 |
+
analyze_voice_logic,
|
| 25 |
+
get_puzzle_by_date,
|
| 26 |
+
lifespan,
|
| 27 |
+
app as backend_app,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
# Database connection
|
| 31 |
+
DATABASE_URL = os.getenv("DATABASE_URL")
|
| 32 |
+
engine = create_engine(
|
| 33 |
+
DATABASE_URL,
|
| 34 |
+
pool_size=10, # κΈ°λ³Έ μ°κ²° ν ν¬κΈ°
|
| 35 |
+
max_overflow=20, # μ΅λ μΆκ° μ°κ²° μ
|
| 36 |
+
pool_pre_ping=True, # μ°κ²° μ¬μ© μ μ ν¨μ± κ²μ¬
|
| 37 |
+
pool_recycle=3600, # 1μκ°λ§λ€ μ°κ²° μ¬μμ±
|
| 38 |
+
connect_args={
|
| 39 |
+
"connect_timeout": 10, # μ°κ²° νμμμ 10μ΄
|
| 40 |
+
"options": "-c statement_timeout=30000" # 쿼리 νμμμ 30μ΄
|
| 41 |
+
}
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
# Session ID (persistent across attempts)
|
| 45 |
+
session_id = str(uuid.uuid4())
|
| 46 |
+
|
| 47 |
+
# Backend initialization flag
|
| 48 |
+
backend_initialized = False
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
async def analyze_voice_async(audio_file, date_str):
|
| 52 |
+
"""
|
| 53 |
+
Analyze voice using backend logic directly
|
| 54 |
+
|
| 55 |
+
Args:
|
| 56 |
+
audio_file: Path to recorded audio file
|
| 57 |
+
date_str: Date string for puzzle lookup
|
| 58 |
+
|
| 59 |
+
Returns:
|
| 60 |
+
tuple: (result_text, scores_text, hint_text, image_path)
|
| 61 |
+
"""
|
| 62 |
+
if audio_file is None:
|
| 63 |
+
return "β μ€λμ€λ₯Ό λ¨Όμ λ
Ήμν΄μ£ΌμΈμ!", "", "", None
|
| 64 |
+
|
| 65 |
+
try:
|
| 66 |
+
# Read audio file
|
| 67 |
+
with open(audio_file, "rb") as f:
|
| 68 |
+
audio_bytes = f.read()
|
| 69 |
+
|
| 70 |
+
# Call backend logic directly
|
| 71 |
+
result = await analyze_voice_logic(audio_bytes, date_str, session_id)
|
| 72 |
+
|
| 73 |
+
# Handle errors
|
| 74 |
+
if result.get("status") == "error":
|
| 75 |
+
return f"β {result.get('message', 'Unknown error')}", "", "", None
|
| 76 |
+
|
| 77 |
+
# Parse response (already in 0-100 range from backend)
|
| 78 |
+
category = result.get("category", "unknown")
|
| 79 |
+
pitch = result.get("pitch", 0.0)
|
| 80 |
+
rhythm = result.get("rhythm", 0.0)
|
| 81 |
+
energy = result.get("energy", 0.0)
|
| 82 |
+
pronunciation = result.get("pronunciation", 0.0)
|
| 83 |
+
transcript = result.get("transcript", 0.0)
|
| 84 |
+
overall = result.get("overall", 0.0)
|
| 85 |
+
advice = result.get("advice", "")
|
| 86 |
+
is_correct = result.get("is_correct", False)
|
| 87 |
+
hints = {} # hints are embedded in advice now
|
| 88 |
+
|
| 89 |
+
# Format result message
|
| 90 |
+
if is_correct:
|
| 91 |
+
result_msg = f"π μ λ΅μ
λλ€! μ 체 μ μ: {overall:.1f}/100"
|
| 92 |
+
else:
|
| 93 |
+
result_msg = f"π μ 체 μ μ: {overall:.1f}/100 - λ€μ μλν΄λ³΄μΈμ!"
|
| 94 |
+
|
| 95 |
+
# Format scores
|
| 96 |
+
scores_text = f"""
|
| 97 |
+
### π μ μ μμΈ
|
| 98 |
+
|
| 99 |
+
**μΉ΄ν
κ³ λ¦¬:** {category.upper()}
|
| 100 |
+
|
| 101 |
+
- **λ°μ (Pronunciation):** {pronunciation:.1f}/100
|
| 102 |
+
- **μλμ΄ (Pitch):** {pitch:.1f}/100
|
| 103 |
+
- **λ¦¬λ¬ (Rhythm):** {rhythm:.1f}/100
|
| 104 |
+
- **μλμ§ (Energy):** {energy:.1f}/100
|
| 105 |
+
- **μ μ¬ (Transcript):** {transcript:.1f}/100
|
| 106 |
+
- **μ 체 (Overall):** {overall:.1f}/100
|
| 107 |
+
"""
|
| 108 |
+
|
| 109 |
+
# Format hints
|
| 110 |
+
hint_text = ""
|
| 111 |
+
hint_image = None
|
| 112 |
+
|
| 113 |
+
if hints and "answer" in hints:
|
| 114 |
+
hint_type = hints.get("type", "hint")
|
| 115 |
+
hint_items = hints.get("answer", [])
|
| 116 |
+
|
| 117 |
+
if hint_type == "hint":
|
| 118 |
+
hint_text = "π‘ **ννΈ:**\n\n"
|
| 119 |
+
else:
|
| 120 |
+
hint_text = "π― **λ°μ μ‘°μΈ:**\n\n"
|
| 121 |
+
|
| 122 |
+
for item in hint_items:
|
| 123 |
+
hint_text += f"{item.get('text', '')}\n\n"
|
| 124 |
+
|
| 125 |
+
# Get image path if exists
|
| 126 |
+
img_path = item.get("path", "")
|
| 127 |
+
if img_path and os.path.exists(img_path):
|
| 128 |
+
hint_image = img_path
|
| 129 |
+
|
| 130 |
+
# Add advice if no hints
|
| 131 |
+
if not hint_text and advice:
|
| 132 |
+
hint_text = f"π¬ **μ‘°μΈ:**\n\n{advice}"
|
| 133 |
+
|
| 134 |
+
return result_msg, scores_text, hint_text, hint_image
|
| 135 |
+
|
| 136 |
+
except Exception as e:
|
| 137 |
+
return f"β μ€λ₯ λ°μ: {str(e)}", "", "", None
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def analyze_voice(audio_file, date_str):
|
| 141 |
+
"""Synchronous wrapper for async analyze_voice_async"""
|
| 142 |
+
return asyncio.run(analyze_voice_async(audio_file, date_str))
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def get_today_puzzle():
|
| 146 |
+
"""Get today's puzzle information from database"""
|
| 147 |
+
try:
|
| 148 |
+
today = datetime.now().strftime("%Y-%m-%d")
|
| 149 |
+
|
| 150 |
+
# Use backend function to get puzzle
|
| 151 |
+
puzzle = get_puzzle_by_date(today)
|
| 152 |
+
print(puzzle)
|
| 153 |
+
if puzzle:
|
| 154 |
+
return f"""
|
| 155 |
+
### π
μ€λμ νΌμ¦
|
| 156 |
+
|
| 157 |
+
**λ μ§:** {puzzle.get('puzzle_date', 'N/A')}
|
| 158 |
+
**νΌμ¦ λ²νΈ:** #{puzzle.get('puzzle_number', 'N/A')}
|
| 159 |
+
**μΉ΄ν
κ³ λ¦¬:** {puzzle.get('category', 'N/A').upper()}
|
| 160 |
+
**λμ΄λ:** {puzzle.get('difficulty', 'N/A')}
|
| 161 |
+
|
| 162 |
+
μ λ΅ λ¨μ΄λ₯Ό λ°μν΄λ³΄μΈμ! (μ΅λ 6ν μλ)
|
| 163 |
+
"""
|
| 164 |
+
else:
|
| 165 |
+
return "β μ€λμ νΌμ¦μ μ°Ύμ μ μμ΅λλ€."
|
| 166 |
+
|
| 167 |
+
except Exception as e:
|
| 168 |
+
return f"β νΌμ¦ μ 보λ₯Ό κ°μ Έμ¬ μ μμ΅λλ€: {str(e)}"
|
| 169 |
+
|
| 170 |
+
|
| 171 |
+
def reset_session():
|
| 172 |
+
"""Reset session for new game"""
|
| 173 |
+
global session_id
|
| 174 |
+
session_id = str(uuid.uuid4())
|
| 175 |
+
return "β
μ κ²μ μμ! μ€λμ€λ₯Ό λ
Ήμν΄μ£ΌμΈμ.", "", "", None
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
# Create Gradio Interface
|
| 179 |
+
with gr.Blocks(title="Chloe's Voice Komentle") as demo:
|
| 180 |
+
gr.Markdown("# π€ Chloe's Voice Komentle")
|
| 181 |
+
|
| 182 |
+
# Puzzle info section
|
| 183 |
+
with gr.Row():
|
| 184 |
+
puzzle_info = gr.Markdown(value=get_today_puzzle())
|
| 185 |
+
refresh_btn = gr.Button("π νΌμ¦ μ 보 μλ‘κ³ μΉ¨", size="sm")
|
| 186 |
+
|
| 187 |
+
with gr.Row():
|
| 188 |
+
with gr.Column(scale=1):
|
| 189 |
+
# Audio recording
|
| 190 |
+
gr.Markdown("### ποΈ μμ± λ
Ήμ")
|
| 191 |
+
audio_input = gr.Audio(
|
| 192 |
+
sources=["microphone"],
|
| 193 |
+
type="filepath",
|
| 194 |
+
label="λ§μ΄ν¬λ‘ λ
Ήμ",
|
| 195 |
+
format="wav",
|
| 196 |
+
)
|
| 197 |
+
|
| 198 |
+
# Date input (auto-filled with today)
|
| 199 |
+
date_input = gr.Textbox(
|
| 200 |
+
label="λ μ§ (YYYY-MM-DD)",
|
| 201 |
+
value=datetime.now().strftime("%Y-%m-%d"),
|
| 202 |
+
interactive=True,
|
| 203 |
+
)
|
| 204 |
+
|
| 205 |
+
# Submit button
|
| 206 |
+
submit_btn = gr.Button("π― λΆμνκΈ°", variant="primary", size="lg")
|
| 207 |
+
reset_btn = gr.Button("π μ κ²μ μμ", variant="secondary")
|
| 208 |
+
|
| 209 |
+
with gr.Column(scale=1):
|
| 210 |
+
# Results
|
| 211 |
+
gr.Markdown("### π κ²°κ³Ό")
|
| 212 |
+
result_output = gr.Markdown(label="κ²°κ³Ό")
|
| 213 |
+
scores_output = gr.Markdown(label="μ μ μμΈ")
|
| 214 |
+
|
| 215 |
+
# Hints section
|
| 216 |
+
with gr.Row():
|
| 217 |
+
with gr.Column():
|
| 218 |
+
hint_output = gr.Markdown(label="ννΈ λ° μ‘°μΈ")
|
| 219 |
+
|
| 220 |
+
with gr.Column():
|
| 221 |
+
hint_image = gr.Image(label="ννΈ μ΄λ―Έμ§", show_label=True)
|
| 222 |
+
|
| 223 |
+
# Event handlers
|
| 224 |
+
submit_btn.click(
|
| 225 |
+
fn=analyze_voice,
|
| 226 |
+
inputs=[audio_input, date_input],
|
| 227 |
+
outputs=[result_output, scores_output, hint_output, hint_image],
|
| 228 |
+
)
|
| 229 |
+
|
| 230 |
+
reset_btn.click(
|
| 231 |
+
fn=reset_session,
|
| 232 |
+
inputs=[],
|
| 233 |
+
outputs=[result_output, scores_output, hint_output, hint_image],
|
| 234 |
+
)
|
| 235 |
+
|
| 236 |
+
refresh_btn.click(fn=get_today_puzzle, inputs=[], outputs=[puzzle_info])
|
| 237 |
+
|
| 238 |
+
# Footer
|
| 239 |
+
gr.Markdown("---\n**Powered by:** VoiceKit MCP + Gemini AI")
|
| 240 |
+
|
| 241 |
+
# Launch configuration
|
| 242 |
+
if __name__ == "__main__":
|
| 243 |
+
print("π Starting Chloe's Voice Komentle...")
|
| 244 |
+
|
| 245 |
+
# Initialize backend (VoiceKit MCP session)
|
| 246 |
+
print("β³ Initializing VoiceKit MCP...")
|
| 247 |
+
|
| 248 |
+
async def init_backend():
|
| 249 |
+
"""Initialize backend resources"""
|
| 250 |
+
async with lifespan(backend_app):
|
| 251 |
+
print("β VoiceKit MCP initialized")
|
| 252 |
+
# Keep the lifespan context active
|
| 253 |
+
await asyncio.Event().wait() # Wait forever
|
| 254 |
+
|
| 255 |
+
# Run backend initialization in background
|
| 256 |
+
import threading
|
| 257 |
+
|
| 258 |
+
def run_backend_init():
|
| 259 |
+
asyncio.run(init_backend())
|
| 260 |
+
|
| 261 |
+
backend_thread = threading.Thread(target=run_backend_init, daemon=True)
|
| 262 |
+
backend_thread.start()
|
| 263 |
+
|
| 264 |
+
# Wait a bit for initialization
|
| 265 |
+
import time
|
| 266 |
+
|
| 267 |
+
time.sleep(5)
|
| 268 |
+
print("β Backend initialized")
|
| 269 |
+
|
| 270 |
+
# Launch Gradio
|
| 271 |
+
server_host = os.getenv("SERVER_HOST")
|
| 272 |
+
frontend_port = int(os.getenv("FRONTEND_PORT"))
|
| 273 |
+
demo.launch(
|
| 274 |
+
server_name=server_host, # Listen on all interfaces
|
| 275 |
+
server_port=frontend_port, # Default Gradio port
|
| 276 |
+
share=False, # Set to True for public link
|
| 277 |
+
show_error=True,
|
| 278 |
+
allowed_paths=[os.path.join(os.path.dirname(__file__), "hints", "audio")], # Allow serving TTS audio hints
|
| 279 |
+
)
|
postgresql.py
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os
|
| 2 |
+
from dotenv import load_dotenv
|
| 3 |
+
from sqlalchemy import create_engine, text
|
| 4 |
+
import pandas as pd
|
| 5 |
+
import gradio as gr
|
| 6 |
+
|
| 7 |
+
load_dotenv()
|
| 8 |
+
|
| 9 |
+
DATABASE_URL = os.getenv('DATABASE_URL')
|
| 10 |
+
engine = create_engine(DATABASE_URL)
|
| 11 |
+
|
| 12 |
+
def test_connection():
|
| 13 |
+
"""Test database connection and show basic info"""
|
| 14 |
+
try:
|
| 15 |
+
with engine.connect() as connection:
|
| 16 |
+
result = connection.execute(text("SELECT version()"))
|
| 17 |
+
version = result.scalar()
|
| 18 |
+
return f"β
μ°κ²° μ±κ³΅!\nλ°μ΄ν°λ² μ΄μ€ λ²μ : {version}"
|
| 19 |
+
except Exception as e:
|
| 20 |
+
return f"β μ°κ²° μ€ν¨: {e}"
|
| 21 |
+
|
| 22 |
+
def get_all_puzzles():
|
| 23 |
+
"""Fetch all puzzles from database"""
|
| 24 |
+
try:
|
| 25 |
+
query = text("SELECT * FROM puzzles ORDER BY puzzle_number")
|
| 26 |
+
df = pd.read_sql_query(query, engine)
|
| 27 |
+
return df
|
| 28 |
+
except Exception as e:
|
| 29 |
+
return pd.DataFrame({"error": [str(e)]})
|
| 30 |
+
|
| 31 |
+
def get_puzzle_by_difficulty(difficulty):
|
| 32 |
+
"""Fetch puzzles filtered by difficulty"""
|
| 33 |
+
try:
|
| 34 |
+
if difficulty == "μ 체":
|
| 35 |
+
query = text("SELECT * FROM puzzles ORDER BY puzzle_number")
|
| 36 |
+
df = pd.read_sql_query(query, engine)
|
| 37 |
+
else:
|
| 38 |
+
query = text("SELECT * FROM puzzles WHERE difficulty = :difficulty ORDER BY puzzle_number")
|
| 39 |
+
df = pd.read_sql_query(query, engine, params={"difficulty": difficulty})
|
| 40 |
+
return df
|
| 41 |
+
except Exception as e:
|
| 42 |
+
return pd.DataFrame({"error": [str(e)]})
|
| 43 |
+
|
| 44 |
+
def get_table_count():
|
| 45 |
+
"""Get total count of puzzles"""
|
| 46 |
+
try:
|
| 47 |
+
query = text("SELECT COUNT(*) FROM puzzles")
|
| 48 |
+
with engine.connect() as connection:
|
| 49 |
+
result = connection.execute(query)
|
| 50 |
+
count = result.scalar()
|
| 51 |
+
return f"μ 체 νΌμ¦ κ°μ: {count}κ°"
|
| 52 |
+
except Exception as e:
|
| 53 |
+
return f"μ€λ₯: {e}"
|
| 54 |
+
|
| 55 |
+
with gr.Blocks(title="Puzzles DB ν
μ€νΈ") as demo:
|
| 56 |
+
gr.Markdown("# π§© Puzzles λ°μ΄ν°λ² μ΄μ€ μ°κ²° ν
μ€νΈ")
|
| 57 |
+
|
| 58 |
+
with gr.Row():
|
| 59 |
+
with gr.Column():
|
| 60 |
+
test_btn = gr.Button("μ°κ²° ν
μ€νΈ", variant="primary")
|
| 61 |
+
connection_status = gr.Textbox(label="μ°κ²° μν", lines=3)
|
| 62 |
+
|
| 63 |
+
with gr.Column():
|
| 64 |
+
count_btn = gr.Button("λ°μ΄ν° κ°μ νμΈ")
|
| 65 |
+
count_output = gr.Textbox(label="κ°μ")
|
| 66 |
+
|
| 67 |
+
gr.Markdown("## μ 체 λ°μ΄ν° μ‘°ν")
|
| 68 |
+
load_all_btn = gr.Button("λͺ¨λ νΌμ¦ λΆλ¬μ€κΈ°")
|
| 69 |
+
all_data = gr.Dataframe(label="μ 체 νΌμ¦ λ°μ΄ν°")
|
| 70 |
+
|
| 71 |
+
gr.Markdown("## λμ΄λλ³ νν°λ§")
|
| 72 |
+
difficulty_filter = gr.Dropdown(
|
| 73 |
+
["μ 체", "easy", "medium", "hard"],
|
| 74 |
+
value="μ 체",
|
| 75 |
+
label="λμ΄λ μ ν"
|
| 76 |
+
)
|
| 77 |
+
filter_btn = gr.Button("νν° μ μ©")
|
| 78 |
+
filtered_data = gr.Dataframe(label="νν°λ§λ λ°μ΄ν°")
|
| 79 |
+
|
| 80 |
+
# Event handlers
|
| 81 |
+
test_btn.click(test_connection, outputs=connection_status)
|
| 82 |
+
count_btn.click(get_table_count, outputs=count_output)
|
| 83 |
+
load_all_btn.click(get_all_puzzles, outputs=all_data)
|
| 84 |
+
filter_btn.click(get_puzzle_by_difficulty, inputs=difficulty_filter, outputs=filtered_data)
|
| 85 |
+
difficulty_filter.change(get_puzzle_by_difficulty, inputs=difficulty_filter, outputs=filtered_data)
|
| 86 |
+
|
| 87 |
+
if __name__ == "__main__":
|
| 88 |
+
demo.launch()
|
reference_audio/movie/wingardiumleviosa.mp3
ADDED
|
Binary file (35.8 kB). View file
|
|
|
requirements.txt
CHANGED
|
@@ -2,17 +2,26 @@ aiofiles==24.1.0
|
|
| 2 |
annotated-doc==0.0.4
|
| 3 |
annotated-types==0.7.0
|
| 4 |
anyio==4.11.0
|
|
|
|
|
|
|
| 5 |
brotli==1.2.0
|
|
|
|
| 6 |
certifi==2025.11.12
|
|
|
|
|
|
|
| 7 |
click==8.3.1
|
| 8 |
colorama==0.4.6
|
|
|
|
|
|
|
| 9 |
dotenv==0.9.9
|
| 10 |
-
elevenlabs==
|
|
|
|
| 11 |
fastapi==0.122.0
|
| 12 |
ffmpy==1.0.0
|
| 13 |
filelock==3.20.0
|
| 14 |
fsspec==2025.10.0
|
| 15 |
-
google-
|
|
|
|
| 16 |
gradio==6.0.0
|
| 17 |
gradio_client==2.0.0.dev3
|
| 18 |
greenlet==3.2.4
|
|
@@ -21,42 +30,69 @@ h11==0.16.0
|
|
| 21 |
hf-xet==1.2.0
|
| 22 |
httpcore==1.0.9
|
| 23 |
httpx==0.28.1
|
|
|
|
| 24 |
huggingface_hub==1.1.5
|
| 25 |
idna==3.11
|
|
|
|
|
|
|
|
|
|
| 26 |
Jinja2==3.1.6
|
|
|
|
|
|
|
| 27 |
markdown-it-py==4.0.0
|
| 28 |
MarkupSafe==3.0.3
|
| 29 |
-
|
|
|
|
| 30 |
mdurl==0.1.2
|
| 31 |
-
numpy
|
| 32 |
orjson==3.11.4
|
| 33 |
packaging==25.0
|
| 34 |
-
pandas
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
| 36 |
psycopg2-binary==2.9.11
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
pydantic==2.12.4
|
|
|
|
| 38 |
pydantic_core==2.41.5
|
| 39 |
pydub==0.25.1
|
| 40 |
Pygments==2.19.2
|
|
|
|
| 41 |
python-dateutil==2.9.0.post0
|
| 42 |
python-dotenv==1.2.1
|
| 43 |
python-multipart==0.0.20
|
| 44 |
pytz==2025.2
|
| 45 |
PyYAML==6.0.3
|
|
|
|
| 46 |
requests==2.31.0
|
| 47 |
rich==14.2.0
|
|
|
|
|
|
|
| 48 |
safehttpx==0.1.7
|
| 49 |
semantic-version==2.10.0
|
| 50 |
shellingham==1.5.4
|
| 51 |
six==1.17.0
|
| 52 |
sniffio==1.3.1
|
| 53 |
SQLAlchemy==2.0.44
|
|
|
|
|
|
|
| 54 |
starlette==0.50.0
|
|
|
|
| 55 |
tomlkit==0.13.3
|
| 56 |
tqdm==4.67.1
|
|
|
|
| 57 |
typer==0.20.0
|
| 58 |
typer-slim==0.20.0
|
| 59 |
typing-inspection==0.4.2
|
| 60 |
typing_extensions==4.15.0
|
| 61 |
tzdata==2025.2
|
| 62 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
annotated-doc==0.0.4
|
| 3 |
annotated-types==0.7.0
|
| 4 |
anyio==4.11.0
|
| 5 |
+
asttokens==3.0.1
|
| 6 |
+
attrs==25.4.0
|
| 7 |
brotli==1.2.0
|
| 8 |
+
cachetools==6.2.2
|
| 9 |
certifi==2025.11.12
|
| 10 |
+
cffi==2.0.0
|
| 11 |
+
charset-normalizer==3.4.4
|
| 12 |
click==8.3.1
|
| 13 |
colorama==0.4.6
|
| 14 |
+
cryptography==46.0.3
|
| 15 |
+
decorator==5.2.1
|
| 16 |
dotenv==0.9.9
|
| 17 |
+
elevenlabs==2.24.0
|
| 18 |
+
executing==2.2.1
|
| 19 |
fastapi==0.122.0
|
| 20 |
ffmpy==1.0.0
|
| 21 |
filelock==3.20.0
|
| 22 |
fsspec==2025.10.0
|
| 23 |
+
google-auth==2.43.0
|
| 24 |
+
google-genai==1.52.0
|
| 25 |
gradio==6.0.0
|
| 26 |
gradio_client==2.0.0.dev3
|
| 27 |
greenlet==3.2.4
|
|
|
|
| 30 |
hf-xet==1.2.0
|
| 31 |
httpcore==1.0.9
|
| 32 |
httpx==0.28.1
|
| 33 |
+
httpx-sse==0.4.3
|
| 34 |
huggingface_hub==1.1.5
|
| 35 |
idna==3.11
|
| 36 |
+
ipython==9.7.0
|
| 37 |
+
ipython_pygments_lexers==1.1.1
|
| 38 |
+
jedi==0.19.2
|
| 39 |
Jinja2==3.1.6
|
| 40 |
+
jsonschema==4.25.1
|
| 41 |
+
jsonschema-specifications==2025.9.1
|
| 42 |
markdown-it-py==4.0.0
|
| 43 |
MarkupSafe==3.0.3
|
| 44 |
+
matplotlib-inline==0.2.1
|
| 45 |
+
mcp==1.22.0
|
| 46 |
mdurl==0.1.2
|
| 47 |
+
numpy==1.26.4
|
| 48 |
orjson==3.11.4
|
| 49 |
packaging==25.0
|
| 50 |
+
pandas==2.2.3
|
| 51 |
+
parso==0.8.5
|
| 52 |
+
pexpect==4.9.0
|
| 53 |
+
pillow==11.3.0
|
| 54 |
+
prompt_toolkit==3.0.52
|
| 55 |
psycopg2-binary==2.9.11
|
| 56 |
+
ptyprocess==0.7.0
|
| 57 |
+
pure_eval==0.2.3
|
| 58 |
+
pyasn1==0.6.1
|
| 59 |
+
pyasn1_modules==0.4.2
|
| 60 |
+
pycparser==2.23
|
| 61 |
pydantic==2.12.4
|
| 62 |
+
pydantic-settings==2.12.0
|
| 63 |
pydantic_core==2.41.5
|
| 64 |
pydub==0.25.1
|
| 65 |
Pygments==2.19.2
|
| 66 |
+
PyJWT==2.10.1
|
| 67 |
python-dateutil==2.9.0.post0
|
| 68 |
python-dotenv==1.2.1
|
| 69 |
python-multipart==0.0.20
|
| 70 |
pytz==2025.2
|
| 71 |
PyYAML==6.0.3
|
| 72 |
+
referencing==0.37.0
|
| 73 |
requests==2.31.0
|
| 74 |
rich==14.2.0
|
| 75 |
+
rpds-py==0.29.0
|
| 76 |
+
rsa==4.9.1
|
| 77 |
safehttpx==0.1.7
|
| 78 |
semantic-version==2.10.0
|
| 79 |
shellingham==1.5.4
|
| 80 |
six==1.17.0
|
| 81 |
sniffio==1.3.1
|
| 82 |
SQLAlchemy==2.0.44
|
| 83 |
+
sse-starlette==3.0.3
|
| 84 |
+
stack-data==0.6.3
|
| 85 |
starlette==0.50.0
|
| 86 |
+
tenacity==9.1.2
|
| 87 |
tomlkit==0.13.3
|
| 88 |
tqdm==4.67.1
|
| 89 |
+
traitlets==5.14.3
|
| 90 |
typer==0.20.0
|
| 91 |
typer-slim==0.20.0
|
| 92 |
typing-inspection==0.4.2
|
| 93 |
typing_extensions==4.15.0
|
| 94 |
tzdata==2025.2
|
| 95 |
+
urllib3==2.5.0
|
| 96 |
+
uvicorn==0.38.0
|
| 97 |
+
wcwidth==0.2.14
|
| 98 |
+
websockets==15.0.1
|
voice_app.py
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio Voice Recording App for Komentle
|
| 3 |
+
Records user voice and sends to FastAPI backend
|
| 4 |
+
"""
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import requests
|
| 7 |
+
import uuid
|
| 8 |
+
from datetime import datetime
|
| 9 |
+
import os
|
| 10 |
+
|
| 11 |
+
# Backend API URL (νκ²½λ³μλ‘ κ΄λ¦¬ κ°λ₯)
|
| 12 |
+
BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:8000")
|
| 13 |
+
|
| 14 |
+
def process_voice(audio):
|
| 15 |
+
"""
|
| 16 |
+
Process recorded voice and send to backend
|
| 17 |
+
|
| 18 |
+
Args:
|
| 19 |
+
audio: tuple (sample_rate, audio_data) or file path
|
| 20 |
+
|
| 21 |
+
Returns:
|
| 22 |
+
dict: Response from backend with scores
|
| 23 |
+
"""
|
| 24 |
+
if audio is None:
|
| 25 |
+
return {
|
| 26 |
+
"status": "error",
|
| 27 |
+
"message": "μμ±μ΄ λ
Ήμλμ§ μμμ΅λλ€."
|
| 28 |
+
}
|
| 29 |
+
|
| 30 |
+
try:
|
| 31 |
+
# Generate or retrieve session ID (μ€μ λ‘λ μΈμ
κ΄λ¦¬ νμ)
|
| 32 |
+
session_id = str(uuid.uuid4())
|
| 33 |
+
today = datetime.now().strftime("%Y-%m-%d")
|
| 34 |
+
|
| 35 |
+
# Prepare request data
|
| 36 |
+
files = {
|
| 37 |
+
'audio': ('audio.wav', open(audio, 'rb'), 'audio/wav')
|
| 38 |
+
}
|
| 39 |
+
data = {
|
| 40 |
+
'date': today,
|
| 41 |
+
'session_id': session_id
|
| 42 |
+
}
|
| 43 |
+
|
| 44 |
+
# Send to backend
|
| 45 |
+
response = requests.post(
|
| 46 |
+
f"{BACKEND_URL}/api/analyze-voice",
|
| 47 |
+
files=files,
|
| 48 |
+
data=data,
|
| 49 |
+
timeout=30
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
if response.status_code == 200:
|
| 53 |
+
result = response.json()
|
| 54 |
+
return format_result(result)
|
| 55 |
+
else:
|
| 56 |
+
return {
|
| 57 |
+
"status": "error",
|
| 58 |
+
"message": f"λ°±μλ μ€λ₯: {response.status_code}"
|
| 59 |
+
}
|
| 60 |
+
|
| 61 |
+
except Exception as e:
|
| 62 |
+
return {
|
| 63 |
+
"status": "error",
|
| 64 |
+
"message": f"μ€λ₯ λ°μ: {str(e)}"
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
def format_result(result):
|
| 68 |
+
"""Format backend response for display"""
|
| 69 |
+
if result.get("status") == "error":
|
| 70 |
+
return f"β μ€λ₯: {result.get('message')}"
|
| 71 |
+
|
| 72 |
+
category = result.get("category", "unknown")
|
| 73 |
+
pitch = result.get("pitch", 0.0)
|
| 74 |
+
rhythm = result.get("rhythm", 0.0)
|
| 75 |
+
energy = result.get("energy", 0.0)
|
| 76 |
+
pronunciation = result.get("pronunciation", 0.0)
|
| 77 |
+
transcript = result.get("transcript", 0.0) # λμ¬ μ νλ μ μ
|
| 78 |
+
overall = result.get("overall", 0.0)
|
| 79 |
+
advice = result.get("advice", "")
|
| 80 |
+
is_correct = result.get("is_correct", False) # μ λ΅ μ¬λΆ
|
| 81 |
+
|
| 82 |
+
output = f"π― μΉ΄ν
κ³ λ¦¬: {category}\n\n"
|
| 83 |
+
|
| 84 |
+
# μ λ΅ μ¬λΆ
|
| 85 |
+
if is_correct:
|
| 86 |
+
output += "π μ λ΅μ
λλ€! μΆνν©λλ€!\n\n"
|
| 87 |
+
else:
|
| 88 |
+
output += "β μμ§ μ λ΅μ΄ μλλλ€. λ€μ λμ ν΄λ³΄μΈμ!\n\n"
|
| 89 |
+
|
| 90 |
+
output += "π λΆμ κ²°κ³Ό:\n"
|
| 91 |
+
output += f" - μλμ΄ (Pitch): {pitch:.1f}/100\n"
|
| 92 |
+
output += f" - 리λ¬κ° (Rhythm): {rhythm:.1f}/100\n"
|
| 93 |
+
output += f" - μλμ§ (Energy): {energy:.1f}/100\n"
|
| 94 |
+
output += f" - λ°μ (Pronunciation): {pronunciation:.1f}/100\n"
|
| 95 |
+
output += f" - λμ¬ μ νλ (Transcript): {transcript:.1f}/100\n"
|
| 96 |
+
output += f"\nβ μ΄μ (Overall): {overall:.1f}/100\n"
|
| 97 |
+
|
| 98 |
+
# AI μ‘°μΈ
|
| 99 |
+
if advice:
|
| 100 |
+
output += f"\nπ‘ AI μ‘°μΈ:\n{advice}\n"
|
| 101 |
+
|
| 102 |
+
return output
|
| 103 |
+
|
| 104 |
+
# Gradio Interface
|
| 105 |
+
with gr.Blocks(title="π€ Komentle Voice Challenge") as demo:
|
| 106 |
+
gr.Markdown("# π€ Komentle Voice Challenge")
|
| 107 |
+
gr.Markdown("μ€λμ λ¬Έμ λ₯Ό μμ±μΌλ‘ λμ νμΈμ!")
|
| 108 |
+
|
| 109 |
+
gr.Markdown("### μ¬μ© λ°©λ²")
|
| 110 |
+
gr.Markdown("""
|
| 111 |
+
1. ποΈ λ§μ΄ν¬ λ²νΌμ ν΄λ¦νμ¬ λ
Ήμ μμ
|
| 112 |
+
2. μ€λμ λ¬Έμ λ₯Ό μμ±μΌλ‘ λ§νκΈ°
|
| 113 |
+
3. λ
Ήμ μλ£ ν 'λΆμ μμ' λ²νΌ ν΄λ¦
|
| 114 |
+
4. AIκ° λΆμν μ μ νμΈ
|
| 115 |
+
""")
|
| 116 |
+
|
| 117 |
+
with gr.Row():
|
| 118 |
+
with gr.Column():
|
| 119 |
+
audio_input = gr.Audio(
|
| 120 |
+
sources=["microphone"],
|
| 121 |
+
type="filepath",
|
| 122 |
+
label="ποΈ μμ± λ
Ήμ",
|
| 123 |
+
format="wav"
|
| 124 |
+
)
|
| 125 |
+
submit_btn = gr.Button("λΆμ μμ", variant="primary", size="lg")
|
| 126 |
+
|
| 127 |
+
with gr.Column():
|
| 128 |
+
result_output = gr.Textbox(
|
| 129 |
+
label="π λΆμ κ²°κ³Ό",
|
| 130 |
+
lines=10,
|
| 131 |
+
interactive=False
|
| 132 |
+
)
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
# Event handlers
|
| 137 |
+
submit_btn.click(
|
| 138 |
+
fn=process_voice,
|
| 139 |
+
inputs=audio_input,
|
| 140 |
+
outputs=result_output
|
| 141 |
+
)
|
| 142 |
+
|
| 143 |
+
if __name__ == "__main__":
|
| 144 |
+
demo.launch(
|
| 145 |
+
server_name="0.0.0.0",
|
| 146 |
+
server_port=7860,
|
| 147 |
+
share=False
|
| 148 |
+
)
|