SJLee-0525 commited on
Commit
8374119
Β·
1 Parent(s): 6d34043

[TEST] test29

Browse files
.gitignore CHANGED
@@ -4,4 +4,5 @@ gradio_uploads/
4
  venv/
5
  .venv/
6
  __pycache__/
7
-
 
 
4
  venv/
5
  .venv/
6
  __pycache__/
7
+ gradio-env/
8
+ *.wav
client/app.py CHANGED
@@ -1,12 +1,17 @@
1
  """
 
 
2
  μŒμ„± 검증 μ•± - 메인 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ (톡합 버전)
3
  Author: Kevin's Team
4
  Description: μŒμ„± 인식 기반 발음 검증 μ‹œμŠ€ν…œ
5
-
6
- Backend 둜직이 ν†΅ν•©λœ 단일 μ•± 버전 (포트 1개만 μ‚¬μš©)
7
  """
8
 
9
  import os
 
 
 
 
 
10
  import sys
11
  import asyncio
12
 
@@ -279,6 +284,7 @@ class AudioValidationApp:
279
  "category": category, # Puzzle category for chatbot
280
  "answerWord": answer_word, # Answer word for chatbot context
281
  "referenceAudioPath": reference_audio_path, # For TTS voice cloning
 
282
  **metrics
283
  }
284
  )
@@ -318,9 +324,12 @@ class AudioValidationApp:
318
 
319
  # game_state에 μ„±κ³΅ν•œ μ˜€λ””μ˜€ 기둝 μΆ”κ°€ (User Audio ν‘œμ‹œμš©)
320
  updated_game_state = GameStateManager.add_guess(
321
- game_state, recognized_text, audio_path,
 
 
322
  {
323
  "score": score,
 
324
  "answerWord": answer_word,
325
  "referenceAudioPath": reference_audio_path,
326
  "category": category,
@@ -517,6 +526,9 @@ if __name__ == "__main__":
517
  server_port=frontend_port,
518
  show_error=True,
519
  allowed_paths=[UPLOAD_DIR, DOCS_DIR, IMAGES_DIR, REFERENCE_AUDIO_DIR],
520
- footer_links=[]
 
 
 
521
  )
522
 
 
1
  """
2
+ Backend 둜직이 ν†΅ν•©λœ 단일 μ•± 버전 (포트 1개만 μ‚¬μš©)
3
+
4
  μŒμ„± 검증 μ•± - 메인 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ (톡합 버전)
5
  Author: Kevin's Team
6
  Description: μŒμ„± 인식 기반 발음 검증 μ‹œμŠ€ν…œ
 
 
7
  """
8
 
9
  import os
10
+
11
+ # .env λ‘œλ“œ (λ‹€λ₯Έ λͺ¨λ“ˆ import 전에 λ¨Όμ € μ‹€ν–‰)
12
+ from dotenv import load_dotenv
13
+ load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"))
14
+
15
  import sys
16
  import asyncio
17
 
 
284
  "category": category, # Puzzle category for chatbot
285
  "answerWord": answer_word, # Answer word for chatbot context
286
  "referenceAudioPath": reference_audio_path, # For TTS voice cloning
287
+ "userText": user_text, # STT result for chatbot context
288
  **metrics
289
  }
290
  )
 
324
 
325
  # game_state에 μ„±κ³΅ν•œ μ˜€λ””μ˜€ 기둝 μΆ”κ°€ (User Audio ν‘œμ‹œμš©)
326
  updated_game_state = GameStateManager.add_guess(
327
+ game_state,
328
+ recognized_text,
329
+ audio_path,
330
  {
331
  "score": score,
332
+ "userText": user_text,
333
  "answerWord": answer_word,
334
  "referenceAudioPath": reference_audio_path,
335
  "category": category,
 
526
  server_port=frontend_port,
527
  show_error=True,
528
  allowed_paths=[UPLOAD_DIR, DOCS_DIR, IMAGES_DIR, REFERENCE_AUDIO_DIR],
529
+ footer_links=[
530
+ {"text": "User Guide", "url": f"/file={DOCS_DIR}/user-guide.html"},
531
+ {"text": "Tech Stack", "url": f"/file={DOCS_DIR}/tech-stack.html"},
532
+ ]
533
  )
534
 
client/frontend/app_ui.py CHANGED
@@ -64,7 +64,7 @@ class AppUI:
64
  import json
65
  stats_json = json.dumps(stats)
66
 
67
- with gr.Blocks(title="VOICE SEMENTLE") as demo:
68
 
69
  # ============== Dashboard Stats (JSμ—μ„œ μ ‘κ·Ό κ°€λŠ₯) ==============
70
  gr.HTML(
 
64
  import json
65
  stats_json = json.dumps(stats)
66
 
67
+ with gr.Blocks(title="VOICE SEMANTLE") as demo:
68
 
69
  # ============== Dashboard Stats (JSμ—μ„œ μ ‘κ·Ό κ°€λŠ₯) ==============
70
  gr.HTML(
client/frontend/components/audio_input.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- μ˜€λ””μ˜€ μž…λ ₯ μ»΄ν¬λ„ŒνŠΈ - Voice Sementle μŠ€νƒ€μΌ
3
  μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ ν…Œλ§ˆμ˜ μŒμ„± μž…λ ₯ μΈν„°νŽ˜μ΄μŠ€
4
  μ»€μŠ€ν…€ λ²„νŠΌμœΌλ‘œ Gradio Audio μ»΄ν¬λ„ŒνŠΈ μ œμ–΄
5
 
@@ -10,12 +10,12 @@ import gradio as gr
10
 
11
 
12
  class AudioInputComponent:
13
- """Voice Sementle μŠ€νƒ€μΌ μ˜€λ””μ˜€ μž…λ ₯ μ»΄ν¬λ„ŒνŠΈ"""
14
 
15
  # 마이크 λ²„νŠΌ HTML ν…œν”Œλ¦Ώ
16
  MIC_BUTTON_HTML_TEMPLATE = """
17
  <div class="mic-section">
18
- <div class="mic-status" id="mic-status">Click the microphone button to start recording</div>
19
  <button class="mic-btn" id="mic-btn" title="λ…ΉμŒ μ‹œμž‘">
20
  <svg xmlns="http://www.w3.org/2000/svg" height="44" viewBox="0 0 64 64" width="44">
21
  <path fill="#fff" d="M24 18 Q20 18 20 22 L20 42 Q20 46 24 46 L46 34 Q50 32 46 30 Z" stroke="#fff" stroke-width="2" stroke-linejoin="round" stroke-linecap="round"/>
 
1
  """
2
+ μ˜€λ””μ˜€ μž…λ ₯ μ»΄ν¬λ„ŒνŠΈ - Voice Semantle μŠ€νƒ€μΌ
3
  μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ ν…Œλ§ˆμ˜ μŒμ„± μž…λ ₯ μΈν„°νŽ˜μ΄μŠ€
4
  μ»€μŠ€ν…€ λ²„νŠΌμœΌλ‘œ Gradio Audio μ»΄ν¬λ„ŒνŠΈ μ œμ–΄
5
 
 
10
 
11
 
12
  class AudioInputComponent:
13
+ """Voice Semantle μŠ€νƒ€μΌ μ˜€λ””μ˜€ μž…λ ₯ μ»΄ν¬λ„ŒνŠΈ"""
14
 
15
  # 마이크 λ²„νŠΌ HTML ν…œν”Œλ¦Ώ
16
  MIC_BUTTON_HTML_TEMPLATE = """
17
  <div class="mic-section">
18
+ <div class="mic-status" id="mic-status">Click the play button to start game</div>
19
  <button class="mic-btn" id="mic-btn" title="λ…ΉμŒ μ‹œμž‘">
20
  <svg xmlns="http://www.w3.org/2000/svg" height="44" viewBox="0 0 64 64" width="44">
21
  <path fill="#fff" d="M24 18 Q20 18 20 22 L20 42 Q20 46 24 46 L46 34 Q50 32 46 30 Z" stroke="#fff" stroke-width="2" stroke-linejoin="round" stroke-linecap="round"/>
client/frontend/components/floating_chatbot.py CHANGED
@@ -230,11 +230,12 @@ Greet them warmly and help them understand the game:
230
 
231
  # Add audio hint capability info (Phase 2 + Tool Calling)
232
  if is_elevenlabs_configured():
233
- context_parts.append(f"\n### AUDIO HINT TOOL (IMPORTANT - You MUST use this)")
234
- context_parts.append(f"- You have a tool called `generate_audio_hint` that generates real TTS audio")
235
- context_parts.append(f"- WHEN USER ASKS FOR AUDIO: You MUST call the generate_audio_hint tool")
236
- context_parts.append(f" - Keywords: 'play audio', 'TTS', 'hear it', 'how does it sound', 'generate audio'")
237
- context_parts.append(f" - Do NOT just describe audio - actually CALL the tool!")
 
238
  context_parts.append(f"- Tool parameters:")
239
  context_parts.append(f" - hint_type: 'syllable' | 'partial' | 'rhythm' | 'almost_full'")
240
  context_parts.append(f"- Choose hint_type based on attempt count:")
@@ -242,8 +243,6 @@ Greet them warmly and help them understand the game:
242
  context_parts.append(f" - Attempt 5-6: use 'partial' (first half)")
243
  context_parts.append(f" - Attempt 7-9: use 'rhythm' (with pauses)")
244
  context_parts.append(f" - Attempt 10+: use 'almost_full' (almost complete)")
245
- context_parts.append(f"- After calling the tool, explain what the user will hear")
246
- context_parts.append(f"- Example: User says 'give me TTS' β†’ Call generate_audio_hint(hint_type='syllable')")
247
  context_parts.append("")
248
 
249
  context_parts.append(f"They are trying to figure out what word/phrase to pronounce.\n")
@@ -259,15 +258,19 @@ Greet them warmly and help them understand the game:
259
  pronunciation = ai_analysis.get("pronunciation", "N/A")
260
  overall = ai_analysis.get("overall_score", "N/A") # Match standardized field name
261
 
262
- # Get recognized text from guess (backend doesn't provide transcription yet)
263
  guessed_word = guess.get("guessedWord", "")
 
 
264
 
265
  # AIκ°€ μ€€ 이전 μ‘°μ–Έ/힌트 (μžˆλ‹€λ©΄)
266
  advice = ai_analysis.get("advice", "")
267
 
268
  context_parts.append(f"### Attempt {i}")
269
- # Only show what they said if we have actual text (not just "Score: X.X")
270
- if guessed_word and not guessed_word.startswith("Score:"):
 
 
271
  context_parts.append(f"- What they said: \"{guessed_word}\"")
272
  context_parts.append(f"- Scores: Pitch={pitch}, Rhythm={rhythm}, Energy={energy}, Pronunciation={pronunciation}, Overall={overall}")
273
 
@@ -374,10 +377,16 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
374
  elevenlabs_ready = is_elevenlabs_configured()
375
  print(f"[CHATBOT] ElevenLabs ready: {elevenlabs_ready}, Answer word: '{answer_word}'")
376
 
377
- if elevenlabs_ready and answer_word:
 
 
 
 
 
 
378
  tools = [{
379
  "name": "generate_audio_hint",
380
- "description": "Generate an audio pronunciation hint using text-to-speech. You MUST call this tool when the user asks for audio, TTS, or wants to hear the pronunciation. Do not just describe audio - actually call this function.",
381
  "input_schema": {
382
  "type": "object",
383
  "properties": {
@@ -385,12 +394,18 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
385
  "type": "string",
386
  "enum": ["syllable", "partial", "rhythm", "almost_full"],
387
  "description": "Type of audio hint: 'syllable' (first syllable), 'partial' (first half), 'rhythm' (with pauses), 'almost_full' (missing last part)"
 
 
 
 
388
  }
389
  },
390
  "required": ["hint_type"]
391
  }
392
  }]
393
- print(f"[CHATBOT] Audio tool enabled! Tool count: {len(tools)}")
 
 
394
 
395
  # Call Gemini with tools
396
  response_text, tool_calls, error = chat_with_gemini_and_tools(
@@ -412,15 +427,16 @@ def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state:
412
  for tool_call in tool_calls:
413
  if tool_call['name'] == 'generate_audio_hint':
414
  hint_type = tool_call['input'].get('hint_type', 'syllable')
415
- print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}")
 
416
 
417
- # Extract the appropriate portion based on hint_type and answer_word
418
- from utils.elevenlabs_tts import extract_hint_portion, generate_audio_hint
419
 
420
- text_to_speak, _ = extract_hint_portion(answer_word, attempt_count)
421
  # Use voice cloning from reference audio when available
422
- print(f"[CHATBOT] Generating audio with reference: {reference_audio_path}")
423
- audio_path = generate_audio_hint(text_to_speak, hint_type, reference_audio_path=reference_audio_path)
424
 
425
  if audio_path:
426
  print(f"[CHATBOT] Audio hint generated: {audio_path}")
@@ -560,20 +576,23 @@ class FloatingChatbotComponent:
560
 
561
  print(f"[CHATBOT] AI 응닡: {str(response)[:100]}...")
562
 
563
- # Convert tuple response (text, audio_path) to Gradio format
564
  if isinstance(response, tuple):
565
  text, audio_path = response
566
  # Convert relative path to absolute path for Gradio
567
  import os
568
- if not os.path.isabs(audio_path):
569
  # Path is relative to project root (3 levels up from this file)
570
  project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
571
  audio_path = os.path.join(project_root, audio_path)
572
- content = [
573
- {"type": "text", "text": text},
574
- {"type": "file", "file": {"path": audio_path, "mime_type": "audio/mpeg"}}
575
- ]
576
  print(f"[CHATBOT] Audio hint included: {audio_path}")
 
 
 
 
 
 
 
577
  else:
578
  content = response
579
 
 
230
 
231
  # Add audio hint capability info (Phase 2 + Tool Calling)
232
  if is_elevenlabs_configured():
233
+ context_parts.append(f"\n### AUDIO HINT TOOL (Use ONLY when explicitly requested)")
234
+ context_parts.append(f"- You have a tool called `generate_audio_hint` that generates TTS audio")
235
+ context_parts.append(f"- ONLY call this tool when the user EXPLICITLY asks for audio hints:")
236
+ context_parts.append(f" - Keywords that REQUIRE audio: 'audio hint', 'play audio', 'TTS', 'hear it', 'how does it sound', 'generate audio', 'listen'")
237
+ context_parts.append(f" - Keywords that do NOT require audio: 'hint', 'help', 'clue', 'what is it', general questions")
238
+ context_parts.append(f"- DO NOT call this tool for general hints or questions - only for explicit audio requests")
239
  context_parts.append(f"- Tool parameters:")
240
  context_parts.append(f" - hint_type: 'syllable' | 'partial' | 'rhythm' | 'almost_full'")
241
  context_parts.append(f"- Choose hint_type based on attempt count:")
 
243
  context_parts.append(f" - Attempt 5-6: use 'partial' (first half)")
244
  context_parts.append(f" - Attempt 7-9: use 'rhythm' (with pauses)")
245
  context_parts.append(f" - Attempt 10+: use 'almost_full' (almost complete)")
 
 
246
  context_parts.append("")
247
 
248
  context_parts.append(f"They are trying to figure out what word/phrase to pronounce.\n")
 
258
  pronunciation = ai_analysis.get("pronunciation", "N/A")
259
  overall = ai_analysis.get("overall_score", "N/A") # Match standardized field name
260
 
261
+ # Get recognized text from guess
262
  guessed_word = guess.get("guessedWord", "")
263
+ # Get actual STT result (what user pronounced)
264
+ user_spoken_text = ai_analysis.get("userText", "")
265
 
266
  # AIκ°€ μ€€ 이전 μ‘°μ–Έ/힌트 (μžˆλ‹€λ©΄)
267
  advice = ai_analysis.get("advice", "")
268
 
269
  context_parts.append(f"### Attempt {i}")
270
+ # Show what they actually said via STT (more accurate for pronunciation feedback)
271
+ if user_spoken_text:
272
+ context_parts.append(f"- What they pronounced (STT): \"{user_spoken_text}\"")
273
+ elif guessed_word and not guessed_word.startswith("Score:"):
274
  context_parts.append(f"- What they said: \"{guessed_word}\"")
275
  context_parts.append(f"- Scores: Pitch={pitch}, Rhythm={rhythm}, Energy={energy}, Pronunciation={pronunciation}, Overall={overall}")
276
 
 
377
  elevenlabs_ready = is_elevenlabs_configured()
378
  print(f"[CHATBOT] ElevenLabs ready: {elevenlabs_ready}, Answer word: '{answer_word}'")
379
 
380
+ # Only enable audio tool if user EXPLICITLY asks for audio in THIS message
381
+ audio_keywords = ['audio', 'play', 'sound', 'hear', 'listen', 'tts', 'pronounce', 'λ“€λ €', '발음']
382
+ user_wants_audio = any(kw in message.lower() for kw in audio_keywords)
383
+
384
+ if elevenlabs_ready and answer_word and user_wants_audio:
385
+ # Get word count for tool description
386
+ word_count = len(answer_word.split())
387
  tools = [{
388
  "name": "generate_audio_hint",
389
+ "description": "Generate an audio pronunciation hint using TTS.",
390
  "input_schema": {
391
  "type": "object",
392
  "properties": {
 
394
  "type": "string",
395
  "enum": ["syllable", "partial", "rhythm", "almost_full"],
396
  "description": "Type of audio hint: 'syllable' (first syllable), 'partial' (first half), 'rhythm' (with pauses), 'almost_full' (missing last part)"
397
+ },
398
+ "word_index": {
399
+ "type": "integer",
400
+ "description": f"Which word to hint (0-indexed). The phrase has {word_count} word(s). Use 0 for first word, 1 for second word, etc. If user knows first word but not second, use 1."
401
  }
402
  },
403
  "required": ["hint_type"]
404
  }
405
  }]
406
+ print(f"[CHATBOT] Audio tool enabled! User requested audio.")
407
+ elif elevenlabs_ready and answer_word:
408
+ print(f"[CHATBOT] Audio tool NOT enabled - user didn't request audio. Message: '{message[:50]}...'")
409
 
410
  # Call Gemini with tools
411
  response_text, tool_calls, error = chat_with_gemini_and_tools(
 
427
  for tool_call in tool_calls:
428
  if tool_call['name'] == 'generate_audio_hint':
429
  hint_type = tool_call['input'].get('hint_type', 'syllable')
430
+ word_index = tool_call['input'].get('word_index', 0) # Default to first word
431
+ print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}, word_index={word_index}")
432
 
433
+ # Extract the appropriate portion based on hint_type, word_index, and answer_word
434
+ from utils.elevenlabs_tts import extract_hint_portion_for_word, generate_audio_hint
435
 
436
+ text_to_speak, actual_hint_type = extract_hint_portion_for_word(answer_word, attempt_count, word_index)
437
  # Use voice cloning from reference audio when available
438
+ print(f"[CHATBOT] Generating audio for: '{text_to_speak}' with reference: {reference_audio_path}")
439
+ audio_path = generate_audio_hint(text_to_speak, actual_hint_type, reference_audio_path=reference_audio_path)
440
 
441
  if audio_path:
442
  print(f"[CHATBOT] Audio hint generated: {audio_path}")
 
576
 
577
  print(f"[CHATBOT] AI 응닡: {str(response)[:100]}...")
578
 
579
+ # Convert tuple response (text, audio_path) to Gradio 6 format
580
  if isinstance(response, tuple):
581
  text, audio_path = response
582
  # Convert relative path to absolute path for Gradio
583
  import os
584
+ if audio_path and not os.path.isabs(audio_path):
585
  # Path is relative to project root (3 levels up from this file)
586
  project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
587
  audio_path = os.path.join(project_root, audio_path)
 
 
 
 
588
  print(f"[CHATBOT] Audio hint included: {audio_path}")
589
+ print(f"[CHATBOT] Audio file exists: {os.path.exists(audio_path)}")
590
+
591
+ # Gradio 6: Use gr.Audio() component for audio content
592
+ # Reference: chatbot_core_components_simple demo
593
+ hist.append({"role": "assistant", "content": text})
594
+ hist.append({"role": "assistant", "content": gr.Audio(audio_path)})
595
+ return "", hist, hist
596
  else:
597
  content = response
598
 
client/frontend/components/header.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- 헀더 μ»΄ν¬λ„ŒνŠΈ - Voice Sementle μŠ€νƒ€μΌ
3
  μ• λ‹ˆλ©”μ΄μ…˜ν’ κ·€μ—¬μš΄ 둜고 λ””μžμΈ
4
 
5
  πŸ‘¨β€πŸ’» λ‹΄λ‹Ή: 개발자 A
@@ -9,9 +9,9 @@ import gradio as gr
9
 
10
 
11
  class HeaderComponent:
12
- """Voice Sementle μŠ€νƒ€μΌ 헀더 μ»΄ν¬λ„ŒνŠΈ"""
13
 
14
- # Voice Sementle μŠ€νƒ€μΌ 헀더 HTML ν…œν”Œλ¦Ώ
15
  HEADER_HTML_TEMPLATE = """
16
  <link href="https://fonts.googleapis.com/css2?family=Lilita+One&family=Bangers&display=swap" rel="stylesheet">
17
  <div class="vs-header">
@@ -21,7 +21,7 @@ class HeaderComponent:
21
  <span class="title-voice">VOICE</span>
22
  </span>
23
  <span class="title-line">
24
- <span class="title-sementle">SEMENTLE</span>
25
  </span>
26
  </h1>
27
  </div>
@@ -69,7 +69,7 @@ class HeaderComponent:
69
  0 0 20px rgba(90, 200, 250, 0.5);
70
  paint-order: stroke fill;
71
  }
72
- .title-sementle {
73
  font-size: 90px;
74
  font-weight: 500;
75
  color: #e8a054;
@@ -96,7 +96,7 @@ class HeaderComponent:
96
  8px 8px 0 #082a35,
97
  0 0 25px rgba(90, 200, 250, 0.6);
98
  }
99
- .dark .title-sementle {
100
  color: #e8a054;
101
  -webkit-text-stroke: 4px #5c3d1e;
102
  text-shadow:
@@ -118,7 +118,7 @@ class HeaderComponent:
118
  7px 7px 0 #0d4a5f,
119
  0 0 20px rgba(90, 200, 250, 0.5);
120
  }
121
- .title-sementle {
122
  font-size: 72px;
123
  letter-spacing: 4px;
124
  -webkit-text-stroke: 4px #8b5a2b;
@@ -136,7 +136,7 @@ class HeaderComponent:
136
  7px 7px 0 #0d4a5f,
137
  0 0 20px rgba(90, 200, 250, 0.5);
138
  }
139
- .dark .title-sementle {
140
  -webkit-text-stroke: 4px #8b5a2b;
141
  text-shadow:
142
  5px 5px 0 #8b5a2b,
@@ -157,7 +157,7 @@ class HeaderComponent:
157
  5px 5px 0 #0d4a5f,
158
  0 0 15px rgba(90, 200, 250, 0.5);
159
  }
160
- .title-sementle {
161
  font-size: 52px;
162
  letter-spacing: 3px;
163
  -webkit-text-stroke: 3px #8b5a2b;
@@ -173,7 +173,7 @@ class HeaderComponent:
173
  5px 5px 0 #082a35,
174
  0 0 20px rgba(90, 200, 250, 0.6);
175
  }
176
- .dark .title-sementle {
177
  -webkit-text-stroke: 3px #5c3d1e;
178
  text-shadow:
179
  4px 4px 0 #5c3d1e,
 
1
  """
2
+ 헀더 μ»΄ν¬λ„ŒνŠΈ - Voice Semantle μŠ€νƒ€μΌ
3
  μ• λ‹ˆλ©”μ΄μ…˜ν’ κ·€μ—¬μš΄ 둜고 λ””μžμΈ
4
 
5
  πŸ‘¨β€πŸ’» λ‹΄λ‹Ή: 개발자 A
 
9
 
10
 
11
  class HeaderComponent:
12
+ """Voice Semantle μŠ€νƒ€μΌ 헀더 μ»΄ν¬λ„ŒνŠΈ"""
13
 
14
+ # Voice Semantle μŠ€νƒ€μΌ 헀더 HTML ν…œν”Œλ¦Ώ
15
  HEADER_HTML_TEMPLATE = """
16
  <link href="https://fonts.googleapis.com/css2?family=Lilita+One&family=Bangers&display=swap" rel="stylesheet">
17
  <div class="vs-header">
 
21
  <span class="title-voice">VOICE</span>
22
  </span>
23
  <span class="title-line">
24
+ <span class="title-semantle">SEMANTLE</span>
25
  </span>
26
  </h1>
27
  </div>
 
69
  0 0 20px rgba(90, 200, 250, 0.5);
70
  paint-order: stroke fill;
71
  }
72
+ .title-semantle {
73
  font-size: 90px;
74
  font-weight: 500;
75
  color: #e8a054;
 
96
  8px 8px 0 #082a35,
97
  0 0 25px rgba(90, 200, 250, 0.6);
98
  }
99
+ .dark .title-semantle {
100
  color: #e8a054;
101
  -webkit-text-stroke: 4px #5c3d1e;
102
  text-shadow:
 
118
  7px 7px 0 #0d4a5f,
119
  0 0 20px rgba(90, 200, 250, 0.5);
120
  }
121
+ .title-semantle {
122
  font-size: 72px;
123
  letter-spacing: 4px;
124
  -webkit-text-stroke: 4px #8b5a2b;
 
136
  7px 7px 0 #0d4a5f,
137
  0 0 20px rgba(90, 200, 250, 0.5);
138
  }
139
+ .dark .title-semantle {
140
  -webkit-text-stroke: 4px #8b5a2b;
141
  text-shadow:
142
  5px 5px 0 #8b5a2b,
 
157
  5px 5px 0 #0d4a5f,
158
  0 0 15px rgba(90, 200, 250, 0.5);
159
  }
160
+ .title-semantle {
161
  font-size: 52px;
162
  letter-spacing: 3px;
163
  -webkit-text-stroke: 3px #8b5a2b;
 
173
  5px 5px 0 #082a35,
174
  0 0 20px rgba(90, 200, 250, 0.6);
175
  }
176
+ .dark .title-semantle {
177
  -webkit-text-stroke: 3px #5c3d1e;
178
  text-shadow:
179
  4px 4px 0 #5c3d1e,
client/frontend/components/history_display.py CHANGED
@@ -337,7 +337,7 @@ class HistoryDisplayComponent:
337
  <div class="history-list" style='
338
  flex: 1;
339
  min-width: 0;
340
- max-height: 480px;
341
  overflow-y: auto;
342
  '>
343
  """
@@ -401,7 +401,7 @@ class HistoryDisplayComponent:
401
 
402
  <!-- 우츑: 였각 κ·Έλž˜ν”„ μ˜μ—­ (μƒν•˜ 배치 μ‹œ μœ„λ‘œ) -->
403
  <div class="graph-area" style='
404
- min-height: 480px;
405
  background: #f0f7fc;
406
  position: relative;
407
  '>
 
337
  <div class="history-list" style='
338
  flex: 1;
339
  min-width: 0;
340
+ max-height: 560px;
341
  overflow-y: auto;
342
  '>
343
  """
 
401
 
402
  <!-- 우츑: 였각 κ·Έλž˜ν”„ μ˜μ—­ (μƒν•˜ 배치 μ‹œ μœ„λ‘œ) -->
403
  <div class="graph-area" style='
404
+ min-height: 560px;
405
  background: #f0f7fc;
406
  position: relative;
407
  '>
client/frontend/styles/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- Voice Sementle μŠ€νƒ€μΌ λͺ¨λ“ˆ
3
  μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ 기반의 전체 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ CSS
4
  """
5
 
 
1
  """
2
+ Voice Semantle μŠ€νƒ€μΌ λͺ¨λ“ˆ
3
  μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ 기반의 전체 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ CSS
4
  """
5
 
client/frontend/styles/buttons_style.py CHANGED
@@ -1,11 +1,11 @@
1
  """
2
- λ²„νŠΌ 및 μ˜€λ””μ˜€ μž…λ ₯ CSS - Voice Sementle ν…Œλ§ˆ
3
  λ²„νŠΌ μŠ€νƒ€μΌ 및 μ˜€λ””μ˜€ μ»΄ν¬λ„ŒνŠΈ μŠ€νƒ€μΌλ§
4
  """
5
 
6
- # Voice Sementle μŠ€νƒ€μΌ λ²„νŠΌ CSS - μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰
7
  BUTTON_CSS = """
8
- /* Voice Sementle μŠ€νƒ€μΌ λ²„νŠΌ - ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ */
9
  #verify-btn,
10
  #restart-btn {
11
  font-family: 'Lilita One' !important;
 
1
  """
2
+ λ²„νŠΌ 및 μ˜€λ””μ˜€ μž…λ ₯ CSS - Voice Semantle ν…Œλ§ˆ
3
  λ²„νŠΌ μŠ€νƒ€μΌ 및 μ˜€λ””μ˜€ μ»΄ν¬λ„ŒνŠΈ μŠ€νƒ€μΌλ§
4
  """
5
 
6
+ # Voice Semantle μŠ€νƒ€μΌ λ²„νŠΌ CSS - μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰
7
  BUTTON_CSS = """
8
+ /* Voice Semantle μŠ€νƒ€μΌ λ²„νŠΌ - ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ */
9
  #verify-btn,
10
  #restart-btn {
11
  font-family: 'Lilita One' !important;
client/frontend/styles/chatbot_style.py CHANGED
@@ -1,9 +1,9 @@
1
  """
2
- ν”Œλ‘œνŒ… AI 챗봇 CSS - Voice Sementle ν…Œλ§ˆ
3
  ν”Œλ‘œνŒ… ν† κΈ€ λ²„νŠΌ, 챗봇 μ»¨ν…Œμ΄λ„ˆ, μž…λ ₯ μ˜μ—­ μŠ€νƒ€μΌ
4
  """
5
 
6
- # ν”Œλ‘œνŒ… AI 챗봇 CSS - Voice Sementle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ
7
  FLOATING_CHATBOT_CSS = """
8
  /* ν”Œλ‘œνŒ… ν† κΈ€ λ²„νŠΌ μ»¨ν…Œμ΄λ„ˆ */
9
  #floating-toggle {
 
1
  """
2
+ ν”Œλ‘œνŒ… AI 챗봇 CSS - Voice Semantle ν…Œλ§ˆ
3
  ν”Œλ‘œνŒ… ν† κΈ€ λ²„νŠΌ, 챗봇 μ»¨ν…Œμ΄λ„ˆ, μž…λ ₯ μ˜μ—­ μŠ€νƒ€μΌ
4
  """
5
 
6
+ # ν”Œλ‘œνŒ… AI 챗봇 CSS - Voice Semantle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ
7
  FLOATING_CHATBOT_CSS = """
8
  /* ν”Œλ‘œνŒ… ν† κΈ€ λ²„νŠΌ μ»¨ν…Œμ΄λ„ˆ */
9
  #floating-toggle {
client/frontend/styles/custom_css.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- μ»€μŠ€ν…€ CSS μŠ€νƒ€μΌ - Voice Sementle ν…Œλ§ˆ
3
  μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ 기반의 전체 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ CSS
4
 
5
  이 νŒŒμΌμ€ ν•˜μœ„ ν˜Έν™˜μ„±μ„ μœ„ν•΄ μœ μ§€λ©λ‹ˆλ‹€.
 
1
  """
2
+ μ»€μŠ€ν…€ CSS μŠ€νƒ€μΌ - Voice Semantle ν…Œλ§ˆ
3
  μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ 기반의 전체 μ• ν”Œλ¦¬μΌ€μ΄μ…˜ CSS
4
 
5
  이 νŒŒμΌμ€ ν•˜μœ„ ν˜Έν™˜μ„±μ„ μœ„ν•΄ μœ μ§€λ©λ‹ˆλ‹€.
client/frontend/styles/failure_modal_style.py CHANGED
@@ -1,11 +1,11 @@
1
  """
2
- μ‹€νŒ¨ λͺ¨λ‹¬ CSS - Voice Sementle ν…Œλ§ˆ
3
  μ‹€νŒ¨ λͺ¨λ‹¬ 및 κ²°κ³Ό 타일 μŠ€νƒ€μΌ
4
  """
5
 
6
- # μ‹€νŒ¨ λͺ¨λ‹¬ CSS - Voice Sementle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ
7
  FAILURE_MODAL_CSS = """
8
- /* μ‹€νŒ¨ λͺ¨λ‹¬ - Voice Sementle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ */
9
  .modal-content {
10
  text-align: center;
11
  padding: 20px;
 
1
  """
2
+ μ‹€νŒ¨ λͺ¨λ‹¬ CSS - Voice Semantle ν…Œλ§ˆ
3
  μ‹€νŒ¨ λͺ¨λ‹¬ 및 κ²°κ³Ό 타일 μŠ€νƒ€μΌ
4
  """
5
 
6
+ # μ‹€νŒ¨ λͺ¨λ‹¬ CSS - Voice Semantle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ
7
  FAILURE_MODAL_CSS = """
8
+ /* μ‹€νŒ¨ λͺ¨λ‹¬ - Voice Semantle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ */
9
  .modal-content {
10
  text-align: center;
11
  padding: 20px;
client/frontend/styles/falling_elements_style.py CHANGED
@@ -1,5 +1,5 @@
1
  """
2
- λ°°κ²½ 꽃/μŒν‘œ λ–¨μ–΄μ§€λŠ” μ• λ‹ˆλ©”μ΄μ…˜ - Voice Sementle ν…Œλ§ˆ
3
  λ°°κ²½ μ• λ‹ˆλ©”μ΄μ…˜ CSS 및 JavaScript
4
  """
5
 
 
1
  """
2
+ λ°°κ²½ 꽃/μŒν‘œ λ–¨μ–΄μ§€λŠ” μ• λ‹ˆλ©”μ΄μ…˜ - Voice Semantle ν…Œλ§ˆ
3
  λ°°κ²½ μ• λ‹ˆλ©”μ΄μ…˜ CSS 및 JavaScript
4
  """
5
 
client/frontend/styles/history_style.py CHANGED
@@ -1,11 +1,11 @@
1
  """
2
- νžˆμŠ€ν† λ¦¬ λ””μŠ€ν”Œλ ˆμ΄ CSS - Voice Sementle ν…Œλ§ˆ
3
  μ‹œλ„ 기둝 및 νžˆμŠ€ν† λ¦¬ ν‘œμ‹œ μŠ€νƒ€μΌ
4
  """
5
 
6
- # νžˆμŠ€ν† λ¦¬ λ””μŠ€ν”Œλ ˆμ΄ CSS - Voice Sementle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ
7
  HISTORY_CSS = """
8
- /* μ‹œλ„ 기둝 - Voice Sementle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ */
9
  .attempt-history {
10
  margin-top: 20px;
11
  padding: 16px;
 
1
  """
2
+ νžˆμŠ€ν† λ¦¬ λ””μŠ€ν”Œλ ˆμ΄ CSS - Voice Semantle ν…Œλ§ˆ
3
  μ‹œλ„ 기둝 및 νžˆμŠ€ν† λ¦¬ ν‘œμ‹œ μŠ€νƒ€μΌ
4
  """
5
 
6
+ # νžˆμŠ€ν† λ¦¬ λ””μŠ€ν”Œλ ˆμ΄ CSS - Voice Semantle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ
7
  HISTORY_CSS = """
8
+ /* μ‹œλ„ 기둝 - Voice Semantle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ */
9
  .attempt-history {
10
  margin-top: 20px;
11
  padding: 16px;
client/frontend/styles/result_screen_style.py CHANGED
@@ -1,11 +1,11 @@
1
  """
2
- 성곡 ν™”λ©΄ CSS 및 Celebration JavaScript - Voice Sementle ν…Œλ§ˆ
3
  성곡 ν™”λ©΄ μŠ€νƒ€μΌ, 톡계 μΉ΄λ“œ, Confetti 효과
4
  """
5
 
6
- # 성곡 ν™”λ©΄ CSS - Voice Sementle μ• λ‹ˆλ©”μ΄μ…˜ ν…Œλ§ˆ + Confetti 효과
7
  RESULT_SCREEN_CSS = """
8
- /* 성곡 ν™”λ©΄ - Voice Sementle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ */
9
  #success-screen,
10
  #giveup-screen {
11
  position: fixed !important;
 
1
  """
2
+ 성곡 ν™”λ©΄ CSS 및 Celebration JavaScript - Voice Semantle ν…Œλ§ˆ
3
  성곡 ν™”λ©΄ μŠ€νƒ€μΌ, 톡계 μΉ΄λ“œ, Confetti 효과
4
  """
5
 
6
+ # 성곡 ν™”λ©΄ CSS - Voice Semantle μ• λ‹ˆλ©”μ΄μ…˜ ν…Œλ§ˆ + Confetti 효과
7
  RESULT_SCREEN_CSS = """
8
+ /* 성곡 ν™”λ©΄ - Voice Semantle ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ */
9
  #success-screen,
10
  #giveup-screen {
11
  position: fixed !important;
client/frontend/styles/theme_style.py CHANGED
@@ -1,11 +1,11 @@
1
  """
2
- κΈ°λ³Έ ν…Œλ§ˆ CSS - Voice Sementle ν…Œλ§ˆ
3
  μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ 기반의 μ „μ—­ λ³€μˆ˜ 및 κΈ°λ³Έ μŠ€νƒ€μΌ
4
  """
5
 
6
- # Voice Sementle ν…Œλ§ˆ CSS - μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ
7
  BASE_THEME_CSS = """
8
- /* Voice Sementle μŠ€νƒ€μΌ κΈ°λ³Έ ν…Œλ§ˆ - μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ */
9
  :root {
10
  /* 메인 ν…Œλ§ˆ 색상 - ν•˜λŠ˜μƒ‰ 계열 */
11
  --theme-primary: #4db8ff;
 
1
  """
2
+ κΈ°λ³Έ ν…Œλ§ˆ CSS - Voice Semantle ν…Œλ§ˆ
3
  μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ 기반의 μ „μ—­ λ³€μˆ˜ 및 κΈ°λ³Έ μŠ€νƒ€μΌ
4
  """
5
 
6
+ # Voice Semantle ν…Œλ§ˆ CSS - μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ ν…Œλ§ˆ
7
  BASE_THEME_CSS = """
8
+ /* Voice Semantle μŠ€νƒ€μΌ κΈ°λ³Έ ν…Œλ§ˆ - μ• λ‹ˆλ©”μ΄μ…˜ν’ ν•˜λŠ˜μƒ‰ */
9
  :root {
10
  /* 메인 ν…Œλ§ˆ 색상 - ν•˜λŠ˜μƒ‰ 계열 */
11
  --theme-primary: #4db8ff;
client/services/analysis_service.py CHANGED
@@ -7,7 +7,11 @@ import time
7
  import logging
8
  from typing import Dict
9
 
10
- from .voice_analyzer import analyze_voice_with_mcp
 
 
 
 
11
  from .hint_generator import generate_hints_with_gemini, extract_advice_text
12
  from .database import get_puzzle_by_date, save_guess_record
13
 
@@ -75,7 +79,10 @@ async def analyze_voice(audio_bytes: bytes, date: str, session_id: str) -> Dict:
75
  logger.info(f"VoiceKit scores: pitch={pitch}, rhythm={rhythm}, energy={energy}, "
76
  f"pronunciation={pronunciation}, transcript={transcript}, overall={overall}")
77
 
78
- # 4. Generate hints with Gemini
 
 
 
79
  gemini_start = time.time()
80
  hints_json = await generate_hints_with_gemini(
81
  scores={
@@ -87,15 +94,21 @@ async def analyze_voice(audio_bytes: bytes, date: str, session_id: str) -> Dict:
87
  },
88
  attempt=attempt,
89
  answer_word=puzzle["answer_word"],
90
- category=puzzle["category"]
 
 
91
  )
92
  gemini_time = (time.time() - gemini_start) * 1000
93
  logger.info(f"⏱️ Gemini hint generation: {gemini_time:.1f}ms")
 
94
 
95
  advice = extract_advice_text(hints_json)
96
  is_correct = overall > 85
97
 
98
- # 5. Save guess record to database
 
 
 
99
  save_guess_record(
100
  session_id=session_id,
101
  puzzle_number=puzzle["puzzle_number"],
 
7
  import logging
8
  from typing import Dict
9
 
10
+ from .voice_analyzer import (
11
+ analyze_voice_with_mcp,
12
+ get_hint_history,
13
+ add_hint_to_history
14
+ )
15
  from .hint_generator import generate_hints_with_gemini, extract_advice_text
16
  from .database import get_puzzle_by_date, save_guess_record
17
 
 
79
  logger.info(f"VoiceKit scores: pitch={pitch}, rhythm={rhythm}, energy={energy}, "
80
  f"pronunciation={pronunciation}, transcript={transcript}, overall={overall}")
81
 
82
+ # 4. Get hint history for this session (to avoid repetition)
83
+ hint_history = get_hint_history(session_id)
84
+
85
+ # 5. Generate hints with Gemini (including user's spoken text for context-aware advice)
86
  gemini_start = time.time()
87
  hints_json = await generate_hints_with_gemini(
88
  scores={
 
94
  },
95
  attempt=attempt,
96
  answer_word=puzzle["answer_word"],
97
+ category=puzzle["category"],
98
+ user_text=user_text,
99
+ hint_history=hint_history
100
  )
101
  gemini_time = (time.time() - gemini_start) * 1000
102
  logger.info(f"⏱️ Gemini hint generation: {gemini_time:.1f}ms")
103
+ logger.info(f"Generated hints: {hints_json}")
104
 
105
  advice = extract_advice_text(hints_json)
106
  is_correct = overall > 85
107
 
108
+ # 6. Store this hint in history (for next time)
109
+ add_hint_to_history(session_id, advice)
110
+
111
+ # 7. Save guess record to database
112
  save_guess_record(
113
  session_id=session_id,
114
  puzzle_number=puzzle["puzzle_number"],
client/services/hint_generator.py CHANGED
@@ -34,9 +34,10 @@ def list_hint_files(category: str) -> list:
34
 
35
 
36
  async def generate_hints_with_gemini(
37
- scores: dict, attempt: int, answer_word: str, category: str
 
38
  ) -> dict:
39
- """Generate JSON hints using Gemini LLM"""
40
  try:
41
  # Find weakest metrics
42
  metrics = {
@@ -79,68 +80,73 @@ async def generate_hints_with_gemini(
79
  guidance = f"Attempt {attempt}! Focus on pronunciation coaching for {', '.join(weakest_names)}. Give very strong hints about what to say."
80
  category_hint = f"After {attempt} attempts, be very helpful while still not directly revealing the answer."
81
 
 
 
 
 
 
 
 
 
 
82
  # Build prompt for Gemini
83
- prompt = f"""You are a hint generator for "Audio Semantle" - a pronunciation puzzle game where players start blind and must figure out what word to say.
84
-
85
- **Current State:**
86
- - Answer word: "{answer_word}" (DO NOT reveal this directly!)
87
- - Category: {category} (this is a {category})
88
- - Attempt number: {attempt} (players have UNLIMITED attempts)
89
- - Scores (0-100): Pitch={scores.get('pitch', 0)}, Rhythm={scores.get('rhythm', 0)}, Energy={scores.get('energy', 0)}, Pronunciation={scores.get('pronunciation', 0)}, Overall={scores.get('overall', 0)}
90
- - Weakest areas: {', '.join(weakest_names)}
91
- - Available hint images: {hint_files_str}
92
-
93
- **Task:** {guidance}
94
- **Category Guidance:** {category_hint}
95
-
96
- **Hint Examples by Category:**
97
- - If category = "meme": "This viral phrase often appears in funny internet videos..."
98
- - If category = "movie": "This famous movie quote/title was released in..."
99
- - If category = "song": "This classic song by [artist hint] topped the charts..."
100
-
101
- **Return ONLY this JSON format, no other text:**
102
- {{
103
- "type": "{hint_type}",
104
- "answer": [
105
- {{
106
- "text": "Your hint or advice text here (can mention category)",
107
- "path": "images/hints/{category}/filename.jpg" OR ""
108
- }}
109
- ]
110
- }}
111
-
112
- **Rules for Progressive Hints:**
113
- 1. Remember: Players start COMPLETELY BLIND - they don't know what to say initially
114
- 2. Hints should get progressively more helpful with each attempt
115
- 3. For "hint" type: Follow the guidance above based on attempt number
116
- 4. For "advice" type: Focus on pronunciation + give strong contextual clues
117
- 5. Keep text concise (1-2 sentences max)
118
- 6. NEVER reveal the answer directly, but after 10+ attempts be very helpful
119
- 7. Return ONLY valid JSON, no markdown, no extra text
120
  """
121
 
 
 
 
 
122
  # Call Gemini
123
  response = call_gemini_with_tools(
124
  model_name="gemini-2.5-flash",
125
  system_prompt="You are a JSON generator. Return ONLY valid JSON with no markdown formatting or extra text.",
126
  messages=[{"role": "user", "content": prompt}],
127
  tools=[],
128
- max_tokens=512,
129
  )
130
 
131
  # Extract JSON from response
132
  response_text, error = get_text_from_gemini_response(response)
 
 
 
133
  if error:
134
  logger.error(f"Gemini response error: {error}")
135
- # Fallback hint
136
  return {
137
  "type": "advice",
138
- "answer": [
139
- {
140
- "text": f"Focus on improving {weakest_names[0]} (score: {weakest[0][1]:.0f}/100)",
141
- "path": "",
142
- }
143
- ],
144
  }
145
 
146
  # Clean response text (remove markdown code blocks if present)
@@ -148,24 +154,25 @@ async def generate_hints_with_gemini(
148
  if response_text.startswith("```"):
149
  lines = response_text.split("\n")
150
  response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
 
151
 
152
  # Parse JSON
 
153
  hints_json = json.loads(response_text)
 
154
  logger.info(f"βœ“ Generated fresh hint for attempt {attempt}, category {category}")
155
 
156
  return hints_json
157
 
158
  except Exception as e:
159
  logger.error(f"Hint generation error: {e}")
 
 
 
160
  # Fallback hint
161
  return {
162
  "type": "advice",
163
- "answer": [
164
- {
165
- "text": "Keep practicing! Focus on your pronunciation.",
166
- "path": "",
167
- }
168
- ],
169
  }
170
 
171
 
 
34
 
35
 
36
  async def generate_hints_with_gemini(
37
+ scores: dict, attempt: int, answer_word: str, category: str,
38
+ user_text: str = "", hint_history: list = None
39
  ) -> dict:
40
+ """Generate JSON hints using Gemini LLM (with hint history to avoid repetition)"""
41
  try:
42
  # Find weakest metrics
43
  metrics = {
 
80
  guidance = f"Attempt {attempt}! Focus on pronunciation coaching for {', '.join(weakest_names)}. Give very strong hints about what to say."
81
  category_hint = f"After {attempt} attempts, be very helpful while still not directly revealing the answer."
82
 
83
+ # Format hint history for prompt (avoid repetition)
84
+ history_text = ""
85
+ if hint_history and len(hint_history) > 0:
86
+ recent_hints = hint_history[-5:] # Last 5 hints only
87
+ history_text = "\n".join([f" - {h}" for h in recent_hints])
88
+
89
+ # Overall score for context-aware advice
90
+ overall_score = scores.get('overall', 0)
91
+
92
  # Build prompt for Gemini
93
+ prompt = f"""You generate hints for a pronunciation game. User tries to guess and say a secret phrase.
94
+
95
+ CONTEXT:
96
+ - User said: "{user_text}"
97
+ - Secret answer: "{answer_word}" (NEVER reveal!)
98
+ - Category: {category}
99
+ - Overall score: {overall_score}/100
100
+ - Attempt: {attempt}
101
+
102
+ PREVIOUS HINTS GIVEN (DO NOT REPEAT THESE - give NEW information!):
103
+ {history_text if history_text else " (none yet)"}
104
+
105
+ MANDATORY FORMAT: Always start with "You said '[what user said]' - " then your feedback.
106
+
107
+ RULES:
108
+ 1. If overall >= 70: User is saying the RIGHT phrase. Give pronunciation tips.
109
+ β†’ "You said 'Wingardium Leviosa' - Correct! Work on your pitch - try more dramatic."
110
+
111
+ 2. If overall < 70: User is saying the WRONG phrase. Analyze what they said and GUIDE them:
112
+ - Same franchise? β†’ "You said 'Shut up Malfoy' - Right franchise! Now think of a famous SPELL..."
113
+ - Similar category? β†’ "You said 'I'll be back' - Good movie instinct! But try a magical fantasy..."
114
+ - Unrelated? β†’ "You said 'Hello' - That's not it. This is a famous {category}..."
115
+
116
+ 3. NEVER say "focus on pronunciation" when overall < 70!
117
+
118
+ 4. Be helpful based on attempt ({attempt}): 1-3 vague, 4-6 specific, 7+ very helpful.
119
+
120
+ 5. NEVER repeat hints from the history above! Always give NEW, FRESH information.
121
+
122
+ Return ONLY this JSON:
123
+ {{"type": "{hint_type}", "answer": [{{"text": "You said '...' - your feedback", "path": ""}}]}}
 
 
 
 
 
 
124
  """
125
 
126
+ print(f"[GEMINI HINT] Calling Gemini for hint generation...")
127
+ print(f"[GEMINI HINT] User said: '{user_text}', Overall: {overall_score}, Attempt: {attempt}")
128
+ print(f"[GEMINI HINT] Hint history: {hint_history}")
129
+
130
  # Call Gemini
131
  response = call_gemini_with_tools(
132
  model_name="gemini-2.5-flash",
133
  system_prompt="You are a JSON generator. Return ONLY valid JSON with no markdown formatting or extra text.",
134
  messages=[{"role": "user", "content": prompt}],
135
  tools=[],
136
+ max_tokens=2048, # Generous limit for hint generation with history
137
  )
138
 
139
  # Extract JSON from response
140
  response_text, error = get_text_from_gemini_response(response)
141
+ print(f"[GEMINI HINT] Response text: {response_text[:200] if response_text else 'None'}...")
142
+ print(f"[GEMINI HINT] Error: {error}")
143
+
144
  if error:
145
  logger.error(f"Gemini response error: {error}")
146
+ print(f"[GEMINI HINT] ❌ FALLBACK triggered due to error: {error}")
147
  return {
148
  "type": "advice",
149
+ "answer": [{"text": f"Keep trying! This is a famous {category}.", "path": ""}]
 
 
 
 
 
150
  }
151
 
152
  # Clean response text (remove markdown code blocks if present)
 
154
  if response_text.startswith("```"):
155
  lines = response_text.split("\n")
156
  response_text = "\n".join(lines[1:-1]) if len(lines) > 2 else response_text
157
+ print(f"[GEMINI HINT] Cleaned markdown, result: {response_text[:200]}...")
158
 
159
  # Parse JSON
160
+ print(f"[GEMINI HINT] Parsing JSON: {response_text[:300]}...")
161
  hints_json = json.loads(response_text)
162
+ print(f"[GEMINI HINT] βœ“ Parsed successfully: {hints_json}")
163
  logger.info(f"βœ“ Generated fresh hint for attempt {attempt}, category {category}")
164
 
165
  return hints_json
166
 
167
  except Exception as e:
168
  logger.error(f"Hint generation error: {e}")
169
+ print(f"[GEMINI HINT] ❌ EXCEPTION: {type(e).__name__}: {e}")
170
+ import traceback
171
+ traceback.print_exc()
172
  # Fallback hint
173
  return {
174
  "type": "advice",
175
+ "answer": [{"text": f"Keep trying! This is a famous {category}.", "path": ""}]
 
 
 
 
 
176
  }
177
 
178
 
client/services/voice_analyzer.py CHANGED
@@ -39,6 +39,9 @@ _mcp_lock = None
39
  # Session tracking for attempt counts
40
  _session_attempts = {}
41
 
 
 
 
42
 
43
  async def initialize_voicekit_mcp():
44
  """Initialize VoiceKit MCP connection on app startup"""
@@ -243,6 +246,27 @@ def get_attempt_count(session_id: str) -> int:
243
  return _session_attempts[session_id]
244
 
245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  async def analyze_voice_with_mcp(
247
  audio_bytes: bytes,
248
  session_id: str,
 
39
  # Session tracking for attempt counts
40
  _session_attempts = {}
41
 
42
+ # Session tracking for hint/advice history (to avoid repetition)
43
+ _session_hint_history = {} # {session_id: [list of previous hints/advice]}
44
+
45
 
46
  async def initialize_voicekit_mcp():
47
  """Initialize VoiceKit MCP connection on app startup"""
 
246
  return _session_attempts[session_id]
247
 
248
 
249
+ def get_hint_history(session_id: str) -> list:
250
+ """Get hint history for session (to avoid repetition)"""
251
+ global _session_hint_history
252
+ if session_id not in _session_hint_history:
253
+ _session_hint_history[session_id] = []
254
+ return _session_hint_history[session_id]
255
+
256
+
257
+ def add_hint_to_history(session_id: str, hint_text: str) -> None:
258
+ """Add a hint to session history (keeps last 10 hints)"""
259
+ global _session_hint_history
260
+ if session_id not in _session_hint_history:
261
+ _session_hint_history[session_id] = []
262
+
263
+ if hint_text and hint_text != "Keep practicing!":
264
+ _session_hint_history[session_id].append(hint_text)
265
+ # Keep only last 10 hints to avoid memory bloat
266
+ if len(_session_hint_history[session_id]) > 10:
267
+ _session_hint_history[session_id] = _session_hint_history[session_id][-10:]
268
+
269
+
270
  async def analyze_voice_with_mcp(
271
  audio_bytes: bytes,
272
  session_id: str,
client/utils/elevenlabs_tts.py CHANGED
@@ -7,8 +7,10 @@ audio hints in the Audio Semantle game.
7
 
8
  import os
9
  import hashlib
 
10
  from pathlib import Path
11
  from typing import Optional
 
12
 
13
  # Try to import ElevenLabs SDK
14
  try:
@@ -18,8 +20,8 @@ except ImportError:
18
  ELEVENLABS_AVAILABLE = False
19
  print("Warning: elevenlabs package not installed. Audio hints will not be available.")
20
 
21
- # Configuration - use /tmp for Gradio compatibility (always in allowed_paths)
22
- AUDIO_HINTS_DIR = Path("/tmp/audio_hints")
23
  AUDIO_HINTS_DIR.mkdir(parents=True, exist_ok=True)
24
 
25
  # In-memory cache for generated audio hints
@@ -65,6 +67,10 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
65
  # Strip leading slash if present and resolve from project root
66
  full_path = project_root / reference_audio_path.lstrip("/")
67
 
 
 
 
 
68
  if not full_path.exists():
69
  print(f"⚠ Reference audio not found: {full_path}")
70
  return None
@@ -76,11 +82,13 @@ def clone_voice_from_reference(reference_audio_path: str) -> Optional[str]:
76
  voice_name = f"puzzle_voice_{hashlib.md5(reference_audio_path.encode()).hexdigest()[:8]}"
77
  print(f"🎀 Cloning voice from: {full_path}")
78
 
 
 
79
  voice = client.voices.ivc.create(
80
  name=voice_name,
81
- files=[str(full_path)], # List of audio file paths
82
  description="Cloned voice for Audio Semantle hints",
83
- remove_background_noise=True
84
  )
85
 
86
  voice_id = voice.voice_id
@@ -118,15 +126,9 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
118
  return None
119
 
120
  try:
121
- # Generate cache key from text + reference path for unique caching
122
- ref_hash = hashlib.md5((reference_audio_path or "").encode()).hexdigest()[:8]
123
  text_hash = hashlib.md5(text.encode()).hexdigest()[:12]
124
- cache_key = f"{text_hash}_{hint_type}_{ref_hash}"
125
-
126
- # Check cache first
127
- if cache_key in _audio_hint_cache:
128
- print(f"βœ“ Using cached audio hint: {cache_key}")
129
- return _audio_hint_cache[cache_key]
130
 
131
  # Initialize ElevenLabs client (v2.24.0 API)
132
  api_key = get_api_key()
@@ -159,8 +161,8 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
159
  output_format="mp3_44100_128"
160
  )
161
 
162
- # Save to file
163
- filename = f"{text_hash}_{hint_type}.mp3"
164
  filepath = AUDIO_HINTS_DIR / filename
165
 
166
  # Write audio bytes to file
@@ -168,11 +170,10 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
168
  for chunk in audio:
169
  f.write(chunk)
170
 
171
- # Return absolute path (Gradio allows /tmp by default)
172
  absolute_path = str(filepath)
173
- _audio_hint_cache[cache_key] = absolute_path
174
 
175
- print(f"βœ“ Audio hint saved: {absolute_path}")
176
  return absolute_path
177
 
178
  except Exception as e:
@@ -184,7 +185,8 @@ def generate_audio_hint(text: str, hint_type: str = "syllable", voice: str = "Ra
184
 
185
  def extract_hint_portion(answer_word: str, attempts: int) -> tuple[str, str]:
186
  """
187
- Extract what portion of the answer to pronounce based on attempt count
 
188
 
189
  Args:
190
  answer_word: The correct answer
@@ -193,44 +195,119 @@ def extract_hint_portion(answer_word: str, attempts: int) -> tuple[str, str]:
193
  Returns:
194
  Tuple of (text_to_speak, hint_type)
195
 
196
- Strategy:
197
- - Attempt 3: First syllable/word
198
- - Attempt 5: First half
199
- - Attempt 7: Rhythm pattern (with pauses)
200
- - Attempt 10+: Almost full (missing last part)
 
 
201
  """
202
  words = answer_word.split()
203
-
204
- if attempts == 3:
205
- # First syllable or first word
206
- first_word = words[0]
207
- # Simple syllable extraction: take first half of first word
208
- mid = len(first_word) // 2
209
- if mid > 0:
210
- return first_word[:mid], "syllable"
211
- return first_word, "syllable"
212
-
213
- elif attempts == 5:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  # First half of phrase
215
- mid = len(words) // 2
216
- if mid > 0:
217
- return " ".join(words[:mid]), "partial"
218
- return words[0], "partial"
219
-
220
- elif attempts == 7:
221
- # Rhythm pattern with pauses
222
- return " ... ".join(words), "rhythm"
223
 
224
- elif attempts >= 10:
225
- # Almost full (missing last word)
226
  if len(words) > 1:
227
  return " ".join(words[:-1]), "almost_full"
228
- # For single word, return first 80%
229
  cutoff = int(len(answer_word) * 0.8)
230
  return answer_word[:cutoff], "almost_full"
231
 
232
- # Default: first word
233
- return words[0], "syllable"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
 
235
 
236
  def should_offer_audio_hint(message: str, attempts: int, answer_word: str) -> bool:
 
7
 
8
  import os
9
  import hashlib
10
+ import time
11
  from pathlib import Path
12
  from typing import Optional
13
+ from io import BytesIO
14
 
15
  # Try to import ElevenLabs SDK
16
  try:
 
20
  ELEVENLABS_AVAILABLE = False
21
  print("Warning: elevenlabs package not installed. Audio hints will not be available.")
22
 
23
+ # Configuration - use project uploads directory for Gradio compatibility
24
+ AUDIO_HINTS_DIR = Path(__file__).parent.parent / "uploads" / "audio_hints"
25
  AUDIO_HINTS_DIR.mkdir(parents=True, exist_ok=True)
26
 
27
  # In-memory cache for generated audio hints
 
67
  # Strip leading slash if present and resolve from project root
68
  full_path = project_root / reference_audio_path.lstrip("/")
69
 
70
+ # Always use .wav for ElevenLabs (required format for voice cloning)
71
+ full_path = full_path.with_suffix('.wav')
72
+ print(f"🎡 Using WAV format for ElevenLabs: {full_path}")
73
+
74
  if not full_path.exists():
75
  print(f"⚠ Reference audio not found: {full_path}")
76
  return None
 
82
  voice_name = f"puzzle_voice_{hashlib.md5(reference_audio_path.encode()).hexdigest()[:8]}"
83
  print(f"🎀 Cloning voice from: {full_path}")
84
 
85
+ # Read file as BytesIO (required by ElevenLabs SDK)
86
+ # remove_background_noise=False allows shorter samples (<4.6s)
87
  voice = client.voices.ivc.create(
88
  name=voice_name,
89
+ files=[BytesIO(open(full_path, "rb").read())],
90
  description="Cloned voice for Audio Semantle hints",
91
+ remove_background_noise=False
92
  )
93
 
94
  voice_id = voice.voice_id
 
126
  return None
127
 
128
  try:
129
+ # Generate unique filename with timestamp (no caching)
 
130
  text_hash = hashlib.md5(text.encode()).hexdigest()[:12]
131
+ timestamp = int(time.time() * 1000)
 
 
 
 
 
132
 
133
  # Initialize ElevenLabs client (v2.24.0 API)
134
  api_key = get_api_key()
 
161
  output_format="mp3_44100_128"
162
  )
163
 
164
+ # Save to file with unique timestamp
165
+ filename = f"{text_hash}_{hint_type}_{timestamp}.mp3"
166
  filepath = AUDIO_HINTS_DIR / filename
167
 
168
  # Write audio bytes to file
 
170
  for chunk in audio:
171
  f.write(chunk)
172
 
173
+ # Return absolute path
174
  absolute_path = str(filepath)
 
175
 
176
+ print(f"βœ“ Audio hint generated (fresh): {absolute_path}")
177
  return absolute_path
178
 
179
  except Exception as e:
 
185
 
186
  def extract_hint_portion(answer_word: str, attempts: int) -> tuple[str, str]:
187
  """
188
+ Extract what portion of the answer to pronounce based on attempt count.
189
+ Uses natural syllable breaks for more gradual progression.
190
 
191
  Args:
192
  answer_word: The correct answer
 
195
  Returns:
196
  Tuple of (text_to_speak, hint_type)
197
 
198
+ Strategy (syllable-based, gradual):
199
+ - Attempt 1-2: First syllable (~25%) with "-"
200
+ - Attempt 3-4: ~40% of first word with "-"
201
+ - Attempt 5-6: ~60% of first word with "-"
202
+ - Attempt 7-8: Full first word
203
+ - Attempt 9-10: First half of phrase
204
+ - Attempt 11+: Almost full (missing last part)
205
  """
206
  words = answer_word.split()
207
+ first_word = words[0]
208
+
209
+ def get_syllable_break(word: str, fraction: float) -> int:
210
+ """Find a natural syllable break point at approximately the given fraction of the word."""
211
+ target = int(len(word) * fraction)
212
+ vowels = set('aeiouAEIOU')
213
+
214
+ # Search around target for a consonant after vowel (syllable break)
215
+ for i in range(max(2, target - 2), min(len(word), target + 3)):
216
+ if i > 0 and word[i-1] in vowels and word[i] not in vowels:
217
+ return i
218
+ return max(2, target)
219
+
220
+ if attempts <= 2:
221
+ # First syllable only: ~25% with natural break
222
+ cut = get_syllable_break(first_word, 0.25)
223
+ return first_word[:cut] + "-", "minimal"
224
+
225
+ elif attempts <= 4:
226
+ # ~40% of first word
227
+ cut = get_syllable_break(first_word, 0.4)
228
+ return first_word[:cut] + "-", "syllable"
229
+
230
+ elif attempts <= 6:
231
+ # ~60% of first word
232
+ cut = get_syllable_break(first_word, 0.6)
233
+ return first_word[:cut] + "-", "partial"
234
+
235
+ elif attempts <= 8:
236
+ # First word only
237
+ return first_word, "word"
238
+
239
+ elif attempts <= 10:
240
  # First half of phrase
241
+ mid = max(1, len(words) // 2)
242
+ return " ".join(words[:mid]), "half"
 
 
 
 
 
 
243
 
244
+ else: # 11+
245
+ # Almost full (missing last word or 20%)
246
  if len(words) > 1:
247
  return " ".join(words[:-1]), "almost_full"
 
248
  cutoff = int(len(answer_word) * 0.8)
249
  return answer_word[:cutoff], "almost_full"
250
 
251
+
252
+ def extract_hint_portion_for_word(answer_word: str, attempts: int, word_index: int = 0) -> tuple[str, str]:
253
+ """
254
+ Extract what portion of a SPECIFIC WORD to pronounce based on attempt count.
255
+
256
+ This allows the chatbot to hint specific words when user already knows others.
257
+ For example, if user knows "Wingardium" but not "Leviosa", set word_index=1.
258
+
259
+ Args:
260
+ answer_word: The full correct answer (may have multiple words)
261
+ attempts: Number of attempts user has made
262
+ word_index: Which word to hint (0=first, 1=second, etc.)
263
+
264
+ Returns:
265
+ Tuple of (text_to_speak, hint_type)
266
+ """
267
+ words = answer_word.split()
268
+
269
+ # Clamp word_index to valid range
270
+ if word_index < 0:
271
+ word_index = 0
272
+ if word_index >= len(words):
273
+ word_index = len(words) - 1
274
+
275
+ target_word = words[word_index]
276
+
277
+ def get_syllable_break(word: str, fraction: float) -> int:
278
+ """Find a natural syllable break point at approximately the given fraction of the word."""
279
+ target = int(len(word) * fraction)
280
+ vowels = set('aeiouAEIOU')
281
+
282
+ # Search around target for a consonant after vowel (syllable break)
283
+ for i in range(max(2, target - 2), min(len(word), target + 3)):
284
+ if i > 0 and word[i-1] in vowels and word[i] not in vowels:
285
+ return i
286
+ return max(2, target)
287
+
288
+ # Progressive hints for the target word
289
+ if attempts <= 2:
290
+ # First syllable only: ~25% with natural break
291
+ cut = get_syllable_break(target_word, 0.25)
292
+ return target_word[:cut] + "-", "minimal"
293
+
294
+ elif attempts <= 4:
295
+ # ~40% of target word
296
+ cut = get_syllable_break(target_word, 0.4)
297
+ return target_word[:cut] + "-", "syllable"
298
+
299
+ elif attempts <= 6:
300
+ # ~60% of target word
301
+ cut = get_syllable_break(target_word, 0.6)
302
+ return target_word[:cut] + "-", "partial"
303
+
304
+ elif attempts <= 8:
305
+ # Full target word
306
+ return target_word, "word"
307
+
308
+ else: # 9+
309
+ # Full target word (no more to reveal for single word)
310
+ return target_word, "almost_full"
311
 
312
 
313
  def should_offer_audio_hint(message: str, attempts: int, answer_word: str) -> bool:
gemini_adapter.py CHANGED
@@ -84,6 +84,19 @@ def convert_messages_to_gemini_format(anthropic_messages):
84
  if isinstance(content, str):
85
  # Simple text message
86
  parts.append(types.Part(text=content))
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  elif isinstance(content, list):
88
  # Complex content with tool calls/results
89
  for item in content:
 
84
  if isinstance(content, str):
85
  # Simple text message
86
  parts.append(types.Part(text=content))
87
+ elif isinstance(content, dict):
88
+ # Could be Gradio file format {"path": ..., "mime_type": ...}
89
+ # Skip audio/video files - they can't be sent to Gemini text API
90
+ if content.get("path") and content.get("mime_type"):
91
+ print(f"DEBUG convert_messages: Skipping file content: {content.get('mime_type')}")
92
+ continue
93
+ # Could be text content {"type": "text", "text": "..."}
94
+ elif content.get("type") == "text":
95
+ parts.append(types.Part(text=content.get("text", "")))
96
+ elif hasattr(content, '__class__') and 'Audio' in content.__class__.__name__:
97
+ # Skip Gradio Audio component objects
98
+ print(f"DEBUG convert_messages: Skipping Gradio component: {content.__class__.__name__}")
99
+ continue
100
  elif isinstance(content, list):
101
  # Complex content with tool calls/results
102
  for item in content:
gradio_ui.py ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio UI for Chloe's Voice Komentle Game
3
+ Connects to FastAPI backend for voice analysis
4
+ """
5
+
6
+ import os
7
+ # Set Gradio temp directory BEFORE importing gradio
8
+ _upload_dir = os.path.join(os.path.dirname(__file__), "gradio_uploads")
9
+ os.makedirs(_upload_dir, exist_ok=True)
10
+ os.environ["GRADIO_TEMP_DIR"] = _upload_dir
11
+
12
+ import gradio as gr
13
+ from datetime import datetime
14
+ import uuid
15
+ import asyncio
16
+ from sqlalchemy import create_engine, text
17
+ from dotenv import load_dotenv
18
+
19
+ # Load environment variables
20
+ load_dotenv()
21
+
22
+ # Import backend functions
23
+ from backend import (
24
+ analyze_voice_logic,
25
+ get_puzzle_by_date,
26
+ lifespan,
27
+ app as backend_app,
28
+ )
29
+
30
+ # Database connection
31
+ DATABASE_URL = os.getenv("DATABASE_URL")
32
+ engine = create_engine(
33
+ DATABASE_URL,
34
+ pool_size=10, # κΈ°λ³Έ μ—°κ²° ν’€ 크기
35
+ max_overflow=20, # μ΅œλŒ€ μΆ”κ°€ μ—°κ²° 수
36
+ pool_pre_ping=True, # μ—°κ²° μ‚¬μš© μ „ μœ νš¨μ„± 검사
37
+ pool_recycle=3600, # 1μ‹œκ°„λ§ˆλ‹€ μ—°κ²° μž¬μƒμ„±
38
+ connect_args={
39
+ "connect_timeout": 10, # μ—°κ²° νƒ€μž„μ•„μ›ƒ 10초
40
+ "options": "-c statement_timeout=30000" # 쿼리 νƒ€μž„μ•„μ›ƒ 30초
41
+ }
42
+ )
43
+
44
+ # Session ID (persistent across attempts)
45
+ session_id = str(uuid.uuid4())
46
+
47
+ # Backend initialization flag
48
+ backend_initialized = False
49
+
50
+
51
+ async def analyze_voice_async(audio_file, date_str):
52
+ """
53
+ Analyze voice using backend logic directly
54
+
55
+ Args:
56
+ audio_file: Path to recorded audio file
57
+ date_str: Date string for puzzle lookup
58
+
59
+ Returns:
60
+ tuple: (result_text, scores_text, hint_text, image_path)
61
+ """
62
+ if audio_file is None:
63
+ return "❌ μ˜€λ””μ˜€λ₯Ό λ¨Όμ € λ…ΉμŒν•΄μ£Όμ„Έμš”!", "", "", None
64
+
65
+ try:
66
+ # Read audio file
67
+ with open(audio_file, "rb") as f:
68
+ audio_bytes = f.read()
69
+
70
+ # Call backend logic directly
71
+ result = await analyze_voice_logic(audio_bytes, date_str, session_id)
72
+
73
+ # Handle errors
74
+ if result.get("status") == "error":
75
+ return f"❌ {result.get('message', 'Unknown error')}", "", "", None
76
+
77
+ # Parse response (already in 0-100 range from backend)
78
+ category = result.get("category", "unknown")
79
+ pitch = result.get("pitch", 0.0)
80
+ rhythm = result.get("rhythm", 0.0)
81
+ energy = result.get("energy", 0.0)
82
+ pronunciation = result.get("pronunciation", 0.0)
83
+ transcript = result.get("transcript", 0.0)
84
+ overall = result.get("overall", 0.0)
85
+ advice = result.get("advice", "")
86
+ is_correct = result.get("is_correct", False)
87
+ hints = {} # hints are embedded in advice now
88
+
89
+ # Format result message
90
+ if is_correct:
91
+ result_msg = f"πŸŽ‰ μ •λ‹΅μž…λ‹ˆλ‹€! 전체 점수: {overall:.1f}/100"
92
+ else:
93
+ result_msg = f"πŸ“Š 전체 점수: {overall:.1f}/100 - λ‹€μ‹œ μ‹œλ„ν•΄λ³΄μ„Έμš”!"
94
+
95
+ # Format scores
96
+ scores_text = f"""
97
+ ### πŸ“Š 점수 상세
98
+
99
+ **μΉ΄ν…Œκ³ λ¦¬:** {category.upper()}
100
+
101
+ - **발음 (Pronunciation):** {pronunciation:.1f}/100
102
+ - **μŒλ†’μ΄ (Pitch):** {pitch:.1f}/100
103
+ - **리듬 (Rhythm):** {rhythm:.1f}/100
104
+ - **μ—λ„ˆμ§€ (Energy):** {energy:.1f}/100
105
+ - **전사 (Transcript):** {transcript:.1f}/100
106
+ - **전체 (Overall):** {overall:.1f}/100
107
+ """
108
+
109
+ # Format hints
110
+ hint_text = ""
111
+ hint_image = None
112
+
113
+ if hints and "answer" in hints:
114
+ hint_type = hints.get("type", "hint")
115
+ hint_items = hints.get("answer", [])
116
+
117
+ if hint_type == "hint":
118
+ hint_text = "πŸ’‘ **힌트:**\n\n"
119
+ else:
120
+ hint_text = "🎯 **발음 μ‘°μ–Έ:**\n\n"
121
+
122
+ for item in hint_items:
123
+ hint_text += f"{item.get('text', '')}\n\n"
124
+
125
+ # Get image path if exists
126
+ img_path = item.get("path", "")
127
+ if img_path and os.path.exists(img_path):
128
+ hint_image = img_path
129
+
130
+ # Add advice if no hints
131
+ if not hint_text and advice:
132
+ hint_text = f"πŸ’¬ **μ‘°μ–Έ:**\n\n{advice}"
133
+
134
+ return result_msg, scores_text, hint_text, hint_image
135
+
136
+ except Exception as e:
137
+ return f"❌ 였λ₯˜ λ°œμƒ: {str(e)}", "", "", None
138
+
139
+
140
+ def analyze_voice(audio_file, date_str):
141
+ """Synchronous wrapper for async analyze_voice_async"""
142
+ return asyncio.run(analyze_voice_async(audio_file, date_str))
143
+
144
+
145
+ def get_today_puzzle():
146
+ """Get today's puzzle information from database"""
147
+ try:
148
+ today = datetime.now().strftime("%Y-%m-%d")
149
+
150
+ # Use backend function to get puzzle
151
+ puzzle = get_puzzle_by_date(today)
152
+ print(puzzle)
153
+ if puzzle:
154
+ return f"""
155
+ ### πŸ“… 였늘의 퍼즐
156
+
157
+ **λ‚ μ§œ:** {puzzle.get('puzzle_date', 'N/A')}
158
+ **퍼즐 번호:** #{puzzle.get('puzzle_number', 'N/A')}
159
+ **μΉ΄ν…Œκ³ λ¦¬:** {puzzle.get('category', 'N/A').upper()}
160
+ **λ‚œμ΄λ„:** {puzzle.get('difficulty', 'N/A')}
161
+
162
+ μ •λ‹΅ 단어λ₯Ό λ°œμŒν•΄λ³΄μ„Έμš”! (μ΅œλŒ€ 6회 μ‹œλ„)
163
+ """
164
+ else:
165
+ return "❌ 였늘의 퍼즐을 찾을 수 μ—†μŠ΅λ‹ˆλ‹€."
166
+
167
+ except Exception as e:
168
+ return f"❌ 퍼즐 정보λ₯Ό κ°€μ Έμ˜¬ 수 μ—†μŠ΅λ‹ˆλ‹€: {str(e)}"
169
+
170
+
171
+ def reset_session():
172
+ """Reset session for new game"""
173
+ global session_id
174
+ session_id = str(uuid.uuid4())
175
+ return "βœ… μƒˆ κ²Œμž„ μ‹œμž‘! μ˜€λ””μ˜€λ₯Ό λ…ΉμŒν•΄μ£Όμ„Έμš”.", "", "", None
176
+
177
+
178
+ # Create Gradio Interface
179
+ with gr.Blocks(title="Chloe's Voice Komentle") as demo:
180
+ gr.Markdown("# 🎀 Chloe's Voice Komentle")
181
+
182
+ # Puzzle info section
183
+ with gr.Row():
184
+ puzzle_info = gr.Markdown(value=get_today_puzzle())
185
+ refresh_btn = gr.Button("πŸ”„ 퍼즐 정보 μƒˆλ‘œκ³ μΉ¨", size="sm")
186
+
187
+ with gr.Row():
188
+ with gr.Column(scale=1):
189
+ # Audio recording
190
+ gr.Markdown("### πŸŽ™οΈ μŒμ„± λ…ΉμŒ")
191
+ audio_input = gr.Audio(
192
+ sources=["microphone"],
193
+ type="filepath",
194
+ label="마이크둜 λ…ΉμŒ",
195
+ format="wav",
196
+ )
197
+
198
+ # Date input (auto-filled with today)
199
+ date_input = gr.Textbox(
200
+ label="λ‚ μ§œ (YYYY-MM-DD)",
201
+ value=datetime.now().strftime("%Y-%m-%d"),
202
+ interactive=True,
203
+ )
204
+
205
+ # Submit button
206
+ submit_btn = gr.Button("🎯 λΆ„μ„ν•˜κΈ°", variant="primary", size="lg")
207
+ reset_btn = gr.Button("πŸ”„ μƒˆ κ²Œμž„ μ‹œμž‘", variant="secondary")
208
+
209
+ with gr.Column(scale=1):
210
+ # Results
211
+ gr.Markdown("### πŸ“Š κ²°κ³Ό")
212
+ result_output = gr.Markdown(label="κ²°κ³Ό")
213
+ scores_output = gr.Markdown(label="점수 상세")
214
+
215
+ # Hints section
216
+ with gr.Row():
217
+ with gr.Column():
218
+ hint_output = gr.Markdown(label="힌트 및 μ‘°μ–Έ")
219
+
220
+ with gr.Column():
221
+ hint_image = gr.Image(label="힌트 이미지", show_label=True)
222
+
223
+ # Event handlers
224
+ submit_btn.click(
225
+ fn=analyze_voice,
226
+ inputs=[audio_input, date_input],
227
+ outputs=[result_output, scores_output, hint_output, hint_image],
228
+ )
229
+
230
+ reset_btn.click(
231
+ fn=reset_session,
232
+ inputs=[],
233
+ outputs=[result_output, scores_output, hint_output, hint_image],
234
+ )
235
+
236
+ refresh_btn.click(fn=get_today_puzzle, inputs=[], outputs=[puzzle_info])
237
+
238
+ # Footer
239
+ gr.Markdown("---\n**Powered by:** VoiceKit MCP + Gemini AI")
240
+
241
+ # Launch configuration
242
+ if __name__ == "__main__":
243
+ print("πŸš€ Starting Chloe's Voice Komentle...")
244
+
245
+ # Initialize backend (VoiceKit MCP session)
246
+ print("⏳ Initializing VoiceKit MCP...")
247
+
248
+ async def init_backend():
249
+ """Initialize backend resources"""
250
+ async with lifespan(backend_app):
251
+ print("βœ“ VoiceKit MCP initialized")
252
+ # Keep the lifespan context active
253
+ await asyncio.Event().wait() # Wait forever
254
+
255
+ # Run backend initialization in background
256
+ import threading
257
+
258
+ def run_backend_init():
259
+ asyncio.run(init_backend())
260
+
261
+ backend_thread = threading.Thread(target=run_backend_init, daemon=True)
262
+ backend_thread.start()
263
+
264
+ # Wait a bit for initialization
265
+ import time
266
+
267
+ time.sleep(5)
268
+ print("βœ“ Backend initialized")
269
+
270
+ # Launch Gradio
271
+ server_host = os.getenv("SERVER_HOST")
272
+ frontend_port = int(os.getenv("FRONTEND_PORT"))
273
+ demo.launch(
274
+ server_name=server_host, # Listen on all interfaces
275
+ server_port=frontend_port, # Default Gradio port
276
+ share=False, # Set to True for public link
277
+ show_error=True,
278
+ allowed_paths=[os.path.join(os.path.dirname(__file__), "hints", "audio")], # Allow serving TTS audio hints
279
+ )
postgresql.py ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from sqlalchemy import create_engine, text
4
+ import pandas as pd
5
+ import gradio as gr
6
+
7
+ load_dotenv()
8
+
9
+ DATABASE_URL = os.getenv('DATABASE_URL')
10
+ engine = create_engine(DATABASE_URL)
11
+
12
+ def test_connection():
13
+ """Test database connection and show basic info"""
14
+ try:
15
+ with engine.connect() as connection:
16
+ result = connection.execute(text("SELECT version()"))
17
+ version = result.scalar()
18
+ return f"βœ… μ—°κ²° 성곡!\nλ°μ΄ν„°λ² μ΄μŠ€ 버전: {version}"
19
+ except Exception as e:
20
+ return f"❌ μ—°κ²° μ‹€νŒ¨: {e}"
21
+
22
+ def get_all_puzzles():
23
+ """Fetch all puzzles from database"""
24
+ try:
25
+ query = text("SELECT * FROM puzzles ORDER BY puzzle_number")
26
+ df = pd.read_sql_query(query, engine)
27
+ return df
28
+ except Exception as e:
29
+ return pd.DataFrame({"error": [str(e)]})
30
+
31
+ def get_puzzle_by_difficulty(difficulty):
32
+ """Fetch puzzles filtered by difficulty"""
33
+ try:
34
+ if difficulty == "전체":
35
+ query = text("SELECT * FROM puzzles ORDER BY puzzle_number")
36
+ df = pd.read_sql_query(query, engine)
37
+ else:
38
+ query = text("SELECT * FROM puzzles WHERE difficulty = :difficulty ORDER BY puzzle_number")
39
+ df = pd.read_sql_query(query, engine, params={"difficulty": difficulty})
40
+ return df
41
+ except Exception as e:
42
+ return pd.DataFrame({"error": [str(e)]})
43
+
44
+ def get_table_count():
45
+ """Get total count of puzzles"""
46
+ try:
47
+ query = text("SELECT COUNT(*) FROM puzzles")
48
+ with engine.connect() as connection:
49
+ result = connection.execute(query)
50
+ count = result.scalar()
51
+ return f"전체 퍼즐 개수: {count}개"
52
+ except Exception as e:
53
+ return f"였λ₯˜: {e}"
54
+
55
+ with gr.Blocks(title="Puzzles DB ν…ŒμŠ€νŠΈ") as demo:
56
+ gr.Markdown("# 🧩 Puzzles λ°μ΄ν„°λ² μ΄μŠ€ μ—°κ²° ν…ŒμŠ€νŠΈ")
57
+
58
+ with gr.Row():
59
+ with gr.Column():
60
+ test_btn = gr.Button("μ—°κ²° ν…ŒμŠ€νŠΈ", variant="primary")
61
+ connection_status = gr.Textbox(label="μ—°κ²° μƒνƒœ", lines=3)
62
+
63
+ with gr.Column():
64
+ count_btn = gr.Button("데이터 개수 확인")
65
+ count_output = gr.Textbox(label="개수")
66
+
67
+ gr.Markdown("## 전체 데이터 쑰회")
68
+ load_all_btn = gr.Button("λͺ¨λ“  퍼즐 뢈러였기")
69
+ all_data = gr.Dataframe(label="전체 퍼즐 데이터")
70
+
71
+ gr.Markdown("## λ‚œμ΄λ„λ³„ 필터링")
72
+ difficulty_filter = gr.Dropdown(
73
+ ["전체", "easy", "medium", "hard"],
74
+ value="전체",
75
+ label="λ‚œμ΄λ„ 선택"
76
+ )
77
+ filter_btn = gr.Button("ν•„ν„° 적용")
78
+ filtered_data = gr.Dataframe(label="ν•„ν„°λ§λœ 데이터")
79
+
80
+ # Event handlers
81
+ test_btn.click(test_connection, outputs=connection_status)
82
+ count_btn.click(get_table_count, outputs=count_output)
83
+ load_all_btn.click(get_all_puzzles, outputs=all_data)
84
+ filter_btn.click(get_puzzle_by_difficulty, inputs=difficulty_filter, outputs=filtered_data)
85
+ difficulty_filter.change(get_puzzle_by_difficulty, inputs=difficulty_filter, outputs=filtered_data)
86
+
87
+ if __name__ == "__main__":
88
+ demo.launch()
reference_audio/movie/wingardiumleviosa.mp3 ADDED
Binary file (35.8 kB). View file
 
requirements.txt CHANGED
@@ -2,17 +2,26 @@ aiofiles==24.1.0
2
  annotated-doc==0.0.4
3
  annotated-types==0.7.0
4
  anyio==4.11.0
 
 
5
  brotli==1.2.0
 
6
  certifi==2025.11.12
 
 
7
  click==8.3.1
8
  colorama==0.4.6
 
 
9
  dotenv==0.9.9
10
- elevenlabs==0.2.26
 
11
  fastapi==0.122.0
12
  ffmpy==1.0.0
13
  filelock==3.20.0
14
  fsspec==2025.10.0
15
- google-genai>=0.1.0
 
16
  gradio==6.0.0
17
  gradio_client==2.0.0.dev3
18
  greenlet==3.2.4
@@ -21,42 +30,69 @@ h11==0.16.0
21
  hf-xet==1.2.0
22
  httpcore==1.0.9
23
  httpx==0.28.1
 
24
  huggingface_hub==1.1.5
25
  idna==3.11
 
 
 
26
  Jinja2==3.1.6
 
 
27
  markdown-it-py==4.0.0
28
  MarkupSafe==3.0.3
29
- mcp>=1.0.0
 
30
  mdurl==0.1.2
31
- numpy>=1.24.0,<2.0.0
32
  orjson==3.11.4
33
  packaging==25.0
34
- pandas>=2.0.0,<2.3.0
35
- pillow==11.3.0
 
 
 
36
  psycopg2-binary==2.9.11
 
 
 
 
 
37
  pydantic==2.12.4
 
38
  pydantic_core==2.41.5
39
  pydub==0.25.1
40
  Pygments==2.19.2
 
41
  python-dateutil==2.9.0.post0
42
  python-dotenv==1.2.1
43
  python-multipart==0.0.20
44
  pytz==2025.2
45
  PyYAML==6.0.3
 
46
  requests==2.31.0
47
  rich==14.2.0
 
 
48
  safehttpx==0.1.7
49
  semantic-version==2.10.0
50
  shellingham==1.5.4
51
  six==1.17.0
52
  sniffio==1.3.1
53
  SQLAlchemy==2.0.44
 
 
54
  starlette==0.50.0
 
55
  tomlkit==0.13.3
56
  tqdm==4.67.1
 
57
  typer==0.20.0
58
  typer-slim==0.20.0
59
  typing-inspection==0.4.2
60
  typing_extensions==4.15.0
61
  tzdata==2025.2
62
- uvicorn==0.38.0
 
 
 
 
2
  annotated-doc==0.0.4
3
  annotated-types==0.7.0
4
  anyio==4.11.0
5
+ asttokens==3.0.1
6
+ attrs==25.4.0
7
  brotli==1.2.0
8
+ cachetools==6.2.2
9
  certifi==2025.11.12
10
+ cffi==2.0.0
11
+ charset-normalizer==3.4.4
12
  click==8.3.1
13
  colorama==0.4.6
14
+ cryptography==46.0.3
15
+ decorator==5.2.1
16
  dotenv==0.9.9
17
+ elevenlabs==2.24.0
18
+ executing==2.2.1
19
  fastapi==0.122.0
20
  ffmpy==1.0.0
21
  filelock==3.20.0
22
  fsspec==2025.10.0
23
+ google-auth==2.43.0
24
+ google-genai==1.52.0
25
  gradio==6.0.0
26
  gradio_client==2.0.0.dev3
27
  greenlet==3.2.4
 
30
  hf-xet==1.2.0
31
  httpcore==1.0.9
32
  httpx==0.28.1
33
+ httpx-sse==0.4.3
34
  huggingface_hub==1.1.5
35
  idna==3.11
36
+ ipython==9.7.0
37
+ ipython_pygments_lexers==1.1.1
38
+ jedi==0.19.2
39
  Jinja2==3.1.6
40
+ jsonschema==4.25.1
41
+ jsonschema-specifications==2025.9.1
42
  markdown-it-py==4.0.0
43
  MarkupSafe==3.0.3
44
+ matplotlib-inline==0.2.1
45
+ mcp==1.22.0
46
  mdurl==0.1.2
47
+ numpy==1.26.4
48
  orjson==3.11.4
49
  packaging==25.0
50
+ pandas==2.2.3
51
+ parso==0.8.5
52
+ pexpect==4.9.0
53
+ pillow==11.3.0
54
+ prompt_toolkit==3.0.52
55
  psycopg2-binary==2.9.11
56
+ ptyprocess==0.7.0
57
+ pure_eval==0.2.3
58
+ pyasn1==0.6.1
59
+ pyasn1_modules==0.4.2
60
+ pycparser==2.23
61
  pydantic==2.12.4
62
+ pydantic-settings==2.12.0
63
  pydantic_core==2.41.5
64
  pydub==0.25.1
65
  Pygments==2.19.2
66
+ PyJWT==2.10.1
67
  python-dateutil==2.9.0.post0
68
  python-dotenv==1.2.1
69
  python-multipart==0.0.20
70
  pytz==2025.2
71
  PyYAML==6.0.3
72
+ referencing==0.37.0
73
  requests==2.31.0
74
  rich==14.2.0
75
+ rpds-py==0.29.0
76
+ rsa==4.9.1
77
  safehttpx==0.1.7
78
  semantic-version==2.10.0
79
  shellingham==1.5.4
80
  six==1.17.0
81
  sniffio==1.3.1
82
  SQLAlchemy==2.0.44
83
+ sse-starlette==3.0.3
84
+ stack-data==0.6.3
85
  starlette==0.50.0
86
+ tenacity==9.1.2
87
  tomlkit==0.13.3
88
  tqdm==4.67.1
89
+ traitlets==5.14.3
90
  typer==0.20.0
91
  typer-slim==0.20.0
92
  typing-inspection==0.4.2
93
  typing_extensions==4.15.0
94
  tzdata==2025.2
95
+ urllib3==2.5.0
96
+ uvicorn==0.38.0
97
+ wcwidth==0.2.14
98
+ websockets==15.0.1
voice_app.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio Voice Recording App for Komentle
3
+ Records user voice and sends to FastAPI backend
4
+ """
5
+ import gradio as gr
6
+ import requests
7
+ import uuid
8
+ from datetime import datetime
9
+ import os
10
+
11
+ # Backend API URL (ν™˜κ²½λ³€μˆ˜λ‘œ 관리 κ°€λŠ₯)
12
+ BACKEND_URL = os.getenv("BACKEND_URL", "http://localhost:8000")
13
+
14
+ def process_voice(audio):
15
+ """
16
+ Process recorded voice and send to backend
17
+
18
+ Args:
19
+ audio: tuple (sample_rate, audio_data) or file path
20
+
21
+ Returns:
22
+ dict: Response from backend with scores
23
+ """
24
+ if audio is None:
25
+ return {
26
+ "status": "error",
27
+ "message": "μŒμ„±μ΄ λ…ΉμŒλ˜μ§€ μ•Šμ•˜μŠ΅λ‹ˆλ‹€."
28
+ }
29
+
30
+ try:
31
+ # Generate or retrieve session ID (μ‹€μ œλ‘œλŠ” μ„Έμ…˜ 관리 ν•„μš”)
32
+ session_id = str(uuid.uuid4())
33
+ today = datetime.now().strftime("%Y-%m-%d")
34
+
35
+ # Prepare request data
36
+ files = {
37
+ 'audio': ('audio.wav', open(audio, 'rb'), 'audio/wav')
38
+ }
39
+ data = {
40
+ 'date': today,
41
+ 'session_id': session_id
42
+ }
43
+
44
+ # Send to backend
45
+ response = requests.post(
46
+ f"{BACKEND_URL}/api/analyze-voice",
47
+ files=files,
48
+ data=data,
49
+ timeout=30
50
+ )
51
+
52
+ if response.status_code == 200:
53
+ result = response.json()
54
+ return format_result(result)
55
+ else:
56
+ return {
57
+ "status": "error",
58
+ "message": f"λ°±μ—”λ“œ 였λ₯˜: {response.status_code}"
59
+ }
60
+
61
+ except Exception as e:
62
+ return {
63
+ "status": "error",
64
+ "message": f"였λ₯˜ λ°œμƒ: {str(e)}"
65
+ }
66
+
67
+ def format_result(result):
68
+ """Format backend response for display"""
69
+ if result.get("status") == "error":
70
+ return f"❌ 였λ₯˜: {result.get('message')}"
71
+
72
+ category = result.get("category", "unknown")
73
+ pitch = result.get("pitch", 0.0)
74
+ rhythm = result.get("rhythm", 0.0)
75
+ energy = result.get("energy", 0.0)
76
+ pronunciation = result.get("pronunciation", 0.0)
77
+ transcript = result.get("transcript", 0.0) # λŒ€μ‚¬ 정확도 점수
78
+ overall = result.get("overall", 0.0)
79
+ advice = result.get("advice", "")
80
+ is_correct = result.get("is_correct", False) # μ •λ‹΅ μ—¬λΆ€
81
+
82
+ output = f"🎯 μΉ΄ν…Œκ³ λ¦¬: {category}\n\n"
83
+
84
+ # μ •λ‹΅ μ—¬λΆ€
85
+ if is_correct:
86
+ output += "πŸŽ‰ μ •λ‹΅μž…λ‹ˆλ‹€! μΆ•ν•˜ν•©λ‹ˆλ‹€!\n\n"
87
+ else:
88
+ output += "❌ 아직 정닡이 μ•„λ‹™λ‹ˆλ‹€. λ‹€μ‹œ λ„μ „ν•΄λ³΄μ„Έμš”!\n\n"
89
+
90
+ output += "πŸ“Š 뢄석 κ²°κ³Ό:\n"
91
+ output += f" - μŒλ†’μ΄ (Pitch): {pitch:.1f}/100\n"
92
+ output += f" - 리듬감 (Rhythm): {rhythm:.1f}/100\n"
93
+ output += f" - μ—λ„ˆμ§€ (Energy): {energy:.1f}/100\n"
94
+ output += f" - 발음 (Pronunciation): {pronunciation:.1f}/100\n"
95
+ output += f" - λŒ€μ‚¬ 정확도 (Transcript): {transcript:.1f}/100\n"
96
+ output += f"\n⭐ 총점 (Overall): {overall:.1f}/100\n"
97
+
98
+ # AI μ‘°μ–Έ
99
+ if advice:
100
+ output += f"\nπŸ’‘ AI μ‘°μ–Έ:\n{advice}\n"
101
+
102
+ return output
103
+
104
+ # Gradio Interface
105
+ with gr.Blocks(title="🎀 Komentle Voice Challenge") as demo:
106
+ gr.Markdown("# 🎀 Komentle Voice Challenge")
107
+ gr.Markdown("였늘의 문제λ₯Ό μŒμ„±μœΌλ‘œ λ„μ „ν•˜μ„Έμš”!")
108
+
109
+ gr.Markdown("### μ‚¬μš© 방법")
110
+ gr.Markdown("""
111
+ 1. πŸŽ™οΈ 마이크 λ²„νŠΌμ„ ν΄λ¦­ν•˜μ—¬ λ…ΉμŒ μ‹œμž‘
112
+ 2. 였늘의 문제λ₯Ό μŒμ„±μœΌλ‘œ λ§ν•˜κΈ°
113
+ 3. λ…ΉμŒ μ™„λ£Œ ν›„ '뢄석 μ‹œμž‘' λ²„νŠΌ 클릭
114
+ 4. AIκ°€ λΆ„μ„ν•œ 점수 확인
115
+ """)
116
+
117
+ with gr.Row():
118
+ with gr.Column():
119
+ audio_input = gr.Audio(
120
+ sources=["microphone"],
121
+ type="filepath",
122
+ label="πŸŽ™οΈ μŒμ„± λ…ΉμŒ",
123
+ format="wav"
124
+ )
125
+ submit_btn = gr.Button("뢄석 μ‹œμž‘", variant="primary", size="lg")
126
+
127
+ with gr.Column():
128
+ result_output = gr.Textbox(
129
+ label="πŸ“Š 뢄석 κ²°κ³Ό",
130
+ lines=10,
131
+ interactive=False
132
+ )
133
+
134
+
135
+
136
+ # Event handlers
137
+ submit_btn.click(
138
+ fn=process_voice,
139
+ inputs=audio_input,
140
+ outputs=result_output
141
+ )
142
+
143
+ if __name__ == "__main__":
144
+ demo.launch(
145
+ server_name="0.0.0.0",
146
+ server_port=7860,
147
+ share=False
148
+ )