File size: 32,607 Bytes
3a9eacd
 
 
 
 
 
 
9f30ef0
 
 
 
 
 
 
3a9eacd
 
 
 
9f30ef0
 
 
 
 
 
 
 
 
 
 
 
3a9eacd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f30ef0
3a9eacd
 
060d8ef
9f30ef0
 
 
 
 
3a9eacd
9f30ef0
 
 
 
ab1ab06
9f30ef0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab1ab06
9f30ef0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab1ab06
9f30ef0
 
 
 
 
 
 
 
ab1ab06
9f30ef0
 
 
 
ab1ab06
9f30ef0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
060d8ef
 
8374119
 
 
 
 
 
9f30ef0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8374119
9f30ef0
8374119
 
9f30ef0
 
 
 
 
8374119
 
 
 
9f30ef0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab1ab06
9f30ef0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3a9eacd
9f30ef0
3a9eacd
 
 
 
 
9f30ef0
3a9eacd
 
9f30ef0
3a9eacd
9f30ef0
 
 
 
 
 
 
 
 
 
 
060d8ef
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9f30ef0
 
48b92eb
 
 
 
 
 
 
 
9f30ef0
 
 
060d8ef
8374119
060d8ef
8374119
 
9f30ef0
 
8374119
9f30ef0
 
 
 
 
 
 
8374119
 
 
 
9f30ef0
 
 
 
 
8374119
 
 
9f30ef0
 
 
 
 
 
 
 
 
3a9eacd
9f30ef0
 
 
3a9eacd
48b92eb
3a9eacd
9f30ef0
 
 
 
 
8374119
48b92eb
3a9eacd
8374119
 
3a9eacd
8374119
9f30ef0
48b92eb
8374119
3a9eacd
9f30ef0
48b92eb
9f30ef0
 
 
 
 
 
 
 
3a9eacd
9f30ef0
3a9eacd
9f30ef0
 
 
 
 
 
3a9eacd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d26ae30
ab1ab06
d26ae30
 
 
 
 
 
 
 
 
 
 
 
 
3a9eacd
d26ae30
3a9eacd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d26ae30
3a9eacd
d26ae30
3a9eacd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
48b92eb
3a9eacd
48b92eb
 
 
 
 
3a9eacd
9f30ef0
 
3a9eacd
 
 
8374119
9f30ef0
 
 
 
8374119
9f30ef0
 
 
48b92eb
 
8374119
 
 
 
 
 
9f30ef0
 
 
 
3a9eacd
 
 
 
48b92eb
3a9eacd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
"""
ํ”Œ๋กœํŒ… AI ์ฑ—๋ด‡ ์ปดํฌ๋„ŒํŠธ (Gradio 6 ํ˜ธํ™˜)
@gr.render ๋ฐฉ์‹ + Backend AI ์—ฐ๋™
"""

import gradio as gr
from typing import List, Dict, Optional
import sys
import os

# Load environment variables from parent directory
from dotenv import load_dotenv
_env_path = os.path.join(os.path.dirname(__file__), "../../../.env")
load_dotenv(_env_path)

# ๊ฒŒ์ž„ ์ƒํƒœ์—์„œ UUID ๊ฐ€์ ธ์˜ค๊ธฐ
from utils.game_state import GameStateManager

# ElevenLabs TTS integration (Phase 2)
from utils.elevenlabs_tts import (
    should_offer_audio_hint,
    get_audio_hint_for_attempt,
    is_elevenlabs_configured,
    get_status as get_tts_status
)

# Gemini adapter import (backend์— ์žˆ์Œ)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../../"))
from gemini_adapter import chat_with_gemini, chat_with_gemini_and_tools


# ============================================================
# [๋ฐฉ์‹ 1] HTTP ์š”์ฒญ - ์™ธ๋ถ€ AI ์„œ๋ฒ„ ํ˜ธ์ถœ
# ์™ธ๋ถ€ AI API ์„œ๋ฒ„๋ฅผ ์‚ฌ์šฉํ•˜๋Š” ๊ฒฝ์šฐ ์ด ๋ฐฉ์‹ ์‚ฌ์šฉ
# ============================================================
# import requests
#
# def call_ai_http(message: str, user_id: str, history: List[Dict]) -> str:
#     """
#     HTTP ์š”์ฒญ์œผ๋กœ ์™ธ๋ถ€ AI ์„œ๋ฒ„ ํ˜ธ์ถœ
#     """
#     try:
#         response = requests.post(
#             "https://your-ai-server.com/api/chat",  # TODO: ์‹ค์ œ AI ์„œ๋ฒ„ URL
#             json={
#                 "message": message,
#                 "user_id": user_id,
#                 "history": history,
#             },
#             timeout=30,
#         )
#         response.raise_for_status()
#         return response.json().get("answer", "์‘๋‹ต์„ ๋ฐ›์ง€ ๋ชปํ–ˆ์Šต๋‹ˆ๋‹ค.")
#     except requests.exceptions.RequestException as e:
#         return f"Error: AI ์„œ๋ฒ„ ์š”์ฒญ ์‹คํŒจ - {str(e)}"


# ============================================================
# [๋ฐฉ์‹ 2] Gemini ๊ธฐ๋ฐ˜ AI ์ฑ—๋ด‡ (game_state ์ปจํ…์ŠคํŠธ ํฌํ•จ)
# ============================================================

def build_system_prompt_from_game_state(game_state: Optional[Dict], include_audio_tool: bool = False) -> str:
    """
    game_state์—์„œ ํžŒํŠธ ํžˆ์Šคํ† ๋ฆฌ๋ฅผ ์ถ”์ถœํ•˜์—ฌ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ

    Args:
        game_state: ๊ฒŒ์ž„ ์ƒํƒœ ๋”•์…”๋„ˆ๋ฆฌ (guesses ๋ฐฐ์—ด ํฌํ•จ)

    Returns:
        str: ์ปจํ…์ŠคํŠธ๊ฐ€ ํฌํ•จ๋œ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ
    """
    # ๊ธฐ๋ณธ ํ”„๋กฌํ”„ํŠธ (Phase 3: Enhanced)
    base_prompt = """You are the help assistant for "Audio Sementle" - an innovative pronunciation puzzle game that combines voice recognition with progressive hint discovery.

## Game Mechanics
This is a BLIND pronunciation game where:
- Players start with NO IDEA what word/phrase to say (completely blind!)
- They can speak ANY word or phrase as their first attempt
- The game analyzes their voice using AI and provides scores (0-100) for:
  - **Pitch**: Tone and melody matching
  - **Rhythm**: Pacing and syllable timing
  - **Energy**: Volume and intensity
  - **Pronunciation**: Phonetic accuracy
  - **Overall**: Combined similarity score
- Based on scores, players must deduce what the target word/phrase is
- Players have UNLIMITED attempts (this is not Wordle's 6-attempt limit!)
- Each attempt provides AI-generated hints that progressively reveal more information
- Goal: Figure out and correctly pronounce the mystery word/phrase

## Your Role as Help Assistant
You are a friendly, encouraging coach who helps users discover the answer through strategic hints:

1. **Explain the game** - When users are confused, explain Audio Sementle mechanics clearly
2. **Give progressive hints** - Start vague, gradually get more specific with each request
3. **Interpret scores intelligently** - Explain what their scores mean and what to try next
4. **Build on previous hints** - NEVER repeat hints, always add new information
5. **Track hint progression** - Remember what was already revealed, provide fresh angles
6. **Suggest strategic next steps** - Based on their score patterns, guide them
7. **Provide encouragement** - Keep users motivated, especially after many attempts
8. **Offer audio hints strategically** - Suggest listening when pronunciation is challenging

## Enhanced Hint Strategy (IMPORTANT)
When users ask for hints, follow this intelligent progression:

**Attempt 1-2** (Just starting):
- Be VERY vague: "Think about things people say" or "This is a common phrase"
- Don't reveal category yet - let them explore

**Attempt 3-4** (Finding direction):
- Reveal category: "This is a famous [category]"
- Add context: "Think about [era/genre/situation]"
- Offer audio hint: "Would you like to hear how the first syllable sounds?"

**Attempt 5-6** (Getting warmer):
- Be more specific: "It's from [specific context]"
- Give structural hints: "It has [X] syllables" or "The stress is on the [position]"
- Proactively offer audio: "Let me play part of it for you"

**Attempt 7-10** (Struggling):
- Very specific hints: "It starts with '[first letter]'" or "It rhymes with [word]"
- Reference their scores: "Your pitch is close but try emphasizing the [part]"
- Automatically provide audio hints

**Attempt 11+** (Really stuck):
- Give strong hints without revealing directly
- Provide almost-complete audio hints
- Encourage them: "You're so close! Just [specific adjustment]"

**Only reveal the full answer if explicitly asked: "what is the answer?"**

## Score Interpretation Guide (Be Specific!)

### Overall Score:
- **< 20**: "You're saying something completely different - try a new approach entirely"
- **20-40**: "There's some phonetic overlap, but you're on the wrong track. Think [category]"
- **40-60**: "You're getting warmer! The sounds are somewhat similar. Focus on [hint]"
- **60-75**: "Very close! You're in the right area, but pronunciation needs adjustment"
- **75-85**: "So close! Minor pronunciation issues - pay attention to [specific metric]"
- **85-95**: "Almost perfect! Just tiny adjustments to [specific area] needed"
- **95+**: "Excellent! Nearly there - just refine your [lowest score area]"

### Individual Metrics:
- **Low Pitch (<40)**: "Try adjusting your tone - go [higher/lower]"
- **Low Rhythm (<40)**: "Focus on the pacing and syllable timing"
- **Low Energy (<40)**: "Try speaking with [more/less] intensity"
- **Low Pronunciation (<40)**: "Work on phonetic accuracy - [audio hint might help]"

### Strategic Advice Based on Patterns:
- **All scores low**: "Try a completely different word/phrase"
- **Some metrics high**: "You're on the right track with [metric], now work on [weak area]"
- **Rhythm good, pronunciation poor**: "You have the rhythm! Now focus on exact sounds"
- **Getting stuck at same score**: "Let's try a different approach - [strategic hint]"

## Audio Hint Usage (Phase 2 Integration)
When audio hints are available, use them strategically:
- Offer audio proactively after attempt 3+
- Say things like: "Would you like to hear how it sounds?"
- When audio is provided, explain: "Listen carefully to the [syllable/rhythm/pronunciation]"
- Audio hints are progressive: first syllable โ†’ partial โ†’ rhythm โ†’ almost full
- Use audio to break through pronunciation barriers

## Communication Style
- **Be friendly and encouraging** - This game is challenging!
- **Use natural language** - Avoid robotic responses
- **Be concise** - 1-3 sentences per response usually
- **Ask engaging questions** - "What do you think your scores are telling you?"
- **Celebrate progress** - "Great improvement! Your overall went from 40 to 65!"
- **Never be condescending** - Even after many attempts

Remember: This is a discovery game. Your job is to guide them to the answer through progressive hints, not to give it away. Make it fun and rewarding!"""

    # game_state๊ฐ€ ์—†๊ฑฐ๋‚˜ guesses๊ฐ€ ์—†์œผ๋ฉด ๊ธฐ๋ณธ ํ”„๋กฌํ”„ํŠธ๋งŒ ๋ฐ˜ํ™˜
    if not game_state or "guesses" not in game_state or not game_state["guesses"]:
        return base_prompt + """

## Current Status
The user has not made any attempts yet. They are starting completely blind.

## Welcome Message Strategy
Greet them warmly and help them understand the game:

**Tone**: Friendly, exciting, encouraging
**Goal**: Get them to make their first attempt without fear

**Key Points to Convey**:
1. This is Audio Sementle - a voice-based puzzle game (not just pronunciation practice!)
2. They start COMPLETELY BLIND - no clues about what to say
3. First step: Just say ANY word or phrase that comes to mind
4. The game will analyze their voice and give similarity scores
5. Based on scores + hints, they figure out the mystery word/phrase
6. Unlimited attempts - it's about discovery, not pressure!
7. First attempts are always wild guesses - that's the fun part!

**Example Welcome**:
"Welcome! ๐Ÿ‘‹ Ready to play Audio Sementle? Here's the fun part: you start completely blind - you have NO idea what word or phrase to say! Just speak anything that comes to mind, and the game will tell you how close you are. Based on the scores and hints, you'll figure it out. Don't overthink your first attempt - it's supposed to be a shot in the dark! Need help? Just ask!"

**When they ask questions**:
- "How do I play?" โ†’ Explain the blind start and score-based discovery
- "Give me a hint" โ†’ Encourage them to try first: "Take a guess first! Any word works. Then I can help based on your scores."
- "What should I say?" โ†’ "Anything! That's the beauty of Audio Sementle - you discover what to say through playing."
- "I'm confused" โ†’ Walk them through: speak โ†’ get scores โ†’ get hints โ†’ figure it out"""

    # guesses ์ถ”์ถœ
    guesses = game_state.get("guesses", [])
    attempt_count = len(guesses)

    # Extract answer_word and category from the most recent guess (Phase 1 improvement)
    answer_word = ""
    category = ""
    if guesses:
        latest_guess = guesses[-1]
        ai_analysis = latest_guess.get("aiAnalysis", {})
        answer_word = ai_analysis.get("answerWord", "")
        category = ai_analysis.get("category", "")

    # ํžŒํŠธ ํžˆ์Šคํ† ๋ฆฌ ๊ตฌ์„ฑ
    context_parts = [
        base_prompt,
        f"\n\n## Current Game Status",
        f"The user has made {attempt_count} attempt(s) so far (unlimited attempts allowed).",
    ]

    # Add answer word and category context if available (Phase 1 improvement)
    if answer_word and category:
        context_parts.append(f"\n### SECRET INFORMATION (Do not reveal directly!)")
        context_parts.append(f"- The correct answer is: \"{answer_word}\"")
        context_parts.append(f"- Category: {category} (meme/movie/song)")
        context_parts.append(f"- Use this to provide strategic hints without giving away the answer")
        context_parts.append(f"- You can reference the category and give contextual clues")

        # Add audio hint capability info (Phase 2 + Tool Calling)
        # Only mention the tool when it will actually be available to avoid UNEXPECTED_TOOL_CALL errors
        if include_audio_tool:
            context_parts.append(f"\n### AUDIO HINT TOOL (Use ONLY when explicitly requested)")
            context_parts.append(f"- You have a tool called `generate_audio_hint` that generates TTS audio")
            context_parts.append(f"- ONLY call this tool when the user EXPLICITLY asks for audio hints:")
            context_parts.append(f"  - Keywords that REQUIRE audio: 'audio hint', 'play audio', 'TTS', 'hear it', 'how does it sound', 'generate audio', 'listen'")
            context_parts.append(f"  - Keywords that do NOT require audio: 'hint', 'help', 'clue', 'what is it', general questions")
            context_parts.append(f"- DO NOT call this tool for general hints or questions - only for explicit audio requests")
            context_parts.append(f"- Tool parameters:")
            context_parts.append(f"  - hint_type: 'syllable' | 'partial' | 'rhythm' | 'almost_full'")
            context_parts.append(f"- Choose hint_type based on attempt count:")
            context_parts.append(f"  - Attempt 1-4: use 'syllable' (first syllable)")
            context_parts.append(f"  - Attempt 5-6: use 'partial' (first half)")
            context_parts.append(f"  - Attempt 7-9: use 'rhythm' (with pauses)")
            context_parts.append(f"  - Attempt 10+: use 'almost_full' (almost complete)")
        context_parts.append("")

    context_parts.append(f"They are trying to figure out what word/phrase to pronounce.\n")

    # ๊ฐ ์‹œ๋„์˜ ์ ์ˆ˜์™€ ํ”ผ๋“œ๋ฐฑ ์ถ”๊ฐ€
    for i, guess in enumerate(guesses, 1):
        ai_analysis = guess.get("aiAnalysis", {})

        # ์ ์ˆ˜ ์ถ”์ถœ (0-100)
        pitch = ai_analysis.get("pitch", "N/A")
        rhythm = ai_analysis.get("rhythm", "N/A")
        energy = ai_analysis.get("energy", "N/A")
        pronunciation = ai_analysis.get("pronunciation", "N/A")
        overall = ai_analysis.get("overall_score", "N/A")  # Match standardized field name

        # Get recognized text from guess
        guessed_word = guess.get("guessedWord", "")
        # Get actual STT result (what user pronounced)
        user_spoken_text = ai_analysis.get("userText", "")

        # AI๊ฐ€ ์ค€ ์ด์ „ ์กฐ์–ธ/ํžŒํŠธ (์žˆ๋‹ค๋ฉด)
        advice = ai_analysis.get("advice", "")

        context_parts.append(f"### Attempt {i}")
        # Show what they actually said via STT (more accurate for pronunciation feedback)
        if user_spoken_text:
            context_parts.append(f"- What they pronounced (STT): \"{user_spoken_text}\"")
        elif guessed_word and not guessed_word.startswith("Score:"):
            context_parts.append(f"- What they said: \"{guessed_word}\"")
        context_parts.append(f"- Scores: Pitch={pitch}, Rhythm={rhythm}, Energy={energy}, Pronunciation={pronunciation}, Overall={overall}")

        if advice:
            context_parts.append(f"- Previous hint given: \"{advice}\"")

        context_parts.append("")  # ๋นˆ ์ค„

    # ์ตœ์ข… ์ปจํ…์ŠคํŠธ (Phase 3: Enhanced strategic guidance)
    context_parts.append("\n## Strategic Guidance for Current Situation")

    # Analyze score trends if multiple attempts exist
    if attempt_count > 1:
        latest = guesses[-1].get("aiAnalysis", {})
        latest_overall = latest.get("overall_score", 0)

        if attempt_count >= 2:
            previous = guesses[-2].get("aiAnalysis", {})
            prev_overall = previous.get("overall_score", 0)

            if latest_overall > prev_overall + 10:
                context_parts.append(f"๐Ÿ“ˆ **Progress Detected**: User improved from {prev_overall} to {latest_overall}! Encourage them and guide them further.")
            elif latest_overall < prev_overall - 10:
                context_parts.append(f"๐Ÿ“‰ **Regression Detected**: User dropped from {prev_overall} to {latest_overall}. They might be confused. Help them get back on track.")
            elif abs(latest_overall - prev_overall) < 5 and attempt_count >= 3:
                context_parts.append(f"๐Ÿ”„ **Plateau Detected**: User stuck around {latest_overall} for multiple attempts. Suggest a different approach or offer audio hint.")

        # Score-specific guidance
        if latest_overall < 30:
            context_parts.append(f"โš ๏ธ **Very Low Score ({latest_overall})**: User is way off. Give category hint if not already given.")
        elif latest_overall >= 75:
            context_parts.append(f"๐ŸŽฏ **Very Close ({latest_overall})**: User is almost there! Focus on pronunciation refinement.")

    context_parts.append("\n## How to Help the User (Context-Aware Responses)")
    context_parts.append("Based on their attempts and scores, provide intelligent responses:")
    context_parts.append("")
    context_parts.append("**Common Questions**:")
    context_parts.append("- \"How do I play?\" โ†’ Explain Audio Sementle mechanics with enthusiasm")
    context_parts.append("- \"Give me a hint\" โ†’ Follow the attempt-based progression strategy (see above)")
    context_parts.append("- \"What should I try next?\" โ†’ Analyze their score pattern and suggest strategic next steps")
    context_parts.append("- \"Why are my scores low?\" โ†’ Interpret scores specifically and suggest what to change")
    context_parts.append("- \"What does the previous hint mean?\" โ†’ Explain and expand with NEW information")
    context_parts.append("- \"Can you play it for me?\" / \"How does it sound?\" โ†’ Trigger audio hint generation")
    context_parts.append("- \"What is the answer?\" โ†’ Only reveal if explicitly asked this exact question")
    context_parts.append("")
    context_parts.append("**Score-Based Responses**:")
    context_parts.append("- If scores improving: Celebrate progress! 'Great improvement! You're getting warmer!'")
    context_parts.append("- If scores stuck: Suggest pivot. 'Try a different approach - think about [new angle]'")
    context_parts.append("- If scores decreasing: Redirect. 'Your previous attempt was closer. Go back to that direction.'")
    context_parts.append("- If very close (>80): Focus on specific metric. 'Work on your [weakest area]'")
    context_parts.append("")
    context_parts.append("**Audio Hint Triggers**:")
    context_parts.append(f"- Automatically offer after attempt 3+ if pronunciation score is low")
    context_parts.append(f"- Proactively suggest when user seems stuck (same score 3+ times)")
    context_parts.append(f"- Always offer when user asks about pronunciation or sounds")
    context_parts.append("")
    context_parts.append("Remember: They have UNLIMITED attempts. Keep it fun, encouraging, and strategic!")

    return "\n".join(context_parts)


def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state: Optional[Dict] = None):
    """
    Gemini AI ํ˜ธ์ถœ with Tool Calling for Audio Hints

    Args:
        message: ์‚ฌ์šฉ์ž ๋ฉ”์‹œ์ง€
        user_id: ์‚ฌ์šฉ์ž ID (๋กœ์ปฌ์Šคํ† ๋ฆฌ์ง€์—์„œ ๊ฐ€์ ธ์˜ด)
        history: ๋Œ€ํ™” ํžˆ์Šคํ† ๋ฆฌ
        game_state: ๊ฒŒ์ž„ ์ƒํƒœ (ํžŒํŠธ ํžˆ์Šคํ† ๋ฆฌ ํฌํ•จ)

    Returns:
        str or tuple: AI ์‘๋‹ต ํ…์ŠคํŠธ, or (ํ…์ŠคํŠธ, audio_file) if audio hint generated
    """
    try:
        # Extract context from game_state
        attempt_count = len(game_state.get('guesses', [])) if game_state else 0
        answer_word = ""
        reference_audio_path = ""
        if game_state and game_state.get('guesses'):
            latest_guess = game_state['guesses'][-1]
            ai_analysis = latest_guess.get('aiAnalysis', {})
            answer_word = ai_analysis.get('answerWord', '')
            reference_audio_path = ai_analysis.get('referenceAudioPath', '')

        # Debug ElevenLabs status
        from utils.elevenlabs_tts import get_api_key, ELEVENLABS_AVAILABLE
        api_key = get_api_key()
        elevenlabs_ready = is_elevenlabs_configured()

        # LLM-driven approach: Enable audio tool when technically available
        # Let Gemini decide when to USE it based on user intent (system prompt guides this)
        # This is better than keyword-based gating because:
        # - Handles any phrasing naturally ("speak it", "let me hear", etc.)
        # - Understands context (won't trigger on "I lost my voice")
        # - Works across languages without maintaining keyword lists
        tools_will_be_enabled = elevenlabs_ready and bool(answer_word)

        # game_state์—์„œ ์‹œ์Šคํ…œ ํ”„๋กฌํ”„ํŠธ ๊ตฌ์„ฑ (with audio tool info only if tools will be enabled)
        system_prompt = build_system_prompt_from_game_state(game_state, include_audio_tool=tools_will_be_enabled)

        print(f"[CHATBOT] Calling Gemini with context:")
        # print(f"  - User ID: {user_id}")
        # print(f"  - Message: {message}")
        # print(f"  - History length: {len(history)}")
        # print(f"  - Game state attempts: {attempt_count}")
        # print(f"  - Answer word: {answer_word}")
        # print(f"  - ElevenLabs AVAILABLE: {ELEVENLABS_AVAILABLE}")
        # print(f"  - ElevenLabs API key set: {bool(api_key)}")
        # print(f"  - ElevenLabs configured: {elevenlabs_ready}")

        # Define audio hint tool if ElevenLabs is configured
        tools = []
        print(f"[CHATBOT] ElevenLabs ready: {elevenlabs_ready}, Answer word: '{answer_word}', Tools enabled: {tools_will_be_enabled}")

        if tools_will_be_enabled:
            # Get word count for tool description
            word_count = len(answer_word.split())
            tools = [{
                "name": "generate_audio_hint",
                "description": "Generate an audio pronunciation hint using TTS.",
                "input_schema": {
                    "type": "object",
                    "properties": {
                        "hint_type": {
                            "type": "string",
                            "enum": ["syllable", "partial", "rhythm", "almost_full"],
                            "description": "Type of audio hint: 'syllable' (first syllable), 'partial' (first half), 'rhythm' (with pauses), 'almost_full' (missing last part)"
                        },
                        "word_index": {
                            "type": "integer",
                            "description": f"Which word to hint (0-indexed). The phrase has {word_count} word(s). Use 0 for first word, 1 for second word, etc. If user knows first word but not second, use 1."
                        }
                    },
                    "required": ["hint_type"]
                }
            }]
            print(f"[CHATBOT] Audio tool enabled! User requested audio.")
        elif elevenlabs_ready and answer_word:
            print(f"[CHATBOT] Audio tool NOT enabled - user didn't request audio. Message: '{message[:50]}...'")

        # Call Gemini with tools
        response_text, tool_calls, error = chat_with_gemini_and_tools(
            system_prompt=system_prompt,
            messages=history,
            tools=tools if tools else None,
            max_tokens=1024,
            temperature=0.8
        )

        if error:
            print(f"[CHATBOT ERROR] {error}")
            return f"Sorry, I encountered an error: {error}\n\nPlease try again in a moment."

        # print(f"[CHATBOT] Got response: text={len(response_text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")

        # Handle tool calls
        if tool_calls:
            for tool_call in tool_calls:
                if tool_call['name'] == 'generate_audio_hint':
                    hint_type = tool_call['input'].get('hint_type', 'syllable')
                    word_index = tool_call['input'].get('word_index', 0)  # Default to first word
                    # print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}, word_index={word_index}")

                    # Extract the appropriate portion based on hint_type, word_index, and answer_word
                    from utils.elevenlabs_tts import extract_hint_portion_for_word, generate_audio_hint

                    text_to_speak, actual_hint_type = extract_hint_portion_for_word(answer_word, attempt_count, word_index)
                    # Use voice cloning from reference audio when available
                    # print(f"[CHATBOT] Generating audio for: '{text_to_speak}' with reference: {reference_audio_path}")
                    audio_path = generate_audio_hint(text_to_speak, actual_hint_type, reference_audio_path=reference_audio_path)

                    if audio_path:
                        # print(f"[CHATBOT] Audio hint generated: {audio_path}")
                        # Always include text with audio hint
                        if not response_text or response_text.strip() == "":
                            response_text = f"๐ŸŽต Here's an audio hint! Listen to how it sounds:"
                        # Return text + audio
                        return (response_text, audio_path)
                    else:
                        print(f"[CHATBOT] Audio hint generation failed")
                        response_text += "\n\n(Audio hint generation failed - please check ElevenLabs configuration)"

        return response_text

    except Exception as e:
        error_msg = f"Unexpected error: {type(e).__name__}: {str(e)}"
        print(f"[CHATBOT ERROR] {error_msg}")
        import traceback
        traceback.print_exc()
        return f"Sorry, something went wrong. Please try again.\n\nError: {error_msg}"


# ============================================================
# UUID ๊ฐ€์ ธ์˜ค๊ธฐ (game_state์—์„œ)
# ============================================================
def get_user_id_from_state(game_state: Optional[Dict]) -> str:
    """
    game_state์—์„œ ์‚ฌ์šฉ์ž ID(UUID) ๊ฐ€์ ธ์˜ค๊ธฐ

    Args:
        game_state: ๊ฒŒ์ž„ ์ƒํƒœ ๋”•์…”๋„ˆ๋ฆฌ

    Returns:
        str: ์„ธ์…˜ ID (UUID)
    """
    if game_state:
        return GameStateManager.get_session_id(game_state)
    return "unknown_user"


class FloatingChatbotComponent:
    """ํ”Œ๋กœํŒ… AI ์ฑ—๋ด‡ ์ปดํฌ๋„ŒํŠธ

    @gr.render ๋Œ€์‹  visible ์†์„ฑ์„ ์‚ฌ์šฉํ•˜์—ฌ ์•ˆ์ •์ ์ธ ์ด๋ฒคํŠธ ์ฒ˜๋ฆฌ
    Gradio 6์—์„œ @gr.render ๋‚ด๋ถ€ ์ด๋ฒคํŠธ ๋ฆฌ์Šค๋„ˆ๋Š” ๋ Œ๋” ๋ฐ˜๋ณต ์‹œ
    ํ•จ์ˆ˜ ID ๋ถˆ์ผ์น˜๋กœ KeyError๊ฐ€ ๋ฐœ์ƒํ•  ์ˆ˜ ์žˆ์Œ
    """

    def __init__(self, title: str = "Help"):
        self.title = title
        self.chat_history = None
        self.game_state_ref = None  # ์™ธ๋ถ€์—์„œ ์ „๋‹ฌ๋ฐ›์€ game_state ์ฐธ์กฐ
        self.toggle = None
        self.chat_container = None

    def render(self, game_state=None):
        """
        ์ฑ—๋ด‡ ์ปดํฌ๋„ŒํŠธ ๋ Œ๋”๋ง (gr.Blocks ์ปจํ…์ŠคํŠธ ๋‚ด๋ถ€์—์„œ ํ˜ธ์ถœ)

        Args:
            game_state: ์™ธ๋ถ€์—์„œ ์ „๋‹ฌ๋ฐ›์€ gr.BrowserState (UUID ํฌํ•จ)
        """
        # ์ดˆ๊ธฐ ํ™˜์˜ ๋ฉ”์‹œ์ง€ ์ •์˜
        welcome_message = """Welcome to Audio Sementle! ๐Ÿ‘‹ Get ready for a unique and super fun pronunciation puzzle game where your voice is the key!

Here's how it works:

1. **You start completely blind!** You have absolutely NO idea what the mystery word or phrase is. That's part of the fun!
2. **Your first move:** Just speak any word or phrase that comes to mind. Seriously, anything!
3. **Get instant feedback:** Our AI will listen and give you scores (0-100) for your Pitch, Rhythm, Energy, Pronunciation, and an Overall similarity score to the hidden target.
4. **Deduce and discover:** Based on these scores, and with my help offering progressive hints, you'll start narrowing down what the target phrase might be.
5. **Unlimited attempts:** This isn't like Wordle! You have as many tries as you need to figure it out. It's all about discovery and getting closer with each attempt.

Don't overthink your first try โ€“ it's meant to be a shot in the dark! Just say anything you like, and let's see where that takes us. Ready to give it a go? What's the first thing that comes to your mind?"""

        self.initial_history = [{"role": "assistant", "content": welcome_message}]

        # ์ƒํƒœ ๊ด€๋ฆฌ
        self.chat_history = gr.State(self.initial_history.copy())
        self.game_state_ref = game_state  # ์™ธ๋ถ€ game_state ์ฐธ์กฐ ์ €์žฅ

        # ํ† ๊ธ€ ๋ฒ„ํŠผ (Checkbox) - JavaScript๋กœ visibility ์ œ์–ด
        self.toggle = gr.Checkbox(
            label="?",
            value=False,
            elem_id="floating-toggle",
            container=False
        )

        # ============================================================
        # ์ •์  UI ๊ตฌ์„ฑ
        # ============================================================
        with gr.Column(elem_id="chat-container") as self.chat_container:
            # ํ—ค๋”
            with gr.Row(elem_id="chat-header"):
                gr.HTML(f"<span>{self.title}</span>")
                close_btn = gr.Button(
                    "โœ•",
                    elem_id="chat-close-btn",
                    scale=0,
                    min_width=32
                )

            # ์ฑ—๋ด‡ (์ดˆ๊ธฐ ํ™˜์˜ ๋ฉ”์‹œ์ง€ ํฌํ•จ)
            chatbot = gr.Chatbot(
                value=self.initial_history,
                height=280,
                elem_id="floating-chatbot",
                show_label=False
            )

            # ์ž…๋ ฅ ์˜์—ญ
            with gr.Row(elem_id="chat-input-area"):
                msg_input = gr.Textbox(
                    placeholder="Type your message...",
                    scale=4,
                    container=False
                )
                send_btn = gr.Button(
                    "โžค",
                    scale=0,
                    variant="primary",
                    min_width=50
                )

        # ============================================================
        # ์ด๋ฒคํŠธ ํ•ธ๋“ค๋Ÿฌ ํ•จ์ˆ˜๋“ค
        # ============================================================

        def send_message(message, hist, gs=None):
            """๋ฉ”์‹œ์ง€ ์ „์†ก ํ•จ์ˆ˜ (game_state ํฌํ•จ)"""
            if not message.strip():
                return "", hist, hist

            # UUID ๊ฐ€์ ธ์˜ค๊ธฐ
            uid = get_user_id_from_state(gs) if gs else "unknown"

            hist = hist or []
            hist.append({"role": "user", "content": message})

            # ์ฝ˜์†” ๋กœ๊ทธ
            # print("=" * 60)
            print("[CHATBOT] ๋ฉ”์‹œ์ง€ ์ „์†ก")
            # print(f"  - Session ID (UUID): {uid}")
            # print(f"  - Message: {message}")
            # print(f"  - History Length: {len(hist)}")
            # print(f"  - Game State Attempts: {len(gs.get('guesses', [])) if gs else 0}")
            # print("=" * 60)

            # AI ํ˜ธ์ถœ (Gemini with game_state context)
            response = call_ai_backend(message, uid, hist, game_state=gs)

            print(f"[CHATBOT] AI ์‘๋‹ต: {str(response)[:100]}...")

            # Convert tuple response (text, audio_path) to Gradio 6 format
            if isinstance(response, tuple):
                text, audio_path = response
                # Convert relative path to absolute path for Gradio
                import os
                if audio_path and not os.path.isabs(audio_path):
                    # Path is relative to project root (3 levels up from this file)
                    project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
                    audio_path = os.path.join(project_root, audio_path)
                # print(f"[CHATBOT] Audio hint included: {audio_path}")
                # print(f"[CHATBOT] Audio file exists: {os.path.exists(audio_path)}")

                # Gradio 6: Use gr.Audio() component for audio content
                # Reference: chatbot_core_components_simple demo
                hist.append({"role": "assistant", "content": text})
                hist.append({"role": "assistant", "content": gr.Audio(audio_path)})
                return "", hist, hist
            else:
                content = response

            hist.append({"role": "assistant", "content": content})
            return "", hist, hist

        def close_chat_handler():
            """๋‹ซ๊ธฐ ๋ฒ„ํŠผ ํ•ธ๋“ค๋Ÿฌ - ์ฒดํฌ๋ฐ•์Šค๋งŒ False๋กœ"""
            # print("[CHATBOT] ๋‹ซ๊ธฐ ๋ฒ„ํŠผ ํด๋ฆญ๋จ")
            return gr.update(value=False)

        # ============================================================
        # ์ด๋ฒคํŠธ ์—ฐ๊ฒฐ
        # ============================================================

        # ํ† ๊ธ€ ์ด๋ฒคํŠธ - JavaScript๋กœ ์ฑ—๋ด‡ visibility ์ œ์–ด
        self.toggle.change(
            fn=None,
            inputs=[self.toggle],
            outputs=None,
            js="""
            (isChecked) => {
                const container = document.getElementById('chat-container');
                if (container) {
                    if (isChecked) {
                        container.classList.add('chat-open');
                    } else {
                        container.classList.remove('chat-open');
                    }
                }
                return isChecked;
            }
            """
        )

        # ๋‹ซ๊ธฐ ๋ฒ„ํŠผ ์ด๋ฒคํŠธ - ์ฒดํฌ๋ฐ•์Šค๋ฅผ False๋กœ ์„ค์ • (JS๊ฐ€ visibility ์ฒ˜๋ฆฌ)
        close_btn.click(
            close_chat_handler,
            outputs=[self.toggle]
        )

        # ๋ฉ”์‹œ์ง€ ์ „์†ก ์ด๋ฒคํŠธ (game_state ํฌํ•จ)
        event_inputs = [msg_input, self.chat_history]
        if self.game_state_ref:
            event_inputs.append(self.game_state_ref)

        msg_input.submit(
            send_message,
            inputs=event_inputs,
            outputs=[msg_input, self.chat_history, chatbot]
        )
        send_btn.click(
            send_message,
            inputs=event_inputs,
            outputs=[msg_input, self.chat_history, chatbot]
        )

        return self.toggle