Spaces:

MCP-1st-Birthday
/

VoiceSementle

Running

File size: 32,607 Bytes

"""
플로팅 AI 챗봇 컴포넌트 (Gradio 6 호환)
@gr.render 방식 + Backend AI 연동
"""

import gradio as gr
from typing import List, Dict, Optional
import sys
import os

# Load environment variables from parent directory
from dotenv import load_dotenv
_env_path = os.path.join(os.path.dirname(__file__), "../../../.env")
load_dotenv(_env_path)

# 게임 상태에서 UUID 가져오기
from utils.game_state import GameStateManager

# ElevenLabs TTS integration (Phase 2)
from utils.elevenlabs_tts import (
    should_offer_audio_hint,
    get_audio_hint_for_attempt,
    is_elevenlabs_configured,
    get_status as get_tts_status
)

# Gemini adapter import (backend에 있음)
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../../"))
from gemini_adapter import chat_with_gemini, chat_with_gemini_and_tools


# ============================================================
# [방식 1] HTTP 요청 - 외부 AI 서버 호출
# 외부 AI API 서버를 사용하는 경우 이 방식 사용
# ============================================================
# import requests
#
# def call_ai_http(message: str, user_id: str, history: List[Dict]) -> str:
#     """
#     HTTP 요청으로 외부 AI 서버 호출
#     """
#     try:
#         response = requests.post(
#             "https://your-ai-server.com/api/chat",  # TODO: 실제 AI 서버 URL
#             json={
#                 "message": message,
#                 "user_id": user_id,
#                 "history": history,
#             },
#             timeout=30,
#         )
#         response.raise_for_status()
#         return response.json().get("answer", "응답을 받지 못했습니다.")
#     except requests.exceptions.RequestException as e:
#         return f"Error: AI 서버 요청 실패 - {str(e)}"


# ============================================================
# [방식 2] Gemini 기반 AI 챗봇 (game_state 컨텍스트 포함)
# ============================================================

def build_system_prompt_from_game_state(game_state: Optional[Dict], include_audio_tool: bool = False) -> str:
    """
    game_state에서 힌트 히스토리를 추출하여 시스템 프롬프트 구성

    Args:
        game_state: 게임 상태 딕셔너리 (guesses 배열 포함)

    Returns:
        str: 컨텍스트가 포함된 시스템 프롬프트
    """
    # 기본 프롬프트 (Phase 3: Enhanced)
    base_prompt = """You are the help assistant for "Audio Sementle" - an innovative pronunciation puzzle game that combines voice recognition with progressive hint discovery.

## Game Mechanics
This is a BLIND pronunciation game where:
- Players start with NO IDEA what word/phrase to say (completely blind!)
- They can speak ANY word or phrase as their first attempt
- The game analyzes their voice using AI and provides scores (0-100) for:
  - **Pitch**: Tone and melody matching
  - **Rhythm**: Pacing and syllable timing
  - **Energy**: Volume and intensity
  - **Pronunciation**: Phonetic accuracy
  - **Overall**: Combined similarity score
- Based on scores, players must deduce what the target word/phrase is
- Players have UNLIMITED attempts (this is not Wordle's 6-attempt limit!)
- Each attempt provides AI-generated hints that progressively reveal more information
- Goal: Figure out and correctly pronounce the mystery word/phrase

## Your Role as Help Assistant
You are a friendly, encouraging coach who helps users discover the answer through strategic hints:

1. **Explain the game** - When users are confused, explain Audio Sementle mechanics clearly
2. **Give progressive hints** - Start vague, gradually get more specific with each request
3. **Interpret scores intelligently** - Explain what their scores mean and what to try next
4. **Build on previous hints** - NEVER repeat hints, always add new information
5. **Track hint progression** - Remember what was already revealed, provide fresh angles
6. **Suggest strategic next steps** - Based on their score patterns, guide them
7. **Provide encouragement** - Keep users motivated, especially after many attempts
8. **Offer audio hints strategically** - Suggest listening when pronunciation is challenging

## Enhanced Hint Strategy (IMPORTANT)
When users ask for hints, follow this intelligent progression:

**Attempt 1-2** (Just starting):
- Be VERY vague: "Think about things people say" or "This is a common phrase"
- Don't reveal category yet - let them explore

**Attempt 3-4** (Finding direction):
- Reveal category: "This is a famous [category]"
- Add context: "Think about [era/genre/situation]"
- Offer audio hint: "Would you like to hear how the first syllable sounds?"

**Attempt 5-6** (Getting warmer):
- Be more specific: "It's from [specific context]"
- Give structural hints: "It has [X] syllables" or "The stress is on the [position]"
- Proactively offer audio: "Let me play part of it for you"

**Attempt 7-10** (Struggling):
- Very specific hints: "It starts with '[first letter]'" or "It rhymes with [word]"
- Reference their scores: "Your pitch is close but try emphasizing the [part]"
- Automatically provide audio hints

**Attempt 11+** (Really stuck):
- Give strong hints without revealing directly
- Provide almost-complete audio hints
- Encourage them: "You're so close! Just [specific adjustment]"

**Only reveal the full answer if explicitly asked: "what is the answer?"**

## Score Interpretation Guide (Be Specific!)

### Overall Score:
- **< 20**: "You're saying something completely different - try a new approach entirely"
- **20-40**: "There's some phonetic overlap, but you're on the wrong track. Think [category]"
- **40-60**: "You're getting warmer! The sounds are somewhat similar. Focus on [hint]"
- **60-75**: "Very close! You're in the right area, but pronunciation needs adjustment"
- **75-85**: "So close! Minor pronunciation issues - pay attention to [specific metric]"
- **85-95**: "Almost perfect! Just tiny adjustments to [specific area] needed"
- **95+**: "Excellent! Nearly there - just refine your [lowest score area]"

### Individual Metrics:
- **Low Pitch (<40)**: "Try adjusting your tone - go [higher/lower]"
- **Low Rhythm (<40)**: "Focus on the pacing and syllable timing"
- **Low Energy (<40)**: "Try speaking with [more/less] intensity"
- **Low Pronunciation (<40)**: "Work on phonetic accuracy - [audio hint might help]"

### Strategic Advice Based on Patterns:
- **All scores low**: "Try a completely different word/phrase"
- **Some metrics high**: "You're on the right track with [metric], now work on [weak area]"
- **Rhythm good, pronunciation poor**: "You have the rhythm! Now focus on exact sounds"
- **Getting stuck at same score**: "Let's try a different approach - [strategic hint]"

## Audio Hint Usage (Phase 2 Integration)
When audio hints are available, use them strategically:
- Offer audio proactively after attempt 3+
- Say things like: "Would you like to hear how it sounds?"
- When audio is provided, explain: "Listen carefully to the [syllable/rhythm/pronunciation]"
- Audio hints are progressive: first syllable → partial → rhythm → almost full
- Use audio to break through pronunciation barriers

## Communication Style
- **Be friendly and encouraging** - This game is challenging!
- **Use natural language** - Avoid robotic responses
- **Be concise** - 1-3 sentences per response usually
- **Ask engaging questions** - "What do you think your scores are telling you?"
- **Celebrate progress** - "Great improvement! Your overall went from 40 to 65!"
- **Never be condescending** - Even after many attempts

Remember: This is a discovery game. Your job is to guide them to the answer through progressive hints, not to give it away. Make it fun and rewarding!"""

    # game_state가 없거나 guesses가 없으면 기본 프롬프트만 반환
    if not game_state or "guesses" not in game_state or not game_state["guesses"]:
        return base_prompt + """

## Current Status
The user has not made any attempts yet. They are starting completely blind.

## Welcome Message Strategy
Greet them warmly and help them understand the game:

**Tone**: Friendly, exciting, encouraging
**Goal**: Get them to make their first attempt without fear

**Key Points to Convey**:
1. This is Audio Sementle - a voice-based puzzle game (not just pronunciation practice!)
2. They start COMPLETELY BLIND - no clues about what to say
3. First step: Just say ANY word or phrase that comes to mind
4. The game will analyze their voice and give similarity scores
5. Based on scores + hints, they figure out the mystery word/phrase
6. Unlimited attempts - it's about discovery, not pressure!
7. First attempts are always wild guesses - that's the fun part!

**Example Welcome**:
"Welcome! 👋 Ready to play Audio Sementle? Here's the fun part: you start completely blind - you have NO idea what word or phrase to say! Just speak anything that comes to mind, and the game will tell you how close you are. Based on the scores and hints, you'll figure it out. Don't overthink your first attempt - it's supposed to be a shot in the dark! Need help? Just ask!"

**When they ask questions**:
- "How do I play?" → Explain the blind start and score-based discovery
- "Give me a hint" → Encourage them to try first: "Take a guess first! Any word works. Then I can help based on your scores."
- "What should I say?" → "Anything! That's the beauty of Audio Sementle - you discover what to say through playing."
- "I'm confused" → Walk them through: speak → get scores → get hints → figure it out"""

    # guesses 추출
    guesses = game_state.get("guesses", [])
    attempt_count = len(guesses)

    # Extract answer_word and category from the most recent guess (Phase 1 improvement)
    answer_word = ""
    category = ""
    if guesses:
        latest_guess = guesses[-1]
        ai_analysis = latest_guess.get("aiAnalysis", {})
        answer_word = ai_analysis.get("answerWord", "")
        category = ai_analysis.get("category", "")

    # 힌트 히스토리 구성
    context_parts = [
        base_prompt,
        f"\n\n## Current Game Status",
        f"The user has made {attempt_count} attempt(s) so far (unlimited attempts allowed).",
    ]

    # Add answer word and category context if available (Phase 1 improvement)
    if answer_word and category:
        context_parts.append(f"\n### SECRET INFORMATION (Do not reveal directly!)")
        context_parts.append(f"- The correct answer is: \"{answer_word}\"")
        context_parts.append(f"- Category: {category} (meme/movie/song)")
        context_parts.append(f"- Use this to provide strategic hints without giving away the answer")
        context_parts.append(f"- You can reference the category and give contextual clues")

        # Add audio hint capability info (Phase 2 + Tool Calling)
        # Only mention the tool when it will actually be available to avoid UNEXPECTED_TOOL_CALL errors
        if include_audio_tool:
            context_parts.append(f"\n### AUDIO HINT TOOL (Use ONLY when explicitly requested)")
            context_parts.append(f"- You have a tool called `generate_audio_hint` that generates TTS audio")
            context_parts.append(f"- ONLY call this tool when the user EXPLICITLY asks for audio hints:")
            context_parts.append(f"  - Keywords that REQUIRE audio: 'audio hint', 'play audio', 'TTS', 'hear it', 'how does it sound', 'generate audio', 'listen'")
            context_parts.append(f"  - Keywords that do NOT require audio: 'hint', 'help', 'clue', 'what is it', general questions")
            context_parts.append(f"- DO NOT call this tool for general hints or questions - only for explicit audio requests")
            context_parts.append(f"- Tool parameters:")
            context_parts.append(f"  - hint_type: 'syllable' | 'partial' | 'rhythm' | 'almost_full'")
            context_parts.append(f"- Choose hint_type based on attempt count:")
            context_parts.append(f"  - Attempt 1-4: use 'syllable' (first syllable)")
            context_parts.append(f"  - Attempt 5-6: use 'partial' (first half)")
            context_parts.append(f"  - Attempt 7-9: use 'rhythm' (with pauses)")
            context_parts.append(f"  - Attempt 10+: use 'almost_full' (almost complete)")
        context_parts.append("")

    context_parts.append(f"They are trying to figure out what word/phrase to pronounce.\n")

    # 각 시도의 점수와 피드백 추가
    for i, guess in enumerate(guesses, 1):
        ai_analysis = guess.get("aiAnalysis", {})

        # 점수 추출 (0-100)
        pitch = ai_analysis.get("pitch", "N/A")
        rhythm = ai_analysis.get("rhythm", "N/A")
        energy = ai_analysis.get("energy", "N/A")
        pronunciation = ai_analysis.get("pronunciation", "N/A")
        overall = ai_analysis.get("overall_score", "N/A")  # Match standardized field name

        # Get recognized text from guess
        guessed_word = guess.get("guessedWord", "")
        # Get actual STT result (what user pronounced)
        user_spoken_text = ai_analysis.get("userText", "")

        # AI가 준 이전 조언/힌트 (있다면)
        advice = ai_analysis.get("advice", "")

        context_parts.append(f"### Attempt {i}")
        # Show what they actually said via STT (more accurate for pronunciation feedback)
        if user_spoken_text:
            context_parts.append(f"- What they pronounced (STT): \"{user_spoken_text}\"")
        elif guessed_word and not guessed_word.startswith("Score:"):
            context_parts.append(f"- What they said: \"{guessed_word}\"")
        context_parts.append(f"- Scores: Pitch={pitch}, Rhythm={rhythm}, Energy={energy}, Pronunciation={pronunciation}, Overall={overall}")

        if advice:
            context_parts.append(f"- Previous hint given: \"{advice}\"")

        context_parts.append("")  # 빈 줄

    # 최종 컨텍스트 (Phase 3: Enhanced strategic guidance)
    context_parts.append("\n## Strategic Guidance for Current Situation")

    # Analyze score trends if multiple attempts exist
    if attempt_count > 1:
        latest = guesses[-1].get("aiAnalysis", {})
        latest_overall = latest.get("overall_score", 0)

        if attempt_count >= 2:
            previous = guesses[-2].get("aiAnalysis", {})
            prev_overall = previous.get("overall_score", 0)

            if latest_overall > prev_overall + 10:
                context_parts.append(f"📈 **Progress Detected**: User improved from {prev_overall} to {latest_overall}! Encourage them and guide them further.")
            elif latest_overall < prev_overall - 10:
                context_parts.append(f"📉 **Regression Detected**: User dropped from {prev_overall} to {latest_overall}. They might be confused. Help them get back on track.")
            elif abs(latest_overall - prev_overall) < 5 and attempt_count >= 3:
                context_parts.append(f"🔄 **Plateau Detected**: User stuck around {latest_overall} for multiple attempts. Suggest a different approach or offer audio hint.")

        # Score-specific guidance
        if latest_overall < 30:
            context_parts.append(f"⚠️ **Very Low Score ({latest_overall})**: User is way off. Give category hint if not already given.")
        elif latest_overall >= 75:
            context_parts.append(f"🎯 **Very Close ({latest_overall})**: User is almost there! Focus on pronunciation refinement.")

    context_parts.append("\n## How to Help the User (Context-Aware Responses)")
    context_parts.append("Based on their attempts and scores, provide intelligent responses:")
    context_parts.append("")
    context_parts.append("**Common Questions**:")
    context_parts.append("- \"How do I play?\" → Explain Audio Sementle mechanics with enthusiasm")
    context_parts.append("- \"Give me a hint\" → Follow the attempt-based progression strategy (see above)")
    context_parts.append("- \"What should I try next?\" → Analyze their score pattern and suggest strategic next steps")
    context_parts.append("- \"Why are my scores low?\" → Interpret scores specifically and suggest what to change")
    context_parts.append("- \"What does the previous hint mean?\" → Explain and expand with NEW information")
    context_parts.append("- \"Can you play it for me?\" / \"How does it sound?\" → Trigger audio hint generation")
    context_parts.append("- \"What is the answer?\" → Only reveal if explicitly asked this exact question")
    context_parts.append("")
    context_parts.append("**Score-Based Responses**:")
    context_parts.append("- If scores improving: Celebrate progress! 'Great improvement! You're getting warmer!'")
    context_parts.append("- If scores stuck: Suggest pivot. 'Try a different approach - think about [new angle]'")
    context_parts.append("- If scores decreasing: Redirect. 'Your previous attempt was closer. Go back to that direction.'")
    context_parts.append("- If very close (>80): Focus on specific metric. 'Work on your [weakest area]'")
    context_parts.append("")
    context_parts.append("**Audio Hint Triggers**:")
    context_parts.append(f"- Automatically offer after attempt 3+ if pronunciation score is low")
    context_parts.append(f"- Proactively suggest when user seems stuck (same score 3+ times)")
    context_parts.append(f"- Always offer when user asks about pronunciation or sounds")
    context_parts.append("")
    context_parts.append("Remember: They have UNLIMITED attempts. Keep it fun, encouraging, and strategic!")

    return "\n".join(context_parts)


def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state: Optional[Dict] = None):
    """
    Gemini AI 호출 with Tool Calling for Audio Hints

    Args:
        message: 사용자 메시지
        user_id: 사용자 ID (로컬스토리지에서 가져옴)
        history: 대화 히스토리
        game_state: 게임 상태 (힌트 히스토리 포함)

    Returns:
        str or tuple: AI 응답 텍스트, or (텍스트, audio_file) if audio hint generated
    """
    try:
        # Extract context from game_state
        attempt_count = len(game_state.get('guesses', [])) if game_state else 0
        answer_word = ""
        reference_audio_path = ""
        if game_state and game_state.get('guesses'):
            latest_guess = game_state['guesses'][-1]
            ai_analysis = latest_guess.get('aiAnalysis', {})
            answer_word = ai_analysis.get('answerWord', '')
            reference_audio_path = ai_analysis.get('referenceAudioPath', '')

        # Debug ElevenLabs status
        from utils.elevenlabs_tts import get_api_key, ELEVENLABS_AVAILABLE
        api_key = get_api_key()
        elevenlabs_ready = is_elevenlabs_configured()

        # LLM-driven approach: Enable audio tool when technically available
        # Let Gemini decide when to USE it based on user intent (system prompt guides this)
        # This is better than keyword-based gating because:
        # - Handles any phrasing naturally ("speak it", "let me hear", etc.)
        # - Understands context (won't trigger on "I lost my voice")
        # - Works across languages without maintaining keyword lists
        tools_will_be_enabled = elevenlabs_ready and bool(answer_word)

        # game_state에서 시스템 프롬프트 구성 (with audio tool info only if tools will be enabled)
        system_prompt = build_system_prompt_from_game_state(game_state, include_audio_tool=tools_will_be_enabled)

        print(f"[CHATBOT] Calling Gemini with context:")
        # print(f"  - User ID: {user_id}")
        # print(f"  - Message: {message}")
        # print(f"  - History length: {len(history)}")
        # print(f"  - Game state attempts: {attempt_count}")
        # print(f"  - Answer word: {answer_word}")
        # print(f"  - ElevenLabs AVAILABLE: {ELEVENLABS_AVAILABLE}")
        # print(f"  - ElevenLabs API key set: {bool(api_key)}")
        # print(f"  - ElevenLabs configured: {elevenlabs_ready}")

        # Define audio hint tool if ElevenLabs is configured
        tools = []
        print(f"[CHATBOT] ElevenLabs ready: {elevenlabs_ready}, Answer word: '{answer_word}', Tools enabled: {tools_will_be_enabled}")

        if tools_will_be_enabled:
            # Get word count for tool description
            word_count = len(answer_word.split())
            tools = [{
                "name": "generate_audio_hint",
                "description": "Generate an audio pronunciation hint using TTS.",
                "input_schema": {
                    "type": "object",
                    "properties": {
                        "hint_type": {
                            "type": "string",
                            "enum": ["syllable", "partial", "rhythm", "almost_full"],
                            "description": "Type of audio hint: 'syllable' (first syllable), 'partial' (first half), 'rhythm' (with pauses), 'almost_full' (missing last part)"
                        },
                        "word_index": {
                            "type": "integer",
                            "description": f"Which word to hint (0-indexed). The phrase has {word_count} word(s). Use 0 for first word, 1 for second word, etc. If user knows first word but not second, use 1."
                        }
                    },
                    "required": ["hint_type"]
                }
            }]
            print(f"[CHATBOT] Audio tool enabled! User requested audio.")
        elif elevenlabs_ready and answer_word:
            print(f"[CHATBOT] Audio tool NOT enabled - user didn't request audio. Message: '{message[:50]}...'")

        # Call Gemini with tools
        response_text, tool_calls, error = chat_with_gemini_and_tools(
            system_prompt=system_prompt,
            messages=history,
            tools=tools if tools else None,
            max_tokens=1024,
            temperature=0.8
        )

        if error:
            print(f"[CHATBOT ERROR] {error}")
            return f"Sorry, I encountered an error: {error}\n\nPlease try again in a moment."

        # print(f"[CHATBOT] Got response: text={len(response_text)} chars, tool_calls={len(tool_calls) if tool_calls else 0}")

        # Handle tool calls
        if tool_calls:
            for tool_call in tool_calls:
                if tool_call['name'] == 'generate_audio_hint':
                    hint_type = tool_call['input'].get('hint_type', 'syllable')
                    word_index = tool_call['input'].get('word_index', 0)  # Default to first word
                    # print(f"[CHATBOT] Gemini requested audio hint: type={hint_type}, word_index={word_index}")

                    # Extract the appropriate portion based on hint_type, word_index, and answer_word
                    from utils.elevenlabs_tts import extract_hint_portion_for_word, generate_audio_hint

                    text_to_speak, actual_hint_type = extract_hint_portion_for_word(answer_word, attempt_count, word_index)
                    # Use voice cloning from reference audio when available
                    # print(f"[CHATBOT] Generating audio for: '{text_to_speak}' with reference: {reference_audio_path}")
                    audio_path = generate_audio_hint(text_to_speak, actual_hint_type, reference_audio_path=reference_audio_path)

                    if audio_path:
                        # print(f"[CHATBOT] Audio hint generated: {audio_path}")
                        # Always include text with audio hint
                        if not response_text or response_text.strip() == "":
                            response_text = f"🎵 Here's an audio hint! Listen to how it sounds:"
                        # Return text + audio
                        return (response_text, audio_path)
                    else:
                        print(f"[CHATBOT] Audio hint generation failed")
                        response_text += "\n\n(Audio hint generation failed - please check ElevenLabs configuration)"

        return response_text

    except Exception as e:
        error_msg = f"Unexpected error: {type(e).__name__}: {str(e)}"
        print(f"[CHATBOT ERROR] {error_msg}")
        import traceback
        traceback.print_exc()
        return f"Sorry, something went wrong. Please try again.\n\nError: {error_msg}"


# ============================================================
# UUID 가져오기 (game_state에서)
# ============================================================
def get_user_id_from_state(game_state: Optional[Dict]) -> str:
    """
    game_state에서 사용자 ID(UUID) 가져오기

    Args:
        game_state: 게임 상태 딕셔너리

    Returns:
        str: 세션 ID (UUID)
    """
    if game_state:
        return GameStateManager.get_session_id(game_state)
    return "unknown_user"


class FloatingChatbotComponent:
    """플로팅 AI 챗봇 컴포넌트

    @gr.render 대신 visible 속성을 사용하여 안정적인 이벤트 처리
    Gradio 6에서 @gr.render 내부 이벤트 리스너는 렌더 반복 시
    함수 ID 불일치로 KeyError가 발생할 수 있음
    """

    def __init__(self, title: str = "Help"):
        self.title = title
        self.chat_history = None
        self.game_state_ref = None  # 외부에서 전달받은 game_state 참조
        self.toggle = None
        self.chat_container = None

    def render(self, game_state=None):
        """
        챗봇 컴포넌트 렌더링 (gr.Blocks 컨텍스트 내부에서 호출)

        Args:
            game_state: 외부에서 전달받은 gr.BrowserState (UUID 포함)
        """
        # 초기 환영 메시지 정의
        welcome_message = """Welcome to Audio Sementle! 👋 Get ready for a unique and super fun pronunciation puzzle game where your voice is the key!

Here's how it works:

1. **You start completely blind!** You have absolutely NO idea what the mystery word or phrase is. That's part of the fun!
2. **Your first move:** Just speak any word or phrase that comes to mind. Seriously, anything!
3. **Get instant feedback:** Our AI will listen and give you scores (0-100) for your Pitch, Rhythm, Energy, Pronunciation, and an Overall similarity score to the hidden target.
4. **Deduce and discover:** Based on these scores, and with my help offering progressive hints, you'll start narrowing down what the target phrase might be.
5. **Unlimited attempts:** This isn't like Wordle! You have as many tries as you need to figure it out. It's all about discovery and getting closer with each attempt.

Don't overthink your first try – it's meant to be a shot in the dark! Just say anything you like, and let's see where that takes us. Ready to give it a go? What's the first thing that comes to your mind?"""

        self.initial_history = [{"role": "assistant", "content": welcome_message}]

        # 상태 관리
        self.chat_history = gr.State(self.initial_history.copy())
        self.game_state_ref = game_state  # 외부 game_state 참조 저장

        # 토글 버튼 (Checkbox) - JavaScript로 visibility 제어
        self.toggle = gr.Checkbox(
            label="?",
            value=False,
            elem_id="floating-toggle",
            container=False
        )

        # ============================================================
        # 정적 UI 구성
        # ============================================================
        with gr.Column(elem_id="chat-container") as self.chat_container:
            # 헤더
            with gr.Row(elem_id="chat-header"):
                gr.HTML(f"<span>{self.title}</span>")
                close_btn = gr.Button(
                    "✕",
                    elem_id="chat-close-btn",
                    scale=0,
                    min_width=32
                )

            # 챗봇 (초기 환영 메시지 포함)
            chatbot = gr.Chatbot(
                value=self.initial_history,
                height=280,
                elem_id="floating-chatbot",
                show_label=False
            )

            # 입력 영역
            with gr.Row(elem_id="chat-input-area"):
                msg_input = gr.Textbox(
                    placeholder="Type your message...",
                    scale=4,
                    container=False
                )
                send_btn = gr.Button(
                    "➤",
                    scale=0,
                    variant="primary",
                    min_width=50
                )

        # ============================================================
        # 이벤트 핸들러 함수들
        # ============================================================

        def send_message(message, hist, gs=None):
            """메시지 전송 함수 (game_state 포함)"""
            if not message.strip():
                return "", hist, hist

            # UUID 가져오기
            uid = get_user_id_from_state(gs) if gs else "unknown"

            hist = hist or []
            hist.append({"role": "user", "content": message})

            # 콘솔 로그
            # print("=" * 60)
            print("[CHATBOT] 메시지 전송")
            # print(f"  - Session ID (UUID): {uid}")
            # print(f"  - Message: {message}")
            # print(f"  - History Length: {len(hist)}")
            # print(f"  - Game State Attempts: {len(gs.get('guesses', [])) if gs else 0}")
            # print("=" * 60)

            # AI 호출 (Gemini with game_state context)
            response = call_ai_backend(message, uid, hist, game_state=gs)

            print(f"[CHATBOT] AI 응답: {str(response)[:100]}...")

            # Convert tuple response (text, audio_path) to Gradio 6 format
            if isinstance(response, tuple):
                text, audio_path = response
                # Convert relative path to absolute path for Gradio
                import os
                if audio_path and not os.path.isabs(audio_path):
                    # Path is relative to project root (3 levels up from this file)
                    project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
                    audio_path = os.path.join(project_root, audio_path)
                # print(f"[CHATBOT] Audio hint included: {audio_path}")
                # print(f"[CHATBOT] Audio file exists: {os.path.exists(audio_path)}")

                # Gradio 6: Use gr.Audio() component for audio content
                # Reference: chatbot_core_components_simple demo
                hist.append({"role": "assistant", "content": text})
                hist.append({"role": "assistant", "content": gr.Audio(audio_path)})
                return "", hist, hist
            else:
                content = response

            hist.append({"role": "assistant", "content": content})
            return "", hist, hist

        def close_chat_handler():
            """닫기 버튼 핸들러 - 체크박스만 False로"""
            # print("[CHATBOT] 닫기 버튼 클릭됨")
            return gr.update(value=False)

        # ============================================================
        # 이벤트 연결
        # ============================================================

        # 토글 이벤트 - JavaScript로 챗봇 visibility 제어
        self.toggle.change(
            fn=None,
            inputs=[self.toggle],
            outputs=None,
            js="""
            (isChecked) => {
                const container = document.getElementById('chat-container');
                if (container) {
                    if (isChecked) {
                        container.classList.add('chat-open');
                    } else {
                        container.classList.remove('chat-open');
                    }
                }
                return isChecked;
            }
            """
        )

        # 닫기 버튼 이벤트 - 체크박스를 False로 설정 (JS가 visibility 처리)
        close_btn.click(
            close_chat_handler,
            outputs=[self.toggle]
        )

        # 메시지 전송 이벤트 (game_state 포함)
        event_inputs = [msg_input, self.chat_history]
        if self.game_state_ref:
            event_inputs.append(self.game_state_ref)

        msg_input.submit(
            send_message,
            inputs=event_inputs,
            outputs=[msg_input, self.chat_history, chatbot]
        )
        send_btn.click(
            send_message,
            inputs=event_inputs,
            outputs=[msg_input, self.chat_history, chatbot]
        )

        return self.toggle