|
|
""" |
|
|
ํ๋กํ
AI ์ฑ๋ด ์ปดํฌ๋ํธ (Gradio 6 ํธํ) |
|
|
@gr.render ๋ฐฉ์ + Backend AI ์ฐ๋ |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
from typing import List, Dict, Optional |
|
|
import sys |
|
|
import os |
|
|
|
|
|
|
|
|
from dotenv import load_dotenv |
|
|
_env_path = os.path.join(os.path.dirname(__file__), "../../../.env") |
|
|
load_dotenv(_env_path) |
|
|
|
|
|
|
|
|
from utils.game_state import GameStateManager |
|
|
|
|
|
|
|
|
from utils.elevenlabs_tts import ( |
|
|
should_offer_audio_hint, |
|
|
get_audio_hint_for_attempt, |
|
|
is_elevenlabs_configured, |
|
|
get_status as get_tts_status |
|
|
) |
|
|
|
|
|
|
|
|
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../../../")) |
|
|
from gemini_adapter import chat_with_gemini, chat_with_gemini_and_tools |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def build_system_prompt_from_game_state(game_state: Optional[Dict], include_audio_tool: bool = False) -> str: |
|
|
""" |
|
|
game_state์์ ํํธ ํ์คํ ๋ฆฌ๋ฅผ ์ถ์ถํ์ฌ ์์คํ
ํ๋กฌํํธ ๊ตฌ์ฑ |
|
|
|
|
|
Args: |
|
|
game_state: ๊ฒ์ ์ํ ๋์
๋๋ฆฌ (guesses ๋ฐฐ์ด ํฌํจ) |
|
|
|
|
|
Returns: |
|
|
str: ์ปจํ
์คํธ๊ฐ ํฌํจ๋ ์์คํ
ํ๋กฌํํธ |
|
|
""" |
|
|
|
|
|
base_prompt = """You are the help assistant for "Audio Sementle" - an innovative pronunciation puzzle game that combines voice recognition with progressive hint discovery. |
|
|
|
|
|
## Game Mechanics |
|
|
This is a BLIND pronunciation game where: |
|
|
- Players start with NO IDEA what word/phrase to say (completely blind!) |
|
|
- They can speak ANY word or phrase as their first attempt |
|
|
- The game analyzes their voice using AI and provides scores (0-100) for: |
|
|
- **Pitch**: Tone and melody matching |
|
|
- **Rhythm**: Pacing and syllable timing |
|
|
- **Energy**: Volume and intensity |
|
|
- **Pronunciation**: Phonetic accuracy |
|
|
- **Overall**: Combined similarity score |
|
|
- Based on scores, players must deduce what the target word/phrase is |
|
|
- Players have UNLIMITED attempts (this is not Wordle's 6-attempt limit!) |
|
|
- Each attempt provides AI-generated hints that progressively reveal more information |
|
|
- Goal: Figure out and correctly pronounce the mystery word/phrase |
|
|
|
|
|
## Your Role as Help Assistant |
|
|
You are a friendly, encouraging coach who helps users discover the answer through strategic hints: |
|
|
|
|
|
1. **Explain the game** - When users are confused, explain Audio Sementle mechanics clearly |
|
|
2. **Give progressive hints** - Start vague, gradually get more specific with each request |
|
|
3. **Interpret scores intelligently** - Explain what their scores mean and what to try next |
|
|
4. **Build on previous hints** - NEVER repeat hints, always add new information |
|
|
5. **Track hint progression** - Remember what was already revealed, provide fresh angles |
|
|
6. **Suggest strategic next steps** - Based on their score patterns, guide them |
|
|
7. **Provide encouragement** - Keep users motivated, especially after many attempts |
|
|
8. **Offer audio hints strategically** - Suggest listening when pronunciation is challenging |
|
|
|
|
|
## Enhanced Hint Strategy (IMPORTANT) |
|
|
When users ask for hints, follow this intelligent progression: |
|
|
|
|
|
**Attempt 1-2** (Just starting): |
|
|
- Be VERY vague: "Think about things people say" or "This is a common phrase" |
|
|
- Don't reveal category yet - let them explore |
|
|
|
|
|
**Attempt 3-4** (Finding direction): |
|
|
- Reveal category: "This is a famous [category]" |
|
|
- Add context: "Think about [era/genre/situation]" |
|
|
- Offer audio hint: "Would you like to hear how the first syllable sounds?" |
|
|
|
|
|
**Attempt 5-6** (Getting warmer): |
|
|
- Be more specific: "It's from [specific context]" |
|
|
- Give structural hints: "It has [X] syllables" or "The stress is on the [position]" |
|
|
- Proactively offer audio: "Let me play part of it for you" |
|
|
|
|
|
**Attempt 7-10** (Struggling): |
|
|
- Very specific hints: "It starts with '[first letter]'" or "It rhymes with [word]" |
|
|
- Reference their scores: "Your pitch is close but try emphasizing the [part]" |
|
|
- Automatically provide audio hints |
|
|
|
|
|
**Attempt 11+** (Really stuck): |
|
|
- Give strong hints without revealing directly |
|
|
- Provide almost-complete audio hints |
|
|
- Encourage them: "You're so close! Just [specific adjustment]" |
|
|
|
|
|
**Only reveal the full answer if explicitly asked: "what is the answer?"** |
|
|
|
|
|
## Score Interpretation Guide (Be Specific!) |
|
|
|
|
|
### Overall Score: |
|
|
- **< 20**: "You're saying something completely different - try a new approach entirely" |
|
|
- **20-40**: "There's some phonetic overlap, but you're on the wrong track. Think [category]" |
|
|
- **40-60**: "You're getting warmer! The sounds are somewhat similar. Focus on [hint]" |
|
|
- **60-75**: "Very close! You're in the right area, but pronunciation needs adjustment" |
|
|
- **75-85**: "So close! Minor pronunciation issues - pay attention to [specific metric]" |
|
|
- **85-95**: "Almost perfect! Just tiny adjustments to [specific area] needed" |
|
|
- **95+**: "Excellent! Nearly there - just refine your [lowest score area]" |
|
|
|
|
|
### Individual Metrics: |
|
|
- **Low Pitch (<40)**: "Try adjusting your tone - go [higher/lower]" |
|
|
- **Low Rhythm (<40)**: "Focus on the pacing and syllable timing" |
|
|
- **Low Energy (<40)**: "Try speaking with [more/less] intensity" |
|
|
- **Low Pronunciation (<40)**: "Work on phonetic accuracy - [audio hint might help]" |
|
|
|
|
|
### Strategic Advice Based on Patterns: |
|
|
- **All scores low**: "Try a completely different word/phrase" |
|
|
- **Some metrics high**: "You're on the right track with [metric], now work on [weak area]" |
|
|
- **Rhythm good, pronunciation poor**: "You have the rhythm! Now focus on exact sounds" |
|
|
- **Getting stuck at same score**: "Let's try a different approach - [strategic hint]" |
|
|
|
|
|
## Audio Hint Usage (Phase 2 Integration) |
|
|
When audio hints are available, use them strategically: |
|
|
- Offer audio proactively after attempt 3+ |
|
|
- Say things like: "Would you like to hear how it sounds?" |
|
|
- When audio is provided, explain: "Listen carefully to the [syllable/rhythm/pronunciation]" |
|
|
- Audio hints are progressive: first syllable โ partial โ rhythm โ almost full |
|
|
- Use audio to break through pronunciation barriers |
|
|
|
|
|
## Communication Style |
|
|
- **Be friendly and encouraging** - This game is challenging! |
|
|
- **Use natural language** - Avoid robotic responses |
|
|
- **Be concise** - 1-3 sentences per response usually |
|
|
- **Ask engaging questions** - "What do you think your scores are telling you?" |
|
|
- **Celebrate progress** - "Great improvement! Your overall went from 40 to 65!" |
|
|
- **Never be condescending** - Even after many attempts |
|
|
|
|
|
Remember: This is a discovery game. Your job is to guide them to the answer through progressive hints, not to give it away. Make it fun and rewarding!""" |
|
|
|
|
|
|
|
|
if not game_state or "guesses" not in game_state or not game_state["guesses"]: |
|
|
return base_prompt + """ |
|
|
|
|
|
## Current Status |
|
|
The user has not made any attempts yet. They are starting completely blind. |
|
|
|
|
|
## Welcome Message Strategy |
|
|
Greet them warmly and help them understand the game: |
|
|
|
|
|
**Tone**: Friendly, exciting, encouraging |
|
|
**Goal**: Get them to make their first attempt without fear |
|
|
|
|
|
**Key Points to Convey**: |
|
|
1. This is Audio Sementle - a voice-based puzzle game (not just pronunciation practice!) |
|
|
2. They start COMPLETELY BLIND - no clues about what to say |
|
|
3. First step: Just say ANY word or phrase that comes to mind |
|
|
4. The game will analyze their voice and give similarity scores |
|
|
5. Based on scores + hints, they figure out the mystery word/phrase |
|
|
6. Unlimited attempts - it's about discovery, not pressure! |
|
|
7. First attempts are always wild guesses - that's the fun part! |
|
|
|
|
|
**Example Welcome**: |
|
|
"Welcome! ๐ Ready to play Audio Sementle? Here's the fun part: you start completely blind - you have NO idea what word or phrase to say! Just speak anything that comes to mind, and the game will tell you how close you are. Based on the scores and hints, you'll figure it out. Don't overthink your first attempt - it's supposed to be a shot in the dark! Need help? Just ask!" |
|
|
|
|
|
**When they ask questions**: |
|
|
- "How do I play?" โ Explain the blind start and score-based discovery |
|
|
- "Give me a hint" โ Encourage them to try first: "Take a guess first! Any word works. Then I can help based on your scores." |
|
|
- "What should I say?" โ "Anything! That's the beauty of Audio Sementle - you discover what to say through playing." |
|
|
- "I'm confused" โ Walk them through: speak โ get scores โ get hints โ figure it out""" |
|
|
|
|
|
|
|
|
guesses = game_state.get("guesses", []) |
|
|
attempt_count = len(guesses) |
|
|
|
|
|
|
|
|
answer_word = "" |
|
|
category = "" |
|
|
if guesses: |
|
|
latest_guess = guesses[-1] |
|
|
ai_analysis = latest_guess.get("aiAnalysis", {}) |
|
|
answer_word = ai_analysis.get("answerWord", "") |
|
|
category = ai_analysis.get("category", "") |
|
|
|
|
|
|
|
|
context_parts = [ |
|
|
base_prompt, |
|
|
f"\n\n## Current Game Status", |
|
|
f"The user has made {attempt_count} attempt(s) so far (unlimited attempts allowed).", |
|
|
] |
|
|
|
|
|
|
|
|
if answer_word and category: |
|
|
context_parts.append(f"\n### SECRET INFORMATION (Do not reveal directly!)") |
|
|
context_parts.append(f"- The correct answer is: \"{answer_word}\"") |
|
|
context_parts.append(f"- Category: {category} (meme/movie/song)") |
|
|
context_parts.append(f"- Use this to provide strategic hints without giving away the answer") |
|
|
context_parts.append(f"- You can reference the category and give contextual clues") |
|
|
|
|
|
|
|
|
|
|
|
if include_audio_tool: |
|
|
context_parts.append(f"\n### AUDIO HINT TOOL (Use ONLY when explicitly requested)") |
|
|
context_parts.append(f"- You have a tool called `generate_audio_hint` that generates TTS audio") |
|
|
context_parts.append(f"- ONLY call this tool when the user EXPLICITLY asks for audio hints:") |
|
|
context_parts.append(f" - Keywords that REQUIRE audio: 'audio hint', 'play audio', 'TTS', 'hear it', 'how does it sound', 'generate audio', 'listen'") |
|
|
context_parts.append(f" - Keywords that do NOT require audio: 'hint', 'help', 'clue', 'what is it', general questions") |
|
|
context_parts.append(f"- DO NOT call this tool for general hints or questions - only for explicit audio requests") |
|
|
context_parts.append(f"- Tool parameters:") |
|
|
context_parts.append(f" - hint_type: 'syllable' | 'partial' | 'rhythm' | 'almost_full'") |
|
|
context_parts.append(f"- Choose hint_type based on attempt count:") |
|
|
context_parts.append(f" - Attempt 1-4: use 'syllable' (first syllable)") |
|
|
context_parts.append(f" - Attempt 5-6: use 'partial' (first half)") |
|
|
context_parts.append(f" - Attempt 7-9: use 'rhythm' (with pauses)") |
|
|
context_parts.append(f" - Attempt 10+: use 'almost_full' (almost complete)") |
|
|
context_parts.append("") |
|
|
|
|
|
context_parts.append(f"They are trying to figure out what word/phrase to pronounce.\n") |
|
|
|
|
|
|
|
|
for i, guess in enumerate(guesses, 1): |
|
|
ai_analysis = guess.get("aiAnalysis", {}) |
|
|
|
|
|
|
|
|
pitch = ai_analysis.get("pitch", "N/A") |
|
|
rhythm = ai_analysis.get("rhythm", "N/A") |
|
|
energy = ai_analysis.get("energy", "N/A") |
|
|
pronunciation = ai_analysis.get("pronunciation", "N/A") |
|
|
overall = ai_analysis.get("overall_score", "N/A") |
|
|
|
|
|
|
|
|
guessed_word = guess.get("guessedWord", "") |
|
|
|
|
|
user_spoken_text = ai_analysis.get("userText", "") |
|
|
|
|
|
|
|
|
advice = ai_analysis.get("advice", "") |
|
|
|
|
|
context_parts.append(f"### Attempt {i}") |
|
|
|
|
|
if user_spoken_text: |
|
|
context_parts.append(f"- What they pronounced (STT): \"{user_spoken_text}\"") |
|
|
elif guessed_word and not guessed_word.startswith("Score:"): |
|
|
context_parts.append(f"- What they said: \"{guessed_word}\"") |
|
|
context_parts.append(f"- Scores: Pitch={pitch}, Rhythm={rhythm}, Energy={energy}, Pronunciation={pronunciation}, Overall={overall}") |
|
|
|
|
|
if advice: |
|
|
context_parts.append(f"- Previous hint given: \"{advice}\"") |
|
|
|
|
|
context_parts.append("") |
|
|
|
|
|
|
|
|
context_parts.append("\n## Strategic Guidance for Current Situation") |
|
|
|
|
|
|
|
|
if attempt_count > 1: |
|
|
latest = guesses[-1].get("aiAnalysis", {}) |
|
|
latest_overall = latest.get("overall_score", 0) |
|
|
|
|
|
if attempt_count >= 2: |
|
|
previous = guesses[-2].get("aiAnalysis", {}) |
|
|
prev_overall = previous.get("overall_score", 0) |
|
|
|
|
|
if latest_overall > prev_overall + 10: |
|
|
context_parts.append(f"๐ **Progress Detected**: User improved from {prev_overall} to {latest_overall}! Encourage them and guide them further.") |
|
|
elif latest_overall < prev_overall - 10: |
|
|
context_parts.append(f"๐ **Regression Detected**: User dropped from {prev_overall} to {latest_overall}. They might be confused. Help them get back on track.") |
|
|
elif abs(latest_overall - prev_overall) < 5 and attempt_count >= 3: |
|
|
context_parts.append(f"๐ **Plateau Detected**: User stuck around {latest_overall} for multiple attempts. Suggest a different approach or offer audio hint.") |
|
|
|
|
|
|
|
|
if latest_overall < 30: |
|
|
context_parts.append(f"โ ๏ธ **Very Low Score ({latest_overall})**: User is way off. Give category hint if not already given.") |
|
|
elif latest_overall >= 75: |
|
|
context_parts.append(f"๐ฏ **Very Close ({latest_overall})**: User is almost there! Focus on pronunciation refinement.") |
|
|
|
|
|
context_parts.append("\n## How to Help the User (Context-Aware Responses)") |
|
|
context_parts.append("Based on their attempts and scores, provide intelligent responses:") |
|
|
context_parts.append("") |
|
|
context_parts.append("**Common Questions**:") |
|
|
context_parts.append("- \"How do I play?\" โ Explain Audio Sementle mechanics with enthusiasm") |
|
|
context_parts.append("- \"Give me a hint\" โ Follow the attempt-based progression strategy (see above)") |
|
|
context_parts.append("- \"What should I try next?\" โ Analyze their score pattern and suggest strategic next steps") |
|
|
context_parts.append("- \"Why are my scores low?\" โ Interpret scores specifically and suggest what to change") |
|
|
context_parts.append("- \"What does the previous hint mean?\" โ Explain and expand with NEW information") |
|
|
context_parts.append("- \"Can you play it for me?\" / \"How does it sound?\" โ Trigger audio hint generation") |
|
|
context_parts.append("- \"What is the answer?\" โ Only reveal if explicitly asked this exact question") |
|
|
context_parts.append("") |
|
|
context_parts.append("**Score-Based Responses**:") |
|
|
context_parts.append("- If scores improving: Celebrate progress! 'Great improvement! You're getting warmer!'") |
|
|
context_parts.append("- If scores stuck: Suggest pivot. 'Try a different approach - think about [new angle]'") |
|
|
context_parts.append("- If scores decreasing: Redirect. 'Your previous attempt was closer. Go back to that direction.'") |
|
|
context_parts.append("- If very close (>80): Focus on specific metric. 'Work on your [weakest area]'") |
|
|
context_parts.append("") |
|
|
context_parts.append("**Audio Hint Triggers**:") |
|
|
context_parts.append(f"- Automatically offer after attempt 3+ if pronunciation score is low") |
|
|
context_parts.append(f"- Proactively suggest when user seems stuck (same score 3+ times)") |
|
|
context_parts.append(f"- Always offer when user asks about pronunciation or sounds") |
|
|
context_parts.append("") |
|
|
context_parts.append("Remember: They have UNLIMITED attempts. Keep it fun, encouraging, and strategic!") |
|
|
|
|
|
return "\n".join(context_parts) |
|
|
|
|
|
|
|
|
def call_ai_backend(message: str, user_id: str, history: List[Dict], game_state: Optional[Dict] = None): |
|
|
""" |
|
|
Gemini AI ํธ์ถ with Tool Calling for Audio Hints |
|
|
|
|
|
Args: |
|
|
message: ์ฌ์ฉ์ ๋ฉ์์ง |
|
|
user_id: ์ฌ์ฉ์ ID (๋ก์ปฌ์คํ ๋ฆฌ์ง์์ ๊ฐ์ ธ์ด) |
|
|
history: ๋ํ ํ์คํ ๋ฆฌ |
|
|
game_state: ๊ฒ์ ์ํ (ํํธ ํ์คํ ๋ฆฌ ํฌํจ) |
|
|
|
|
|
Returns: |
|
|
str or tuple: AI ์๋ต ํ
์คํธ, or (ํ
์คํธ, audio_file) if audio hint generated |
|
|
""" |
|
|
try: |
|
|
|
|
|
attempt_count = len(game_state.get('guesses', [])) if game_state else 0 |
|
|
answer_word = "" |
|
|
reference_audio_path = "" |
|
|
if game_state and game_state.get('guesses'): |
|
|
latest_guess = game_state['guesses'][-1] |
|
|
ai_analysis = latest_guess.get('aiAnalysis', {}) |
|
|
answer_word = ai_analysis.get('answerWord', '') |
|
|
reference_audio_path = ai_analysis.get('referenceAudioPath', '') |
|
|
|
|
|
|
|
|
from utils.elevenlabs_tts import get_api_key, ELEVENLABS_AVAILABLE |
|
|
api_key = get_api_key() |
|
|
elevenlabs_ready = is_elevenlabs_configured() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools_will_be_enabled = elevenlabs_ready and bool(answer_word) |
|
|
|
|
|
|
|
|
system_prompt = build_system_prompt_from_game_state(game_state, include_audio_tool=tools_will_be_enabled) |
|
|
|
|
|
print(f"[CHATBOT] Calling Gemini with context:") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tools = [] |
|
|
print(f"[CHATBOT] ElevenLabs ready: {elevenlabs_ready}, Answer word: '{answer_word}', Tools enabled: {tools_will_be_enabled}") |
|
|
|
|
|
if tools_will_be_enabled: |
|
|
|
|
|
word_count = len(answer_word.split()) |
|
|
tools = [{ |
|
|
"name": "generate_audio_hint", |
|
|
"description": "Generate an audio pronunciation hint using TTS.", |
|
|
"input_schema": { |
|
|
"type": "object", |
|
|
"properties": { |
|
|
"hint_type": { |
|
|
"type": "string", |
|
|
"enum": ["syllable", "partial", "rhythm", "almost_full"], |
|
|
"description": "Type of audio hint: 'syllable' (first syllable), 'partial' (first half), 'rhythm' (with pauses), 'almost_full' (missing last part)" |
|
|
}, |
|
|
"word_index": { |
|
|
"type": "integer", |
|
|
"description": f"Which word to hint (0-indexed). The phrase has {word_count} word(s). Use 0 for first word, 1 for second word, etc. If user knows first word but not second, use 1." |
|
|
} |
|
|
}, |
|
|
"required": ["hint_type"] |
|
|
} |
|
|
}] |
|
|
print(f"[CHATBOT] Audio tool enabled! User requested audio.") |
|
|
elif elevenlabs_ready and answer_word: |
|
|
print(f"[CHATBOT] Audio tool NOT enabled - user didn't request audio. Message: '{message[:50]}...'") |
|
|
|
|
|
|
|
|
response_text, tool_calls, error = chat_with_gemini_and_tools( |
|
|
system_prompt=system_prompt, |
|
|
messages=history, |
|
|
tools=tools if tools else None, |
|
|
max_tokens=1024, |
|
|
temperature=0.8 |
|
|
) |
|
|
|
|
|
if error: |
|
|
print(f"[CHATBOT ERROR] {error}") |
|
|
return f"Sorry, I encountered an error: {error}\n\nPlease try again in a moment." |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if tool_calls: |
|
|
for tool_call in tool_calls: |
|
|
if tool_call['name'] == 'generate_audio_hint': |
|
|
hint_type = tool_call['input'].get('hint_type', 'syllable') |
|
|
word_index = tool_call['input'].get('word_index', 0) |
|
|
|
|
|
|
|
|
|
|
|
from utils.elevenlabs_tts import extract_hint_portion_for_word, generate_audio_hint |
|
|
|
|
|
text_to_speak, actual_hint_type = extract_hint_portion_for_word(answer_word, attempt_count, word_index) |
|
|
|
|
|
|
|
|
audio_path = generate_audio_hint(text_to_speak, actual_hint_type, reference_audio_path=reference_audio_path) |
|
|
|
|
|
if audio_path: |
|
|
|
|
|
|
|
|
if not response_text or response_text.strip() == "": |
|
|
response_text = f"๐ต Here's an audio hint! Listen to how it sounds:" |
|
|
|
|
|
return (response_text, audio_path) |
|
|
else: |
|
|
print(f"[CHATBOT] Audio hint generation failed") |
|
|
response_text += "\n\n(Audio hint generation failed - please check ElevenLabs configuration)" |
|
|
|
|
|
return response_text |
|
|
|
|
|
except Exception as e: |
|
|
error_msg = f"Unexpected error: {type(e).__name__}: {str(e)}" |
|
|
print(f"[CHATBOT ERROR] {error_msg}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
return f"Sorry, something went wrong. Please try again.\n\nError: {error_msg}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_user_id_from_state(game_state: Optional[Dict]) -> str: |
|
|
""" |
|
|
game_state์์ ์ฌ์ฉ์ ID(UUID) ๊ฐ์ ธ์ค๊ธฐ |
|
|
|
|
|
Args: |
|
|
game_state: ๊ฒ์ ์ํ ๋์
๋๋ฆฌ |
|
|
|
|
|
Returns: |
|
|
str: ์ธ์
ID (UUID) |
|
|
""" |
|
|
if game_state: |
|
|
return GameStateManager.get_session_id(game_state) |
|
|
return "unknown_user" |
|
|
|
|
|
|
|
|
class FloatingChatbotComponent: |
|
|
"""ํ๋กํ
AI ์ฑ๋ด ์ปดํฌ๋ํธ |
|
|
|
|
|
@gr.render ๋์ visible ์์ฑ์ ์ฌ์ฉํ์ฌ ์์ ์ ์ธ ์ด๋ฒคํธ ์ฒ๋ฆฌ |
|
|
Gradio 6์์ @gr.render ๋ด๋ถ ์ด๋ฒคํธ ๋ฆฌ์ค๋๋ ๋ ๋ ๋ฐ๋ณต ์ |
|
|
ํจ์ ID ๋ถ์ผ์น๋ก KeyError๊ฐ ๋ฐ์ํ ์ ์์ |
|
|
""" |
|
|
|
|
|
def __init__(self, title: str = "Help"): |
|
|
self.title = title |
|
|
self.chat_history = None |
|
|
self.game_state_ref = None |
|
|
self.toggle = None |
|
|
self.chat_container = None |
|
|
|
|
|
def render(self, game_state=None): |
|
|
""" |
|
|
์ฑ๋ด ์ปดํฌ๋ํธ ๋ ๋๋ง (gr.Blocks ์ปจํ
์คํธ ๋ด๋ถ์์ ํธ์ถ) |
|
|
|
|
|
Args: |
|
|
game_state: ์ธ๋ถ์์ ์ ๋ฌ๋ฐ์ gr.BrowserState (UUID ํฌํจ) |
|
|
""" |
|
|
|
|
|
welcome_message = """Welcome to Audio Sementle! ๐ Get ready for a unique and super fun pronunciation puzzle game where your voice is the key! |
|
|
|
|
|
Here's how it works: |
|
|
|
|
|
1. **You start completely blind!** You have absolutely NO idea what the mystery word or phrase is. That's part of the fun! |
|
|
2. **Your first move:** Just speak any word or phrase that comes to mind. Seriously, anything! |
|
|
3. **Get instant feedback:** Our AI will listen and give you scores (0-100) for your Pitch, Rhythm, Energy, Pronunciation, and an Overall similarity score to the hidden target. |
|
|
4. **Deduce and discover:** Based on these scores, and with my help offering progressive hints, you'll start narrowing down what the target phrase might be. |
|
|
5. **Unlimited attempts:** This isn't like Wordle! You have as many tries as you need to figure it out. It's all about discovery and getting closer with each attempt. |
|
|
|
|
|
Don't overthink your first try โ it's meant to be a shot in the dark! Just say anything you like, and let's see where that takes us. Ready to give it a go? What's the first thing that comes to your mind?""" |
|
|
|
|
|
self.initial_history = [{"role": "assistant", "content": welcome_message}] |
|
|
|
|
|
|
|
|
self.chat_history = gr.State(self.initial_history.copy()) |
|
|
self.game_state_ref = game_state |
|
|
|
|
|
|
|
|
self.toggle = gr.Checkbox( |
|
|
label="?", |
|
|
value=False, |
|
|
elem_id="floating-toggle", |
|
|
container=False |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Column(elem_id="chat-container") as self.chat_container: |
|
|
|
|
|
with gr.Row(elem_id="chat-header"): |
|
|
gr.HTML(f"<span>{self.title}</span>") |
|
|
close_btn = gr.Button( |
|
|
"โ", |
|
|
elem_id="chat-close-btn", |
|
|
scale=0, |
|
|
min_width=32 |
|
|
) |
|
|
|
|
|
|
|
|
chatbot = gr.Chatbot( |
|
|
value=self.initial_history, |
|
|
height=280, |
|
|
elem_id="floating-chatbot", |
|
|
show_label=False |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Row(elem_id="chat-input-area"): |
|
|
msg_input = gr.Textbox( |
|
|
placeholder="Type your message...", |
|
|
scale=4, |
|
|
container=False |
|
|
) |
|
|
send_btn = gr.Button( |
|
|
"โค", |
|
|
scale=0, |
|
|
variant="primary", |
|
|
min_width=50 |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def send_message(message, hist, gs=None): |
|
|
"""๋ฉ์์ง ์ ์ก ํจ์ (game_state ํฌํจ)""" |
|
|
if not message.strip(): |
|
|
return "", hist, hist |
|
|
|
|
|
|
|
|
uid = get_user_id_from_state(gs) if gs else "unknown" |
|
|
|
|
|
hist = hist or [] |
|
|
hist.append({"role": "user", "content": message}) |
|
|
|
|
|
|
|
|
|
|
|
print("[CHATBOT] ๋ฉ์์ง ์ ์ก") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
response = call_ai_backend(message, uid, hist, game_state=gs) |
|
|
|
|
|
print(f"[CHATBOT] AI ์๋ต: {str(response)[:100]}...") |
|
|
|
|
|
|
|
|
if isinstance(response, tuple): |
|
|
text, audio_path = response |
|
|
|
|
|
import os |
|
|
if audio_path and not os.path.isabs(audio_path): |
|
|
|
|
|
project_root = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(__file__)))) |
|
|
audio_path = os.path.join(project_root, audio_path) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
hist.append({"role": "assistant", "content": text}) |
|
|
hist.append({"role": "assistant", "content": gr.Audio(audio_path)}) |
|
|
return "", hist, hist |
|
|
else: |
|
|
content = response |
|
|
|
|
|
hist.append({"role": "assistant", "content": content}) |
|
|
return "", hist, hist |
|
|
|
|
|
def close_chat_handler(): |
|
|
"""๋ซ๊ธฐ ๋ฒํผ ํธ๋ค๋ฌ - ์ฒดํฌ๋ฐ์ค๋ง False๋ก""" |
|
|
|
|
|
return gr.update(value=False) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.toggle.change( |
|
|
fn=None, |
|
|
inputs=[self.toggle], |
|
|
outputs=None, |
|
|
js=""" |
|
|
(isChecked) => { |
|
|
const container = document.getElementById('chat-container'); |
|
|
if (container) { |
|
|
if (isChecked) { |
|
|
container.classList.add('chat-open'); |
|
|
} else { |
|
|
container.classList.remove('chat-open'); |
|
|
} |
|
|
} |
|
|
return isChecked; |
|
|
} |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
close_btn.click( |
|
|
close_chat_handler, |
|
|
outputs=[self.toggle] |
|
|
) |
|
|
|
|
|
|
|
|
event_inputs = [msg_input, self.chat_history] |
|
|
if self.game_state_ref: |
|
|
event_inputs.append(self.game_state_ref) |
|
|
|
|
|
msg_input.submit( |
|
|
send_message, |
|
|
inputs=event_inputs, |
|
|
outputs=[msg_input, self.chat_history, chatbot] |
|
|
) |
|
|
send_btn.click( |
|
|
send_message, |
|
|
inputs=event_inputs, |
|
|
outputs=[msg_input, self.chat_history, chatbot] |
|
|
) |
|
|
|
|
|
return self.toggle |
|
|
|