Spaces:

MCP-1st-Birthday
/

VoiceSementle

Running

File size: 8,581 Bytes

"""
Gradio UI for Chloe's Voice Komentle Game
Connects to FastAPI backend for voice analysis
"""

import os
# Set Gradio temp directory BEFORE importing gradio
_upload_dir = os.path.join(os.path.dirname(__file__), "gradio_uploads")
os.makedirs(_upload_dir, exist_ok=True)
os.environ["GRADIO_TEMP_DIR"] = _upload_dir

import gradio as gr
from datetime import datetime
import uuid
import asyncio
from sqlalchemy import create_engine, text
from dotenv import load_dotenv

# Load environment variables
load_dotenv()

# Import backend functions
from backend import (
    analyze_voice_logic,
    get_puzzle_by_date,
    lifespan,
    app as backend_app,
)

# Database connection
DATABASE_URL = os.getenv("DATABASE_URL")
engine = create_engine(
    DATABASE_URL,
    pool_size=10,  # 기본 연결 풀 크기
    max_overflow=20,  # 최대 추가 연결 수
    pool_pre_ping=True,  # 연결 사용 전 유효성 검사
    pool_recycle=3600,  # 1시간마다 연결 재생성
    connect_args={
        "connect_timeout": 10,  # 연결 타임아웃 10초
        "options": "-c statement_timeout=30000"  # 쿼리 타임아웃 30초
    }
)

# Session ID (persistent across attempts)
session_id = str(uuid.uuid4())

# Backend initialization flag
backend_initialized = False


async def analyze_voice_async(audio_file, date_str):
    """
    Analyze voice using backend logic directly

    Args:
        audio_file: Path to recorded audio file
        date_str: Date string for puzzle lookup

    Returns:
        tuple: (result_text, scores_text, hint_text, image_path)
    """
    if audio_file is None:
        return "❌ 오디오를 먼저 녹음해주세요!", "", "", None

    try:
        # Read audio file
        with open(audio_file, "rb") as f:
            audio_bytes = f.read()

        # Call backend logic directly
        result = await analyze_voice_logic(audio_bytes, date_str, session_id)

        # Handle errors
        if result.get("status") == "error":
            return f"❌ {result.get('message', 'Unknown error')}", "", "", None

        # Parse response (already in 0-100 range from backend)
        category = result.get("category", "unknown")
        pitch = result.get("pitch", 0.0)
        rhythm = result.get("rhythm", 0.0)
        energy = result.get("energy", 0.0)
        pronunciation = result.get("pronunciation", 0.0)
        transcript = result.get("transcript", 0.0)
        overall = result.get("overall", 0.0)
        advice = result.get("advice", "")
        is_correct = result.get("is_correct", False)
        hints = {}  # hints are embedded in advice now

        # Format result message
        if is_correct:
            result_msg = f"🎉 정답입니다! 전체 점수: {overall:.1f}/100"
        else:
            result_msg = f"📊 전체 점수: {overall:.1f}/100 - 다시 시도해보세요!"

        # Format scores
        scores_text = f"""
### 📊 점수 상세

**카테고리:** {category.upper()}

- **발음 (Pronunciation):** {pronunciation:.1f}/100
- **음높이 (Pitch):** {pitch:.1f}/100
- **리듬 (Rhythm):** {rhythm:.1f}/100
- **에너지 (Energy):** {energy:.1f}/100
- **전사 (Transcript):** {transcript:.1f}/100
- **전체 (Overall):** {overall:.1f}/100
"""

        # Format hints
        hint_text = ""
        hint_image = None

        if hints and "answer" in hints:
            hint_type = hints.get("type", "hint")
            hint_items = hints.get("answer", [])

            if hint_type == "hint":
                hint_text = "💡 **힌트:**\n\n"
            else:
                hint_text = "🎯 **발음 조언:**\n\n"

            for item in hint_items:
                hint_text += f"{item.get('text', '')}\n\n"

                # Get image path if exists
                img_path = item.get("path", "")
                if img_path and os.path.exists(img_path):
                    hint_image = img_path

        # Add advice if no hints
        if not hint_text and advice:
            hint_text = f"💬 **조언:**\n\n{advice}"

        return result_msg, scores_text, hint_text, hint_image

    except Exception as e:
        return f"❌ 오류 발생: {str(e)}", "", "", None


def analyze_voice(audio_file, date_str):
    """Synchronous wrapper for async analyze_voice_async"""
    return asyncio.run(analyze_voice_async(audio_file, date_str))


def get_today_puzzle():
    """Get today's puzzle information from database"""
    try:
        today = datetime.now().strftime("%Y-%m-%d")

        # Use backend function to get puzzle
        puzzle = get_puzzle_by_date(today)
        # print(puzzle)
        if puzzle:
            return f"""
### 📅 오늘의 퍼즐

**날짜:** {puzzle.get('puzzle_date', 'N/A')}
**퍼즐 번호:** #{puzzle.get('puzzle_number', 'N/A')}
**카테고리:** {puzzle.get('category', 'N/A').upper()}
**난이도:** {puzzle.get('difficulty', 'N/A')}

정답 단어를 발음해보세요! (최대 6회 시도)
"""
        else:
            return "❌ 오늘의 퍼즐을 찾을 수 없습니다."

    except Exception as e:
        return f"❌ 퍼즐 정보를 가져올 수 없습니다: {str(e)}"


def reset_session():
    """Reset session for new game"""
    global session_id
    session_id = str(uuid.uuid4())
    return "✅ 새 게임 시작! 오디오를 녹음해주세요.", "", "", None


# Create Gradio Interface
with gr.Blocks(title="Chloe's Voice Komentle") as demo:
    gr.Markdown("# 🎤 Chloe's Voice Komentle")

    # Puzzle info section
    with gr.Row():
        puzzle_info = gr.Markdown(value=get_today_puzzle())
        refresh_btn = gr.Button("🔄 퍼즐 정보 새로고침", size="sm")

    with gr.Row():
        with gr.Column(scale=1):
            # Audio recording
            gr.Markdown("### 🎙️ 음성 녹음")
            audio_input = gr.Audio(
                sources=["microphone"],
                type="filepath",
                label="마이크로 녹음",
                format="wav",
            )

            # Date input (auto-filled with today)
            date_input = gr.Textbox(
                label="날짜 (YYYY-MM-DD)",
                value=datetime.now().strftime("%Y-%m-%d"),
                interactive=True,
            )

            # Submit button
            submit_btn = gr.Button("🎯 분석하기", variant="primary", size="lg")
            reset_btn = gr.Button("🔄 새 게임 시작", variant="secondary")

        with gr.Column(scale=1):
            # Results
            gr.Markdown("### 📊 결과")
            result_output = gr.Markdown(label="결과")
            scores_output = gr.Markdown(label="점수 상세")

    # Hints section
    with gr.Row():
        with gr.Column():
            hint_output = gr.Markdown(label="힌트 및 조언")

        with gr.Column():
            hint_image = gr.Image(label="힌트 이미지", show_label=True)

    # Event handlers
    submit_btn.click(
        fn=analyze_voice,
        inputs=[audio_input, date_input],
        outputs=[result_output, scores_output, hint_output, hint_image],
    )

    reset_btn.click(
        fn=reset_session,
        inputs=[],
        outputs=[result_output, scores_output, hint_output, hint_image],
    )

    refresh_btn.click(fn=get_today_puzzle, inputs=[], outputs=[puzzle_info])

    # Footer
    gr.Markdown("---\n**Powered by:** VoiceKit MCP + Gemini AI")

# Launch configuration
if __name__ == "__main__":
    # Initialize backend (VoiceKit MCP session)
    print("⏳ Initializing VoiceKit MCP...")

    async def init_backend():
        """Initialize backend resources"""
        async with lifespan(backend_app):
            print("✓ VoiceKit MCP initialized")
            # Keep the lifespan context active
            await asyncio.Event().wait()  # Wait forever

    # Run backend initialization in background
    import threading

    def run_backend_init():
        asyncio.run(init_backend())

    backend_thread = threading.Thread(target=run_backend_init, daemon=True)
    backend_thread.start()

    # Wait a bit for initialization
    import time

    time.sleep(5)
    print("✓ Backend initialized")

    # Launch Gradio
    server_host = os.getenv("SERVER_HOST")
    frontend_port = int(os.getenv("FRONTEND_PORT"))
    demo.launch(
        server_name=server_host,  # Listen on all interfaces
        server_port=frontend_port,  # Default Gradio port
        share=False,  # Set to True for public link
        show_error=True,
        allowed_paths=[os.path.join(os.path.dirname(__file__), "hints", "audio")],  # Allow serving TTS audio hints
    )