Spaces:

ianshank
/

phi35-moe-demo

Sleeping

File size: 11,924 Bytes
"""
Gradio interface module for the Phi-3.5-MoE application.
"""

import logging
import gradio as gr
from typing import Optional, Dict, Any
from .model_loader import ModelLoader

logger = logging.getLogger(__name__)


class ExpertClassifier:
    """Classifies queries to determine expert specialization."""
    
    EXPERT_KEYWORDS = {
        "Code": [
            "programming", "software", "development", "coding", "algorithm", 
            "python", "javascript", "java", "function", "code", "debug",
            "api", "framework", "library", "class", "method", "variable"
        ],
        "Math": [
            "mathematics", "calculation", "equation", "formula", "statistics", 
            "derivative", "integral", "algebra", "calculus", "math", "solve", 
            "calculate", "probability", "geometry", "trigonometry"
        ],
        "Reasoning": [
            "logic", "analysis", "reasoning", "problem-solving", "critical", 
            "explain", "why", "how", "because", "analyze", "evaluate",
            "compare", "contrast", "deduce", "infer"
        ],
        "Multilingual": [
            "translation", "language", "multilingual", "localization", 
            "translate", "spanish", "french", "german", "chinese", "japanese",
            "korean", "arabic", "russian", "portuguese"
        ],
        "General": [
            "general", "conversation", "assistance", "help", "hello", "hi", 
            "what", "who", "when", "where", "tell", "describe", "explain"
        ]
    }
    
    @classmethod
    def classify_query(cls, query: str) -> str:
        """Classify query to determine expert specialization."""
        if not query:
            return "General"
        
        query_lower = query.lower()
        scores = {}
        
        for expert, keywords in cls.EXPERT_KEYWORDS.items():
            score = sum(1 for keyword in keywords if keyword in query_lower)
            scores[expert] = score
        
        if scores:
            best_expert = max(scores.items(), key=lambda x: x[1])[0]
            if scores[best_expert] > 0:
                return best_expert
        
        return "General"


class ResponseGenerator:
    """Handles response generation with fallback support."""
    
    SYSTEM_MESSAGES = {
        "Code": "You are an expert software engineer and programming assistant. Provide clear, well-commented code examples and explain programming concepts thoroughly.",
        "Math": "You are a mathematics expert. Solve problems step-by-step, show your work, and explain mathematical concepts clearly.",
        "Reasoning": "You are a logical reasoning expert. Break down complex problems, analyze them systematically, and provide clear explanations.",
        "Multilingual": "You are a multilingual expert. Help with translations, language learning, and cross-cultural communication.",
        "General": "You are a helpful AI assistant. Provide accurate, helpful, and informative responses to user questions."
    }
    
    def __init__(self, model_loader: ModelLoader):
        self.model_loader = model_loader
    
    def generate_fallback_response(self, query: str, expert_type: str) -> str:
        """Generate fallback response when model is unavailable."""
        fallback_messages = {
            "Code": f"I'm a Code Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide detailed code examples and programming guidance. Please try again later when the model is loaded.",
            "Math": f"I'm a Math Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically solve mathematical problems step-by-step. Please try again later when the model is loaded.",
            "Reasoning": f"I'm a Reasoning Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide logical analysis and systematic problem-solving. Please try again later when the model is loaded.",
            "Multilingual": f"I'm a Multilingual Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically help with translations and language learning. Please try again later when the model is loaded.",
            "General": f"I'm a General Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide helpful and informative responses. Please try again later when the model is loaded."
        }
        return fallback_messages.get(expert_type, fallback_messages["General"])
    
    def generate_response(
        self, 
        query: str, 
        max_tokens: int = 500, 
        temperature: float = 0.7
    ) -> str:
        """Generate response using the model or fallback."""
        try:
            # Classify query type
            expert_type = ExpertClassifier.classify_query(query)
            
            # Check if model is available
            if not self.model_loader.is_loaded:
                return f"**Expert Type:** {expert_type}\n\n**Response:**\n{self.generate_fallback_response(query, expert_type)}"
            
            # Get system message for expert type
            system_message = self.SYSTEM_MESSAGES.get(expert_type, self.SYSTEM_MESSAGES["General"])
            
            # Format messages
            messages = [
                {"role": "system", "content": system_message},
                {"role": "user", "content": query}
            ]
            
            # Generate response using pipeline
            response = self.model_loader.pipeline(
                messages,
                max_new_tokens=max_tokens,
                temperature=temperature,
                do_sample=True,
                pad_token_id=self.model_loader.tokenizer.eos_token_id
            )
            
            # Extract response text
            generated_text = response[0]['generated_text']
            
            # Find the assistant's response
            if "Assistant:" in generated_text:
                assistant_response = generated_text.split("Assistant:")[-1].strip()
            else:
                assistant_response = generated_text
            
            return f"**Expert Type:** {expert_type}\n\n**Response:**\n{assistant_response}"
            
        except Exception as e:
            logger.error(f"Error generating response: {e}")
            return f"❌ **Error generating response:** {str(e)}\n\nPlease try again or check the logs for more details."


class GradioInterface:
    """Main Gradio interface for the application."""
    
    def __init__(self, model_loader: ModelLoader):
        self.model_loader = model_loader
        self.response_generator = ResponseGenerator(model_loader)
        self.demo = None
    
    def create_interface(self) -> gr.Blocks:
        """Create the Gradio interface."""
        
        # Determine status message
        if self.model_loader.is_loaded:
            model_info = self.model_loader.get_model_info()
            status_msg = f"✅ **Model Status:** {model_info['model_id']} loaded successfully"
            if model_info.get('revision'):
                status_msg += f" (revision: {model_info['revision'][:8]}...)"
        else:
            status_msg = "⚠️ **Model Status:** Running in fallback mode (model loading issues)"
        
        with gr.Blocks(
            title="Phi-3.5-MoE Expert Assistant", 
            theme=gr.themes.Soft(),
            css=".status-box { background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin: 10px 0; }"
        ) as demo:
            
            gr.Markdown("# 🤖 Phi-3.5-MoE Expert Assistant")
            gr.Markdown(status_msg, elem_classes=["status-box"])
            
            gr.Markdown("""
            This is a specialized AI assistant powered by Microsoft's Phi-3.5-MoE model.
            It automatically routes your queries to the most appropriate expert:
            - **Code Expert**: Programming, software development, algorithms
            - **Math Expert**: Mathematics, calculations, problem solving
            - **Reasoning Expert**: Logic, analysis, critical thinking
            - **Multilingual Expert**: Translation and language assistance
            - **General Expert**: General purpose assistance
            """)
            
            with gr.Row():
                with gr.Column(scale=3):
                    query_input = gr.Textbox(
                        label="Your Question",
                        placeholder="Ask me anything...",
                        lines=3,
                        max_lines=10
                    )
                    
                    with gr.Row():
                        max_tokens = gr.Slider(
                            minimum=50,
                            maximum=1000,
                            value=500,
                            step=50,
                            label="Max Tokens"
                        )
                        temperature = gr.Slider(
                            minimum=0.1,
                            maximum=1.0,
                            value=0.7,
                            step=0.1,
                            label="Temperature"
                        )
                    
                    submit_btn = gr.Button("Generate Response", variant="primary")
                    clear_btn = gr.Button("Clear", variant="secondary")
                
                with gr.Column(scale=2):
                    response_output = gr.Markdown(
                        label="Response",
                        value="*Submit a question to get started...*"
                    )
            
            # Model information section
            with gr.Accordion("Model Information", open=False):
                model_info_display = gr.JSON(
                    value=self.model_loader.get_model_info(),
                    label="Model Details"
                )
            
            # Example queries
            gr.Markdown("### 💡 Example Queries")
            examples = [
                "How do I implement a binary search algorithm in Python?",
                "What is the derivative of x² + 3x + 1?",
                "Explain the logical reasoning behind the Monty Hall problem",
                "Translate 'Hello, how are you?' to Spanish",
                "What are the benefits of renewable energy?"
            ]
            
            gr.Examples(
                examples=examples,
                inputs=query_input,
                label="Click an example to try it"
            )
            
            # Event handlers
            submit_btn.click(
                fn=self.response_generator.generate_response,
                inputs=[query_input, max_tokens, temperature],
                outputs=response_output,
                show_progress=True
            )
            
            query_input.submit(
                fn=self.response_generator.generate_response,
                inputs=[query_input, max_tokens, temperature],
                outputs=response_output,
                show_progress=True
            )
            
            clear_btn.click(
                fn=lambda: ("", "*Submit a question to get started...*"),
                outputs=[query_input, response_output]
            )
        
        self.demo = demo
        return demo
    
    def launch(
        self, 
        server_name: str = "0.0.0.0", 
        server_port: int = 7860,
        **kwargs
    ) -> None:
        """Launch the Gradio interface."""
        if not self.demo:
            self.create_interface()
        
        logger.info(f"🚀 Launching Gradio interface on {server_name}:{server_port}")
        self.demo.launch(
            server_name=server_name,
            server_port=server_port,
            **kwargs
        )