""" Gradio interface module for the Phi-3.5-MoE application. """ import logging import gradio as gr from typing import Optional, Dict, Any from .model_loader import ModelLoader logger = logging.getLogger(__name__) class ExpertClassifier: """Classifies queries to determine expert specialization.""" EXPERT_KEYWORDS = { "Code": [ "programming", "software", "development", "coding", "algorithm", "python", "javascript", "java", "function", "code", "debug", "api", "framework", "library", "class", "method", "variable" ], "Math": [ "mathematics", "calculation", "equation", "formula", "statistics", "derivative", "integral", "algebra", "calculus", "math", "solve", "calculate", "probability", "geometry", "trigonometry" ], "Reasoning": [ "logic", "analysis", "reasoning", "problem-solving", "critical", "explain", "why", "how", "because", "analyze", "evaluate", "compare", "contrast", "deduce", "infer" ], "Multilingual": [ "translation", "language", "multilingual", "localization", "translate", "spanish", "french", "german", "chinese", "japanese", "korean", "arabic", "russian", "portuguese" ], "General": [ "general", "conversation", "assistance", "help", "hello", "hi", "what", "who", "when", "where", "tell", "describe", "explain" ] } @classmethod def classify_query(cls, query: str) -> str: """Classify query to determine expert specialization.""" if not query: return "General" query_lower = query.lower() scores = {} for expert, keywords in cls.EXPERT_KEYWORDS.items(): score = sum(1 for keyword in keywords if keyword in query_lower) scores[expert] = score if scores: best_expert = max(scores.items(), key=lambda x: x[1])[0] if scores[best_expert] > 0: return best_expert return "General" class ResponseGenerator: """Handles response generation with fallback support.""" SYSTEM_MESSAGES = { "Code": "You are an expert software engineer and programming assistant. Provide clear, well-commented code examples and explain programming concepts thoroughly.", "Math": "You are a mathematics expert. Solve problems step-by-step, show your work, and explain mathematical concepts clearly.", "Reasoning": "You are a logical reasoning expert. Break down complex problems, analyze them systematically, and provide clear explanations.", "Multilingual": "You are a multilingual expert. Help with translations, language learning, and cross-cultural communication.", "General": "You are a helpful AI assistant. Provide accurate, helpful, and informative responses to user questions." } def __init__(self, model_loader: ModelLoader): self.model_loader = model_loader def generate_fallback_response(self, query: str, expert_type: str) -> str: """Generate fallback response when model is unavailable.""" fallback_messages = { "Code": f"I'm a Code Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide detailed code examples and programming guidance. Please try again later when the model is loaded.", "Math": f"I'm a Math Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically solve mathematical problems step-by-step. Please try again later when the model is loaded.", "Reasoning": f"I'm a Reasoning Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide logical analysis and systematic problem-solving. Please try again later when the model is loaded.", "Multilingual": f"I'm a Multilingual Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically help with translations and language learning. Please try again later when the model is loaded.", "General": f"I'm a General Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide helpful and informative responses. Please try again later when the model is loaded." } return fallback_messages.get(expert_type, fallback_messages["General"]) def generate_response( self, query: str, max_tokens: int = 500, temperature: float = 0.7 ) -> str: """Generate response using the model or fallback.""" try: # Classify query type expert_type = ExpertClassifier.classify_query(query) # Check if model is available if not self.model_loader.is_loaded: return f"**Expert Type:** {expert_type}\n\n**Response:**\n{self.generate_fallback_response(query, expert_type)}" # Get system message for expert type system_message = self.SYSTEM_MESSAGES.get(expert_type, self.SYSTEM_MESSAGES["General"]) # Format messages messages = [ {"role": "system", "content": system_message}, {"role": "user", "content": query} ] # Generate response using pipeline response = self.model_loader.pipeline( messages, max_new_tokens=max_tokens, temperature=temperature, do_sample=True, pad_token_id=self.model_loader.tokenizer.eos_token_id ) # Extract response text generated_text = response[0]['generated_text'] # Find the assistant's response if "Assistant:" in generated_text: assistant_response = generated_text.split("Assistant:")[-1].strip() else: assistant_response = generated_text return f"**Expert Type:** {expert_type}\n\n**Response:**\n{assistant_response}" except Exception as e: logger.error(f"Error generating response: {e}") return f"❌ **Error generating response:** {str(e)}\n\nPlease try again or check the logs for more details." class GradioInterface: """Main Gradio interface for the application.""" def __init__(self, model_loader: ModelLoader): self.model_loader = model_loader self.response_generator = ResponseGenerator(model_loader) self.demo = None def create_interface(self) -> gr.Blocks: """Create the Gradio interface.""" # Determine status message if self.model_loader.is_loaded: model_info = self.model_loader.get_model_info() status_msg = f"✅ **Model Status:** {model_info['model_id']} loaded successfully" if model_info.get('revision'): status_msg += f" (revision: {model_info['revision'][:8]}...)" else: status_msg = "⚠️ **Model Status:** Running in fallback mode (model loading issues)" with gr.Blocks( title="Phi-3.5-MoE Expert Assistant", theme=gr.themes.Soft(), css=".status-box { background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin: 10px 0; }" ) as demo: gr.Markdown("# 🤖 Phi-3.5-MoE Expert Assistant") gr.Markdown(status_msg, elem_classes=["status-box"]) gr.Markdown(""" This is a specialized AI assistant powered by Microsoft's Phi-3.5-MoE model. It automatically routes your queries to the most appropriate expert: - **Code Expert**: Programming, software development, algorithms - **Math Expert**: Mathematics, calculations, problem solving - **Reasoning Expert**: Logic, analysis, critical thinking - **Multilingual Expert**: Translation and language assistance - **General Expert**: General purpose assistance """) with gr.Row(): with gr.Column(scale=3): query_input = gr.Textbox( label="Your Question", placeholder="Ask me anything...", lines=3, max_lines=10 ) with gr.Row(): max_tokens = gr.Slider( minimum=50, maximum=1000, value=500, step=50, label="Max Tokens" ) temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.7, step=0.1, label="Temperature" ) submit_btn = gr.Button("Generate Response", variant="primary") clear_btn = gr.Button("Clear", variant="secondary") with gr.Column(scale=2): response_output = gr.Markdown( label="Response", value="*Submit a question to get started...*" ) # Model information section with gr.Accordion("Model Information", open=False): model_info_display = gr.JSON( value=self.model_loader.get_model_info(), label="Model Details" ) # Example queries gr.Markdown("### 💡 Example Queries") examples = [ "How do I implement a binary search algorithm in Python?", "What is the derivative of x² + 3x + 1?", "Explain the logical reasoning behind the Monty Hall problem", "Translate 'Hello, how are you?' to Spanish", "What are the benefits of renewable energy?" ] gr.Examples( examples=examples, inputs=query_input, label="Click an example to try it" ) # Event handlers submit_btn.click( fn=self.response_generator.generate_response, inputs=[query_input, max_tokens, temperature], outputs=response_output, show_progress=True ) query_input.submit( fn=self.response_generator.generate_response, inputs=[query_input, max_tokens, temperature], outputs=response_output, show_progress=True ) clear_btn.click( fn=lambda: ("", "*Submit a question to get started...*"), outputs=[query_input, response_output] ) self.demo = demo return demo def launch( self, server_name: str = "0.0.0.0", server_port: int = 7860, **kwargs ) -> None: """Launch the Gradio interface.""" if not self.demo: self.create_interface() logger.info(f"🚀 Launching Gradio interface on {server_name}:{server_port}") self.demo.launch( server_name=server_name, server_port=server_port, **kwargs )