Spaces:
Sleeping
Sleeping
| """ | |
| Gradio interface module for the Phi-3.5-MoE application. | |
| """ | |
| import logging | |
| import gradio as gr | |
| from typing import Optional, Dict, Any | |
| from .model_loader import ModelLoader | |
| logger = logging.getLogger(__name__) | |
| class ExpertClassifier: | |
| """Classifies queries to determine expert specialization.""" | |
| EXPERT_KEYWORDS = { | |
| "Code": [ | |
| "programming", "software", "development", "coding", "algorithm", | |
| "python", "javascript", "java", "function", "code", "debug", | |
| "api", "framework", "library", "class", "method", "variable" | |
| ], | |
| "Math": [ | |
| "mathematics", "calculation", "equation", "formula", "statistics", | |
| "derivative", "integral", "algebra", "calculus", "math", "solve", | |
| "calculate", "probability", "geometry", "trigonometry" | |
| ], | |
| "Reasoning": [ | |
| "logic", "analysis", "reasoning", "problem-solving", "critical", | |
| "explain", "why", "how", "because", "analyze", "evaluate", | |
| "compare", "contrast", "deduce", "infer" | |
| ], | |
| "Multilingual": [ | |
| "translation", "language", "multilingual", "localization", | |
| "translate", "spanish", "french", "german", "chinese", "japanese", | |
| "korean", "arabic", "russian", "portuguese" | |
| ], | |
| "General": [ | |
| "general", "conversation", "assistance", "help", "hello", "hi", | |
| "what", "who", "when", "where", "tell", "describe", "explain" | |
| ] | |
| } | |
| def classify_query(cls, query: str) -> str: | |
| """Classify query to determine expert specialization.""" | |
| if not query: | |
| return "General" | |
| query_lower = query.lower() | |
| scores = {} | |
| for expert, keywords in cls.EXPERT_KEYWORDS.items(): | |
| score = sum(1 for keyword in keywords if keyword in query_lower) | |
| scores[expert] = score | |
| if scores: | |
| best_expert = max(scores.items(), key=lambda x: x[1])[0] | |
| if scores[best_expert] > 0: | |
| return best_expert | |
| return "General" | |
| class ResponseGenerator: | |
| """Handles response generation with fallback support.""" | |
| SYSTEM_MESSAGES = { | |
| "Code": "You are an expert software engineer and programming assistant. Provide clear, well-commented code examples and explain programming concepts thoroughly.", | |
| "Math": "You are a mathematics expert. Solve problems step-by-step, show your work, and explain mathematical concepts clearly.", | |
| "Reasoning": "You are a logical reasoning expert. Break down complex problems, analyze them systematically, and provide clear explanations.", | |
| "Multilingual": "You are a multilingual expert. Help with translations, language learning, and cross-cultural communication.", | |
| "General": "You are a helpful AI assistant. Provide accurate, helpful, and informative responses to user questions." | |
| } | |
| def __init__(self, model_loader: ModelLoader): | |
| self.model_loader = model_loader | |
| def generate_fallback_response(self, query: str, expert_type: str) -> str: | |
| """Generate fallback response when model is unavailable.""" | |
| fallback_messages = { | |
| "Code": f"I'm a Code Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide detailed code examples and programming guidance. Please try again later when the model is loaded.", | |
| "Math": f"I'm a Math Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically solve mathematical problems step-by-step. Please try again later when the model is loaded.", | |
| "Reasoning": f"I'm a Reasoning Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide logical analysis and systematic problem-solving. Please try again later when the model is loaded.", | |
| "Multilingual": f"I'm a Multilingual Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically help with translations and language learning. Please try again later when the model is loaded.", | |
| "General": f"I'm a General Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide helpful and informative responses. Please try again later when the model is loaded." | |
| } | |
| return fallback_messages.get(expert_type, fallback_messages["General"]) | |
| def generate_response( | |
| self, | |
| query: str, | |
| max_tokens: int = 500, | |
| temperature: float = 0.7 | |
| ) -> str: | |
| """Generate response using the model or fallback.""" | |
| try: | |
| # Classify query type | |
| expert_type = ExpertClassifier.classify_query(query) | |
| # Check if model is available | |
| if not self.model_loader.is_loaded: | |
| return f"**Expert Type:** {expert_type}\n\n**Response:**\n{self.generate_fallback_response(query, expert_type)}" | |
| # Get system message for expert type | |
| system_message = self.SYSTEM_MESSAGES.get(expert_type, self.SYSTEM_MESSAGES["General"]) | |
| # Format messages | |
| messages = [ | |
| {"role": "system", "content": system_message}, | |
| {"role": "user", "content": query} | |
| ] | |
| # Generate response using pipeline | |
| response = self.model_loader.pipeline( | |
| messages, | |
| max_new_tokens=max_tokens, | |
| temperature=temperature, | |
| do_sample=True, | |
| pad_token_id=self.model_loader.tokenizer.eos_token_id | |
| ) | |
| # Extract response text | |
| generated_text = response[0]['generated_text'] | |
| # Find the assistant's response | |
| if "Assistant:" in generated_text: | |
| assistant_response = generated_text.split("Assistant:")[-1].strip() | |
| else: | |
| assistant_response = generated_text | |
| return f"**Expert Type:** {expert_type}\n\n**Response:**\n{assistant_response}" | |
| except Exception as e: | |
| logger.error(f"Error generating response: {e}") | |
| return f"β **Error generating response:** {str(e)}\n\nPlease try again or check the logs for more details." | |
| class GradioInterface: | |
| """Main Gradio interface for the application.""" | |
| def __init__(self, model_loader: ModelLoader): | |
| self.model_loader = model_loader | |
| self.response_generator = ResponseGenerator(model_loader) | |
| self.demo = None | |
| def create_interface(self) -> gr.Blocks: | |
| """Create the Gradio interface.""" | |
| # Determine status message | |
| if self.model_loader.is_loaded: | |
| model_info = self.model_loader.get_model_info() | |
| status_msg = f"β **Model Status:** {model_info['model_id']} loaded successfully" | |
| if model_info.get('revision'): | |
| status_msg += f" (revision: {model_info['revision'][:8]}...)" | |
| else: | |
| status_msg = "β οΈ **Model Status:** Running in fallback mode (model loading issues)" | |
| with gr.Blocks( | |
| title="Phi-3.5-MoE Expert Assistant", | |
| theme=gr.themes.Soft(), | |
| css=".status-box { background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin: 10px 0; }" | |
| ) as demo: | |
| gr.Markdown("# π€ Phi-3.5-MoE Expert Assistant") | |
| gr.Markdown(status_msg, elem_classes=["status-box"]) | |
| gr.Markdown(""" | |
| This is a specialized AI assistant powered by Microsoft's Phi-3.5-MoE model. | |
| It automatically routes your queries to the most appropriate expert: | |
| - **Code Expert**: Programming, software development, algorithms | |
| - **Math Expert**: Mathematics, calculations, problem solving | |
| - **Reasoning Expert**: Logic, analysis, critical thinking | |
| - **Multilingual Expert**: Translation and language assistance | |
| - **General Expert**: General purpose assistance | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=3): | |
| query_input = gr.Textbox( | |
| label="Your Question", | |
| placeholder="Ask me anything...", | |
| lines=3, | |
| max_lines=10 | |
| ) | |
| with gr.Row(): | |
| max_tokens = gr.Slider( | |
| minimum=50, | |
| maximum=1000, | |
| value=500, | |
| step=50, | |
| label="Max Tokens" | |
| ) | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.7, | |
| step=0.1, | |
| label="Temperature" | |
| ) | |
| submit_btn = gr.Button("Generate Response", variant="primary") | |
| clear_btn = gr.Button("Clear", variant="secondary") | |
| with gr.Column(scale=2): | |
| response_output = gr.Markdown( | |
| label="Response", | |
| value="*Submit a question to get started...*" | |
| ) | |
| # Model information section | |
| with gr.Accordion("Model Information", open=False): | |
| model_info_display = gr.JSON( | |
| value=self.model_loader.get_model_info(), | |
| label="Model Details" | |
| ) | |
| # Example queries | |
| gr.Markdown("### π‘ Example Queries") | |
| examples = [ | |
| "How do I implement a binary search algorithm in Python?", | |
| "What is the derivative of xΒ² + 3x + 1?", | |
| "Explain the logical reasoning behind the Monty Hall problem", | |
| "Translate 'Hello, how are you?' to Spanish", | |
| "What are the benefits of renewable energy?" | |
| ] | |
| gr.Examples( | |
| examples=examples, | |
| inputs=query_input, | |
| label="Click an example to try it" | |
| ) | |
| # Event handlers | |
| submit_btn.click( | |
| fn=self.response_generator.generate_response, | |
| inputs=[query_input, max_tokens, temperature], | |
| outputs=response_output, | |
| show_progress=True | |
| ) | |
| query_input.submit( | |
| fn=self.response_generator.generate_response, | |
| inputs=[query_input, max_tokens, temperature], | |
| outputs=response_output, | |
| show_progress=True | |
| ) | |
| clear_btn.click( | |
| fn=lambda: ("", "*Submit a question to get started...*"), | |
| outputs=[query_input, response_output] | |
| ) | |
| self.demo = demo | |
| return demo | |
| def launch( | |
| self, | |
| server_name: str = "0.0.0.0", | |
| server_port: int = 7860, | |
| **kwargs | |
| ) -> None: | |
| """Launch the Gradio interface.""" | |
| if not self.demo: | |
| self.create_interface() | |
| logger.info(f"π Launching Gradio interface on {server_name}:{server_port}") | |
| self.demo.launch( | |
| server_name=server_name, | |
| server_port=server_port, | |
| **kwargs | |
| ) | |