phi35-moe-demo / app /interface.py
ianshank's picture
πŸš€ Deploy robust modular solution with comprehensive testing and CPU/GPU support
6510698 verified
"""
Gradio interface module for the Phi-3.5-MoE application.
"""
import logging
import gradio as gr
from typing import Optional, Dict, Any
from .model_loader import ModelLoader
logger = logging.getLogger(__name__)
class ExpertClassifier:
"""Classifies queries to determine expert specialization."""
EXPERT_KEYWORDS = {
"Code": [
"programming", "software", "development", "coding", "algorithm",
"python", "javascript", "java", "function", "code", "debug",
"api", "framework", "library", "class", "method", "variable"
],
"Math": [
"mathematics", "calculation", "equation", "formula", "statistics",
"derivative", "integral", "algebra", "calculus", "math", "solve",
"calculate", "probability", "geometry", "trigonometry"
],
"Reasoning": [
"logic", "analysis", "reasoning", "problem-solving", "critical",
"explain", "why", "how", "because", "analyze", "evaluate",
"compare", "contrast", "deduce", "infer"
],
"Multilingual": [
"translation", "language", "multilingual", "localization",
"translate", "spanish", "french", "german", "chinese", "japanese",
"korean", "arabic", "russian", "portuguese"
],
"General": [
"general", "conversation", "assistance", "help", "hello", "hi",
"what", "who", "when", "where", "tell", "describe", "explain"
]
}
@classmethod
def classify_query(cls, query: str) -> str:
"""Classify query to determine expert specialization."""
if not query:
return "General"
query_lower = query.lower()
scores = {}
for expert, keywords in cls.EXPERT_KEYWORDS.items():
score = sum(1 for keyword in keywords if keyword in query_lower)
scores[expert] = score
if scores:
best_expert = max(scores.items(), key=lambda x: x[1])[0]
if scores[best_expert] > 0:
return best_expert
return "General"
class ResponseGenerator:
"""Handles response generation with fallback support."""
SYSTEM_MESSAGES = {
"Code": "You are an expert software engineer and programming assistant. Provide clear, well-commented code examples and explain programming concepts thoroughly.",
"Math": "You are a mathematics expert. Solve problems step-by-step, show your work, and explain mathematical concepts clearly.",
"Reasoning": "You are a logical reasoning expert. Break down complex problems, analyze them systematically, and provide clear explanations.",
"Multilingual": "You are a multilingual expert. Help with translations, language learning, and cross-cultural communication.",
"General": "You are a helpful AI assistant. Provide accurate, helpful, and informative responses to user questions."
}
def __init__(self, model_loader: ModelLoader):
self.model_loader = model_loader
def generate_fallback_response(self, query: str, expert_type: str) -> str:
"""Generate fallback response when model is unavailable."""
fallback_messages = {
"Code": f"I'm a Code Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide detailed code examples and programming guidance. Please try again later when the model is loaded.",
"Math": f"I'm a Math Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically solve mathematical problems step-by-step. Please try again later when the model is loaded.",
"Reasoning": f"I'm a Reasoning Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide logical analysis and systematic problem-solving. Please try again later when the model is loaded.",
"Multilingual": f"I'm a Multilingual Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically help with translations and language learning. Please try again later when the model is loaded.",
"General": f"I'm a General Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide helpful and informative responses. Please try again later when the model is loaded."
}
return fallback_messages.get(expert_type, fallback_messages["General"])
def generate_response(
self,
query: str,
max_tokens: int = 500,
temperature: float = 0.7
) -> str:
"""Generate response using the model or fallback."""
try:
# Classify query type
expert_type = ExpertClassifier.classify_query(query)
# Check if model is available
if not self.model_loader.is_loaded:
return f"**Expert Type:** {expert_type}\n\n**Response:**\n{self.generate_fallback_response(query, expert_type)}"
# Get system message for expert type
system_message = self.SYSTEM_MESSAGES.get(expert_type, self.SYSTEM_MESSAGES["General"])
# Format messages
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": query}
]
# Generate response using pipeline
response = self.model_loader.pipeline(
messages,
max_new_tokens=max_tokens,
temperature=temperature,
do_sample=True,
pad_token_id=self.model_loader.tokenizer.eos_token_id
)
# Extract response text
generated_text = response[0]['generated_text']
# Find the assistant's response
if "Assistant:" in generated_text:
assistant_response = generated_text.split("Assistant:")[-1].strip()
else:
assistant_response = generated_text
return f"**Expert Type:** {expert_type}\n\n**Response:**\n{assistant_response}"
except Exception as e:
logger.error(f"Error generating response: {e}")
return f"❌ **Error generating response:** {str(e)}\n\nPlease try again or check the logs for more details."
class GradioInterface:
"""Main Gradio interface for the application."""
def __init__(self, model_loader: ModelLoader):
self.model_loader = model_loader
self.response_generator = ResponseGenerator(model_loader)
self.demo = None
def create_interface(self) -> gr.Blocks:
"""Create the Gradio interface."""
# Determine status message
if self.model_loader.is_loaded:
model_info = self.model_loader.get_model_info()
status_msg = f"βœ… **Model Status:** {model_info['model_id']} loaded successfully"
if model_info.get('revision'):
status_msg += f" (revision: {model_info['revision'][:8]}...)"
else:
status_msg = "⚠️ **Model Status:** Running in fallback mode (model loading issues)"
with gr.Blocks(
title="Phi-3.5-MoE Expert Assistant",
theme=gr.themes.Soft(),
css=".status-box { background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin: 10px 0; }"
) as demo:
gr.Markdown("# πŸ€– Phi-3.5-MoE Expert Assistant")
gr.Markdown(status_msg, elem_classes=["status-box"])
gr.Markdown("""
This is a specialized AI assistant powered by Microsoft's Phi-3.5-MoE model.
It automatically routes your queries to the most appropriate expert:
- **Code Expert**: Programming, software development, algorithms
- **Math Expert**: Mathematics, calculations, problem solving
- **Reasoning Expert**: Logic, analysis, critical thinking
- **Multilingual Expert**: Translation and language assistance
- **General Expert**: General purpose assistance
""")
with gr.Row():
with gr.Column(scale=3):
query_input = gr.Textbox(
label="Your Question",
placeholder="Ask me anything...",
lines=3,
max_lines=10
)
with gr.Row():
max_tokens = gr.Slider(
minimum=50,
maximum=1000,
value=500,
step=50,
label="Max Tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Temperature"
)
submit_btn = gr.Button("Generate Response", variant="primary")
clear_btn = gr.Button("Clear", variant="secondary")
with gr.Column(scale=2):
response_output = gr.Markdown(
label="Response",
value="*Submit a question to get started...*"
)
# Model information section
with gr.Accordion("Model Information", open=False):
model_info_display = gr.JSON(
value=self.model_loader.get_model_info(),
label="Model Details"
)
# Example queries
gr.Markdown("### πŸ’‘ Example Queries")
examples = [
"How do I implement a binary search algorithm in Python?",
"What is the derivative of xΒ² + 3x + 1?",
"Explain the logical reasoning behind the Monty Hall problem",
"Translate 'Hello, how are you?' to Spanish",
"What are the benefits of renewable energy?"
]
gr.Examples(
examples=examples,
inputs=query_input,
label="Click an example to try it"
)
# Event handlers
submit_btn.click(
fn=self.response_generator.generate_response,
inputs=[query_input, max_tokens, temperature],
outputs=response_output,
show_progress=True
)
query_input.submit(
fn=self.response_generator.generate_response,
inputs=[query_input, max_tokens, temperature],
outputs=response_output,
show_progress=True
)
clear_btn.click(
fn=lambda: ("", "*Submit a question to get started...*"),
outputs=[query_input, response_output]
)
self.demo = demo
return demo
def launch(
self,
server_name: str = "0.0.0.0",
server_port: int = 7860,
**kwargs
) -> None:
"""Launch the Gradio interface."""
if not self.demo:
self.create_interface()
logger.info(f"πŸš€ Launching Gradio interface on {server_name}:{server_port}")
self.demo.launch(
server_name=server_name,
server_port=server_port,
**kwargs
)