Spaces:
Sleeping
Sleeping
File size: 11,924 Bytes
6510698 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 |
"""
Gradio interface module for the Phi-3.5-MoE application.
"""
import logging
import gradio as gr
from typing import Optional, Dict, Any
from .model_loader import ModelLoader
logger = logging.getLogger(__name__)
class ExpertClassifier:
"""Classifies queries to determine expert specialization."""
EXPERT_KEYWORDS = {
"Code": [
"programming", "software", "development", "coding", "algorithm",
"python", "javascript", "java", "function", "code", "debug",
"api", "framework", "library", "class", "method", "variable"
],
"Math": [
"mathematics", "calculation", "equation", "formula", "statistics",
"derivative", "integral", "algebra", "calculus", "math", "solve",
"calculate", "probability", "geometry", "trigonometry"
],
"Reasoning": [
"logic", "analysis", "reasoning", "problem-solving", "critical",
"explain", "why", "how", "because", "analyze", "evaluate",
"compare", "contrast", "deduce", "infer"
],
"Multilingual": [
"translation", "language", "multilingual", "localization",
"translate", "spanish", "french", "german", "chinese", "japanese",
"korean", "arabic", "russian", "portuguese"
],
"General": [
"general", "conversation", "assistance", "help", "hello", "hi",
"what", "who", "when", "where", "tell", "describe", "explain"
]
}
@classmethod
def classify_query(cls, query: str) -> str:
"""Classify query to determine expert specialization."""
if not query:
return "General"
query_lower = query.lower()
scores = {}
for expert, keywords in cls.EXPERT_KEYWORDS.items():
score = sum(1 for keyword in keywords if keyword in query_lower)
scores[expert] = score
if scores:
best_expert = max(scores.items(), key=lambda x: x[1])[0]
if scores[best_expert] > 0:
return best_expert
return "General"
class ResponseGenerator:
"""Handles response generation with fallback support."""
SYSTEM_MESSAGES = {
"Code": "You are an expert software engineer and programming assistant. Provide clear, well-commented code examples and explain programming concepts thoroughly.",
"Math": "You are a mathematics expert. Solve problems step-by-step, show your work, and explain mathematical concepts clearly.",
"Reasoning": "You are a logical reasoning expert. Break down complex problems, analyze them systematically, and provide clear explanations.",
"Multilingual": "You are a multilingual expert. Help with translations, language learning, and cross-cultural communication.",
"General": "You are a helpful AI assistant. Provide accurate, helpful, and informative responses to user questions."
}
def __init__(self, model_loader: ModelLoader):
self.model_loader = model_loader
def generate_fallback_response(self, query: str, expert_type: str) -> str:
"""Generate fallback response when model is unavailable."""
fallback_messages = {
"Code": f"I'm a Code Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide detailed code examples and programming guidance. Please try again later when the model is loaded.",
"Math": f"I'm a Math Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically solve mathematical problems step-by-step. Please try again later when the model is loaded.",
"Reasoning": f"I'm a Reasoning Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide logical analysis and systematic problem-solving. Please try again later when the model is loaded.",
"Multilingual": f"I'm a Multilingual Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically help with translations and language learning. Please try again later when the model is loaded.",
"General": f"I'm a General Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide helpful and informative responses. Please try again later when the model is loaded."
}
return fallback_messages.get(expert_type, fallback_messages["General"])
def generate_response(
self,
query: str,
max_tokens: int = 500,
temperature: float = 0.7
) -> str:
"""Generate response using the model or fallback."""
try:
# Classify query type
expert_type = ExpertClassifier.classify_query(query)
# Check if model is available
if not self.model_loader.is_loaded:
return f"**Expert Type:** {expert_type}\n\n**Response:**\n{self.generate_fallback_response(query, expert_type)}"
# Get system message for expert type
system_message = self.SYSTEM_MESSAGES.get(expert_type, self.SYSTEM_MESSAGES["General"])
# Format messages
messages = [
{"role": "system", "content": system_message},
{"role": "user", "content": query}
]
# Generate response using pipeline
response = self.model_loader.pipeline(
messages,
max_new_tokens=max_tokens,
temperature=temperature,
do_sample=True,
pad_token_id=self.model_loader.tokenizer.eos_token_id
)
# Extract response text
generated_text = response[0]['generated_text']
# Find the assistant's response
if "Assistant:" in generated_text:
assistant_response = generated_text.split("Assistant:")[-1].strip()
else:
assistant_response = generated_text
return f"**Expert Type:** {expert_type}\n\n**Response:**\n{assistant_response}"
except Exception as e:
logger.error(f"Error generating response: {e}")
return f"❌ **Error generating response:** {str(e)}\n\nPlease try again or check the logs for more details."
class GradioInterface:
"""Main Gradio interface for the application."""
def __init__(self, model_loader: ModelLoader):
self.model_loader = model_loader
self.response_generator = ResponseGenerator(model_loader)
self.demo = None
def create_interface(self) -> gr.Blocks:
"""Create the Gradio interface."""
# Determine status message
if self.model_loader.is_loaded:
model_info = self.model_loader.get_model_info()
status_msg = f"✅ **Model Status:** {model_info['model_id']} loaded successfully"
if model_info.get('revision'):
status_msg += f" (revision: {model_info['revision'][:8]}...)"
else:
status_msg = "⚠️ **Model Status:** Running in fallback mode (model loading issues)"
with gr.Blocks(
title="Phi-3.5-MoE Expert Assistant",
theme=gr.themes.Soft(),
css=".status-box { background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin: 10px 0; }"
) as demo:
gr.Markdown("# 🤖 Phi-3.5-MoE Expert Assistant")
gr.Markdown(status_msg, elem_classes=["status-box"])
gr.Markdown("""
This is a specialized AI assistant powered by Microsoft's Phi-3.5-MoE model.
It automatically routes your queries to the most appropriate expert:
- **Code Expert**: Programming, software development, algorithms
- **Math Expert**: Mathematics, calculations, problem solving
- **Reasoning Expert**: Logic, analysis, critical thinking
- **Multilingual Expert**: Translation and language assistance
- **General Expert**: General purpose assistance
""")
with gr.Row():
with gr.Column(scale=3):
query_input = gr.Textbox(
label="Your Question",
placeholder="Ask me anything...",
lines=3,
max_lines=10
)
with gr.Row():
max_tokens = gr.Slider(
minimum=50,
maximum=1000,
value=500,
step=50,
label="Max Tokens"
)
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.7,
step=0.1,
label="Temperature"
)
submit_btn = gr.Button("Generate Response", variant="primary")
clear_btn = gr.Button("Clear", variant="secondary")
with gr.Column(scale=2):
response_output = gr.Markdown(
label="Response",
value="*Submit a question to get started...*"
)
# Model information section
with gr.Accordion("Model Information", open=False):
model_info_display = gr.JSON(
value=self.model_loader.get_model_info(),
label="Model Details"
)
# Example queries
gr.Markdown("### 💡 Example Queries")
examples = [
"How do I implement a binary search algorithm in Python?",
"What is the derivative of x² + 3x + 1?",
"Explain the logical reasoning behind the Monty Hall problem",
"Translate 'Hello, how are you?' to Spanish",
"What are the benefits of renewable energy?"
]
gr.Examples(
examples=examples,
inputs=query_input,
label="Click an example to try it"
)
# Event handlers
submit_btn.click(
fn=self.response_generator.generate_response,
inputs=[query_input, max_tokens, temperature],
outputs=response_output,
show_progress=True
)
query_input.submit(
fn=self.response_generator.generate_response,
inputs=[query_input, max_tokens, temperature],
outputs=response_output,
show_progress=True
)
clear_btn.click(
fn=lambda: ("", "*Submit a question to get started...*"),
outputs=[query_input, response_output]
)
self.demo = demo
return demo
def launch(
self,
server_name: str = "0.0.0.0",
server_port: int = 7860,
**kwargs
) -> None:
"""Launch the Gradio interface."""
if not self.demo:
self.create_interface()
logger.info(f"🚀 Launching Gradio interface on {server_name}:{server_port}")
self.demo.launch(
server_name=server_name,
server_port=server_port,
**kwargs
)
|