Spaces:

ianshank
/

phi35-moe-demo

Sleeping

App Files Files Community

phi35-moe-demo / app /interface.py

ianshank

🚀 Deploy robust modular solution with comprehensive testing and CPU/GPU support

6510698 verified 3 months ago

raw

history blame contribute delete

11.9 kB

	"""
	Gradio interface module for the Phi-3.5-MoE application.
	"""

	import logging
	import gradio as gr
	from typing import Optional, Dict, Any
	from .model_loader import ModelLoader

	logger = logging.getLogger(__name__)


	class ExpertClassifier:
	"""Classifies queries to determine expert specialization."""

	EXPERT_KEYWORDS = {
	"Code": [
	"programming", "software", "development", "coding", "algorithm",
	"python", "javascript", "java", "function", "code", "debug",
	"api", "framework", "library", "class", "method", "variable"
	],
	"Math": [
	"mathematics", "calculation", "equation", "formula", "statistics",
	"derivative", "integral", "algebra", "calculus", "math", "solve",
	"calculate", "probability", "geometry", "trigonometry"
	],
	"Reasoning": [
	"logic", "analysis", "reasoning", "problem-solving", "critical",
	"explain", "why", "how", "because", "analyze", "evaluate",
	"compare", "contrast", "deduce", "infer"
	],
	"Multilingual": [
	"translation", "language", "multilingual", "localization",
	"translate", "spanish", "french", "german", "chinese", "japanese",
	"korean", "arabic", "russian", "portuguese"
	],
	"General": [
	"general", "conversation", "assistance", "help", "hello", "hi",
	"what", "who", "when", "where", "tell", "describe", "explain"
	]
	}

	@classmethod
	def classify_query(cls, query: str) -> str:
	"""Classify query to determine expert specialization."""
	if not query:
	return "General"

	query_lower = query.lower()
	scores = {}

	for expert, keywords in cls.EXPERT_KEYWORDS.items():
	score = sum(1 for keyword in keywords if keyword in query_lower)
	scores[expert] = score

	if scores:
	best_expert = max(scores.items(), key=lambda x: x[1])[0]
	if scores[best_expert] > 0:
	return best_expert

	return "General"


	class ResponseGenerator:
	"""Handles response generation with fallback support."""

	SYSTEM_MESSAGES = {
	"Code": "You are an expert software engineer and programming assistant. Provide clear, well-commented code examples and explain programming concepts thoroughly.",
	"Math": "You are a mathematics expert. Solve problems step-by-step, show your work, and explain mathematical concepts clearly.",
	"Reasoning": "You are a logical reasoning expert. Break down complex problems, analyze them systematically, and provide clear explanations.",
	"Multilingual": "You are a multilingual expert. Help with translations, language learning, and cross-cultural communication.",
	"General": "You are a helpful AI assistant. Provide accurate, helpful, and informative responses to user questions."
	}

	def __init__(self, model_loader: ModelLoader):
	self.model_loader = model_loader

	def generate_fallback_response(self, query: str, expert_type: str) -> str:
	"""Generate fallback response when model is unavailable."""
	fallback_messages = {
	"Code": f"I'm a Code Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide detailed code examples and programming guidance. Please try again later when the model is loaded.",
	"Math": f"I'm a Math Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically solve mathematical problems step-by-step. Please try again later when the model is loaded.",
	"Reasoning": f"I'm a Reasoning Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide logical analysis and systematic problem-solving. Please try again later when the model is loaded.",
	"Multilingual": f"I'm a Multilingual Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically help with translations and language learning. Please try again later when the model is loaded.",
	"General": f"I'm a General Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide helpful and informative responses. Please try again later when the model is loaded."
	}
	return fallback_messages.get(expert_type, fallback_messages["General"])

	def generate_response(
	self,
	query: str,
	max_tokens: int = 500,
	temperature: float = 0.7
	) -> str:
	"""Generate response using the model or fallback."""
	try:
	# Classify query type
	expert_type = ExpertClassifier.classify_query(query)

	# Check if model is available
	if not self.model_loader.is_loaded:
	return f"Expert Type: {expert_type}\n\nResponse:\n{self.generate_fallback_response(query, expert_type)}"

	# Get system message for expert type
	system_message = self.SYSTEM_MESSAGES.get(expert_type, self.SYSTEM_MESSAGES["General"])

	# Format messages
	messages = [
	{"role": "system", "content": system_message},
	{"role": "user", "content": query}
	]

	# Generate response using pipeline
	response = self.model_loader.pipeline(
	messages,
	max_new_tokens=max_tokens,
	temperature=temperature,
	do_sample=True,
	pad_token_id=self.model_loader.tokenizer.eos_token_id
	)

	# Extract response text
	generated_text = response[0]['generated_text']

	# Find the assistant's response
	if "Assistant:" in generated_text:
	assistant_response = generated_text.split("Assistant:")[-1].strip()
	else:
	assistant_response = generated_text

	return f"Expert Type: {expert_type}\n\nResponse:\n{assistant_response}"

	except Exception as e:
	logger.error(f"Error generating response: {e}")
	return f"❌ Error generating response: {str(e)}\n\nPlease try again or check the logs for more details."


	class GradioInterface:
	"""Main Gradio interface for the application."""

	def __init__(self, model_loader: ModelLoader):
	self.model_loader = model_loader
	self.response_generator = ResponseGenerator(model_loader)
	self.demo = None

	def create_interface(self) -> gr.Blocks:
	"""Create the Gradio interface."""

	# Determine status message
	if self.model_loader.is_loaded:
	model_info = self.model_loader.get_model_info()
	status_msg = f"✅ Model Status: {model_info['model_id']} loaded successfully"
	if model_info.get('revision'):
	status_msg += f" (revision: {model_info['revision'][:8]}...)"
	else:
	status_msg = "⚠️ Model Status: Running in fallback mode (model loading issues)"

	with gr.Blocks(
	title="Phi-3.5-MoE Expert Assistant",
	theme=gr.themes.Soft(),
	css=".status-box { background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin: 10px 0; }"
	) as demo:

	gr.Markdown("# 🤖 Phi-3.5-MoE Expert Assistant")
	gr.Markdown(status_msg, elem_classes=["status-box"])

	gr.Markdown("""
	This is a specialized AI assistant powered by Microsoft's Phi-3.5-MoE model.
	It automatically routes your queries to the most appropriate expert:
	- Code Expert: Programming, software development, algorithms
	- Math Expert: Mathematics, calculations, problem solving
	- Reasoning Expert: Logic, analysis, critical thinking
	- Multilingual Expert: Translation and language assistance
	- General Expert: General purpose assistance
	""")

	with gr.Row():
	with gr.Column(scale=3):
	query_input = gr.Textbox(
	label="Your Question",
	placeholder="Ask me anything...",
	lines=3,
	max_lines=10
	)

	with gr.Row():
	max_tokens = gr.Slider(
	minimum=50,
	maximum=1000,
	value=500,
	step=50,
	label="Max Tokens"
	)
	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.7,
	step=0.1,
	label="Temperature"
	)

	submit_btn = gr.Button("Generate Response", variant="primary")
	clear_btn = gr.Button("Clear", variant="secondary")

	with gr.Column(scale=2):
	response_output = gr.Markdown(
	label="Response",
	value="Submit a question to get started..."
	)

	# Model information section
	with gr.Accordion("Model Information", open=False):
	model_info_display = gr.JSON(
	value=self.model_loader.get_model_info(),
	label="Model Details"
	)

	# Example queries
	gr.Markdown("### 💡 Example Queries")
	examples = [
	"How do I implement a binary search algorithm in Python?",
	"What is the derivative of x² + 3x + 1?",
	"Explain the logical reasoning behind the Monty Hall problem",
	"Translate 'Hello, how are you?' to Spanish",
	"What are the benefits of renewable energy?"
	]

	gr.Examples(
	examples=examples,
	inputs=query_input,
	label="Click an example to try it"
	)

	# Event handlers
	submit_btn.click(
	fn=self.response_generator.generate_response,
	inputs=[query_input, max_tokens, temperature],
	outputs=response_output,
	show_progress=True
	)

	query_input.submit(
	fn=self.response_generator.generate_response,
	inputs=[query_input, max_tokens, temperature],
	outputs=response_output,
	show_progress=True
	)

	clear_btn.click(
	fn=lambda: ("", "Submit a question to get started..."),
	outputs=[query_input, response_output]
	)

	self.demo = demo
	return demo

	def launch(
	self,
	server_name: str = "0.0.0.0",
	server_port: int = 7860,
	**kwargs
	) -> None:
	"""Launch the Gradio interface."""
	if not self.demo:
	self.create_interface()

	logger.info(f"🚀 Launching Gradio interface on {server_name}:{server_port}")
	self.demo.launch(
	server_name=server_name,
	server_port=server_port,
	**kwargs
	)