File size: 11,924 Bytes
6510698
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
"""
Gradio interface module for the Phi-3.5-MoE application.
"""

import logging
import gradio as gr
from typing import Optional, Dict, Any
from .model_loader import ModelLoader

logger = logging.getLogger(__name__)


class ExpertClassifier:
    """Classifies queries to determine expert specialization."""
    
    EXPERT_KEYWORDS = {
        "Code": [
            "programming", "software", "development", "coding", "algorithm", 
            "python", "javascript", "java", "function", "code", "debug",
            "api", "framework", "library", "class", "method", "variable"
        ],
        "Math": [
            "mathematics", "calculation", "equation", "formula", "statistics", 
            "derivative", "integral", "algebra", "calculus", "math", "solve", 
            "calculate", "probability", "geometry", "trigonometry"
        ],
        "Reasoning": [
            "logic", "analysis", "reasoning", "problem-solving", "critical", 
            "explain", "why", "how", "because", "analyze", "evaluate",
            "compare", "contrast", "deduce", "infer"
        ],
        "Multilingual": [
            "translation", "language", "multilingual", "localization", 
            "translate", "spanish", "french", "german", "chinese", "japanese",
            "korean", "arabic", "russian", "portuguese"
        ],
        "General": [
            "general", "conversation", "assistance", "help", "hello", "hi", 
            "what", "who", "when", "where", "tell", "describe", "explain"
        ]
    }
    
    @classmethod
    def classify_query(cls, query: str) -> str:
        """Classify query to determine expert specialization."""
        if not query:
            return "General"
        
        query_lower = query.lower()
        scores = {}
        
        for expert, keywords in cls.EXPERT_KEYWORDS.items():
            score = sum(1 for keyword in keywords if keyword in query_lower)
            scores[expert] = score
        
        if scores:
            best_expert = max(scores.items(), key=lambda x: x[1])[0]
            if scores[best_expert] > 0:
                return best_expert
        
        return "General"


class ResponseGenerator:
    """Handles response generation with fallback support."""
    
    SYSTEM_MESSAGES = {
        "Code": "You are an expert software engineer and programming assistant. Provide clear, well-commented code examples and explain programming concepts thoroughly.",
        "Math": "You are a mathematics expert. Solve problems step-by-step, show your work, and explain mathematical concepts clearly.",
        "Reasoning": "You are a logical reasoning expert. Break down complex problems, analyze them systematically, and provide clear explanations.",
        "Multilingual": "You are a multilingual expert. Help with translations, language learning, and cross-cultural communication.",
        "General": "You are a helpful AI assistant. Provide accurate, helpful, and informative responses to user questions."
    }
    
    def __init__(self, model_loader: ModelLoader):
        self.model_loader = model_loader
    
    def generate_fallback_response(self, query: str, expert_type: str) -> str:
        """Generate fallback response when model is unavailable."""
        fallback_messages = {
            "Code": f"I'm a Code Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide detailed code examples and programming guidance. Please try again later when the model is loaded.",
            "Math": f"I'm a Math Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically solve mathematical problems step-by-step. Please try again later when the model is loaded.",
            "Reasoning": f"I'm a Reasoning Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide logical analysis and systematic problem-solving. Please try again later when the model is loaded.",
            "Multilingual": f"I'm a Multilingual Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically help with translations and language learning. Please try again later when the model is loaded.",
            "General": f"I'm a General Expert, but the Phi-3.5-MoE model is currently unavailable. For your question about '{query}', I would typically provide helpful and informative responses. Please try again later when the model is loaded."
        }
        return fallback_messages.get(expert_type, fallback_messages["General"])
    
    def generate_response(
        self, 
        query: str, 
        max_tokens: int = 500, 
        temperature: float = 0.7
    ) -> str:
        """Generate response using the model or fallback."""
        try:
            # Classify query type
            expert_type = ExpertClassifier.classify_query(query)
            
            # Check if model is available
            if not self.model_loader.is_loaded:
                return f"**Expert Type:** {expert_type}\n\n**Response:**\n{self.generate_fallback_response(query, expert_type)}"
            
            # Get system message for expert type
            system_message = self.SYSTEM_MESSAGES.get(expert_type, self.SYSTEM_MESSAGES["General"])
            
            # Format messages
            messages = [
                {"role": "system", "content": system_message},
                {"role": "user", "content": query}
            ]
            
            # Generate response using pipeline
            response = self.model_loader.pipeline(
                messages,
                max_new_tokens=max_tokens,
                temperature=temperature,
                do_sample=True,
                pad_token_id=self.model_loader.tokenizer.eos_token_id
            )
            
            # Extract response text
            generated_text = response[0]['generated_text']
            
            # Find the assistant's response
            if "Assistant:" in generated_text:
                assistant_response = generated_text.split("Assistant:")[-1].strip()
            else:
                assistant_response = generated_text
            
            return f"**Expert Type:** {expert_type}\n\n**Response:**\n{assistant_response}"
            
        except Exception as e:
            logger.error(f"Error generating response: {e}")
            return f"❌ **Error generating response:** {str(e)}\n\nPlease try again or check the logs for more details."


class GradioInterface:
    """Main Gradio interface for the application."""
    
    def __init__(self, model_loader: ModelLoader):
        self.model_loader = model_loader
        self.response_generator = ResponseGenerator(model_loader)
        self.demo = None
    
    def create_interface(self) -> gr.Blocks:
        """Create the Gradio interface."""
        
        # Determine status message
        if self.model_loader.is_loaded:
            model_info = self.model_loader.get_model_info()
            status_msg = f"✅ **Model Status:** {model_info['model_id']} loaded successfully"
            if model_info.get('revision'):
                status_msg += f" (revision: {model_info['revision'][:8]}...)"
        else:
            status_msg = "⚠️ **Model Status:** Running in fallback mode (model loading issues)"
        
        with gr.Blocks(
            title="Phi-3.5-MoE Expert Assistant", 
            theme=gr.themes.Soft(),
            css=".status-box { background-color: #f0f0f0; padding: 10px; border-radius: 5px; margin: 10px 0; }"
        ) as demo:
            
            gr.Markdown("# 🤖 Phi-3.5-MoE Expert Assistant")
            gr.Markdown(status_msg, elem_classes=["status-box"])
            
            gr.Markdown("""
            This is a specialized AI assistant powered by Microsoft's Phi-3.5-MoE model.
            It automatically routes your queries to the most appropriate expert:
            - **Code Expert**: Programming, software development, algorithms
            - **Math Expert**: Mathematics, calculations, problem solving
            - **Reasoning Expert**: Logic, analysis, critical thinking
            - **Multilingual Expert**: Translation and language assistance
            - **General Expert**: General purpose assistance
            """)
            
            with gr.Row():
                with gr.Column(scale=3):
                    query_input = gr.Textbox(
                        label="Your Question",
                        placeholder="Ask me anything...",
                        lines=3,
                        max_lines=10
                    )
                    
                    with gr.Row():
                        max_tokens = gr.Slider(
                            minimum=50,
                            maximum=1000,
                            value=500,
                            step=50,
                            label="Max Tokens"
                        )
                        temperature = gr.Slider(
                            minimum=0.1,
                            maximum=1.0,
                            value=0.7,
                            step=0.1,
                            label="Temperature"
                        )
                    
                    submit_btn = gr.Button("Generate Response", variant="primary")
                    clear_btn = gr.Button("Clear", variant="secondary")
                
                with gr.Column(scale=2):
                    response_output = gr.Markdown(
                        label="Response",
                        value="*Submit a question to get started...*"
                    )
            
            # Model information section
            with gr.Accordion("Model Information", open=False):
                model_info_display = gr.JSON(
                    value=self.model_loader.get_model_info(),
                    label="Model Details"
                )
            
            # Example queries
            gr.Markdown("### 💡 Example Queries")
            examples = [
                "How do I implement a binary search algorithm in Python?",
                "What is the derivative of x² + 3x + 1?",
                "Explain the logical reasoning behind the Monty Hall problem",
                "Translate 'Hello, how are you?' to Spanish",
                "What are the benefits of renewable energy?"
            ]
            
            gr.Examples(
                examples=examples,
                inputs=query_input,
                label="Click an example to try it"
            )
            
            # Event handlers
            submit_btn.click(
                fn=self.response_generator.generate_response,
                inputs=[query_input, max_tokens, temperature],
                outputs=response_output,
                show_progress=True
            )
            
            query_input.submit(
                fn=self.response_generator.generate_response,
                inputs=[query_input, max_tokens, temperature],
                outputs=response_output,
                show_progress=True
            )
            
            clear_btn.click(
                fn=lambda: ("", "*Submit a question to get started...*"),
                outputs=[query_input, response_output]
            )
        
        self.demo = demo
        return demo
    
    def launch(
        self, 
        server_name: str = "0.0.0.0", 
        server_port: int = 7860,
        **kwargs
    ) -> None:
        """Launch the Gradio interface."""
        if not self.demo:
            self.create_interface()
        
        logger.info(f"🚀 Launching Gradio interface on {server_name}:{server_port}")
        self.demo.launch(
            server_name=server_name,
            server_port=server_port,
            **kwargs
        )