mgbam commited on
Commit
b942332
·
verified ·
1 Parent(s): 227089b

Upload 2 files

Browse files
Files changed (2) hide show
  1. core/knowledge_engine.py +148 -0
  2. core/model_router.py +363 -0
core/knowledge_engine.py ADDED
@@ -0,0 +1,148 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LlamaIndex Knowledge Engine - For $1,000 Prize
3
+
4
+ Enterprise RAG for connecting to company knowledge bases.
5
+ """
6
+
7
+ import os
8
+ from typing import List, Dict, Any, Optional
9
+ from pathlib import Path
10
+
11
+ try:
12
+ from llama_index.core import (
13
+ VectorStoreIndex,
14
+ SimpleDirectoryReader,
15
+ StorageContext,
16
+ load_index_from_storage,
17
+ Settings
18
+ )
19
+ from llama_index.embeddings.openai import OpenAIEmbedding
20
+ from llama_index.llms.anthropic import Anthropic
21
+ from llama_index.vector_stores.chroma import ChromaVectorStore
22
+ import chromadb
23
+ LLAMAINDEX_AVAILABLE = True
24
+ except ImportError:
25
+ LLAMAINDEX_AVAILABLE = False
26
+ print("[WARNING] LlamaIndex not installed")
27
+
28
+
29
+ class KnowledgeEngine:
30
+ """
31
+ Enterprise knowledge integration using LlamaIndex.
32
+
33
+ Prize Integration: LlamaIndex Category Award ($1,000)
34
+ - RAG for enterprise documents
35
+ - Multi-source knowledge integration
36
+ - Context-aware MCP generation
37
+ """
38
+
39
+ def __init__(self, persist_dir: str = "./chroma_db"):
40
+ self.persist_dir = Path(persist_dir)
41
+ self.persist_dir.mkdir(parents=True, exist_ok=True)
42
+
43
+ if not LLAMAINDEX_AVAILABLE:
44
+ self.index = None
45
+ return
46
+
47
+ # Configure LlamaIndex settings
48
+ Settings.embed_model = OpenAIEmbedding(
49
+ api_key=os.getenv("OPENAI_API_KEY"),
50
+ model="text-embedding-3-small"
51
+ )
52
+ Settings.llm = Anthropic(
53
+ api_key=os.getenv("ANTHROPIC_API_KEY"),
54
+ model="claude-sonnet-4-20250514"
55
+ )
56
+
57
+ # Initialize ChromaDB
58
+ self.chroma_client = chromadb.PersistentClient(path=str(self.persist_dir))
59
+ self.chroma_collection = self.chroma_client.get_or_create_collection("omnimind_knowledge")
60
+
61
+ # Vector store
62
+ self.vector_store = ChromaVectorStore(chroma_collection=self.chroma_collection)
63
+ self.storage_context = StorageContext.from_defaults(vector_store=self.vector_store)
64
+
65
+ # Try to load existing index
66
+ try:
67
+ self.index = load_index_from_storage(self.storage_context)
68
+ print("[OK] Loaded existing knowledge base")
69
+ except:
70
+ self.index = None
71
+ print("[INFO] No existing knowledge base - will create on first document add")
72
+
73
+ async def add_documents(self, documents_path: str) -> Dict[str, Any]:
74
+ """Add documents to the knowledge base"""
75
+ if not LLAMAINDEX_AVAILABLE:
76
+ return {"status": "unavailable", "message": "LlamaIndex not installed"}
77
+
78
+ reader = SimpleDirectoryReader(documents_path)
79
+ documents = reader.load_data()
80
+
81
+ if self.index is None:
82
+ self.index = VectorStoreIndex.from_documents(
83
+ documents,
84
+ storage_context=self.storage_context
85
+ )
86
+ else:
87
+ for doc in documents:
88
+ self.index.insert(doc)
89
+
90
+ self.index.storage_context.persist()
91
+
92
+ return {
93
+ "status": "success",
94
+ "documents_added": len(documents),
95
+ "total_documents": len(self.chroma_collection.get()["ids"])
96
+ }
97
+
98
+ async def query(self, question: str, top_k: int = 3) -> Dict[str, Any]:
99
+ """Query the knowledge base"""
100
+ if not LLAMAINDEX_AVAILABLE or self.index is None:
101
+ return {
102
+ "status": "unavailable",
103
+ "answer": "Knowledge base not configured",
104
+ "sources": []
105
+ }
106
+
107
+ query_engine = self.index.as_query_engine(similarity_top_k=top_k)
108
+ response = query_engine.query(question)
109
+
110
+ return {
111
+ "status": "success",
112
+ "answer": str(response),
113
+ "sources": [
114
+ {
115
+ "text": node.node.text[:200] + "...",
116
+ "score": node.score
117
+ }
118
+ for node in response.source_nodes
119
+ ]
120
+ }
121
+
122
+ async def get_context_for_mcp_generation(
123
+ self,
124
+ task_description: str
125
+ ) -> Optional[str]:
126
+ """
127
+ Get relevant context from knowledge base for MCP generation.
128
+
129
+ This makes MCPs context-aware - they can use company-specific info.
130
+ """
131
+ if not LLAMAINDEX_AVAILABLE or self.index is None:
132
+ return None
133
+
134
+ result = await self.query(
135
+ f"Find information relevant to: {task_description}",
136
+ top_k=3
137
+ )
138
+
139
+ if result["status"] == "success":
140
+ context_parts = [result["answer"]]
141
+ context_parts.extend([s["text"] for s in result["sources"]])
142
+ return "\n\n".join(context_parts)
143
+
144
+ return None
145
+
146
+
147
+ # Global knowledge engine
148
+ knowledge = KnowledgeEngine()
core/model_router.py ADDED
@@ -0,0 +1,363 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Multi-Model Router - Intelligent model selection for optimal performance
3
+ Integrates Claude, Gemini, and GPT-4 with automatic routing
4
+ """
5
+
6
+ import os
7
+ from typing import Dict, Any, List, Optional, Literal
8
+ from enum import Enum
9
+ import asyncio
10
+ from dotenv import load_dotenv
11
+ from anthropic import AsyncAnthropic
12
+ from openai import AsyncOpenAI
13
+ import google.generativeai as genai
14
+ from langchain_anthropic import ChatAnthropic
15
+ from langchain_openai import ChatOpenAI
16
+ from langchain_google_genai import ChatGoogleGenerativeAI
17
+ from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
18
+
19
+ # Load environment variables before initializing clients
20
+ load_dotenv()
21
+
22
+
23
+ class ModelType(Enum):
24
+ """Available AI models"""
25
+ CLAUDE_SONNET = "claude-sonnet-4-20250514" # Best for reasoning, code generation
26
+ GEMINI_2_FLASH = "gemini-2.0-flash-exp" # Best for multimodal, speed
27
+ GPT4O_MINI = "gpt-4o-mini" # Best for planning, routing decisions
28
+
29
+
30
+ class TaskType(Enum):
31
+ """Task types for intelligent routing"""
32
+ REASONING = "reasoning" # Complex logic, analysis
33
+ CODE_GEN = "code_generation" # MCP server generation
34
+ MULTIMODAL = "multimodal" # Images, audio, video
35
+ PLANNING = "planning" # Task breakdown, routing
36
+ FAST_QUERY = "fast_query" # Quick responses
37
+ VISION = "vision" # Image analysis
38
+ AUDIO = "audio" # Audio processing
39
+
40
+
41
+ class MultiModelRouter:
42
+ """
43
+ Intelligent multi-model router that selects the best AI model for each task.
44
+
45
+ Prize Integration:
46
+ - Google Gemini: $10K prize for multimodal capabilities
47
+ - Anthropic Claude: Core reasoning engine
48
+ - OpenAI GPT-4: Planning and routing
49
+ """
50
+
51
+ def __init__(self):
52
+ self.anthropic_key = os.getenv("ANTHROPIC_API_KEY")
53
+ self.openai_key = os.getenv("OPENAI_API_KEY")
54
+ self.google_key = os.getenv("GOOGLE_API_KEY")
55
+
56
+ # Initialize clients
57
+ self.anthropic_client = AsyncAnthropic(api_key=self.anthropic_key) if self.anthropic_key else None
58
+ self.openai_client = AsyncOpenAI(api_key=self.openai_key) if self.openai_key else None
59
+
60
+ if self.google_key:
61
+ genai.configure(api_key=self.google_key)
62
+
63
+ # LangChain clients for agent integration
64
+ self.claude_lc = ChatAnthropic(
65
+ model=ModelType.CLAUDE_SONNET.value,
66
+ api_key=self.anthropic_key,
67
+ temperature=0.7
68
+ ) if self.anthropic_key else None
69
+
70
+ self.gpt_lc = ChatOpenAI(
71
+ model=ModelType.GPT4O_MINI.value,
72
+ api_key=self.openai_key,
73
+ temperature=0.7
74
+ ) if self.openai_key else None
75
+
76
+ self.gemini_lc = ChatGoogleGenerativeAI(
77
+ model=ModelType.GEMINI_2_FLASH.value,
78
+ google_api_key=self.google_key,
79
+ temperature=0.7
80
+ ) if self.google_key else None
81
+
82
+ # Routing rules: Task type -> Best model
83
+ self.routing_rules = {
84
+ TaskType.REASONING: ModelType.CLAUDE_SONNET,
85
+ TaskType.CODE_GEN: ModelType.CLAUDE_SONNET,
86
+ TaskType.MULTIMODAL: ModelType.GEMINI_2_FLASH,
87
+ TaskType.PLANNING: ModelType.GPT4O_MINI,
88
+ TaskType.FAST_QUERY: ModelType.GEMINI_2_FLASH,
89
+ TaskType.VISION: ModelType.GEMINI_2_FLASH,
90
+ TaskType.AUDIO: ModelType.GEMINI_2_FLASH,
91
+ }
92
+
93
+ # Cost tracking (per 1M tokens)
94
+ self.model_costs = {
95
+ ModelType.CLAUDE_SONNET: {"input": 3.0, "output": 15.0},
96
+ ModelType.GEMINI_2_FLASH: {"input": 0.0, "output": 0.0}, # Free tier
97
+ ModelType.GPT4O_MINI: {"input": 0.15, "output": 0.60},
98
+ }
99
+
100
+ self.usage_stats = {
101
+ "claude": {"requests": 0, "tokens": 0, "cost": 0.0},
102
+ "gemini": {"requests": 0, "tokens": 0, "cost": 0.0},
103
+ "gpt4": {"requests": 0, "tokens": 0, "cost": 0.0},
104
+ }
105
+
106
+ def select_model(self, task_type: TaskType, prefer_cost_efficient: bool = False) -> ModelType:
107
+ """
108
+ Intelligently select the best model for a task.
109
+
110
+ Args:
111
+ task_type: Type of task to perform
112
+ prefer_cost_efficient: Prefer cheaper models when possible
113
+
114
+ Returns:
115
+ Selected model type
116
+ """
117
+ base_model = self.routing_rules.get(task_type, ModelType.CLAUDE_SONNET)
118
+
119
+ # If cost-efficient mode, prefer Gemini (free tier) or GPT-4o-mini
120
+ if prefer_cost_efficient:
121
+ if task_type in [TaskType.MULTIMODAL, TaskType.FAST_QUERY, TaskType.VISION]:
122
+ return ModelType.GEMINI_2_FLASH
123
+ elif task_type == TaskType.PLANNING:
124
+ return ModelType.GPT4O_MINI
125
+
126
+ return base_model
127
+
128
+ async def generate(
129
+ self,
130
+ prompt: str,
131
+ task_type: TaskType = TaskType.REASONING,
132
+ system_prompt: Optional[str] = None,
133
+ max_tokens: int = 4000,
134
+ temperature: float = 0.7,
135
+ image_url: Optional[str] = None,
136
+ audio_data: Optional[bytes] = None,
137
+ stream: bool = False,
138
+ ) -> Dict[str, Any]:
139
+ """
140
+ Generate response using the best model for the task.
141
+
142
+ Args:
143
+ prompt: User prompt
144
+ task_type: Type of task
145
+ system_prompt: System instructions
146
+ max_tokens: Maximum response length
147
+ temperature: Creativity (0-1)
148
+ image_url: URL for image analysis (Gemini multimodal)
149
+ audio_data: Audio bytes for analysis (Gemini)
150
+ stream: Stream response tokens
151
+
152
+ Returns:
153
+ Dict with response, model used, tokens, cost
154
+ """
155
+ model = self.select_model(task_type)
156
+
157
+ # Force Gemini for multimodal tasks
158
+ if image_url or audio_data:
159
+ model = ModelType.GEMINI_2_FLASH
160
+
161
+ try:
162
+ if model == ModelType.CLAUDE_SONNET:
163
+ return await self._generate_claude(prompt, system_prompt, max_tokens, temperature, stream)
164
+ elif model == ModelType.GEMINI_2_FLASH:
165
+ return await self._generate_gemini(prompt, system_prompt, max_tokens, temperature, image_url, audio_data)
166
+ elif model == ModelType.GPT4O_MINI:
167
+ return await self._generate_gpt(prompt, system_prompt, max_tokens, temperature, stream)
168
+ except Exception as e:
169
+ # Fallback to Claude if primary model fails
170
+ if model != ModelType.CLAUDE_SONNET:
171
+ return await self._generate_claude(prompt, system_prompt, max_tokens, temperature, stream)
172
+ raise e
173
+
174
+ async def _generate_claude(
175
+ self,
176
+ prompt: str,
177
+ system_prompt: Optional[str],
178
+ max_tokens: int,
179
+ temperature: float,
180
+ stream: bool
181
+ ) -> Dict[str, Any]:
182
+ """Generate using Claude Sonnet"""
183
+ if not self.anthropic_client:
184
+ raise ValueError("Anthropic API key not configured")
185
+
186
+ messages = [{"role": "user", "content": prompt}]
187
+
188
+ response = await self.anthropic_client.messages.create(
189
+ model=ModelType.CLAUDE_SONNET.value,
190
+ max_tokens=max_tokens,
191
+ temperature=temperature,
192
+ system=system_prompt or "You are a helpful AI assistant.",
193
+ messages=messages,
194
+ stream=stream
195
+ )
196
+
197
+ if stream:
198
+ return {"response": response, "model": "claude", "streaming": True}
199
+
200
+ content = response.content[0].text
201
+ input_tokens = response.usage.input_tokens
202
+ output_tokens = response.usage.output_tokens
203
+ cost = self._calculate_cost(ModelType.CLAUDE_SONNET, input_tokens, output_tokens)
204
+
205
+ # Update stats
206
+ self.usage_stats["claude"]["requests"] += 1
207
+ self.usage_stats["claude"]["tokens"] += input_tokens + output_tokens
208
+ self.usage_stats["claude"]["cost"] += cost
209
+
210
+ return {
211
+ "response": content,
212
+ "model": "claude-sonnet-4",
213
+ "input_tokens": input_tokens,
214
+ "output_tokens": output_tokens,
215
+ "total_tokens": input_tokens + output_tokens,
216
+ "cost": cost,
217
+ "streaming": False
218
+ }
219
+
220
+ async def _generate_gemini(
221
+ self,
222
+ prompt: str,
223
+ system_prompt: Optional[str],
224
+ max_tokens: int,
225
+ temperature: float,
226
+ image_url: Optional[str] = None,
227
+ audio_data: Optional[bytes] = None
228
+ ) -> Dict[str, Any]:
229
+ """Generate using Gemini 2.0 Flash (multimodal support)"""
230
+ if not self.google_key:
231
+ raise ValueError("Google API key not configured")
232
+
233
+ model = genai.GenerativeModel(
234
+ ModelType.GEMINI_2_FLASH.value,
235
+ system_instruction=system_prompt
236
+ )
237
+
238
+ # Build multimodal content
239
+ content_parts = []
240
+ if image_url:
241
+ # For image analysis
242
+ import httpx
243
+ async with httpx.AsyncClient() as client:
244
+ img_response = await client.get(image_url)
245
+ img_data = img_response.content
246
+ content_parts.append({"mime_type": "image/jpeg", "data": img_data})
247
+
248
+ if audio_data:
249
+ content_parts.append({"mime_type": "audio/wav", "data": audio_data})
250
+
251
+ content_parts.append(prompt)
252
+
253
+ response = await model.generate_content_async(
254
+ content_parts,
255
+ generation_config=genai.GenerationConfig(
256
+ max_output_tokens=max_tokens,
257
+ temperature=temperature
258
+ )
259
+ )
260
+
261
+ content = response.text
262
+
263
+ # Gemini free tier - no cost tracking
264
+ self.usage_stats["gemini"]["requests"] += 1
265
+
266
+ return {
267
+ "response": content,
268
+ "model": "gemini-2.0-flash",
269
+ "input_tokens": 0, # Not provided in free tier
270
+ "output_tokens": 0,
271
+ "total_tokens": 0,
272
+ "cost": 0.0,
273
+ "streaming": False,
274
+ "multimodal": bool(image_url or audio_data)
275
+ }
276
+
277
+ async def _generate_gpt(
278
+ self,
279
+ prompt: str,
280
+ system_prompt: Optional[str],
281
+ max_tokens: int,
282
+ temperature: float,
283
+ stream: bool
284
+ ) -> Dict[str, Any]:
285
+ """Generate using GPT-4o-mini"""
286
+ if not self.openai_client:
287
+ raise ValueError("OpenAI API key not configured")
288
+
289
+ messages = [
290
+ {"role": "system", "content": system_prompt or "You are a helpful AI assistant."},
291
+ {"role": "user", "content": prompt}
292
+ ]
293
+
294
+ response = await self.openai_client.chat.completions.create(
295
+ model=ModelType.GPT4O_MINI.value,
296
+ messages=messages,
297
+ max_tokens=max_tokens,
298
+ temperature=temperature,
299
+ stream=stream
300
+ )
301
+
302
+ if stream:
303
+ return {"response": response, "model": "gpt-4o-mini", "streaming": True}
304
+
305
+ content = response.choices[0].message.content
306
+ input_tokens = response.usage.prompt_tokens
307
+ output_tokens = response.usage.completion_tokens
308
+ cost = self._calculate_cost(ModelType.GPT4O_MINI, input_tokens, output_tokens)
309
+
310
+ # Update stats
311
+ self.usage_stats["gpt4"]["requests"] += 1
312
+ self.usage_stats["gpt4"]["tokens"] += input_tokens + output_tokens
313
+ self.usage_stats["gpt4"]["cost"] += cost
314
+
315
+ return {
316
+ "response": content,
317
+ "model": "gpt-4o-mini",
318
+ "input_tokens": input_tokens,
319
+ "output_tokens": output_tokens,
320
+ "total_tokens": input_tokens + output_tokens,
321
+ "cost": cost,
322
+ "streaming": False
323
+ }
324
+
325
+ def _calculate_cost(self, model: ModelType, input_tokens: int, output_tokens: int) -> float:
326
+ """Calculate cost for API usage"""
327
+ costs = self.model_costs[model]
328
+ input_cost = (input_tokens / 1_000_000) * costs["input"]
329
+ output_cost = (output_tokens / 1_000_000) * costs["output"]
330
+ return input_cost + output_cost
331
+
332
+ def get_usage_stats(self) -> Dict[str, Any]:
333
+ """Get usage statistics across all models"""
334
+ total_cost = sum(stats["cost"] for stats in self.usage_stats.values())
335
+ total_requests = sum(stats["requests"] for stats in self.usage_stats.values())
336
+
337
+ return {
338
+ "total_requests": total_requests,
339
+ "total_cost": round(total_cost, 4),
340
+ "by_model": self.usage_stats,
341
+ "cost_breakdown": {
342
+ "claude": round(self.usage_stats["claude"]["cost"], 4),
343
+ "gemini": round(self.usage_stats["gemini"]["cost"], 4),
344
+ "gpt4": round(self.usage_stats["gpt4"]["cost"], 4),
345
+ }
346
+ }
347
+
348
+ def get_langchain_model(self, task_type: TaskType):
349
+ """Get LangChain-compatible model for agent integration"""
350
+ model = self.select_model(task_type)
351
+
352
+ if model == ModelType.CLAUDE_SONNET:
353
+ return self.claude_lc
354
+ elif model == ModelType.GEMINI_2_FLASH:
355
+ return self.gemini_lc
356
+ elif model == ModelType.GPT4O_MINI:
357
+ return self.gpt_lc
358
+
359
+ return self.claude_lc # Default fallback
360
+
361
+
362
+ # Global router instance
363
+ router = MultiModelRouter()