Spaces:
Running
Running
Upload app.py
Browse files
app.py
CHANGED
|
@@ -268,13 +268,17 @@ class UltimateModelLoader:
|
|
| 268 |
for model_name, config in self.model_configs.items():
|
| 269 |
# Skip resource-intensive models on limited systems
|
| 270 |
if not has_gpu and config["params"] > 500_000_000:
|
|
|
|
| 271 |
continue
|
| 272 |
-
if memory_gb <
|
|
|
|
| 273 |
continue
|
| 274 |
# More reasonable Mamba filtering - only skip very large models on low memory
|
| 275 |
if memory_gb < 12 and "mamba" in model_name.lower() and config["params"] > 1_000_000_000:
|
|
|
|
| 276 |
continue
|
| 277 |
|
|
|
|
| 278 |
available_models.append((model_name, config))
|
| 279 |
|
| 280 |
# Sort by preference and priority
|
|
@@ -867,8 +871,10 @@ class UltimateMambaSwarm:
|
|
| 867 |
|
| 868 |
# Generate response
|
| 869 |
if self.model_loaded:
|
|
|
|
| 870 |
response = self._generate_with_ultimate_model(prompt, max_length, temperature, top_p, domain)
|
| 871 |
else:
|
|
|
|
| 872 |
response = self._generate_ultimate_fallback(prompt, domain)
|
| 873 |
|
| 874 |
# Quality validation
|
|
@@ -1378,7 +1384,6 @@ Continued research, development, and practical application will likely yield add
|
|
| 1378 |
|
| 1379 |
**⚡ Mamba Swarm Performance:**
|
| 1380 |
- **Architecture**: Mamba Encoder Swarm (CPU Alternative Mode)
|
| 1381 |
-
- **Active Model**: {model_info}
|
| 1382 |
- **Model Size**: {routing_info['model_size'].title()}
|
| 1383 |
- **Selected Encoders**: {routing_info['total_active']}/100
|
| 1384 |
- **Hardware**: {self.model_loader.device}
|
|
|
|
| 268 |
for model_name, config in self.model_configs.items():
|
| 269 |
# Skip resource-intensive models on limited systems
|
| 270 |
if not has_gpu and config["params"] > 500_000_000:
|
| 271 |
+
print(f"⚠️ Skipping {config['display_name']} - too large for CPU ({config['params']:,} > 500M)")
|
| 272 |
continue
|
| 273 |
+
if memory_gb < 3 and config["params"] > 150_000_000:
|
| 274 |
+
print(f"⚠️ Skipping {config['display_name']} - insufficient RAM ({memory_gb:.1f}GB < 3GB for {config['params']:,})")
|
| 275 |
continue
|
| 276 |
# More reasonable Mamba filtering - only skip very large models on low memory
|
| 277 |
if memory_gb < 12 and "mamba" in model_name.lower() and config["params"] > 1_000_000_000:
|
| 278 |
+
print(f"⚠️ Skipping {config['display_name']} - large Mamba model needs more RAM")
|
| 279 |
continue
|
| 280 |
|
| 281 |
+
print(f"✅ Available: {config['display_name']} ({config['params']:,} params)")
|
| 282 |
available_models.append((model_name, config))
|
| 283 |
|
| 284 |
# Sort by preference and priority
|
|
|
|
| 871 |
|
| 872 |
# Generate response
|
| 873 |
if self.model_loaded:
|
| 874 |
+
print(f"🧠 Using actual model inference: {self.model_loader.model_name}")
|
| 875 |
response = self._generate_with_ultimate_model(prompt, max_length, temperature, top_p, domain)
|
| 876 |
else:
|
| 877 |
+
print(f"🔄 Using fallback response system (no model loaded)")
|
| 878 |
response = self._generate_ultimate_fallback(prompt, domain)
|
| 879 |
|
| 880 |
# Quality validation
|
|
|
|
| 1384 |
|
| 1385 |
**⚡ Mamba Swarm Performance:**
|
| 1386 |
- **Architecture**: Mamba Encoder Swarm (CPU Alternative Mode)
|
|
|
|
| 1387 |
- **Model Size**: {routing_info['model_size'].title()}
|
| 1388 |
- **Selected Encoders**: {routing_info['total_active']}/100
|
| 1389 |
- **Hardware**: {self.model_loader.device}
|