Spaces:
Running
Running
Commit
·
9e2cabe
1
Parent(s):
f93db33
fix: Complete hardware allocation matrix with no gaps (1B-100B+)
Browse filesReplaced string matching with regex-based model size extraction for
accurate hardware allocation across all model sizes.
Modal GPU allocation:
- 49B-100B+: H200 (140GB VRAM)
- 25B-48B: A100-80GB (Gemma-27B, Kimi-48B, 30B, 34B)
- 13B-24B: A100-80GB (13B, 14B, 15B, 20B, 22B)
- 6B-12B: L40S 48GB (6B, 7B, 8B, 9B, 10B, 11B, 12B)
- 1B-5B: T4 16GB (1B, 2B, 3B, 4B, 5B)
- <1B: T4 16GB
HF Jobs allocation (limited GPU options):
- 13B-100B+: A100 large
- 6B-12B: A10G large
- 1B-5B: T4 small
- <1B: T4 small
Uses regex to extract model size from name (e.g., '8b', '27b', '0.5b')
ensuring no gaps in coverage.
- utils/hf_jobs_submission.py +24 -25
- utils/modal_job_submission.py +29 -24
utils/hf_jobs_submission.py
CHANGED
|
@@ -239,34 +239,33 @@ def _auto_select_hf_hardware(provider: str, model: str) -> str:
|
|
| 239 |
|
| 240 |
# Local models need GPU - select based on model size
|
| 241 |
# Conservative allocation for agentic tasks (model weights + KV cache + inference overhead)
|
|
|
|
| 242 |
model_lower = model.lower()
|
| 243 |
|
| 244 |
-
# Extract model size
|
| 245 |
-
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
|
| 254 |
-
|
| 255 |
-
|
| 256 |
-
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
# Small models < 3B: ~4-10GB needed -> T4 sufficient
|
| 266 |
-
return "t4-small"
|
| 267 |
else:
|
| 268 |
-
#
|
| 269 |
-
return "
|
| 270 |
|
| 271 |
|
| 272 |
def check_job_status(hf_job_id: str, hf_token: Optional[str] = None) -> Dict:
|
|
|
|
| 239 |
|
| 240 |
# Local models need GPU - select based on model size
|
| 241 |
# Conservative allocation for agentic tasks (model weights + KV cache + inference overhead)
|
| 242 |
+
# Memory estimation: ~4-5GB per 1B params for safe agentic execution
|
| 243 |
model_lower = model.lower()
|
| 244 |
|
| 245 |
+
# Extract model size using regex to capture the number before 'b'
|
| 246 |
+
import re
|
| 247 |
+
size_match = re.search(r'(\d+\.?\d*)b', model_lower)
|
| 248 |
+
|
| 249 |
+
if size_match:
|
| 250 |
+
model_size = float(size_match.group(1))
|
| 251 |
+
|
| 252 |
+
# Complete coverage from 0.5B to 100B+ with no gaps
|
| 253 |
+
# HF Jobs has limited GPU options: t4-small, a10g-large, a100-large
|
| 254 |
+
if model_size >= 13:
|
| 255 |
+
# 13B-100B+: A100 large (e.g., 13B, 14B, 27B, 30B, 48B, 70B)
|
| 256 |
+
return "a100-large"
|
| 257 |
+
elif model_size >= 6:
|
| 258 |
+
# 6B-12B: A10G large (e.g., 6B, 7B, 8B, 9B, 10B, 11B, 12B)
|
| 259 |
+
return "a10g-large"
|
| 260 |
+
elif model_size >= 1:
|
| 261 |
+
# 1B-5B: T4 small (e.g., 1B, 2B, 3B, 4B, 5B)
|
| 262 |
+
return "t4-small"
|
| 263 |
+
else:
|
| 264 |
+
# < 1B: T4 small
|
| 265 |
+
return "t4-small"
|
|
|
|
|
|
|
| 266 |
else:
|
| 267 |
+
# No size detected in model name - default to A100 (safe for agentic workloads)
|
| 268 |
+
return "a100-large"
|
| 269 |
|
| 270 |
|
| 271 |
def check_job_status(hf_job_id: str, hf_token: Optional[str] = None) -> Dict:
|
utils/modal_job_submission.py
CHANGED
|
@@ -378,30 +378,35 @@ def _auto_select_modal_hardware(provider: str, model: str) -> Optional[str]:
|
|
| 378 |
|
| 379 |
# Local models need GPU - select based on model size
|
| 380 |
# Conservative allocation for agentic tasks (model weights + KV cache + inference overhead)
|
|
|
|
| 381 |
model_lower = model.lower()
|
| 382 |
|
| 383 |
-
# Extract model size
|
| 384 |
-
|
| 385 |
-
|
| 386 |
-
|
| 387 |
-
|
| 388 |
-
|
| 389 |
-
|
| 390 |
-
|
| 391 |
-
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
| 397 |
-
|
| 398 |
-
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
| 402 |
-
|
| 403 |
-
|
| 404 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 405 |
else:
|
| 406 |
-
#
|
| 407 |
-
return "
|
|
|
|
| 378 |
|
| 379 |
# Local models need GPU - select based on model size
|
| 380 |
# Conservative allocation for agentic tasks (model weights + KV cache + inference overhead)
|
| 381 |
+
# Memory estimation: ~4-5GB per 1B params for safe agentic execution
|
| 382 |
model_lower = model.lower()
|
| 383 |
|
| 384 |
+
# Extract model size using regex to capture the number before 'b'
|
| 385 |
+
import re
|
| 386 |
+
size_match = re.search(r'(\d+\.?\d*)b', model_lower)
|
| 387 |
+
|
| 388 |
+
if size_match:
|
| 389 |
+
model_size = float(size_match.group(1))
|
| 390 |
+
|
| 391 |
+
# Complete coverage from 0.5B to 100B+ with no gaps
|
| 392 |
+
if model_size >= 49:
|
| 393 |
+
# 49B-100B+: H200 (140GB VRAM)
|
| 394 |
+
return "H200"
|
| 395 |
+
elif model_size >= 25:
|
| 396 |
+
# 25B-48B: A100-80GB (e.g., Gemma-27B, Kimi-48B, 30B, 34B)
|
| 397 |
+
return "A100-80GB"
|
| 398 |
+
elif model_size >= 13:
|
| 399 |
+
# 13B-24B: A100-80GB (e.g., 13B, 14B, 15B, 20B, 22B)
|
| 400 |
+
return "A100-80GB"
|
| 401 |
+
elif model_size >= 6:
|
| 402 |
+
# 6B-12B: L40S 48GB (e.g., 6B, 7B, 8B, 9B, 10B, 11B, 12B)
|
| 403 |
+
return "L40S"
|
| 404 |
+
elif model_size >= 1:
|
| 405 |
+
# 1B-5B: T4 16GB (e.g., 1B, 2B, 3B, 4B, 5B)
|
| 406 |
+
return "T4"
|
| 407 |
+
else:
|
| 408 |
+
# < 1B: T4 16GB
|
| 409 |
+
return "T4"
|
| 410 |
else:
|
| 411 |
+
# No size detected in model name - default to L40S (safe middle ground)
|
| 412 |
+
return "L40S"
|