kshitijthakkar commited on
Commit
9e2cabe
·
1 Parent(s): f93db33

fix: Complete hardware allocation matrix with no gaps (1B-100B+)

Browse files

Replaced string matching with regex-based model size extraction for
accurate hardware allocation across all model sizes.

Modal GPU allocation:
- 49B-100B+: H200 (140GB VRAM)
- 25B-48B: A100-80GB (Gemma-27B, Kimi-48B, 30B, 34B)
- 13B-24B: A100-80GB (13B, 14B, 15B, 20B, 22B)
- 6B-12B: L40S 48GB (6B, 7B, 8B, 9B, 10B, 11B, 12B)
- 1B-5B: T4 16GB (1B, 2B, 3B, 4B, 5B)
- <1B: T4 16GB

HF Jobs allocation (limited GPU options):
- 13B-100B+: A100 large
- 6B-12B: A10G large
- 1B-5B: T4 small
- <1B: T4 small

Uses regex to extract model size from name (e.g., '8b', '27b', '0.5b')
ensuring no gaps in coverage.

utils/hf_jobs_submission.py CHANGED
@@ -239,34 +239,33 @@ def _auto_select_hf_hardware(provider: str, model: str) -> str:
239
 
240
  # Local models need GPU - select based on model size
241
  # Conservative allocation for agentic tasks (model weights + KV cache + inference overhead)
 
242
  model_lower = model.lower()
243
 
244
- # Extract model size in billions
245
- # Check for explicit size markers
246
- if "70b" in model_lower or "72b" in model_lower or "65b" in model_lower:
247
- # 70B+ models: ~280-350GB needed -> A100 80GB (may need quantization)
248
- return "a100-large"
249
- elif "30b" in model_lower or "32b" in model_lower or "33b" in model_lower or "34b" in model_lower:
250
- # 30-34B models: ~120-170GB needed -> A100 80GB required
251
- return "a100-large"
252
- elif "14b" in model_lower or "13b" in model_lower or "15b" in model_lower:
253
- # 13-15B models: ~52-75GB needed -> A100 40GB or A100 80GB
254
- return "a100-large"
255
- elif "8b" in model_lower or "9b" in model_lower:
256
- # 8-9B models: ~32-45GB needed -> A10G 24GB may OOM, use A100
257
- return "a100-large"
258
- elif "7b" in model_lower:
259
- # 7B models: ~28-35GB needed -> A10G can work with quantization
260
- return "a10g-large"
261
- elif "3b" in model_lower or "4b" in model_lower:
262
- # 3-4B models: ~12-20GB needed -> A10G safe
263
- return "a10g-large"
264
- elif "1b" in model_lower or "2b" in model_lower or "0.5b" in model_lower:
265
- # Small models < 3B: ~4-10GB needed -> T4 sufficient
266
- return "t4-small"
267
  else:
268
- # Default to A10G for unknown sizes (safer than T4)
269
- return "a10g-large"
270
 
271
 
272
  def check_job_status(hf_job_id: str, hf_token: Optional[str] = None) -> Dict:
 
239
 
240
  # Local models need GPU - select based on model size
241
  # Conservative allocation for agentic tasks (model weights + KV cache + inference overhead)
242
+ # Memory estimation: ~4-5GB per 1B params for safe agentic execution
243
  model_lower = model.lower()
244
 
245
+ # Extract model size using regex to capture the number before 'b'
246
+ import re
247
+ size_match = re.search(r'(\d+\.?\d*)b', model_lower)
248
+
249
+ if size_match:
250
+ model_size = float(size_match.group(1))
251
+
252
+ # Complete coverage from 0.5B to 100B+ with no gaps
253
+ # HF Jobs has limited GPU options: t4-small, a10g-large, a100-large
254
+ if model_size >= 13:
255
+ # 13B-100B+: A100 large (e.g., 13B, 14B, 27B, 30B, 48B, 70B)
256
+ return "a100-large"
257
+ elif model_size >= 6:
258
+ # 6B-12B: A10G large (e.g., 6B, 7B, 8B, 9B, 10B, 11B, 12B)
259
+ return "a10g-large"
260
+ elif model_size >= 1:
261
+ # 1B-5B: T4 small (e.g., 1B, 2B, 3B, 4B, 5B)
262
+ return "t4-small"
263
+ else:
264
+ # < 1B: T4 small
265
+ return "t4-small"
 
 
266
  else:
267
+ # No size detected in model name - default to A100 (safe for agentic workloads)
268
+ return "a100-large"
269
 
270
 
271
  def check_job_status(hf_job_id: str, hf_token: Optional[str] = None) -> Dict:
utils/modal_job_submission.py CHANGED
@@ -378,30 +378,35 @@ def _auto_select_modal_hardware(provider: str, model: str) -> Optional[str]:
378
 
379
  # Local models need GPU - select based on model size
380
  # Conservative allocation for agentic tasks (model weights + KV cache + inference overhead)
 
381
  model_lower = model.lower()
382
 
383
- # Extract model size in billions
384
- if "70b" in model_lower or "72b" in model_lower or "65b" in model_lower:
385
- # 70B+ models: ~280-350GB needed -> H200 (140GB VRAM, faster throughput)
386
- return "H200"
387
- elif "30b" in model_lower or "32b" in model_lower or "33b" in model_lower or "34b" in model_lower:
388
- # 30-34B models: ~120-170GB needed -> A100 80GB required
389
- return "A100-80GB"
390
- elif "14b" in model_lower or "13b" in model_lower or "15b" in model_lower:
391
- # 13-15B models: ~52-75GB needed -> A100 40GB or A100 80GB
392
- return "A100-80GB"
393
- elif "8b" in model_lower or "9b" in model_lower:
394
- # 8-9B models: ~32-45GB needed -> L40S (48GB VRAM)
395
- return "L40S"
396
- elif "7b" in model_lower:
397
- # 7B models: ~28-35GB needed -> A10G can work with quantization
398
- return "A10G"
399
- elif "3b" in model_lower or "4b" in model_lower:
400
- # 3-4B models: ~12-20GB needed -> A10G safe
401
- return "A10G"
402
- elif "1b" in model_lower or "2b" in model_lower or "0.5b" in model_lower:
403
- # Small models < 3B: ~4-10GB needed -> T4 sufficient
404
- return "T4"
 
 
 
 
405
  else:
406
- # Default to A10G for unknown sizes (safer than T4)
407
- return "A10G"
 
378
 
379
  # Local models need GPU - select based on model size
380
  # Conservative allocation for agentic tasks (model weights + KV cache + inference overhead)
381
+ # Memory estimation: ~4-5GB per 1B params for safe agentic execution
382
  model_lower = model.lower()
383
 
384
+ # Extract model size using regex to capture the number before 'b'
385
+ import re
386
+ size_match = re.search(r'(\d+\.?\d*)b', model_lower)
387
+
388
+ if size_match:
389
+ model_size = float(size_match.group(1))
390
+
391
+ # Complete coverage from 0.5B to 100B+ with no gaps
392
+ if model_size >= 49:
393
+ # 49B-100B+: H200 (140GB VRAM)
394
+ return "H200"
395
+ elif model_size >= 25:
396
+ # 25B-48B: A100-80GB (e.g., Gemma-27B, Kimi-48B, 30B, 34B)
397
+ return "A100-80GB"
398
+ elif model_size >= 13:
399
+ # 13B-24B: A100-80GB (e.g., 13B, 14B, 15B, 20B, 22B)
400
+ return "A100-80GB"
401
+ elif model_size >= 6:
402
+ # 6B-12B: L40S 48GB (e.g., 6B, 7B, 8B, 9B, 10B, 11B, 12B)
403
+ return "L40S"
404
+ elif model_size >= 1:
405
+ # 1B-5B: T4 16GB (e.g., 1B, 2B, 3B, 4B, 5B)
406
+ return "T4"
407
+ else:
408
+ # < 1B: T4 16GB
409
+ return "T4"
410
  else:
411
+ # No size detected in model name - default to L40S (safe middle ground)
412
+ return "L40S"