kshitijthakkar commited on
Commit
b714b1e
·
1 Parent(s): 60c4817

feat: Add full support for HuggingFace Jobs hardware in cost estimator

Browse files

Fixed the 'Value not in list of choices' error by adding all HF Jobs hardware options:

**HuggingFace Jobs hardware added:**
- CPU: cpu-basic, cpu-upgrade
- GPU: t4-small, t4-medium, l4x1, l4x4, a10g-small, a10g-large,
a10g-largex2, a10g-largex4, a100-large
- TPU: v5e-1x1, v5e-2x2, v5e-2x4

**Changes:**
1. Updated docstring to document all supported hardware
2. Added HF Jobs per-second pricing estimates (based on typical hourly rates)
3. Updated CO2 estimates for all HF Jobs hardware
4. Changed auto-selection: cpu-basic (API) / a10g-small (local)
5. Updated compute cost calculation to handle all hardware types
6. Updated pricing source metadata

Now cost estimation works for both Modal and HuggingFace Jobs hardware.

Files changed (1) hide show
  1. mcp_tools.py +48 -16
mcp_tools.py CHANGED
@@ -275,9 +275,9 @@ async def estimate_cost(
275
  model (str): Model identifier in litellm format (e.g., "openai/gpt-4", "meta-llama/Llama-3.1-8B")
276
  agent_type (str): Type of agent capabilities to test. Options: "tool", "code", "both"
277
  num_tests (int): Number of test cases to run. Must be between 10 and 1000. Default: 100
278
- hardware (str): Hardware type for HuggingFace Jobs. Options: "auto", "cpu", "gpu_a10", "gpu_h200". Default: "auto"
279
  Returns:
280
- str: Markdown-formatted cost estimate with breakdown of LLM costs, HF Jobs costs, duration, CO2 emissions, and optimization tips
281
  """
282
  try:
283
  # Initialize Gemini client with provided key or from environment
@@ -301,11 +301,11 @@ async def estimate_cost(
301
 
302
  # Auto-select hardware
303
  if hardware == "auto":
304
- hardware = "cpu" if is_api_model else "gpu_a10"
305
 
306
- # Modal compute costs (per second) - from Modal website
307
- modal_compute_costs = {
308
- # GPU Tasks
309
  "gpu_b200": 0.001736, # Nvidia B200
310
  "gpu_h200": 0.001261, # Nvidia H200
311
  "gpu_h100": 0.001097, # Nvidia H100
@@ -315,10 +315,26 @@ async def estimate_cost(
315
  "gpu_a10": 0.000306, # Nvidia A10
316
  "gpu_l4": 0.000222, # Nvidia L4
317
  "gpu_t4": 0.000164, # Nvidia T4
318
- # CPU (per core)
319
  "cpu": 0.0000131, # Physical core (2 vCPU equivalent)
320
- # Memory (per GiB)
321
- "memory": 0.00000222 # Per GiB
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
322
  }
323
 
324
  # Get model costs from pricing database
@@ -380,12 +396,12 @@ async def estimate_cost(
380
 
381
  total_duration_seconds = duration_per_test * num_tests
382
 
383
- # Calculate Modal compute costs (per second)
384
- compute_rate_per_sec = modal_compute_costs.get(hardware, modal_compute_costs["cpu"])
385
 
386
- # For CPU, estimate core usage (assume 2 cores for agent workload)
387
- # For GPU, direct cost
388
- if hardware == "cpu":
389
  num_cores = 2 # Estimate 2 cores for typical agent workload
390
  total_compute_cost = total_duration_seconds * compute_rate_per_sec * num_cores
391
  else:
@@ -393,6 +409,7 @@ async def estimate_cost(
393
 
394
  # Estimate CO2 (rough estimates in kg per hour)
395
  co2_per_hour = {
 
396
  "cpu": 0.05,
397
  "gpu_t4": 0.10,
398
  "gpu_l4": 0.12,
@@ -402,7 +419,22 @@ async def estimate_cost(
402
  "gpu_a100_80gb": 0.28,
403
  "gpu_h100": 0.30,
404
  "gpu_h200": 0.32,
405
- "gpu_b200": 0.35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
406
  }
407
 
408
  total_co2_kg = (total_duration_seconds / 3600) * co2_per_hour.get(hardware, 0.05)
@@ -414,7 +446,7 @@ async def estimate_cost(
414
  "num_tests": num_tests,
415
  "hardware": hardware,
416
  "is_api_model": is_api_model,
417
- "pricing_source": "genai_otel pricing database + Modal compute costs",
418
  "estimates": {
419
  "llm_cost_usd": round(total_llm_cost, 6),
420
  "llm_cost_per_test": round(llm_cost_per_test, 6),
 
275
  model (str): Model identifier in litellm format (e.g., "openai/gpt-4", "meta-llama/Llama-3.1-8B")
276
  agent_type (str): Type of agent capabilities to test. Options: "tool", "code", "both"
277
  num_tests (int): Number of test cases to run. Must be between 10 and 1000. Default: 100
278
+ hardware (str): Hardware type for compute. Supports Modal (gpu_t4, gpu_a10, gpu_h200, etc.) and HuggingFace Jobs (cpu-basic, t4-small, a10g-small, a100-large, etc.). Default: "auto"
279
  Returns:
280
+ str: Markdown-formatted cost estimate with breakdown of LLM costs, compute costs, duration, CO2 emissions, and optimization tips
281
  """
282
  try:
283
  # Initialize Gemini client with provided key or from environment
 
301
 
302
  # Auto-select hardware
303
  if hardware == "auto":
304
+ hardware = "cpu-basic" if is_api_model else "a10g-small"
305
 
306
+ # Compute costs (per second) - Modal + HuggingFace Jobs
307
+ compute_costs = {
308
+ # Modal GPU Tasks (per second)
309
  "gpu_b200": 0.001736, # Nvidia B200
310
  "gpu_h200": 0.001261, # Nvidia H200
311
  "gpu_h100": 0.001097, # Nvidia H100
 
315
  "gpu_a10": 0.000306, # Nvidia A10
316
  "gpu_l4": 0.000222, # Nvidia L4
317
  "gpu_t4": 0.000164, # Nvidia T4
318
+ # Modal CPU (per core)
319
  "cpu": 0.0000131, # Physical core (2 vCPU equivalent)
320
+
321
+ # HuggingFace Jobs (estimated per second based on typical hourly rates)
322
+ # Note: HF Jobs pricing varies, these are estimates
323
+ "cpu-basic": 0.0000167, # ~$0.06/hour
324
+ "cpu-upgrade": 0.0000278, # ~$0.10/hour
325
+ "t4-small": 0.000167, # ~$0.60/hour
326
+ "t4-medium": 0.000278, # ~$1.00/hour
327
+ "l4x1": 0.000250, # ~$0.90/hour
328
+ "l4x4": 0.001000, # ~$3.60/hour
329
+ "a10g-small": 0.000333, # ~$1.20/hour
330
+ "a10g-large": 0.000556, # ~$2.00/hour
331
+ "a10g-largex2": 0.001111, # ~$4.00/hour
332
+ "a10g-largex4": 0.002222, # ~$8.00/hour
333
+ "a100-large": 0.001389, # ~$5.00/hour
334
+ # TPU (estimated)
335
+ "v5e-1x1": 0.000417, # ~$1.50/hour
336
+ "v5e-2x2": 0.001667, # ~$6.00/hour
337
+ "v5e-2x4": 0.003333 # ~$12.00/hour
338
  }
339
 
340
  # Get model costs from pricing database
 
396
 
397
  total_duration_seconds = duration_per_test * num_tests
398
 
399
+ # Calculate compute costs (per second)
400
+ compute_rate_per_sec = compute_costs.get(hardware, compute_costs.get("cpu-basic", 0.0000167))
401
 
402
+ # For CPU-based hardware, estimate core usage (assume 2 cores for agent workload)
403
+ # For GPU/TPU, direct cost
404
+ if hardware in ["cpu", "cpu-basic", "cpu-upgrade"]:
405
  num_cores = 2 # Estimate 2 cores for typical agent workload
406
  total_compute_cost = total_duration_seconds * compute_rate_per_sec * num_cores
407
  else:
 
409
 
410
  # Estimate CO2 (rough estimates in kg per hour)
411
  co2_per_hour = {
412
+ # Modal
413
  "cpu": 0.05,
414
  "gpu_t4": 0.10,
415
  "gpu_l4": 0.12,
 
419
  "gpu_a100_80gb": 0.28,
420
  "gpu_h100": 0.30,
421
  "gpu_h200": 0.32,
422
+ "gpu_b200": 0.35,
423
+ # HuggingFace Jobs
424
+ "cpu-basic": 0.03,
425
+ "cpu-upgrade": 0.04,
426
+ "t4-small": 0.08,
427
+ "t4-medium": 0.10,
428
+ "l4x1": 0.12,
429
+ "l4x4": 0.48,
430
+ "a10g-small": 0.13,
431
+ "a10g-large": 0.15,
432
+ "a10g-largex2": 0.30,
433
+ "a10g-largex4": 0.60,
434
+ "a100-large": 0.25,
435
+ "v5e-1x1": 0.18,
436
+ "v5e-2x2": 0.72,
437
+ "v5e-2x4": 1.44
438
  }
439
 
440
  total_co2_kg = (total_duration_seconds / 3600) * co2_per_hour.get(hardware, 0.05)
 
446
  "num_tests": num_tests,
447
  "hardware": hardware,
448
  "is_api_model": is_api_model,
449
+ "pricing_source": "genai_otel pricing database + Modal/HF Jobs compute costs",
450
  "estimates": {
451
  "llm_cost_usd": round(total_llm_cost, 6),
452
  "llm_cost_per_test": round(llm_cost_per_test, 6),