Spaces:

MCP-1st-Birthday
/

TraceMind

Running

kshitijthakkar commited on 18 days ago

Commit

83ebb04

1 Parent(s): 8c679b3

feat: Complete Modal integration and fix cost estimation

- Fixed Modal GPU job execution with required packages (hf_transfer, nvidia-ml-py)
- Updated to latest non-deprecated CUDA image (12.6.0-cudnn-devel)
- Made Python version dynamic to match environment (HF Space uses 3.10)
- Added streaming output for real-time progress visibility in Modal logs
- Improved logging with GPU info, download progress indicators
- Fixed cost estimation to show actual hardware for both Modal and HF Jobs
- Auto-selection now displays: 'auto → **A100-80GB** (Modal)' or 'auto → **a10g-large** (HF Jobs)'
- Cost estimates now match actual job hardware selection
- Updated job submission instructions with realistic duration estimates

Files changed (3) hide show

app.py +77 -14
requirements.txt +3 -0
utils/modal_job_submission.py +176 -27

app.py CHANGED Viewed

@@ -2270,10 +2270,10 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
                 with gr.Row():
                     eval_model = gr.Textbox(
-                        value="openai/gpt-4",
                         label="Model",
-                        info="Model ID (e.g., openai/gpt-4, meta-llama/Llama-3.1-8B-Instruct)",
-                        placeholder="openai/gpt-4"
                     )
                     eval_provider = gr.Dropdown(
@@ -2462,11 +2462,47 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
         # Evaluation Helper Functions
         # ============================================================================
-        def estimate_job_cost_with_mcp_fallback(model, hardware):
             """
             Estimate cost using historical leaderboard data first,
             then fall back to MCP server if model not found
             """
             try:
                 # Try to get historical data from leaderboard
                 df = data_loader.load_leaderboard()
@@ -2480,13 +2516,16 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
                     avg_duration = model_runs['avg_duration_ms'].mean()
                     has_cost_data = model_runs['total_cost_usd'].sum() > 0
-                    return {
                         'source': 'historical',
                         'total_cost_usd': f"{avg_cost:.4f}",
                         'estimated_duration_minutes': f"{(avg_duration / 1000 / 60):.1f}",
                         'historical_runs': len(model_runs),
                         'has_cost_data': has_cost_data
                     }
                 else:
                     # No historical data - use MCP tool
                     print(f"[INFO] No historical data for {model}, using MCP cost estimator")
@@ -2517,7 +2556,7 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
                             extracted_duration = duration_match.group(0) if duration_match else 'See details below'
                             # Return with markdown content
-                            return {
                                 'source': 'mcp',
                                 'total_cost_usd': extracted_cost,
                                 'estimated_duration_minutes': extracted_duration,
@@ -2525,9 +2564,12 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
                                 'has_cost_data': True,
                                 'markdown_details': result  # Include full markdown response
                             }
                         else:
                             # Unexpected response type
-                            return {
                                 'source': 'mcp',
                                 'total_cost_usd': 'N/A',
                                 'estimated_duration_minutes': 'N/A',
@@ -2535,12 +2577,15 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
                                 'has_cost_data': False,
                                 'error': f'MCP returned unexpected type: {type(result)}'
                             }
                     except Exception as mcp_error:
                         print(f"[ERROR] MCP cost estimation failed: {mcp_error}")
                         import traceback
                         traceback.print_exc()
                         # Return a result indicating MCP is unavailable
-                        return {
                             'source': 'mcp',
                             'total_cost_usd': 'N/A',
                             'estimated_duration_minutes': 'N/A',
@@ -2548,14 +2593,17 @@ with gr.Blocks(title="TraceMind-AI", theme=theme) as app:
                             'has_cost_data': False,
                             'error': str(mcp_error)
                         }
             except Exception as e:
                 print(f"[ERROR] Cost estimation failed (leaderboard load): {e}")
                 return None
-        def on_hardware_change(model, hardware):
             """Update cost estimate when hardware selection changes"""
-            cost_est = estimate_job_cost_with_mcp_fallback(model, hardware)
             if cost_est is None:
                 # Error occurred
@@ -2583,6 +2631,9 @@ No historical data available for **{model}**.
                 cost_display = f"${cost_est['total_cost_usd']}" if cost_est['has_cost_data'] else "N/A (cost tracking not enabled)"
                 duration = cost_est['estimated_duration_minutes']
                 return f"""## 💰 Cost Estimate
 **{source_label}**
@@ -2590,7 +2641,7 @@ No historical data available for **{model}**.
 | Metric | Value |
 |--------|-------|
 | **Model** | {model} |
-| **Hardware** | {hardware.upper()} |
 | **Estimated Cost** | {cost_display} |
 | **Duration** | {duration} minutes |
@@ -2602,13 +2653,18 @@ No historical data available for **{model}**.
                 # MCP Cost Estimator - return the full markdown from MCP
                 markdown_details = cost_est.get('markdown_details', '')
                 # Add header to identify the source
                 header = f"""## 💰 Cost Estimate - AI Analysis
 **🤖 Powered by MCP Server + Gemini 2.5 Pro**
 *This estimate was generated by AI analysis since no historical data is available for this model.*
 ---
 """
@@ -2697,13 +2753,14 @@ No historical data available for **{model}**.
             # Success - build success message
             job_id = result.get('job_id', 'unknown')
             hf_job_id = result.get('hf_job_id', job_id)  # Get actual HF job ID
             job_platform = result.get('platform', infra_provider)
             job_hardware = result.get('hardware', hardware)
             job_status = result.get('status', 'submitted')
             job_message = result.get('message', '')
             # Estimate cost
-            cost_est = estimate_job_cost_with_mcp_fallback(model, hardware)
             has_cost_estimate = cost_est is not None
             cost_info_html = ""
@@ -2770,9 +2827,15 @@ No historical data available for **{model}**.
                 <div style="background: rgba(255,255,255,0.15); padding: 15px; border-radius: 5px; margin: 15px 0;">
                     <div style="font-size: 0.9em; opacity: 0.9; margin-bottom: 5px;">Run ID (SMOLTRACE)</div>
                     <div style="font-family: monospace; font-size: 0.95em; font-weight: bold;">{job_id}</div>
                     <div style="font-size: 0.9em; opacity: 0.9; margin-top: 10px; margin-bottom: 5px;">HF Job ID</div>
                     <div style="font-family: monospace; font-size: 0.95em; font-weight: bold;">{hf_job_id}</div>
                     <div style="font-size: 0.8em; opacity: 0.8; margin-top: 8px;">Use this ID to monitor: <code style="background: rgba(0,0,0,0.2); padding: 2px 6px; border-radius: 3px;">hf jobs inspect {hf_job_id}</code></div>
                 </div>
                 <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 10px; margin-top: 15px;">
@@ -3646,7 +3709,7 @@ Result: {result}
         eval_estimate_btn.click(
             fn=on_hardware_change,
-            inputs=[eval_model, eval_hardware],
             outputs=[eval_cost_estimate]
         )

                 with gr.Row():
                     eval_model = gr.Textbox(
+                        value="openai/gpt-4.1-nano",
                         label="Model",
+                        info="Model ID (e.g., openai/gpt-4.1-nano, meta-llama/Llama-3.1-8B-Instruct)",
+                        placeholder="openai/gpt-4.1-nano"
                     )
                     eval_provider = gr.Dropdown(
         # Evaluation Helper Functions
         # ============================================================================
+        def estimate_job_cost_with_mcp_fallback(model, hardware, provider="litellm", infrastructure="HuggingFace Jobs"):
             """
             Estimate cost using historical leaderboard data first,
             then fall back to MCP server if model not found
+            Args:
+                model: Model name
+                hardware: Hardware selection from UI
+                provider: Provider type (litellm, transformers, etc.)
+                infrastructure: Infrastructure provider (Modal, HuggingFace Jobs)
             """
+            # Handle auto-selection for both infrastructure providers
+            selected_hardware_display = None
+            if hardware == "auto":
+                if infrastructure == "Modal":
+                    # Modal auto-selection
+                    from utils.modal_job_submission import _auto_select_modal_hardware
+                    modal_gpu = _auto_select_modal_hardware(provider, model)
+                    selected_hardware_display = f"auto → **{modal_gpu or 'CPU'}** (Modal)"
+                    # Map Modal GPU names to HF Jobs equivalent for cost estimation
+                    modal_to_hf_map = {
+                        None: "cpu-basic",  # CPU
+                        "T4": "t4-small",
+                        "L4": "l4x1",
+                        "A10G": "a10g-small",
+                        "L40S": "a10g-large",
+                        "A100": "a100-large",
+                        "A100-80GB": "a100-large",  # Use a100-large as proxy for cost
+                        "H100": "a100-large",  # Use a100 as proxy
+                        "H200": "a100-large",  # Use a100 as proxy
+                    }
+                    hardware = modal_to_hf_map.get(modal_gpu, "a10g-small")
+                else:
+                    # HuggingFace Jobs auto-selection
+                    from utils.hf_jobs_submission import _auto_select_hf_hardware
+                    hf_hardware = _auto_select_hf_hardware(provider, model)
+                    selected_hardware_display = f"auto → **{hf_hardware}** (HF Jobs)"
+                    hardware = hf_hardware
             try:
                 # Try to get historical data from leaderboard
                 df = data_loader.load_leaderboard()
                     avg_duration = model_runs['avg_duration_ms'].mean()
                     has_cost_data = model_runs['total_cost_usd'].sum() > 0
+                    result = {
                         'source': 'historical',
                         'total_cost_usd': f"{avg_cost:.4f}",
                         'estimated_duration_minutes': f"{(avg_duration / 1000 / 60):.1f}",
                         'historical_runs': len(model_runs),
                         'has_cost_data': has_cost_data
                     }
+                    if selected_hardware_display:
+                        result['hardware_display'] = selected_hardware_display
+                    return result
                 else:
                     # No historical data - use MCP tool
                     print(f"[INFO] No historical data for {model}, using MCP cost estimator")
                             extracted_duration = duration_match.group(0) if duration_match else 'See details below'
                             # Return with markdown content
+                            result_dict = {
                                 'source': 'mcp',
                                 'total_cost_usd': extracted_cost,
                                 'estimated_duration_minutes': extracted_duration,
                                 'has_cost_data': True,
                                 'markdown_details': result  # Include full markdown response
                             }
+                            if selected_hardware_display:
+                                result_dict['hardware_display'] = selected_hardware_display
+                            return result_dict
                         else:
                             # Unexpected response type
+                            result_dict = {
                                 'source': 'mcp',
                                 'total_cost_usd': 'N/A',
                                 'estimated_duration_minutes': 'N/A',
                                 'has_cost_data': False,
                                 'error': f'MCP returned unexpected type: {type(result)}'
                             }
+                            if selected_hardware_display:
+                                result_dict['hardware_display'] = selected_hardware_display
+                            return result_dict
                     except Exception as mcp_error:
                         print(f"[ERROR] MCP cost estimation failed: {mcp_error}")
                         import traceback
                         traceback.print_exc()
                         # Return a result indicating MCP is unavailable
+                        result_dict = {
                             'source': 'mcp',
                             'total_cost_usd': 'N/A',
                             'estimated_duration_minutes': 'N/A',
                             'has_cost_data': False,
                             'error': str(mcp_error)
                         }
+                        if selected_hardware_display:
+                            result_dict['hardware_display'] = selected_hardware_display
+                        return result_dict
             except Exception as e:
                 print(f"[ERROR] Cost estimation failed (leaderboard load): {e}")
                 return None
+        def on_hardware_change(model, hardware, provider, infrastructure):
             """Update cost estimate when hardware selection changes"""
+            cost_est = estimate_job_cost_with_mcp_fallback(model, hardware, provider, infrastructure)
             if cost_est is None:
                 # Error occurred
                 cost_display = f"${cost_est['total_cost_usd']}" if cost_est['has_cost_data'] else "N/A (cost tracking not enabled)"
                 duration = cost_est['estimated_duration_minutes']
+                # Use custom hardware display if available, otherwise show hardware as-is
+                hardware_display = cost_est.get('hardware_display', hardware.upper())
                 return f"""## 💰 Cost Estimate
 **{source_label}**
 | Metric | Value |
 |--------|-------|
 | **Model** | {model} |
+| **Hardware** | {hardware_display} |
 | **Estimated Cost** | {cost_display} |
 | **Duration** | {duration} minutes |
                 # MCP Cost Estimator - return the full markdown from MCP
                 markdown_details = cost_est.get('markdown_details', '')
+                # Add hardware selection note if applicable
+                hardware_note = ""
+                if cost_est.get('hardware_display'):
+                    hardware_note = f"\n\n**Hardware**: {cost_est['hardware_display']}\n\n"
                 # Add header to identify the source
                 header = f"""## 💰 Cost Estimate - AI Analysis
 **🤖 Powered by MCP Server + Gemini 2.5 Pro**
 *This estimate was generated by AI analysis since no historical data is available for this model.*
+{hardware_note}
 ---
 """
             # Success - build success message
             job_id = result.get('job_id', 'unknown')
             hf_job_id = result.get('hf_job_id', job_id)  # Get actual HF job ID
+            modal_call_id = result.get('modal_call_id', None)  # Get Modal call ID if available
             job_platform = result.get('platform', infra_provider)
             job_hardware = result.get('hardware', hardware)
             job_status = result.get('status', 'submitted')
             job_message = result.get('message', '')
             # Estimate cost
+            cost_est = estimate_job_cost_with_mcp_fallback(model, hardware, provider, infra_provider)
             has_cost_estimate = cost_est is not None
             cost_info_html = ""
                 <div style="background: rgba(255,255,255,0.15); padding: 15px; border-radius: 5px; margin: 15px 0;">
                     <div style="font-size: 0.9em; opacity: 0.9; margin-bottom: 5px;">Run ID (SMOLTRACE)</div>
                     <div style="font-family: monospace; font-size: 0.95em; font-weight: bold;">{job_id}</div>
+                    {f'''
+                    <div style="font-size: 0.9em; opacity: 0.9; margin-top: 10px; margin-bottom: 5px;">Modal Call ID</div>
+                    <div style="font-family: monospace; font-size: 0.95em; font-weight: bold;">{modal_call_id}</div>
+                    <div style="font-size: 0.8em; opacity: 0.8; margin-top: 8px;">View on Modal Dashboard: <a href="https://modal.com/apps" target="_blank" style="color: rgba(255,255,255,0.9);">https://modal.com/apps</a></div>
+                    ''' if modal_call_id else f'''
                     <div style="font-size: 0.9em; opacity: 0.9; margin-top: 10px; margin-bottom: 5px;">HF Job ID</div>
                     <div style="font-family: monospace; font-size: 0.95em; font-weight: bold;">{hf_job_id}</div>
                     <div style="font-size: 0.8em; opacity: 0.8; margin-top: 8px;">Use this ID to monitor: <code style="background: rgba(0,0,0,0.2); padding: 2px 6px; border-radius: 3px;">hf jobs inspect {hf_job_id}</code></div>
+                    '''}
                 </div>
                 <div style="display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 10px; margin-top: 15px;">
         eval_estimate_btn.click(
             fn=on_hardware_change,
+            inputs=[eval_model, eval_hardware, eval_provider, eval_infrastructure],
             outputs=[eval_cost_estimate]
         )

requirements.txt CHANGED Viewed

@@ -35,3 +35,6 @@ smolagents>=1.22.0
 smolagents[mcp]>=1.22.0  # MCP client support
 google-generativeai>=0.3.0  # For Gemini integration
 litellm>=1.0.0  # For LiteLLM model support

 smolagents[mcp]>=1.22.0  # MCP client support
 google-generativeai>=0.3.0  # For Gemini integration
 litellm>=1.0.0  # For LiteLLM model support
+# Modal (for serverless GPU compute)
+modal>=0.64.0

utils/modal_job_submission.py CHANGED Viewed

@@ -5,6 +5,7 @@ Handles submission of SMOLTRACE evaluation jobs to Modal's serverless compute pl
 """
 import os
 import uuid
 from typing import Dict, Optional, List
@@ -156,13 +157,41 @@ def submit_modal_job(
     try:
         app = modal.App(f"smoltrace-eval-{job_id}")
-        # Define Modal function
-        image = modal.Image.debian_slim().pip_install([
-            "smoltrace[otel,gpu]",
-            "litellm",
-            "transformers",
-            "torch"
-        ])
         @app.function(
             image=image,
@@ -170,40 +199,160 @@ def submit_modal_job(
             secrets=[
                 modal.Secret.from_dict(env_vars)
             ],
-            timeout=3600  # 1 hour timeout
         )
-        def run_evaluation():
             """Run SMOLTRACE evaluation on Modal"""
             import subprocess
-            result = subprocess.run(command, shell=True, capture_output=True, text=True)
-            return {
-                "returncode": result.returncode,
-                "stdout": result.stdout,
-                "stderr": result.stderr
-            }
-        # Submit the job
-        # Note: Modal doesn't have a direct "submit and return" API like HF Jobs
-        # For now, we'll return the command that should be run
-        # In production, you'd use Modal's async API or spawn the function
         return {
             "success": True,
             "job_id": job_id,
             "platform": "Modal",
             "hardware": modal_gpu or "CPU",
             "command": command,
-            "status": "pending",
-            "message": "Modal job configured. Use Modal CLI to submit: modal run modal_job_submission.py",
-            "note": "Direct Modal API submission requires async handling. For now, use the generated command with Modal CLI."
         }
     except Exception as e:
-        return {
-            "success": False,
-            "error": f"Failed to create Modal job: {str(e)}",
-            "job_id": job_id
-        }
 def _auto_select_modal_hardware(provider: str, model: str) -> Optional[str]:

 """
 import os
+import sys
 import uuid
 from typing import Dict, Optional, List
     try:
         app = modal.App(f"smoltrace-eval-{job_id}")
+        # Detect current Python version dynamically (must match for serialized=True)
+        python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+        # Define Modal function with appropriate base image
+        # Note: Must match local Python version when using serialized=True
+        if modal_gpu:
+            # Use GPU-optimized image with CUDA for GPU jobs (using latest stable CUDA)
+            image = modal.Image.from_registry(
+                "nvidia/cuda:12.6.0-cudnn-devel-ubuntu22.04",
+                add_python=python_version  # Dynamically match current environment
+            ).pip_install([
+                "smoltrace",
+                "ddgs",  # DuckDuckGo search
+                "litellm",
+                "transformers",
+                "torch",
+                "accelerate",  # Required for GPU device_map
+                "bitsandbytes",  # For quantization support
+                "sentencepiece",  # For some tokenizers
+                "protobuf",  # For some models
+                "hf_transfer",  # Fast HuggingFace downloads
+                "nvidia-ml-py"  # GPU metrics collection
+            ]).env({
+                # Enable fast downloads and verbose logging
+                "HF_HUB_ENABLE_HF_TRANSFER": "1",
+                "TRANSFORMERS_VERBOSITY": "info",
+                "HF_HUB_VERBOSITY": "info"
+            })
+        else:
+            # Use lightweight image for CPU jobs
+            image = modal.Image.debian_slim(python_version=python_version).pip_install([
+                "smoltrace",
+                "ddgs",  # DuckDuckGo search
+                "litellm"
+            ])
         @app.function(
             image=image,
             secrets=[
                 modal.Secret.from_dict(env_vars)
             ],
+            timeout=3600,  # 1 hour timeout
+            serialized=True  # Required for functions defined in local scope
         )
+        def run_evaluation(command_to_run: str):
             """Run SMOLTRACE evaluation on Modal"""
             import subprocess
+            import sys
+            import os
+            print("=" * 80)
+            print(f"Starting SMOLTRACE evaluation on Modal")
+            print(f"Command: {command_to_run}")
+            print(f"Python version: {sys.version}")
+            # Show GPU info if available
+            try:
+                import torch
+                if torch.cuda.is_available():
+                    print(f"GPU: {torch.cuda.get_device_name(0)}")
+                    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
+            except:
+                pass
+            print("=" * 80)
+            print("\nNote: Model download may take several minutes for large models (14B = ~28GB)")
+            print("Downloading and initializing model...\n")
+            try:
+                # Run with live output instead of capture_output so we can see progress
+                result = subprocess.run(
+                    command_to_run,
+                    shell=True,
+                    capture_output=False,  # Stream output in real-time
+                    text=True
+                )
+                # Since we're not capturing, create a success message
+                print("\n" + "=" * 80)
+                print("EVALUATION COMPLETED")
+                print(f"Return code: {result.returncode}")
+                print("=" * 80)
+                return {
+                    "returncode": result.returncode,
+                    "stdout": "Check Modal logs for full output (streaming mode)",
+                    "stderr": ""
+                }
+            except Exception as e:
+                error_msg = f"Error running evaluation: {str(e)}"
+                print("\n" + "=" * 80)
+                print("EVALUATION FAILED")
+                print(error_msg)
+                print("=" * 80)
+                import traceback
+                traceback.print_exc()
+                return {
+                    "returncode": -1,
+                    "stdout": "",
+                    "stderr": error_msg
+                }
+        # Submit the job using Modal's remote() in a background thread
+        # Note: spawn() doesn't work well with dynamically created apps
+        # remote() ensures the job actually executes, threading keeps UI responsive
+        import threading
+        # Store result in a shared dict since we're using threading
+        result_container = {"modal_call_id": None, "started": False}
+        def run_job_on_modal():
+            """Run the Modal job in background thread"""
+            try:
+                with app.run():
+                    # Use remote() instead of spawn() for dynamic apps
+                    # This ensures the function actually executes
+                    function_call = run_evaluation.remote(command)
+                    result_container["started"] = True
+                    print(f"Modal job completed with return code: {function_call.get('returncode', 'unknown')}")
+            except Exception as e:
+                print(f"Error running Modal job: {e}")
+                result_container["error"] = str(e)
+        # Start the job in a background thread so we don't block the UI
+        job_thread = threading.Thread(target=run_job_on_modal, daemon=True)
+        job_thread.start()
+        # Give Modal a moment to start the job and capture any immediate errors
+        import time
+        time.sleep(2)
+        # Use job_id as the tracking ID since remote() doesn't give us a call_id
+        modal_call_id = f"modal-{job_id}"
         return {
             "success": True,
             "job_id": job_id,
+            "modal_call_id": modal_call_id,  # Modal's internal function call ID
             "platform": "Modal",
             "hardware": modal_gpu or "CPU",
             "command": command,
+            "status": "submitted",
+            "message": f"Job successfully submitted to Modal (hardware: {modal_gpu or 'CPU'})",
+            "instructions": f"""
+✅ Job submitted successfully!
+**Job Details:**
+- Run ID: {job_id}
+- Modal Call ID: {modal_call_id}
+- Hardware: {modal_gpu or "CPU"}
+- Platform: Modal (serverless compute)
+**What happens next:**
+1. Job starts running on Modal infrastructure
+2. For GPU jobs: Model downloads first (14B models = ~28GB, can take 10-15 min)
+3. SMOLTRACE evaluates your model
+4. Results are automatically pushed to HuggingFace datasets
+5. They will appear in TraceMind leaderboard when complete
+**Monitoring**: Check Modal dashboard for real-time logs and progress:
+https://modal.com/apps
+**Expected Duration**:
+- CPU jobs (API models): 2-5 minutes
+- GPU jobs (local models): 15-30 minutes (includes model download)
+**Cost**: Modal charges per-second usage. Estimated cost: $0.01-1.00 depending on model size and hardware.
+            """.strip()
         }
     except Exception as e:
+        error_msg = str(e)
+        # Check for common Modal errors
+        if "MODAL_TOKEN_ID" in error_msg or "authentication" in error_msg.lower():
+            return {
+                "success": False,
+                "error": "Modal authentication failed. Please verify your MODAL_TOKEN_ID and MODAL_TOKEN_SECRET in Settings.",
+                "job_id": job_id,
+                "troubleshooting": """
+**Steps to fix:**
+1. Go to https://modal.com/settings/tokens
+2. Create a new token
+3. Copy Token ID (starts with 'ak-') and Token Secret (starts with 'as-')
+4. Add them to Settings in TraceMind
+5. Try again
+                """
+            }
+        else:
+            return {
+                "success": False,
+                "error": f"Failed to submit Modal job: {error_msg}",
+                "job_id": job_id,
+                "command": command
+            }
 def _auto_select_modal_hardware(provider: str, model: str) -> Optional[str]: