Spaces:
Sleeping
Sleeping
Robin L. M. Cheung, MBA
feat: Add local CUDA support, MCP server, Spaces GPU selection, and stacking roadmap
01504c4
| """Hardware configuration for local CUDA and HuggingFace Spaces GPU selection. | |
| This module provides: | |
| - Hardware mode selection (local CUDA vs Spaces GPU) | |
| - Persistent configuration via JSON file | |
| - HuggingFace Spaces GPU hardware options | |
| Spaces GPU pricing (as of Dec 2024): | |
| - ZeroGPU (H200): Free (PRO subscribers), dynamic allocation | |
| - T4-small: $0.40/hr, 16GB VRAM | |
| - T4-medium: $0.60/hr, 16GB VRAM | |
| - L4x1: $0.80/hr, 24GB VRAM | |
| - L4x4: $3.80/hr, 96GB VRAM | |
| - L40Sx1: $1.80/hr, 48GB VRAM | |
| - L40Sx4: $8.30/hr, 192GB VRAM | |
| - A10G-small: $1.00/hr, 24GB VRAM | |
| - A10G-large: $1.50/hr, 24GB VRAM | |
| - A100-large: $2.50/hr, 80GB VRAM | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| from dataclasses import dataclass, field | |
| from pathlib import Path | |
| from typing import Final, Literal | |
| # Hardware mode: local CUDA or HuggingFace Spaces | |
| HardwareMode = Literal["local", "spaces"] | |
| # Spaces hardware flavors (from HF docs) | |
| SpacesHardware = Literal[ | |
| "zero-gpu", # ZeroGPU (H200, dynamic, free for PRO) | |
| "t4-small", # Nvidia T4 small | |
| "t4-medium", # Nvidia T4 medium | |
| "l4x1", # 1x Nvidia L4 | |
| "l4x4", # 4x Nvidia L4 | |
| "l40s-x1", # 1x Nvidia L40S | |
| "l40s-x4", # 4x Nvidia L40S | |
| "a10g-small", # Nvidia A10G small | |
| "a10g-large", # Nvidia A10G large | |
| "a10g-largex2", # 2x Nvidia A10G large | |
| "a10g-largex4", # 4x Nvidia A10G large | |
| "a100-large", # Nvidia A100 large (80GB) | |
| ] | |
| # Hardware specs for display | |
| SPACES_HARDWARE_SPECS: Final[dict[str, dict]] = { | |
| "zero-gpu": { | |
| "name": "ZeroGPU (H200)", | |
| "vram": "70GB", | |
| "price": "Free (PRO)", | |
| "description": "Dynamic allocation, best for demos", | |
| }, | |
| "t4-small": { | |
| "name": "Nvidia T4 small", | |
| "vram": "16GB", | |
| "price": "$0.40/hr", | |
| "description": "4 vCPU, 15GB RAM", | |
| }, | |
| "t4-medium": { | |
| "name": "Nvidia T4 medium", | |
| "vram": "16GB", | |
| "price": "$0.60/hr", | |
| "description": "8 vCPU, 30GB RAM", | |
| }, | |
| "l4x1": { | |
| "name": "1x Nvidia L4", | |
| "vram": "24GB", | |
| "price": "$0.80/hr", | |
| "description": "8 vCPU, 30GB RAM", | |
| }, | |
| "l4x4": { | |
| "name": "4x Nvidia L4", | |
| "vram": "96GB", | |
| "price": "$3.80/hr", | |
| "description": "48 vCPU, 186GB RAM", | |
| }, | |
| "l40s-x1": { | |
| "name": "1x Nvidia L40S", | |
| "vram": "48GB", | |
| "price": "$1.80/hr", | |
| "description": "8 vCPU, 62GB RAM", | |
| }, | |
| "l40s-x4": { | |
| "name": "4x Nvidia L40S", | |
| "vram": "192GB", | |
| "price": "$8.30/hr", | |
| "description": "48 vCPU, 382GB RAM", | |
| }, | |
| "a10g-small": { | |
| "name": "Nvidia A10G small", | |
| "vram": "24GB", | |
| "price": "$1.00/hr", | |
| "description": "4 vCPU, 14GB RAM", | |
| }, | |
| "a10g-large": { | |
| "name": "Nvidia A10G large", | |
| "vram": "24GB", | |
| "price": "$1.50/hr", | |
| "description": "12 vCPU, 46GB RAM", | |
| }, | |
| "a10g-largex2": { | |
| "name": "2x Nvidia A10G large", | |
| "vram": "48GB", | |
| "price": "$3.00/hr", | |
| "description": "24 vCPU, 92GB RAM", | |
| }, | |
| "a10g-largex4": { | |
| "name": "4x Nvidia A10G large", | |
| "vram": "96GB", | |
| "price": "$5.00/hr", | |
| "description": "48 vCPU, 184GB RAM", | |
| }, | |
| "a100-large": { | |
| "name": "Nvidia A100 large", | |
| "vram": "80GB", | |
| "price": "$2.50/hr", | |
| "description": "12 vCPU, 142GB RAM, best for large models", | |
| }, | |
| } | |
| CONFIG_FILE: Final[Path] = Path(__file__).resolve().parent / ".hardware_config.json" | |
| class HardwareConfig: | |
| """Persistent hardware configuration.""" | |
| mode: HardwareMode = "local" | |
| spaces_hardware: SpacesHardware = "zero-gpu" | |
| spaces_duration: int = 180 # seconds for @spaces.GPU decorator | |
| local_device: str = "auto" # auto, cuda, cpu, mps | |
| keep_model_on_device: bool = True | |
| def to_dict(self) -> dict: | |
| return { | |
| "mode": self.mode, | |
| "spaces_hardware": self.spaces_hardware, | |
| "spaces_duration": self.spaces_duration, | |
| "local_device": self.local_device, | |
| "keep_model_on_device": self.keep_model_on_device, | |
| } | |
| def from_dict(cls, data: dict) -> "HardwareConfig": | |
| return cls( | |
| mode=data.get("mode", "local"), | |
| spaces_hardware=data.get("spaces_hardware", "zero-gpu"), | |
| spaces_duration=data.get("spaces_duration", 180), | |
| local_device=data.get("local_device", "auto"), | |
| keep_model_on_device=data.get("keep_model_on_device", True), | |
| ) | |
| def save(self, path: Path = CONFIG_FILE) -> None: | |
| """Save configuration to JSON file.""" | |
| path.write_text(json.dumps(self.to_dict(), indent=2)) | |
| def load(cls, path: Path = CONFIG_FILE) -> "HardwareConfig": | |
| """Load configuration from JSON file, or return defaults.""" | |
| if path.exists(): | |
| try: | |
| data = json.loads(path.read_text()) | |
| return cls.from_dict(data) | |
| except Exception: | |
| pass | |
| return cls() | |
| def get_hardware_choices() -> list[tuple[str, str]]: | |
| """Get hardware choices for Gradio dropdown. | |
| Returns list of (display_name, value) tuples. | |
| """ | |
| choices = [ | |
| ("🖥️ Local CUDA (auto-detect)", "local"), | |
| ] | |
| for hw_id, spec in SPACES_HARDWARE_SPECS.items(): | |
| label = f"☁️ {spec['name']} - {spec['vram']} VRAM ({spec['price']})" | |
| choices.append((label, f"spaces:{hw_id}")) | |
| return choices | |
| def parse_hardware_choice(choice: str) -> tuple[HardwareMode, SpacesHardware | None]: | |
| """Parse hardware choice string into mode and hardware type.""" | |
| if choice == "local": | |
| return "local", None | |
| elif choice.startswith("spaces:"): | |
| hw = choice.replace("spaces:", "") | |
| return "spaces", hw # type: ignore | |
| else: | |
| return "local", None | |
| def is_running_on_spaces() -> bool: | |
| """Check if we're running on HuggingFace Spaces.""" | |
| return os.getenv("SPACE_ID") is not None | |
| def get_spaces_module(): | |
| """Import and return the spaces module if available.""" | |
| try: | |
| import spaces | |
| return spaces | |
| except ImportError: | |
| return None | |
| # Global config instance | |
| _config: HardwareConfig | None = None | |
| def get_config() -> HardwareConfig: | |
| """Get the global hardware configuration.""" | |
| global _config | |
| if _config is None: | |
| _config = HardwareConfig.load() | |
| return _config | |
| def update_config( | |
| mode: HardwareMode | None = None, | |
| spaces_hardware: SpacesHardware | None = None, | |
| spaces_duration: int | None = None, | |
| local_device: str | None = None, | |
| keep_model_on_device: bool | None = None, | |
| save: bool = True, | |
| ) -> HardwareConfig: | |
| """Update and optionally save the hardware configuration.""" | |
| global _config | |
| config = get_config() | |
| if mode is not None: | |
| config.mode = mode | |
| if spaces_hardware is not None: | |
| config.spaces_hardware = spaces_hardware | |
| if spaces_duration is not None: | |
| config.spaces_duration = spaces_duration | |
| if local_device is not None: | |
| config.local_device = local_device | |
| if keep_model_on_device is not None: | |
| config.keep_model_on_device = keep_model_on_device | |
| if save: | |
| config.save() | |
| _config = config | |
| return config | |