ml-sharp

Sleeping

ml-sharp / hardware_config.py

Robin L. M. Cheung, MBA

feat: Add local CUDA support, MCP server, Spaces GPU selection, and stacking roadmap

01504c4 3 days ago

7.43 kB

	"""Hardware configuration for local CUDA and HuggingFace Spaces GPU selection.

	This module provides:
	- Hardware mode selection (local CUDA vs Spaces GPU)
	- Persistent configuration via JSON file
	- HuggingFace Spaces GPU hardware options

	Spaces GPU pricing (as of Dec 2024):
	- ZeroGPU (H200): Free (PRO subscribers), dynamic allocation
	- T4-small: $0.40/hr, 16GB VRAM
	- T4-medium: $0.60/hr, 16GB VRAM
	- L4x1: $0.80/hr, 24GB VRAM
	- L4x4: $3.80/hr, 96GB VRAM
	- L40Sx1: $1.80/hr, 48GB VRAM
	- L40Sx4: $8.30/hr, 192GB VRAM
	- A10G-small: $1.00/hr, 24GB VRAM
	- A10G-large: $1.50/hr, 24GB VRAM
	- A100-large: $2.50/hr, 80GB VRAM
	"""

	from __future__ import annotations

	import json
	import os
	from dataclasses import dataclass, field
	from pathlib import Path
	from typing import Final, Literal

	# Hardware mode: local CUDA or HuggingFace Spaces
	HardwareMode = Literal["local", "spaces"]

	# Spaces hardware flavors (from HF docs)
	SpacesHardware = Literal[
	"zero-gpu", # ZeroGPU (H200, dynamic, free for PRO)
	"t4-small", # Nvidia T4 small
	"t4-medium", # Nvidia T4 medium
	"l4x1", # 1x Nvidia L4
	"l4x4", # 4x Nvidia L4
	"l40s-x1", # 1x Nvidia L40S
	"l40s-x4", # 4x Nvidia L40S
	"a10g-small", # Nvidia A10G small
	"a10g-large", # Nvidia A10G large
	"a10g-largex2", # 2x Nvidia A10G large
	"a10g-largex4", # 4x Nvidia A10G large
	"a100-large", # Nvidia A100 large (80GB)
	]

	# Hardware specs for display
	SPACES_HARDWARE_SPECS: Final[dict[str, dict]] = {
	"zero-gpu": {
	"name": "ZeroGPU (H200)",
	"vram": "70GB",
	"price": "Free (PRO)",
	"description": "Dynamic allocation, best for demos",
	},
	"t4-small": {
	"name": "Nvidia T4 small",
	"vram": "16GB",
	"price": "$0.40/hr",
	"description": "4 vCPU, 15GB RAM",
	},
	"t4-medium": {
	"name": "Nvidia T4 medium",
	"vram": "16GB",
	"price": "$0.60/hr",
	"description": "8 vCPU, 30GB RAM",
	},
	"l4x1": {
	"name": "1x Nvidia L4",
	"vram": "24GB",
	"price": "$0.80/hr",
	"description": "8 vCPU, 30GB RAM",
	},
	"l4x4": {
	"name": "4x Nvidia L4",
	"vram": "96GB",
	"price": "$3.80/hr",
	"description": "48 vCPU, 186GB RAM",
	},
	"l40s-x1": {
	"name": "1x Nvidia L40S",
	"vram": "48GB",
	"price": "$1.80/hr",
	"description": "8 vCPU, 62GB RAM",
	},
	"l40s-x4": {
	"name": "4x Nvidia L40S",
	"vram": "192GB",
	"price": "$8.30/hr",
	"description": "48 vCPU, 382GB RAM",
	},
	"a10g-small": {
	"name": "Nvidia A10G small",
	"vram": "24GB",
	"price": "$1.00/hr",
	"description": "4 vCPU, 14GB RAM",
	},
	"a10g-large": {
	"name": "Nvidia A10G large",
	"vram": "24GB",
	"price": "$1.50/hr",
	"description": "12 vCPU, 46GB RAM",
	},
	"a10g-largex2": {
	"name": "2x Nvidia A10G large",
	"vram": "48GB",
	"price": "$3.00/hr",
	"description": "24 vCPU, 92GB RAM",
	},
	"a10g-largex4": {
	"name": "4x Nvidia A10G large",
	"vram": "96GB",
	"price": "$5.00/hr",
	"description": "48 vCPU, 184GB RAM",
	},
	"a100-large": {
	"name": "Nvidia A100 large",
	"vram": "80GB",
	"price": "$2.50/hr",
	"description": "12 vCPU, 142GB RAM, best for large models",
	},
	}

	CONFIG_FILE: Final[Path] = Path(__file__).resolve().parent / ".hardware_config.json"


	@dataclass
	class HardwareConfig:
	"""Persistent hardware configuration."""

	mode: HardwareMode = "local"
	spaces_hardware: SpacesHardware = "zero-gpu"
	spaces_duration: int = 180 # seconds for @spaces.GPU decorator
	local_device: str = "auto" # auto, cuda, cpu, mps
	keep_model_on_device: bool = True

	def to_dict(self) -> dict:
	return {
	"mode": self.mode,
	"spaces_hardware": self.spaces_hardware,
	"spaces_duration": self.spaces_duration,
	"local_device": self.local_device,
	"keep_model_on_device": self.keep_model_on_device,
	}

	@classmethod
	def from_dict(cls, data: dict) -> "HardwareConfig":
	return cls(
	mode=data.get("mode", "local"),
	spaces_hardware=data.get("spaces_hardware", "zero-gpu"),
	spaces_duration=data.get("spaces_duration", 180),
	local_device=data.get("local_device", "auto"),
	keep_model_on_device=data.get("keep_model_on_device", True),
	)

	def save(self, path: Path = CONFIG_FILE) -> None:
	"""Save configuration to JSON file."""
	path.write_text(json.dumps(self.to_dict(), indent=2))

	@classmethod
	def load(cls, path: Path = CONFIG_FILE) -> "HardwareConfig":
	"""Load configuration from JSON file, or return defaults."""
	if path.exists():
	try:
	data = json.loads(path.read_text())
	return cls.from_dict(data)
	except Exception:
	pass
	return cls()


	def get_hardware_choices() -> list[tuple[str, str]]:
	"""Get hardware choices for Gradio dropdown.

	Returns list of (display_name, value) tuples.
	"""
	choices = [
	("🖥️ Local CUDA (auto-detect)", "local"),
	]

	for hw_id, spec in SPACES_HARDWARE_SPECS.items():
	label = f"☁️ {spec['name']} - {spec['vram']} VRAM ({spec['price']})"
	choices.append((label, f"spaces:{hw_id}"))

	return choices


	def parse_hardware_choice(choice: str) -> tuple[HardwareMode, SpacesHardware \| None]:
	"""Parse hardware choice string into mode and hardware type."""
	if choice == "local":
	return "local", None
	elif choice.startswith("spaces:"):
	hw = choice.replace("spaces:", "")
	return "spaces", hw # type: ignore
	else:
	return "local", None


	def is_running_on_spaces() -> bool:
	"""Check if we're running on HuggingFace Spaces."""
	return os.getenv("SPACE_ID") is not None


	def get_spaces_module():
	"""Import and return the spaces module if available."""
	try:
	import spaces
	return spaces
	except ImportError:
	return None


	# Global config instance
	_config: HardwareConfig \| None = None


	def get_config() -> HardwareConfig:
	"""Get the global hardware configuration."""
	global _config
	if _config is None:
	_config = HardwareConfig.load()
	return _config


	def update_config(
	mode: HardwareMode \| None = None,
	spaces_hardware: SpacesHardware \| None = None,
	spaces_duration: int \| None = None,
	local_device: str \| None = None,
	keep_model_on_device: bool \| None = None,
	save: bool = True,
	) -> HardwareConfig:
	"""Update and optionally save the hardware configuration."""
	global _config
	config = get_config()

	if mode is not None:
	config.mode = mode
	if spaces_hardware is not None:
	config.spaces_hardware = spaces_hardware
	if spaces_duration is not None:
	config.spaces_duration = spaces_duration
	if local_device is not None:
	config.local_device = local_device
	if keep_model_on_device is not None:
	config.keep_model_on_device = keep_model_on_device

	if save:
	config.save()

	_config = config
	return config