ml-sharp

Sleeping

ml-sharp / mcp_server.py

Robin L. M. Cheung, MBA

feat: Add local CUDA support, MCP server, Spaces GPU selection, and stacking roadmap

01504c4 2 days ago

6.8 kB

	"""SHARP MCP Server for programmatic access to 3D Gaussian prediction.

	Run standalone:
	uv run python mcp_server.py

	Or integrate with MCP clients via stdio transport.
	"""

	from __future__ import annotations

	import json
	import os
	from pathlib import Path
	from typing import Literal

	import torch
	from mcp.server.fastmcp import FastMCP

	from model_utils import (
	DEFAULT_OUTPUTS_DIR,
	ModelWrapper,
	TrajectoryType,
	get_global_model,
	)

	MCP_PORT: int = int(os.getenv("SHARP_MCP_PORT", "49201"))

	mcp = FastMCP(
	"sharp",
	description="SHARP: Single-image 3D Gaussian scene prediction",
	)

	# -----------------------------------------------------------------------------
	# Tools
	# -----------------------------------------------------------------------------


	@mcp.tool()
	def sharp_predict(
	image_path: str,
	render_video: bool = True,
	trajectory_type: TrajectoryType = "rotate_forward",
	num_frames: int = 60,
	fps: int = 30,
	output_long_side: int \| None = None,
	) -> dict:
	"""Predict 3D Gaussians from a single image.

	Args:
	image_path: Absolute path to input image (jpg/png/webp).
	render_video: Whether to render a camera trajectory video (requires CUDA).
	trajectory_type: Camera trajectory type (swipe/shake/rotate/rotate_forward).
	num_frames: Number of frames for video rendering.
	fps: Frames per second for video.
	output_long_side: Output resolution (longest side). None = match input.

	Returns:
	dict with keys:
	- ply_path: Path to exported PLY file
	- video_path: Path to rendered MP4 (or null if not rendered)
	- cuda_available: Whether CUDA was available
	"""
	image_path_obj = Path(image_path)
	if not image_path_obj.exists():
	raise FileNotFoundError(f"Image not found: {image_path}")

	model = get_global_model()
	video_path, ply_path = model.predict_and_maybe_render(
	image_path_obj,
	trajectory_type=trajectory_type,
	num_frames=num_frames,
	fps=fps,
	output_long_side=output_long_side,
	render_video=render_video,
	)

	return {
	"ply_path": str(ply_path),
	"video_path": str(video_path) if video_path else None,
	"cuda_available": torch.cuda.is_available(),
	}


	@mcp.tool()
	def sharp_render(
	ply_path: str,
	trajectory_type: TrajectoryType = "rotate_forward",
	num_frames: int = 60,
	fps: int = 30,
	output_long_side: int \| None = None,
	) -> dict:
	"""Render a video from an existing PLY file.

	Note: This requires re-predicting from the original image since Gaussians
	are not stored in standard PLY format. For now, returns an error.
	Future versions may support loading Gaussians from PLY.

	Args:
	ply_path: Path to PLY file (from previous prediction).
	trajectory_type: Camera trajectory type.
	num_frames: Number of frames.
	fps: Frames per second.
	output_long_side: Output resolution.

	Returns:
	dict with error message (feature not yet implemented).
	"""
	return {
	"error": "Rendering from PLY not yet implemented. Use sharp_predict with render_video=True.",
	"hint": "PLY files store only point data, not the full Gaussian parameters needed for rendering.",
	}


	@mcp.tool()
	def list_outputs() -> dict:
	"""List all generated output files (PLY and MP4).

	Returns:
	dict with keys:
	- outputs_dir: Path to outputs directory
	- ply_files: List of PLY file paths
	- video_files: List of MP4 file paths
	"""
	outputs_dir = DEFAULT_OUTPUTS_DIR
	ply_files = sorted(outputs_dir.glob("*.ply"))
	video_files = sorted(outputs_dir.glob("*.mp4"))

	return {
	"outputs_dir": str(outputs_dir),
	"ply_files": [str(f) for f in ply_files],
	"video_files": [str(f) for f in video_files],
	}


	# -----------------------------------------------------------------------------
	# Resources
	# -----------------------------------------------------------------------------


	@mcp.resource("sharp://info")
	def get_info() -> str:
	"""Get SHARP server info including GPU status and configuration."""
	cuda_available = torch.cuda.is_available()
	gpu_info = []

	if cuda_available:
	for i in range(torch.cuda.device_count()):
	props = torch.cuda.get_device_properties(i)
	gpu_info.append({
	"index": i,
	"name": props.name,
	"total_memory_gb": round(props.total_memory / (1024**3), 2),
	"compute_capability": f"{props.major}.{props.minor}",
	})

	info = {
	"model": "SHARP (Apple ml-sharp)",
	"description": "Single-image 3D Gaussian scene prediction",
	"cuda_available": cuda_available,
	"cuda_device_count": torch.cuda.device_count() if cuda_available else 0,
	"gpus": gpu_info,
	"outputs_dir": str(DEFAULT_OUTPUTS_DIR),
	"checkpoint_sources": [
	"SHARP_CHECKPOINT_PATH env var",
	"HuggingFace Hub (apple/Sharp)",
	"Upstream CDN (torch.hub)",
	],
	"env_vars": {
	"SHARP_CHECKPOINT_PATH": os.getenv("SHARP_CHECKPOINT_PATH", "(not set)"),
	"SHARP_KEEP_MODEL_ON_DEVICE": os.getenv("SHARP_KEEP_MODEL_ON_DEVICE", "1"),
	"CUDA_VISIBLE_DEVICES": os.getenv("CUDA_VISIBLE_DEVICES", "(not set)"),
	},
	}

	return json.dumps(info, indent=2)


	@mcp.resource("sharp://help")
	def get_help() -> str:
	"""Get usage help for the SHARP MCP server."""
	help_text = """
	# SHARP MCP Server

	## Tools

	### sharp_predict
	Predict 3D Gaussians from a single image.

	Parameters:
	- image_path (required): Absolute path to input image
	- render_video: Whether to render MP4 (default: true, requires CUDA)
	- trajectory_type: swipe \| shake \| rotate \| rotate_forward (default: rotate_forward)
	- num_frames: Number of video frames (default: 60)
	- fps: Video frame rate (default: 30)
	- output_long_side: Output resolution, null = match input

	### list_outputs
	List all generated PLY and MP4 files.

	## Resources

	### sharp://info
	Server info, GPU status, configuration.

	### sharp://help
	This help text.

	## Environment Variables

	- SHARP_MCP_PORT: MCP server port (default: 49201)
	- SHARP_CHECKPOINT_PATH: Local checkpoint path override
	- SHARP_KEEP_MODEL_ON_DEVICE: Keep model on GPU (default: 1)
	- CUDA_VISIBLE_DEVICES: GPU selection (e.g., "0" or "0,1")
	"""
	return help_text.strip()


	# -----------------------------------------------------------------------------
	# Main
	# -----------------------------------------------------------------------------

	if __name__ == "__main__":
	# Run as stdio transport for MCP clients
	mcp.run()