"""SHARP MCP Server for programmatic access to 3D Gaussian prediction. Run standalone: uv run python mcp_server.py Or integrate with MCP clients via stdio transport. """ from __future__ import annotations import json import os from pathlib import Path from typing import Literal import torch from mcp.server.fastmcp import FastMCP from model_utils import ( DEFAULT_OUTPUTS_DIR, ModelWrapper, TrajectoryType, get_global_model, ) MCP_PORT: int = int(os.getenv("SHARP_MCP_PORT", "49201")) mcp = FastMCP( "sharp", description="SHARP: Single-image 3D Gaussian scene prediction", ) # ----------------------------------------------------------------------------- # Tools # ----------------------------------------------------------------------------- @mcp.tool() def sharp_predict( image_path: str, render_video: bool = True, trajectory_type: TrajectoryType = "rotate_forward", num_frames: int = 60, fps: int = 30, output_long_side: int | None = None, ) -> dict: """Predict 3D Gaussians from a single image. Args: image_path: Absolute path to input image (jpg/png/webp). render_video: Whether to render a camera trajectory video (requires CUDA). trajectory_type: Camera trajectory type (swipe/shake/rotate/rotate_forward). num_frames: Number of frames for video rendering. fps: Frames per second for video. output_long_side: Output resolution (longest side). None = match input. Returns: dict with keys: - ply_path: Path to exported PLY file - video_path: Path to rendered MP4 (or null if not rendered) - cuda_available: Whether CUDA was available """ image_path_obj = Path(image_path) if not image_path_obj.exists(): raise FileNotFoundError(f"Image not found: {image_path}") model = get_global_model() video_path, ply_path = model.predict_and_maybe_render( image_path_obj, trajectory_type=trajectory_type, num_frames=num_frames, fps=fps, output_long_side=output_long_side, render_video=render_video, ) return { "ply_path": str(ply_path), "video_path": str(video_path) if video_path else None, "cuda_available": torch.cuda.is_available(), } @mcp.tool() def sharp_render( ply_path: str, trajectory_type: TrajectoryType = "rotate_forward", num_frames: int = 60, fps: int = 30, output_long_side: int | None = None, ) -> dict: """Render a video from an existing PLY file. Note: This requires re-predicting from the original image since Gaussians are not stored in standard PLY format. For now, returns an error. Future versions may support loading Gaussians from PLY. Args: ply_path: Path to PLY file (from previous prediction). trajectory_type: Camera trajectory type. num_frames: Number of frames. fps: Frames per second. output_long_side: Output resolution. Returns: dict with error message (feature not yet implemented). """ return { "error": "Rendering from PLY not yet implemented. Use sharp_predict with render_video=True.", "hint": "PLY files store only point data, not the full Gaussian parameters needed for rendering.", } @mcp.tool() def list_outputs() -> dict: """List all generated output files (PLY and MP4). Returns: dict with keys: - outputs_dir: Path to outputs directory - ply_files: List of PLY file paths - video_files: List of MP4 file paths """ outputs_dir = DEFAULT_OUTPUTS_DIR ply_files = sorted(outputs_dir.glob("*.ply")) video_files = sorted(outputs_dir.glob("*.mp4")) return { "outputs_dir": str(outputs_dir), "ply_files": [str(f) for f in ply_files], "video_files": [str(f) for f in video_files], } # ----------------------------------------------------------------------------- # Resources # ----------------------------------------------------------------------------- @mcp.resource("sharp://info") def get_info() -> str: """Get SHARP server info including GPU status and configuration.""" cuda_available = torch.cuda.is_available() gpu_info = [] if cuda_available: for i in range(torch.cuda.device_count()): props = torch.cuda.get_device_properties(i) gpu_info.append({ "index": i, "name": props.name, "total_memory_gb": round(props.total_memory / (1024**3), 2), "compute_capability": f"{props.major}.{props.minor}", }) info = { "model": "SHARP (Apple ml-sharp)", "description": "Single-image 3D Gaussian scene prediction", "cuda_available": cuda_available, "cuda_device_count": torch.cuda.device_count() if cuda_available else 0, "gpus": gpu_info, "outputs_dir": str(DEFAULT_OUTPUTS_DIR), "checkpoint_sources": [ "SHARP_CHECKPOINT_PATH env var", "HuggingFace Hub (apple/Sharp)", "Upstream CDN (torch.hub)", ], "env_vars": { "SHARP_CHECKPOINT_PATH": os.getenv("SHARP_CHECKPOINT_PATH", "(not set)"), "SHARP_KEEP_MODEL_ON_DEVICE": os.getenv("SHARP_KEEP_MODEL_ON_DEVICE", "1"), "CUDA_VISIBLE_DEVICES": os.getenv("CUDA_VISIBLE_DEVICES", "(not set)"), }, } return json.dumps(info, indent=2) @mcp.resource("sharp://help") def get_help() -> str: """Get usage help for the SHARP MCP server.""" help_text = """ # SHARP MCP Server ## Tools ### sharp_predict Predict 3D Gaussians from a single image. Parameters: - image_path (required): Absolute path to input image - render_video: Whether to render MP4 (default: true, requires CUDA) - trajectory_type: swipe | shake | rotate | rotate_forward (default: rotate_forward) - num_frames: Number of video frames (default: 60) - fps: Video frame rate (default: 30) - output_long_side: Output resolution, null = match input ### list_outputs List all generated PLY and MP4 files. ## Resources ### sharp://info Server info, GPU status, configuration. ### sharp://help This help text. ## Environment Variables - SHARP_MCP_PORT: MCP server port (default: 49201) - SHARP_CHECKPOINT_PATH: Local checkpoint path override - SHARP_KEEP_MODEL_ON_DEVICE: Keep model on GPU (default: 1) - CUDA_VISIBLE_DEVICES: GPU selection (e.g., "0" or "0,1") """ return help_text.strip() # ----------------------------------------------------------------------------- # Main # ----------------------------------------------------------------------------- if __name__ == "__main__": # Run as stdio transport for MCP clients mcp.run()