Spaces:
Sleeping
Sleeping
Robin L. M. Cheung, MBA
feat: Add local CUDA support, MCP server, Spaces GPU selection, and stacking roadmap
01504c4
| """SHARP MCP Server for programmatic access to 3D Gaussian prediction. | |
| Run standalone: | |
| uv run python mcp_server.py | |
| Or integrate with MCP clients via stdio transport. | |
| """ | |
| from __future__ import annotations | |
| import json | |
| import os | |
| from pathlib import Path | |
| from typing import Literal | |
| import torch | |
| from mcp.server.fastmcp import FastMCP | |
| from model_utils import ( | |
| DEFAULT_OUTPUTS_DIR, | |
| ModelWrapper, | |
| TrajectoryType, | |
| get_global_model, | |
| ) | |
| MCP_PORT: int = int(os.getenv("SHARP_MCP_PORT", "49201")) | |
| mcp = FastMCP( | |
| "sharp", | |
| description="SHARP: Single-image 3D Gaussian scene prediction", | |
| ) | |
| # ----------------------------------------------------------------------------- | |
| # Tools | |
| # ----------------------------------------------------------------------------- | |
| def sharp_predict( | |
| image_path: str, | |
| render_video: bool = True, | |
| trajectory_type: TrajectoryType = "rotate_forward", | |
| num_frames: int = 60, | |
| fps: int = 30, | |
| output_long_side: int | None = None, | |
| ) -> dict: | |
| """Predict 3D Gaussians from a single image. | |
| Args: | |
| image_path: Absolute path to input image (jpg/png/webp). | |
| render_video: Whether to render a camera trajectory video (requires CUDA). | |
| trajectory_type: Camera trajectory type (swipe/shake/rotate/rotate_forward). | |
| num_frames: Number of frames for video rendering. | |
| fps: Frames per second for video. | |
| output_long_side: Output resolution (longest side). None = match input. | |
| Returns: | |
| dict with keys: | |
| - ply_path: Path to exported PLY file | |
| - video_path: Path to rendered MP4 (or null if not rendered) | |
| - cuda_available: Whether CUDA was available | |
| """ | |
| image_path_obj = Path(image_path) | |
| if not image_path_obj.exists(): | |
| raise FileNotFoundError(f"Image not found: {image_path}") | |
| model = get_global_model() | |
| video_path, ply_path = model.predict_and_maybe_render( | |
| image_path_obj, | |
| trajectory_type=trajectory_type, | |
| num_frames=num_frames, | |
| fps=fps, | |
| output_long_side=output_long_side, | |
| render_video=render_video, | |
| ) | |
| return { | |
| "ply_path": str(ply_path), | |
| "video_path": str(video_path) if video_path else None, | |
| "cuda_available": torch.cuda.is_available(), | |
| } | |
| def sharp_render( | |
| ply_path: str, | |
| trajectory_type: TrajectoryType = "rotate_forward", | |
| num_frames: int = 60, | |
| fps: int = 30, | |
| output_long_side: int | None = None, | |
| ) -> dict: | |
| """Render a video from an existing PLY file. | |
| Note: This requires re-predicting from the original image since Gaussians | |
| are not stored in standard PLY format. For now, returns an error. | |
| Future versions may support loading Gaussians from PLY. | |
| Args: | |
| ply_path: Path to PLY file (from previous prediction). | |
| trajectory_type: Camera trajectory type. | |
| num_frames: Number of frames. | |
| fps: Frames per second. | |
| output_long_side: Output resolution. | |
| Returns: | |
| dict with error message (feature not yet implemented). | |
| """ | |
| return { | |
| "error": "Rendering from PLY not yet implemented. Use sharp_predict with render_video=True.", | |
| "hint": "PLY files store only point data, not the full Gaussian parameters needed for rendering.", | |
| } | |
| def list_outputs() -> dict: | |
| """List all generated output files (PLY and MP4). | |
| Returns: | |
| dict with keys: | |
| - outputs_dir: Path to outputs directory | |
| - ply_files: List of PLY file paths | |
| - video_files: List of MP4 file paths | |
| """ | |
| outputs_dir = DEFAULT_OUTPUTS_DIR | |
| ply_files = sorted(outputs_dir.glob("*.ply")) | |
| video_files = sorted(outputs_dir.glob("*.mp4")) | |
| return { | |
| "outputs_dir": str(outputs_dir), | |
| "ply_files": [str(f) for f in ply_files], | |
| "video_files": [str(f) for f in video_files], | |
| } | |
| # ----------------------------------------------------------------------------- | |
| # Resources | |
| # ----------------------------------------------------------------------------- | |
| def get_info() -> str: | |
| """Get SHARP server info including GPU status and configuration.""" | |
| cuda_available = torch.cuda.is_available() | |
| gpu_info = [] | |
| if cuda_available: | |
| for i in range(torch.cuda.device_count()): | |
| props = torch.cuda.get_device_properties(i) | |
| gpu_info.append({ | |
| "index": i, | |
| "name": props.name, | |
| "total_memory_gb": round(props.total_memory / (1024**3), 2), | |
| "compute_capability": f"{props.major}.{props.minor}", | |
| }) | |
| info = { | |
| "model": "SHARP (Apple ml-sharp)", | |
| "description": "Single-image 3D Gaussian scene prediction", | |
| "cuda_available": cuda_available, | |
| "cuda_device_count": torch.cuda.device_count() if cuda_available else 0, | |
| "gpus": gpu_info, | |
| "outputs_dir": str(DEFAULT_OUTPUTS_DIR), | |
| "checkpoint_sources": [ | |
| "SHARP_CHECKPOINT_PATH env var", | |
| "HuggingFace Hub (apple/Sharp)", | |
| "Upstream CDN (torch.hub)", | |
| ], | |
| "env_vars": { | |
| "SHARP_CHECKPOINT_PATH": os.getenv("SHARP_CHECKPOINT_PATH", "(not set)"), | |
| "SHARP_KEEP_MODEL_ON_DEVICE": os.getenv("SHARP_KEEP_MODEL_ON_DEVICE", "1"), | |
| "CUDA_VISIBLE_DEVICES": os.getenv("CUDA_VISIBLE_DEVICES", "(not set)"), | |
| }, | |
| } | |
| return json.dumps(info, indent=2) | |
| def get_help() -> str: | |
| """Get usage help for the SHARP MCP server.""" | |
| help_text = """ | |
| # SHARP MCP Server | |
| ## Tools | |
| ### sharp_predict | |
| Predict 3D Gaussians from a single image. | |
| Parameters: | |
| - image_path (required): Absolute path to input image | |
| - render_video: Whether to render MP4 (default: true, requires CUDA) | |
| - trajectory_type: swipe | shake | rotate | rotate_forward (default: rotate_forward) | |
| - num_frames: Number of video frames (default: 60) | |
| - fps: Video frame rate (default: 30) | |
| - output_long_side: Output resolution, null = match input | |
| ### list_outputs | |
| List all generated PLY and MP4 files. | |
| ## Resources | |
| ### sharp://info | |
| Server info, GPU status, configuration. | |
| ### sharp://help | |
| This help text. | |
| ## Environment Variables | |
| - SHARP_MCP_PORT: MCP server port (default: 49201) | |
| - SHARP_CHECKPOINT_PATH: Local checkpoint path override | |
| - SHARP_KEEP_MODEL_ON_DEVICE: Keep model on GPU (default: 1) | |
| - CUDA_VISIBLE_DEVICES: GPU selection (e.g., "0" or "0,1") | |
| """ | |
| return help_text.strip() | |
| # ----------------------------------------------------------------------------- | |
| # Main | |
| # ----------------------------------------------------------------------------- | |
| if __name__ == "__main__": | |
| # Run as stdio transport for MCP clients | |
| mcp.run() | |