"""SHARP Gradio demo (minimal, responsive UI). This Space: - Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image. - Exports a canonical `.ply` file for download. - Optionally renders a camera trajectory `.mp4` (CUDA / ZeroGPU only). Precompiled examples Place precompiled examples under `assets/examples/`. Recommended structure (matching stem): assets/examples/.jpg|png|webp assets/examples/.mp4 assets/examples/.ply Optional manifest (assets/examples/manifest.json): [ {"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"}, ... ] """ from __future__ import annotations import json from dataclasses import dataclass from pathlib import Path from typing import Final import gradio as gr import os from model_utils import ( TrajectoryType, predict_and_maybe_render_gpu, configure_gpu_mode, get_gpu_status, ) from hardware_config import ( get_hardware_choices, parse_hardware_choice, get_config, update_config, SPACES_HARDWARE_SPECS, is_running_on_spaces, ) # ----------------------------------------------------------------------------- # Paths & constants # ----------------------------------------------------------------------------- APP_DIR: Final[Path] = Path(__file__).resolve().parent OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs" ASSETS_DIR: Final[Path] = APP_DIR / "assets" EXAMPLES_DIR: Final[Path] = ASSETS_DIR / "examples" IMAGE_EXTS: Final[tuple[str, ...]] = (".png", ".jpg", ".jpeg", ".webp") DEFAULT_QUEUE_MAX_SIZE: Final[int] = 32 DEFAULT_PORT: Final[int] = int(os.getenv("SHARP_PORT", "49200")) THEME: Final = gr.themes.Soft( primary_hue="indigo", secondary_hue="blue", neutral_hue="slate", ) CSS: Final[str] = """ /* Keep layout stable when scrollbars appear/disappear */ html { scrollbar-gutter: stable; } /* Use normal document flow (no fixed-height viewport shell) */ html, body { height: auto; } body { overflow: auto; } /* Comfortable max width; still fills small screens */ .gradio-container { max-width: 1400px; margin: 0 auto; padding: 0.75rem 1rem 1rem; box-sizing: border-box; } /* Make media components responsive without stretching */ #run-image, #run-video, #examples-image, #examples-video { width: 100%; } /* Keep aspect ratio and prevent runaway vertical growth on tall viewports */ #run-image img, #examples-image img { width: 100%; height: auto; max-height: 70vh; object-fit: contain; } #run-video video, #examples-video video { width: 100%; height: auto; max-height: 70vh; object-fit: contain; } /* On very small screens, reduce max media height a bit */ @media (max-width: 640px) { #run-image img, #examples-image img, #run-video video, #examples-video video { max-height: 55vh; } } /* Reduce extra whitespace in markdown blocks */ .gr-markdown > :first-child { margin-top: 0 !important; } .gr-markdown > :last-child { margin-bottom: 0 !important; } """ # ----------------------------------------------------------------------------- # Helpers # ----------------------------------------------------------------------------- def _ensure_dir(path: Path) -> Path: path.mkdir(parents=True, exist_ok=True) return path @dataclass(frozen=True, slots=True) class ExampleSpec: """A precompiled example bundle (image + optional mp4 + optional ply).""" label: str image: Path video: Path | None ply: Path | None def _normalize_key(path: str) -> str: """Normalize a path-like string for stable dictionary keys.""" try: return str(Path(path).resolve()) except Exception: return path def _load_manifest(manifest_path: Path) -> list[dict]: """Load manifest.json if present; return an empty list on errors.""" try: data = json.loads(manifest_path.read_text(encoding="utf-8")) if not isinstance(data, list): raise ValueError("manifest.json must contain a JSON list.") return [x for x in data if isinstance(x, dict)] except FileNotFoundError: return [] except Exception as e: # Manifest errors should not crash the app. print(f"[examples] Failed to parse manifest.json: {type(e).__name__}: {e}") return [] def discover_examples(examples_dir: Path) -> list[ExampleSpec]: """Discover example bundles under assets/examples/.""" _ensure_dir(examples_dir) manifest_rows = _load_manifest(examples_dir / "manifest.json") if manifest_rows: specs: list[ExampleSpec] = [] for row in manifest_rows: label = str(row.get("label") or "Example").strip() or "Example" image_rel = row.get("image") if not image_rel: continue image = (examples_dir / str(image_rel)).resolve() if not image.exists(): continue video = None ply = None if row.get("video"): v = (examples_dir / str(row["video"])).resolve() if v.exists(): video = v if row.get("ply"): p = (examples_dir / str(row["ply"])).resolve() if p.exists(): ply = p specs.append(ExampleSpec(label=label, image=image, video=video, ply=ply)) return specs # Fallback: infer bundles by filename stem images: list[Path] = [] for ext in IMAGE_EXTS: images.extend(sorted(examples_dir.glob(f"*{ext}"))) specs = [] for img in images: stem = img.stem video = examples_dir / f"{stem}.mp4" ply = examples_dir / f"{stem}.ply" specs.append( ExampleSpec( label=stem.replace("_", " ").strip() or stem, image=img.resolve(), video=video.resolve() if video.exists() else None, ply=ply.resolve() if ply.exists() else None, ) ) return specs _ensure_dir(OUTPUTS_DIR) EXAMPLE_SPECS: Final[list[ExampleSpec]] = discover_examples(EXAMPLES_DIR) EXAMPLE_INDEX_BY_PATH: Final[dict[str, ExampleSpec]] = { _normalize_key(str(s.image)): s for s in EXAMPLE_SPECS } EXAMPLE_INDEX_BY_NAME: Final[dict[str, ExampleSpec]] = { s.image.name: s for s in EXAMPLE_SPECS } def load_example_assets( image_path: str | None, ) -> tuple[str | None, str | None, str | None, str]: """Return (image, video, ply_path, status) for the selected example image.""" if not image_path: return None, None, None, "No example selected." spec = EXAMPLE_INDEX_BY_PATH.get(_normalize_key(image_path)) if spec is None: spec = EXAMPLE_INDEX_BY_NAME.get(Path(image_path).name) if spec is None: return image_path, None, None, "No matching example bundle found." video = str(spec.video) if spec.video is not None else None ply_path = str(spec.ply) if spec.ply is not None else None missing: list[str] = [] if video is None: missing.append("MP4") if ply_path is None: missing.append("PLY") msg = f"Loaded example: **{spec.label}**." if missing: msg += f" Missing: {', '.join(missing)}." return str(spec.image), video, ply_path, msg def _validate_image(image_path: str | None) -> None: if not image_path: raise gr.Error("Upload an image first.") # ----------------------------------------------------------------------------- # Hardware Configuration # ----------------------------------------------------------------------------- def _get_current_hardware_value() -> str: """Get current hardware choice value for dropdown.""" config = get_config() if config.mode == "local": return "local" return f"spaces:{config.spaces_hardware}" def _format_gpu_status() -> str: """Format GPU status as markdown.""" status = get_gpu_status() config = get_config() lines = ["### Current Status"] lines.append(f"- **Mode:** {'Local CUDA' if config.mode == 'local' else 'HuggingFace Spaces'}") if config.mode == "spaces": hw_spec = SPACES_HARDWARE_SPECS.get(config.spaces_hardware, {}) lines.append(f"- **Spaces Hardware:** {hw_spec.get('name', config.spaces_hardware)}") lines.append(f"- **VRAM:** {hw_spec.get('vram', 'N/A')}") lines.append(f"- **Price:** {hw_spec.get('price', 'N/A')}") lines.append(f"- **Duration:** {config.spaces_duration}s") else: lines.append(f"- **CUDA Available:** {'✅ Yes' if status['cuda_available'] else '❌ No'}") lines.append(f"- **Spaces Module:** {'✅ Installed' if status['spaces_available'] else '❌ Not installed'}") if status['devices']: lines.append("\n### Local GPUs") for dev in status['devices']: lines.append(f"- **GPU {dev['index']}:** {dev['name']} ({dev['total_memory_gb']}GB)") if is_running_on_spaces(): lines.append("\n⚠️ *Running on HuggingFace Spaces*") return "\n".join(lines) def _apply_hardware_config(choice: str, duration: int) -> str: """Apply hardware configuration and return status.""" mode, spaces_hw = parse_hardware_choice(choice) # Update config update_config( mode=mode, spaces_hardware=spaces_hw if spaces_hw else "zero-gpu", spaces_duration=duration, ) # Configure GPU mode in model_utils configure_gpu_mode( use_spaces=(mode == "spaces"), duration=duration, ) return _format_gpu_status() def run_sharp( image_path: str | None, trajectory_type: TrajectoryType, output_long_side: int, num_frames: int, fps: int, render_video: bool, ) -> tuple[str | None, str | None, str]: """Run SHARP inference and return (video_path, ply_path, status_markdown).""" _validate_image(image_path) out_long_side: int | None = ( None if int(output_long_side) <= 0 else int(output_long_side) ) try: video_path, ply_path = predict_and_maybe_render_gpu( image_path, trajectory_type=trajectory_type, num_frames=int(num_frames), fps=int(fps), output_long_side=out_long_side, render_video=bool(render_video), ) lines: list[str] = [f"**PLY:** `{ply_path.name}` (ready to download)"] if render_video: if video_path is None: lines.append("**Video:** not rendered (CUDA unavailable).") else: lines.append(f"**Video:** `{video_path.name}`") else: lines.append("**Video:** disabled.") return ( str(video_path) if video_path is not None else None, str(ply_path), "\n".join(lines), ) except gr.Error: raise except Exception as e: raise gr.Error(f"SHARP failed: {type(e).__name__}: {e}") from e # ----------------------------------------------------------------------------- # UI # ----------------------------------------------------------------------------- def build_demo() -> gr.Blocks: with gr.Blocks( title="SHARP • Single-Image 3D Gaussian Prediction", elem_id="sharp-root", fill_height=True, ) as demo: gr.Markdown("## SHARP\nSingle-image **3D Gaussian scene** prediction.") # Run tab components are referenced by Examples tab, so keep them in outer scope. with gr.Column(elem_id="tabs-shell"): with gr.Tabs(): with gr.Tab("Run", id="run"): with gr.Column(elem_id="run-panel"): with gr.Row(equal_height=True, elem_id="run-media-row"): with gr.Column( scale=5, min_width=360, elem_id="run-left-col" ): image_in = gr.Image( label="Input image", type="filepath", sources=["upload"], elem_id="run-image", ) with gr.Row(): trajectory = gr.Dropdown( label="Trajectory", choices=[ "swipe", "shake", "rotate", "rotate_forward", ], value="rotate_forward", ) output_res = gr.Dropdown( label="Output long side", info="0 = match input", choices=[ ("Match input", 0), ("512", 512), ("768", 768), ("1024", 1024), ("1280", 1280), ("1536", 1536), ], value=0, ) with gr.Row(): frames = gr.Slider( label="Frames", minimum=24, maximum=120, step=1, value=60, ) fps_in = gr.Slider( label="FPS", minimum=8, maximum=60, step=1, value=30, ) render_toggle = gr.Checkbox( label="Render MP4 (requires CUDA)", value=True, ) with gr.Column( scale=5, min_width=360, elem_id="run-right-col" ): video_out = gr.Video( label="Trajectory video (MP4)", elem_id="run-video", ) with gr.Row(elem_id="run-download-row"): ply_download = gr.DownloadButton( label="Download PLY (.ply)", value=None, visible=True, elem_id="run-ply-download", ) status_md = gr.Markdown("", elem_id="run-status") with gr.Row(elem_id="run-actions-row"): run_btn = gr.Button("Generate", variant="primary") clear_btn = gr.ClearButton( [image_in, video_out, ply_download, status_md], value="Clear", ) # Ensure clearing also clears any previous download target. clear_btn.click( fn=lambda: None, outputs=[ply_download], queue=False, ) run_btn.click( fn=run_sharp, inputs=[ image_in, trajectory, output_res, frames, fps_in, render_toggle, ], outputs=[video_out, ply_download, status_md], api_visibility="public", ) with gr.Tab("Examples", id="examples"): with gr.Column(elem_id="examples-panel"): if EXAMPLE_SPECS: gr.Markdown( "Click an example to preview precompiled outputs. " "The example image will also be loaded into the Run tab." ) # Define preview outputs first (unrendered), so we can reference them from gr.Examples. ex_img = gr.Image( label="Example image", type="filepath", interactive=False, render=False, height=360, elem_id="examples-image", ) ex_vid = gr.Video( label="Pre-rendered MP4", render=False, height=360, elem_id="examples-video", ) ex_ply = gr.DownloadButton( label="Download PLY (.ply)", value=None, visible=True, render=False, elem_id="examples-ply-download", ) ex_status = gr.Markdown( render=False, elem_id="examples-status" ) with gr.Row(equal_height=True): with gr.Column(scale=4, min_width=320): gr.Examples( examples=[ [str(s.image)] for s in EXAMPLE_SPECS ], example_labels=[s.label for s in EXAMPLE_SPECS], inputs=[image_in], outputs=[ex_img, ex_vid, ex_ply, ex_status], fn=load_example_assets, cache_examples=False, run_on_click=True, examples_per_page=10, label=None, ) with gr.Column(scale=6, min_width=360): ex_img.render() ex_vid.render() ex_ply.render() ex_status.render() gr.Markdown( "Add example bundles under `assets/examples/` " "(image + mp4 + ply) or provide a `manifest.json`." ) else: gr.Markdown( "No precompiled examples found.\n\n" "Add files under `assets/examples/`:\n" "- `example.jpg` (or png/webp)\n" "- `example.mp4`\n" "- `example.ply`\n\n" "Optionally add `assets/examples/manifest.json` to define labels and filenames." ) with gr.Tab("About", id="about"): with gr.Column(elem_id="about-panel"): gr.Markdown( """ *Sharp Monocular View Synthesis in Less Than a Second* (Apple, 2025) ```bibtex @inproceedings{Sharp2025:arxiv, title = {Sharp Monocular View Synthesis in Less Than a Second}, author = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Ama\\"{e}l Delaunoyand Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun}, journal = {arXiv preprint arXiv:2512.10685}, year = {2025}, url = {https://arxiv.org/abs/2512.10685}, } ``` """.strip() ) with gr.Tab("⚙️ Settings", id="settings"): with gr.Column(elem_id="settings-panel"): gr.Markdown("### GPU Hardware Selection") gr.Markdown( "Select local CUDA or HuggingFace Spaces GPU for inference. " "Spaces GPUs require deploying to HuggingFace Spaces." ) with gr.Row(): with gr.Column(scale=3): hw_dropdown = gr.Dropdown( label="Hardware", choices=get_hardware_choices(), value=_get_current_hardware_value(), interactive=True, ) duration_slider = gr.Slider( label="Spaces GPU Duration (seconds)", info="Max time for @spaces.GPU decorator (ZeroGPU only)", minimum=60, maximum=300, step=30, value=get_config().spaces_duration, interactive=True, ) apply_btn = gr.Button("Apply & Save", variant="primary") with gr.Column(scale=2): hw_status = gr.Markdown( value=_format_gpu_status(), elem_id="hw-status", ) apply_btn.click( fn=_apply_hardware_config, inputs=[hw_dropdown, duration_slider], outputs=[hw_status], ) gr.Markdown( """ --- ### Spaces Hardware Reference | Hardware | VRAM | Price | Best For | |----------|------|-------|----------| | ZeroGPU (H200) | 70GB | Free (PRO) | Demos, dynamic allocation | | T4 small/medium | 16GB | $0.40-0.60/hr | Light workloads | | L4x1 | 24GB | $0.80/hr | Standard inference | | L40Sx1 | 48GB | $1.80/hr | Large models | | A10G large | 24GB | $1.50/hr | Balanced cost/performance | | A100 large | 80GB | $2.50/hr | Maximum VRAM | *Prices as of Dec 2024. See [HuggingFace Spaces GPU docs](https://huggingface.co/docs/hub/spaces-gpus).* """ ) demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1) return demo demo = build_demo() if __name__ == "__main__": demo.launch( theme=THEME, css=CSS, server_port=DEFAULT_PORT, show_api=True )