ml-sharp / app.py
RobinsAIWorld's picture
Update app.py
15435e6 verified
"""SHARP Gradio demo (minimal, responsive UI).
This Space:
- Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image.
- Exports a canonical `.ply` file for download.
- Optionally renders a camera trajectory `.mp4` (CUDA / ZeroGPU only).
Precompiled examples
Place precompiled examples under `assets/examples/`.
Recommended structure (matching stem):
assets/examples/<name>.jpg|png|webp
assets/examples/<name>.mp4
assets/examples/<name>.ply
Optional manifest (assets/examples/manifest.json):
[
{"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
...
]
"""
from __future__ import annotations
import json
from dataclasses import dataclass
from pathlib import Path
from typing import Final
import gradio as gr
import os
from model_utils import (
TrajectoryType,
predict_and_maybe_render_gpu,
configure_gpu_mode,
get_gpu_status,
)
from hardware_config import (
get_hardware_choices,
parse_hardware_choice,
get_config,
update_config,
SPACES_HARDWARE_SPECS,
is_running_on_spaces,
)
# -----------------------------------------------------------------------------
# Paths & constants
# -----------------------------------------------------------------------------
APP_DIR: Final[Path] = Path(__file__).resolve().parent
OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
ASSETS_DIR: Final[Path] = APP_DIR / "assets"
EXAMPLES_DIR: Final[Path] = ASSETS_DIR / "examples"
IMAGE_EXTS: Final[tuple[str, ...]] = (".png", ".jpg", ".jpeg", ".webp")
DEFAULT_QUEUE_MAX_SIZE: Final[int] = 32
DEFAULT_PORT: Final[int] = int(os.getenv("SHARP_PORT", "49200"))
THEME: Final = gr.themes.Soft(
primary_hue="indigo",
secondary_hue="blue",
neutral_hue="slate",
)
CSS: Final[str] = """
/* Keep layout stable when scrollbars appear/disappear */
html { scrollbar-gutter: stable; }
/* Use normal document flow (no fixed-height viewport shell) */
html, body { height: auto; }
body { overflow: auto; }
/* Comfortable max width; still fills small screens */
.gradio-container {
max-width: 1400px;
margin: 0 auto;
padding: 0.75rem 1rem 1rem;
box-sizing: border-box;
}
/* Make media components responsive without stretching */
#run-image, #run-video,
#examples-image, #examples-video {
width: 100%;
}
/* Keep aspect ratio and prevent runaway vertical growth on tall viewports */
#run-image img, #examples-image img {
width: 100%;
height: auto;
max-height: 70vh;
object-fit: contain;
}
#run-video video, #examples-video video {
width: 100%;
height: auto;
max-height: 70vh;
object-fit: contain;
}
/* On very small screens, reduce max media height a bit */
@media (max-width: 640px) {
#run-image img, #examples-image img,
#run-video video, #examples-video video {
max-height: 55vh;
}
}
/* Reduce extra whitespace in markdown blocks */
.gr-markdown > :first-child { margin-top: 0 !important; }
.gr-markdown > :last-child { margin-bottom: 0 !important; }
"""
# -----------------------------------------------------------------------------
# Helpers
# -----------------------------------------------------------------------------
def _ensure_dir(path: Path) -> Path:
path.mkdir(parents=True, exist_ok=True)
return path
@dataclass(frozen=True, slots=True)
class ExampleSpec:
"""A precompiled example bundle (image + optional mp4 + optional ply)."""
label: str
image: Path
video: Path | None
ply: Path | None
def _normalize_key(path: str) -> str:
"""Normalize a path-like string for stable dictionary keys."""
try:
return str(Path(path).resolve())
except Exception:
return path
def _load_manifest(manifest_path: Path) -> list[dict]:
"""Load manifest.json if present; return an empty list on errors."""
try:
data = json.loads(manifest_path.read_text(encoding="utf-8"))
if not isinstance(data, list):
raise ValueError("manifest.json must contain a JSON list.")
return [x for x in data if isinstance(x, dict)]
except FileNotFoundError:
return []
except Exception as e:
# Manifest errors should not crash the app.
print(f"[examples] Failed to parse manifest.json: {type(e).__name__}: {e}")
return []
def discover_examples(examples_dir: Path) -> list[ExampleSpec]:
"""Discover example bundles under assets/examples/."""
_ensure_dir(examples_dir)
manifest_rows = _load_manifest(examples_dir / "manifest.json")
if manifest_rows:
specs: list[ExampleSpec] = []
for row in manifest_rows:
label = str(row.get("label") or "Example").strip() or "Example"
image_rel = row.get("image")
if not image_rel:
continue
image = (examples_dir / str(image_rel)).resolve()
if not image.exists():
continue
video = None
ply = None
if row.get("video"):
v = (examples_dir / str(row["video"])).resolve()
if v.exists():
video = v
if row.get("ply"):
p = (examples_dir / str(row["ply"])).resolve()
if p.exists():
ply = p
specs.append(ExampleSpec(label=label, image=image, video=video, ply=ply))
return specs
# Fallback: infer bundles by filename stem
images: list[Path] = []
for ext in IMAGE_EXTS:
images.extend(sorted(examples_dir.glob(f"*{ext}")))
specs = []
for img in images:
stem = img.stem
video = examples_dir / f"{stem}.mp4"
ply = examples_dir / f"{stem}.ply"
specs.append(
ExampleSpec(
label=stem.replace("_", " ").strip() or stem,
image=img.resolve(),
video=video.resolve() if video.exists() else None,
ply=ply.resolve() if ply.exists() else None,
)
)
return specs
_ensure_dir(OUTPUTS_DIR)
EXAMPLE_SPECS: Final[list[ExampleSpec]] = discover_examples(EXAMPLES_DIR)
EXAMPLE_INDEX_BY_PATH: Final[dict[str, ExampleSpec]] = {
_normalize_key(str(s.image)): s for s in EXAMPLE_SPECS
}
EXAMPLE_INDEX_BY_NAME: Final[dict[str, ExampleSpec]] = {
s.image.name: s for s in EXAMPLE_SPECS
}
def load_example_assets(
image_path: str | None,
) -> tuple[str | None, str | None, str | None, str]:
"""Return (image, video, ply_path, status) for the selected example image."""
if not image_path:
return None, None, None, "No example selected."
spec = EXAMPLE_INDEX_BY_PATH.get(_normalize_key(image_path))
if spec is None:
spec = EXAMPLE_INDEX_BY_NAME.get(Path(image_path).name)
if spec is None:
return image_path, None, None, "No matching example bundle found."
video = str(spec.video) if spec.video is not None else None
ply_path = str(spec.ply) if spec.ply is not None else None
missing: list[str] = []
if video is None:
missing.append("MP4")
if ply_path is None:
missing.append("PLY")
msg = f"Loaded example: **{spec.label}**."
if missing:
msg += f" Missing: {', '.join(missing)}."
return str(spec.image), video, ply_path, msg
def _validate_image(image_path: str | None) -> None:
if not image_path:
raise gr.Error("Upload an image first.")
# -----------------------------------------------------------------------------
# Hardware Configuration
# -----------------------------------------------------------------------------
def _get_current_hardware_value() -> str:
"""Get current hardware choice value for dropdown."""
config = get_config()
if config.mode == "local":
return "local"
return f"spaces:{config.spaces_hardware}"
def _format_gpu_status() -> str:
"""Format GPU status as markdown."""
status = get_gpu_status()
config = get_config()
lines = ["### Current Status"]
lines.append(f"- **Mode:** {'Local CUDA' if config.mode == 'local' else 'HuggingFace Spaces'}")
if config.mode == "spaces":
hw_spec = SPACES_HARDWARE_SPECS.get(config.spaces_hardware, {})
lines.append(f"- **Spaces Hardware:** {hw_spec.get('name', config.spaces_hardware)}")
lines.append(f"- **VRAM:** {hw_spec.get('vram', 'N/A')}")
lines.append(f"- **Price:** {hw_spec.get('price', 'N/A')}")
lines.append(f"- **Duration:** {config.spaces_duration}s")
else:
lines.append(f"- **CUDA Available:** {'✅ Yes' if status['cuda_available'] else '❌ No'}")
lines.append(f"- **Spaces Module:** {'✅ Installed' if status['spaces_available'] else '❌ Not installed'}")
if status['devices']:
lines.append("\n### Local GPUs")
for dev in status['devices']:
lines.append(f"- **GPU {dev['index']}:** {dev['name']} ({dev['total_memory_gb']}GB)")
if is_running_on_spaces():
lines.append("\n⚠️ *Running on HuggingFace Spaces*")
return "\n".join(lines)
def _apply_hardware_config(choice: str, duration: int) -> str:
"""Apply hardware configuration and return status."""
mode, spaces_hw = parse_hardware_choice(choice)
# Update config
update_config(
mode=mode,
spaces_hardware=spaces_hw if spaces_hw else "zero-gpu",
spaces_duration=duration,
)
# Configure GPU mode in model_utils
configure_gpu_mode(
use_spaces=(mode == "spaces"),
duration=duration,
)
return _format_gpu_status()
def run_sharp(
image_path: str | None,
trajectory_type: TrajectoryType,
output_long_side: int,
num_frames: int,
fps: int,
render_video: bool,
) -> tuple[str | None, str | None, str]:
"""Run SHARP inference and return (video_path, ply_path, status_markdown)."""
_validate_image(image_path)
out_long_side: int | None = (
None if int(output_long_side) <= 0 else int(output_long_side)
)
try:
video_path, ply_path = predict_and_maybe_render_gpu(
image_path,
trajectory_type=trajectory_type,
num_frames=int(num_frames),
fps=int(fps),
output_long_side=out_long_side,
render_video=bool(render_video),
)
lines: list[str] = [f"**PLY:** `{ply_path.name}` (ready to download)"]
if render_video:
if video_path is None:
lines.append("**Video:** not rendered (CUDA unavailable).")
else:
lines.append(f"**Video:** `{video_path.name}`")
else:
lines.append("**Video:** disabled.")
return (
str(video_path) if video_path is not None else None,
str(ply_path),
"\n".join(lines),
)
except gr.Error:
raise
except Exception as e:
raise gr.Error(f"SHARP failed: {type(e).__name__}: {e}") from e
# -----------------------------------------------------------------------------
# UI
# -----------------------------------------------------------------------------
def build_demo() -> gr.Blocks:
with gr.Blocks(
title="SHARP • Single-Image 3D Gaussian Prediction",
elem_id="sharp-root",
fill_height=True,
) as demo:
gr.Markdown("## SHARP\nSingle-image **3D Gaussian scene** prediction.")
# Run tab components are referenced by Examples tab, so keep them in outer scope.
with gr.Column(elem_id="tabs-shell"):
with gr.Tabs():
with gr.Tab("Run", id="run"):
with gr.Column(elem_id="run-panel"):
with gr.Row(equal_height=True, elem_id="run-media-row"):
with gr.Column(
scale=5, min_width=360, elem_id="run-left-col"
):
image_in = gr.Image(
label="Input image",
type="filepath",
sources=["upload"],
elem_id="run-image",
)
with gr.Row():
trajectory = gr.Dropdown(
label="Trajectory",
choices=[
"swipe",
"shake",
"rotate",
"rotate_forward",
],
value="rotate_forward",
)
output_res = gr.Dropdown(
label="Output long side",
info="0 = match input",
choices=[
("Match input", 0),
("512", 512),
("768", 768),
("1024", 1024),
("1280", 1280),
("1536", 1536),
],
value=0,
)
with gr.Row():
frames = gr.Slider(
label="Frames",
minimum=24,
maximum=120,
step=1,
value=60,
)
fps_in = gr.Slider(
label="FPS",
minimum=8,
maximum=60,
step=1,
value=30,
)
render_toggle = gr.Checkbox(
label="Render MP4 (requires CUDA)",
value=True,
)
with gr.Column(
scale=5, min_width=360, elem_id="run-right-col"
):
video_out = gr.Video(
label="Trajectory video (MP4)",
elem_id="run-video",
)
with gr.Row(elem_id="run-download-row"):
ply_download = gr.DownloadButton(
label="Download PLY (.ply)",
value=None,
visible=True,
elem_id="run-ply-download",
)
status_md = gr.Markdown("", elem_id="run-status")
with gr.Row(elem_id="run-actions-row"):
run_btn = gr.Button("Generate", variant="primary")
clear_btn = gr.ClearButton(
[image_in, video_out, ply_download, status_md],
value="Clear",
)
# Ensure clearing also clears any previous download target.
clear_btn.click(
fn=lambda: None,
outputs=[ply_download],
queue=False,
)
run_btn.click(
fn=run_sharp,
inputs=[
image_in,
trajectory,
output_res,
frames,
fps_in,
render_toggle,
],
outputs=[video_out, ply_download, status_md],
api_visibility="public",
)
with gr.Tab("Examples", id="examples"):
with gr.Column(elem_id="examples-panel"):
if EXAMPLE_SPECS:
gr.Markdown(
"Click an example to preview precompiled outputs. "
"The example image will also be loaded into the Run tab."
)
# Define preview outputs first (unrendered), so we can reference them from gr.Examples.
ex_img = gr.Image(
label="Example image",
type="filepath",
interactive=False,
render=False,
height=360,
elem_id="examples-image",
)
ex_vid = gr.Video(
label="Pre-rendered MP4",
render=False,
height=360,
elem_id="examples-video",
)
ex_ply = gr.DownloadButton(
label="Download PLY (.ply)",
value=None,
visible=True,
render=False,
elem_id="examples-ply-download",
)
ex_status = gr.Markdown(
render=False, elem_id="examples-status"
)
with gr.Row(equal_height=True):
with gr.Column(scale=4, min_width=320):
gr.Examples(
examples=[
[str(s.image)] for s in EXAMPLE_SPECS
],
example_labels=[s.label for s in EXAMPLE_SPECS],
inputs=[image_in],
outputs=[ex_img, ex_vid, ex_ply, ex_status],
fn=load_example_assets,
cache_examples=False,
run_on_click=True,
examples_per_page=10,
label=None,
)
with gr.Column(scale=6, min_width=360):
ex_img.render()
ex_vid.render()
ex_ply.render()
ex_status.render()
gr.Markdown(
"Add example bundles under `assets/examples/` "
"(image + mp4 + ply) or provide a `manifest.json`."
)
else:
gr.Markdown(
"No precompiled examples found.\n\n"
"Add files under `assets/examples/`:\n"
"- `example.jpg` (or png/webp)\n"
"- `example.mp4`\n"
"- `example.ply`\n\n"
"Optionally add `assets/examples/manifest.json` to define labels and filenames."
)
with gr.Tab("About", id="about"):
with gr.Column(elem_id="about-panel"):
gr.Markdown(
"""
*Sharp Monocular View Synthesis in Less Than a Second* (Apple, 2025)
```bibtex
@inproceedings{Sharp2025:arxiv,
title = {Sharp Monocular View Synthesis in Less Than a Second},
author = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Ama\\"{e}l Delaunoyand Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun},
journal = {arXiv preprint arXiv:2512.10685},
year = {2025},
url = {https://arxiv.org/abs/2512.10685},
}
```
""".strip()
)
with gr.Tab("⚙️ Settings", id="settings"):
with gr.Column(elem_id="settings-panel"):
gr.Markdown("### GPU Hardware Selection")
gr.Markdown(
"Select local CUDA or HuggingFace Spaces GPU for inference. "
"Spaces GPUs require deploying to HuggingFace Spaces."
)
with gr.Row():
with gr.Column(scale=3):
hw_dropdown = gr.Dropdown(
label="Hardware",
choices=get_hardware_choices(),
value=_get_current_hardware_value(),
interactive=True,
)
duration_slider = gr.Slider(
label="Spaces GPU Duration (seconds)",
info="Max time for @spaces.GPU decorator (ZeroGPU only)",
minimum=60,
maximum=300,
step=30,
value=get_config().spaces_duration,
interactive=True,
)
apply_btn = gr.Button("Apply & Save", variant="primary")
with gr.Column(scale=2):
hw_status = gr.Markdown(
value=_format_gpu_status(),
elem_id="hw-status",
)
apply_btn.click(
fn=_apply_hardware_config,
inputs=[hw_dropdown, duration_slider],
outputs=[hw_status],
)
gr.Markdown(
"""
---
### Spaces Hardware Reference
| Hardware | VRAM | Price | Best For |
|----------|------|-------|----------|
| ZeroGPU (H200) | 70GB | Free (PRO) | Demos, dynamic allocation |
| T4 small/medium | 16GB | $0.40-0.60/hr | Light workloads |
| L4x1 | 24GB | $0.80/hr | Standard inference |
| L40Sx1 | 48GB | $1.80/hr | Large models |
| A10G large | 24GB | $1.50/hr | Balanced cost/performance |
| A100 large | 80GB | $2.50/hr | Maximum VRAM |
*Prices as of Dec 2024. See [HuggingFace Spaces GPU docs](https://huggingface.co/docs/hub/spaces-gpus).*
"""
)
demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1)
return demo
demo = build_demo()
if __name__ == "__main__":
demo.launch(
theme=THEME,
css=CSS,
server_port=DEFAULT_PORT,
show_api=True
)