ml-sharp

Sleeping

App Files Files Community

ml-sharp / app.py

RobinsAIWorld

Update app.py

15435e6 verified 2 days ago

raw

history blame contribute delete

24.1 kB

	"""SHARP Gradio demo (minimal, responsive UI).

	This Space:
	- Runs Apple's SHARP model to predict a 3D Gaussian scene from a single image.
	- Exports a canonical `.ply` file for download.
	- Optionally renders a camera trajectory `.mp4` (CUDA / ZeroGPU only).

	Precompiled examples
	Place precompiled examples under `assets/examples/`.

	Recommended structure (matching stem):
	assets/examples/<name>.jpg\|png\|webp
	assets/examples/<name>.mp4
	assets/examples/<name>.ply

	Optional manifest (assets/examples/manifest.json):
	[
	{"label": "Desk", "image": "desk.jpg", "video": "desk.mp4", "ply": "desk.ply"},
	...
	]
	"""

	from __future__ import annotations

	import json
	from dataclasses import dataclass
	from pathlib import Path
	from typing import Final

	import gradio as gr

	import os

	from model_utils import (
	TrajectoryType,
	predict_and_maybe_render_gpu,
	configure_gpu_mode,
	get_gpu_status,
	)
	from hardware_config import (
	get_hardware_choices,
	parse_hardware_choice,
	get_config,
	update_config,
	SPACES_HARDWARE_SPECS,
	is_running_on_spaces,
	)

	# -----------------------------------------------------------------------------
	# Paths & constants
	# -----------------------------------------------------------------------------

	APP_DIR: Final[Path] = Path(__file__).resolve().parent
	OUTPUTS_DIR: Final[Path] = APP_DIR / "outputs"
	ASSETS_DIR: Final[Path] = APP_DIR / "assets"
	EXAMPLES_DIR: Final[Path] = ASSETS_DIR / "examples"

	IMAGE_EXTS: Final[tuple[str, ...]] = (".png", ".jpg", ".jpeg", ".webp")
	DEFAULT_QUEUE_MAX_SIZE: Final[int] = 32
	DEFAULT_PORT: Final[int] = int(os.getenv("SHARP_PORT", "49200"))

	THEME: Final = gr.themes.Soft(
	primary_hue="indigo",
	secondary_hue="blue",
	neutral_hue="slate",
	)

	CSS: Final[str] = """
	/* Keep layout stable when scrollbars appear/disappear */
	html { scrollbar-gutter: stable; }

	/* Use normal document flow (no fixed-height viewport shell) */
	html, body { height: auto; }
	body { overflow: auto; }

	/* Comfortable max width; still fills small screens */
	.gradio-container {
	max-width: 1400px;
	margin: 0 auto;
	padding: 0.75rem 1rem 1rem;
	box-sizing: border-box;
	}

	/* Make media components responsive without stretching */
	#run-image, #run-video,
	#examples-image, #examples-video {
	width: 100%;
	}

	/* Keep aspect ratio and prevent runaway vertical growth on tall viewports */
	#run-image img, #examples-image img {
	width: 100%;
	height: auto;
	max-height: 70vh;
	object-fit: contain;
	}
	#run-video video, #examples-video video {
	width: 100%;
	height: auto;
	max-height: 70vh;
	object-fit: contain;
	}

	/* On very small screens, reduce max media height a bit */
	@media (max-width: 640px) {
	#run-image img, #examples-image img,
	#run-video video, #examples-video video {
	max-height: 55vh;
	}
	}

	/* Reduce extra whitespace in markdown blocks */
	.gr-markdown > :first-child { margin-top: 0 !important; }
	.gr-markdown > :last-child { margin-bottom: 0 !important; }
	"""

	# -----------------------------------------------------------------------------
	# Helpers
	# -----------------------------------------------------------------------------


	def _ensure_dir(path: Path) -> Path:
	path.mkdir(parents=True, exist_ok=True)
	return path


	@dataclass(frozen=True, slots=True)
	class ExampleSpec:
	"""A precompiled example bundle (image + optional mp4 + optional ply)."""

	label: str
	image: Path
	video: Path \| None
	ply: Path \| None


	def _normalize_key(path: str) -> str:
	"""Normalize a path-like string for stable dictionary keys."""
	try:
	return str(Path(path).resolve())
	except Exception:
	return path


	def _load_manifest(manifest_path: Path) -> list[dict]:
	"""Load manifest.json if present; return an empty list on errors."""
	try:
	data = json.loads(manifest_path.read_text(encoding="utf-8"))
	if not isinstance(data, list):
	raise ValueError("manifest.json must contain a JSON list.")
	return [x for x in data if isinstance(x, dict)]
	except FileNotFoundError:
	return []
	except Exception as e:
	# Manifest errors should not crash the app.
	print(f"[examples] Failed to parse manifest.json: {type(e).__name__}: {e}")
	return []


	def discover_examples(examples_dir: Path) -> list[ExampleSpec]:
	"""Discover example bundles under assets/examples/."""
	_ensure_dir(examples_dir)

	manifest_rows = _load_manifest(examples_dir / "manifest.json")
	if manifest_rows:
	specs: list[ExampleSpec] = []
	for row in manifest_rows:
	label = str(row.get("label") or "Example").strip() or "Example"
	image_rel = row.get("image")
	if not image_rel:
	continue

	image = (examples_dir / str(image_rel)).resolve()
	if not image.exists():
	continue

	video = None
	ply = None
	if row.get("video"):
	v = (examples_dir / str(row["video"])).resolve()
	if v.exists():
	video = v
	if row.get("ply"):
	p = (examples_dir / str(row["ply"])).resolve()
	if p.exists():
	ply = p

	specs.append(ExampleSpec(label=label, image=image, video=video, ply=ply))
	return specs

	# Fallback: infer bundles by filename stem
	images: list[Path] = []
	for ext in IMAGE_EXTS:
	images.extend(sorted(examples_dir.glob(f"*{ext}")))

	specs = []
	for img in images:
	stem = img.stem
	video = examples_dir / f"{stem}.mp4"
	ply = examples_dir / f"{stem}.ply"
	specs.append(
	ExampleSpec(
	label=stem.replace("_", " ").strip() or stem,
	image=img.resolve(),
	video=video.resolve() if video.exists() else None,
	ply=ply.resolve() if ply.exists() else None,
	)
	)
	return specs


	_ensure_dir(OUTPUTS_DIR)

	EXAMPLE_SPECS: Final[list[ExampleSpec]] = discover_examples(EXAMPLES_DIR)
	EXAMPLE_INDEX_BY_PATH: Final[dict[str, ExampleSpec]] = {
	_normalize_key(str(s.image)): s for s in EXAMPLE_SPECS
	}
	EXAMPLE_INDEX_BY_NAME: Final[dict[str, ExampleSpec]] = {
	s.image.name: s for s in EXAMPLE_SPECS
	}


	def load_example_assets(
	image_path: str \| None,
	) -> tuple[str \| None, str \| None, str \| None, str]:
	"""Return (image, video, ply_path, status) for the selected example image."""
	if not image_path:
	return None, None, None, "No example selected."

	spec = EXAMPLE_INDEX_BY_PATH.get(_normalize_key(image_path))
	if spec is None:
	spec = EXAMPLE_INDEX_BY_NAME.get(Path(image_path).name)

	if spec is None:
	return image_path, None, None, "No matching example bundle found."

	video = str(spec.video) if spec.video is not None else None
	ply_path = str(spec.ply) if spec.ply is not None else None

	missing: list[str] = []
	if video is None:
	missing.append("MP4")
	if ply_path is None:
	missing.append("PLY")

	msg = f"Loaded example: {spec.label}."
	if missing:
	msg += f" Missing: {', '.join(missing)}."

	return str(spec.image), video, ply_path, msg


	def _validate_image(image_path: str \| None) -> None:
	if not image_path:
	raise gr.Error("Upload an image first.")


	# -----------------------------------------------------------------------------
	# Hardware Configuration
	# -----------------------------------------------------------------------------


	def _get_current_hardware_value() -> str:
	"""Get current hardware choice value for dropdown."""
	config = get_config()
	if config.mode == "local":
	return "local"
	return f"spaces:{config.spaces_hardware}"


	def _format_gpu_status() -> str:
	"""Format GPU status as markdown."""
	status = get_gpu_status()
	config = get_config()

	lines = ["### Current Status"]
	lines.append(f"- Mode: {'Local CUDA' if config.mode == 'local' else 'HuggingFace Spaces'}")

	if config.mode == "spaces":
	hw_spec = SPACES_HARDWARE_SPECS.get(config.spaces_hardware, {})
	lines.append(f"- Spaces Hardware: {hw_spec.get('name', config.spaces_hardware)}")
	lines.append(f"- VRAM: {hw_spec.get('vram', 'N/A')}")
	lines.append(f"- Price: {hw_spec.get('price', 'N/A')}")
	lines.append(f"- Duration: {config.spaces_duration}s")
	else:
	lines.append(f"- CUDA Available: {'✅ Yes' if status['cuda_available'] else '❌ No'}")
	lines.append(f"- Spaces Module: {'✅ Installed' if status['spaces_available'] else '❌ Not installed'}")

	if status['devices']:
	lines.append("\n### Local GPUs")
	for dev in status['devices']:
	lines.append(f"- GPU {dev['index']}: {dev['name']} ({dev['total_memory_gb']}GB)")

	if is_running_on_spaces():
	lines.append("\n⚠️ Running on HuggingFace Spaces")

	return "\n".join(lines)


	def _apply_hardware_config(choice: str, duration: int) -> str:
	"""Apply hardware configuration and return status."""
	mode, spaces_hw = parse_hardware_choice(choice)

	# Update config
	update_config(
	mode=mode,
	spaces_hardware=spaces_hw if spaces_hw else "zero-gpu",
	spaces_duration=duration,
	)

	# Configure GPU mode in model_utils
	configure_gpu_mode(
	use_spaces=(mode == "spaces"),
	duration=duration,
	)

	return _format_gpu_status()


	def run_sharp(
	image_path: str \| None,
	trajectory_type: TrajectoryType,
	output_long_side: int,
	num_frames: int,
	fps: int,
	render_video: bool,
	) -> tuple[str \| None, str \| None, str]:
	"""Run SHARP inference and return (video_path, ply_path, status_markdown)."""
	_validate_image(image_path)
	out_long_side: int \| None = (
	None if int(output_long_side) <= 0 else int(output_long_side)
	)

	try:
	video_path, ply_path = predict_and_maybe_render_gpu(
	image_path,
	trajectory_type=trajectory_type,
	num_frames=int(num_frames),
	fps=int(fps),
	output_long_side=out_long_side,
	render_video=bool(render_video),
	)

	lines: list[str] = [f"PLY: `{ply_path.name}` (ready to download)"]
	if render_video:
	if video_path is None:
	lines.append("Video: not rendered (CUDA unavailable).")
	else:
	lines.append(f"Video: `{video_path.name}`")
	else:
	lines.append("Video: disabled.")

	return (
	str(video_path) if video_path is not None else None,
	str(ply_path),
	"\n".join(lines),
	)
	except gr.Error:
	raise
	except Exception as e:
	raise gr.Error(f"SHARP failed: {type(e).__name__}: {e}") from e


	# -----------------------------------------------------------------------------
	# UI
	# -----------------------------------------------------------------------------


	def build_demo() -> gr.Blocks:
	with gr.Blocks(
	title="SHARP • Single-Image 3D Gaussian Prediction",
	elem_id="sharp-root",
	fill_height=True,
	) as demo:
	gr.Markdown("## SHARP\nSingle-image 3D Gaussian scene prediction.")

	# Run tab components are referenced by Examples tab, so keep them in outer scope.
	with gr.Column(elem_id="tabs-shell"):
	with gr.Tabs():
	with gr.Tab("Run", id="run"):
	with gr.Column(elem_id="run-panel"):
	with gr.Row(equal_height=True, elem_id="run-media-row"):
	with gr.Column(
	scale=5, min_width=360, elem_id="run-left-col"
	):
	image_in = gr.Image(
	label="Input image",
	type="filepath",
	sources=["upload"],
	elem_id="run-image",
	)

	with gr.Row():
	trajectory = gr.Dropdown(
	label="Trajectory",
	choices=[
	"swipe",
	"shake",
	"rotate",
	"rotate_forward",
	],
	value="rotate_forward",
	)
	output_res = gr.Dropdown(
	label="Output long side",
	info="0 = match input",
	choices=[
	("Match input", 0),
	("512", 512),
	("768", 768),
	("1024", 1024),
	("1280", 1280),
	("1536", 1536),
	],
	value=0,
	)

	with gr.Row():
	frames = gr.Slider(
	label="Frames",
	minimum=24,
	maximum=120,
	step=1,
	value=60,
	)
	fps_in = gr.Slider(
	label="FPS",
	minimum=8,
	maximum=60,
	step=1,
	value=30,
	)

	render_toggle = gr.Checkbox(
	label="Render MP4 (requires CUDA)",
	value=True,
	)

	with gr.Column(
	scale=5, min_width=360, elem_id="run-right-col"
	):
	video_out = gr.Video(
	label="Trajectory video (MP4)",
	elem_id="run-video",
	)
	with gr.Row(elem_id="run-download-row"):
	ply_download = gr.DownloadButton(
	label="Download PLY (.ply)",
	value=None,
	visible=True,
	elem_id="run-ply-download",
	)
	status_md = gr.Markdown("", elem_id="run-status")

	with gr.Row(elem_id="run-actions-row"):
	run_btn = gr.Button("Generate", variant="primary")
	clear_btn = gr.ClearButton(
	[image_in, video_out, ply_download, status_md],
	value="Clear",
	)

	# Ensure clearing also clears any previous download target.
	clear_btn.click(
	fn=lambda: None,
	outputs=[ply_download],
	queue=False,
	)

	run_btn.click(
	fn=run_sharp,
	inputs=[
	image_in,
	trajectory,
	output_res,
	frames,
	fps_in,
	render_toggle,
	],
	outputs=[video_out, ply_download, status_md],
	api_visibility="public",
	)

	with gr.Tab("Examples", id="examples"):
	with gr.Column(elem_id="examples-panel"):
	if EXAMPLE_SPECS:
	gr.Markdown(
	"Click an example to preview precompiled outputs. "
	"The example image will also be loaded into the Run tab."
	)

	# Define preview outputs first (unrendered), so we can reference them from gr.Examples.
	ex_img = gr.Image(
	label="Example image",
	type="filepath",
	interactive=False,
	render=False,
	height=360,
	elem_id="examples-image",
	)
	ex_vid = gr.Video(
	label="Pre-rendered MP4",
	render=False,
	height=360,
	elem_id="examples-video",
	)
	ex_ply = gr.DownloadButton(
	label="Download PLY (.ply)",
	value=None,
	visible=True,
	render=False,
	elem_id="examples-ply-download",
	)
	ex_status = gr.Markdown(
	render=False, elem_id="examples-status"
	)

	with gr.Row(equal_height=True):
	with gr.Column(scale=4, min_width=320):
	gr.Examples(
	examples=[
	[str(s.image)] for s in EXAMPLE_SPECS
	],
	example_labels=[s.label for s in EXAMPLE_SPECS],
	inputs=[image_in],
	outputs=[ex_img, ex_vid, ex_ply, ex_status],
	fn=load_example_assets,
	cache_examples=False,
	run_on_click=True,
	examples_per_page=10,
	label=None,
	)

	with gr.Column(scale=6, min_width=360):
	ex_img.render()
	ex_vid.render()
	ex_ply.render()
	ex_status.render()

	gr.Markdown(
	"Add example bundles under `assets/examples/` "
	"(image + mp4 + ply) or provide a `manifest.json`."
	)
	else:
	gr.Markdown(
	"No precompiled examples found.\n\n"
	"Add files under `assets/examples/`:\n"
	"- `example.jpg` (or png/webp)\n"
	"- `example.mp4`\n"
	"- `example.ply`\n\n"
	"Optionally add `assets/examples/manifest.json` to define labels and filenames."
	)

	with gr.Tab("About", id="about"):
	with gr.Column(elem_id="about-panel"):
	gr.Markdown(
	"""
	Sharp Monocular View Synthesis in Less Than a Second (Apple, 2025)

	```bibtex
	@inproceedings{Sharp2025:arxiv,
	title = {Sharp Monocular View Synthesis in Less Than a Second},
	author = {Lars Mescheder and Wei Dong and Shiwei Li and Xuyang Bai and Marcel Santos and Peiyun Hu and Bruno Lecouat and Mingmin Zhen and Ama\\"{e}l Delaunoyand Tian Fang and Yanghai Tsin and Stephan R. Richter and Vladlen Koltun},
	journal = {arXiv preprint arXiv:2512.10685},
	year = {2025},
	url = {https://arxiv.org/abs/2512.10685},
	}
	```
	""".strip()
	)

	with gr.Tab("⚙️ Settings", id="settings"):
	with gr.Column(elem_id="settings-panel"):
	gr.Markdown("### GPU Hardware Selection")
	gr.Markdown(
	"Select local CUDA or HuggingFace Spaces GPU for inference. "
	"Spaces GPUs require deploying to HuggingFace Spaces."
	)

	with gr.Row():
	with gr.Column(scale=3):
	hw_dropdown = gr.Dropdown(
	label="Hardware",
	choices=get_hardware_choices(),
	value=_get_current_hardware_value(),
	interactive=True,
	)

	duration_slider = gr.Slider(
	label="Spaces GPU Duration (seconds)",
	info="Max time for @spaces.GPU decorator (ZeroGPU only)",
	minimum=60,
	maximum=300,
	step=30,
	value=get_config().spaces_duration,
	interactive=True,
	)

	apply_btn = gr.Button("Apply & Save", variant="primary")

	with gr.Column(scale=2):
	hw_status = gr.Markdown(
	value=_format_gpu_status(),
	elem_id="hw-status",
	)

	apply_btn.click(
	fn=_apply_hardware_config,
	inputs=[hw_dropdown, duration_slider],
	outputs=[hw_status],
	)

	gr.Markdown(
	"""
	---
	### Spaces Hardware Reference

	\| Hardware \| VRAM \| Price \| Best For \|
	\|----------\|------\|-------\|----------\|
	\| ZeroGPU (H200) \| 70GB \| Free (PRO) \| Demos, dynamic allocation \|
	\| T4 small/medium \| 16GB \| $0.40-0.60/hr \| Light workloads \|
	\| L4x1 \| 24GB \| $0.80/hr \| Standard inference \|
	\| L40Sx1 \| 48GB \| $1.80/hr \| Large models \|
	\| A10G large \| 24GB \| $1.50/hr \| Balanced cost/performance \|
	\| A100 large \| 80GB \| $2.50/hr \| Maximum VRAM \|

	Prices as of Dec 2024. See [HuggingFace Spaces GPU docs](https://huggingface.co/docs/hub/spaces-gpus).
	"""
	)

	demo.queue(max_size=DEFAULT_QUEUE_MAX_SIZE, default_concurrency_limit=1)
	return demo


	demo = build_demo()

	if __name__ == "__main__":
	demo.launch(
	theme=THEME,
	css=CSS,
	server_port=DEFAULT_PORT,
	show_api=True
	)