#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Mon Oct 6 10:16:31 2025 @author: nibio """ # Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. import os print(os.getcwd()) os.environ["OMP_NUM_THREADS"] = "1" import os import importlib.util import sys try: import svraster_cuda print("[INFO] svraster_cuda loaded successfully ✅") except Exception as e: print(f"[WARN] svraster_cuda not found ({e}); rebuilding for Python {sys.version_info.major}.{sys.version_info.minor} ...") os.environ["PYTHONPATH"] = sys.path[0] + ":" + os.environ.get("PYTHONPATH", "") os.environ["PIP_CACHE_DIR"] = "/home/user/app/.pip_cache" # Force rebuild from your CUDA setup build_status = os.system("pip install ./svraster/cuda --no-build-isolation --verbose") if build_status == 0: try: import svraster_cuda print("[INFO] ✅ svraster_cuda rebuilt and imported successfully") except Exception as err: print(f"[ERROR] Import after rebuild failed: {err}") print("[INFO] Falling back to CPU mode 🚧") else: print("[ERROR] CUDA build failed — running in CPU fallback mode 🚧") import subprocess, os, sys # ensure torch installed before building CUDA extension try: import torch except ImportError: subprocess.run([ sys.executable, "-m", "pip", "install", "torch==2.3.1", "torchvision==0.18.1", "torchaudio==2.3.1", "--extra-index-url", "https://download.pytorch.org/whl/cu121" ], check=True) cuda_path = os.path.join(os.path.dirname(__file__), "svraster/cuda") if os.path.exists(cuda_path): print("🧩 Installing svraster CUDA extension from source...") subprocess.run([ sys.executable, "-m", "pip", "install", "--no-build-isolation", cuda_path ], check=True) import os import time import numpy as np import imageio.v3 as iio from scipy.spatial.transform import Rotation from typing import Optional import torch from src.config import cfg, update_config from src.dataloader.data_pack import DataPack from src.sparse_voxel_model import SparseVoxelModel from src.utils.image_utils import im_tensor2np, viz_tensordepth from src.cameras import MiniCam import viser import viser.transforms as tf # --- Hugging Face runtime detection --- if os.environ.get("SPACE_RUNTIME_VERSION"): os.environ["PYCOLMAP_NO_CUDA"] = "1" os.environ["PYCOLMAP_NO_OPENGL"] = "1" print("[INFO] Hugging Face Space detected — disabling PyCOLMAP GPU/OpenGL features.") # --- Patch for new viser GUI API naming --- def _safe_gui(gui): """Map old viser GUI functions to new names if available (for compatibility).""" def wrap(method_old, method_new): if not hasattr(gui, method_old) and hasattr(gui, method_new): setattr(gui, method_old, getattr(gui, method_new)) wrap("add_markdown", "add_gui_markdown") wrap("add_text", "add_gui_text") wrap("add_slider", "add_gui_slider") wrap("add_dropdown", "add_gui_dropdown") wrap("add_checkbox", "add_gui_checkbox") wrap("add_button", "add_gui_button") return gui def matrix2wxyz(R: np.ndarray) -> np.ndarray: return Rotation.from_matrix(R).as_quat()[[3, 0, 1, 2]] def wxyz2matrix(wxyz: np.ndarray) -> np.ndarray: return Rotation.from_quat(wxyz[[1, 2, 3, 0]]).as_matrix() class SVRasterViewer: def __init__(self, cfg): # ---------- Data & model ---------- data_pack = DataPack( source_path=cfg.data.source_path, image_dir_name=cfg.data.image_dir_name, res_downscale=cfg.data.res_downscale, res_width=cfg.data.res_width, skip_blend_alpha=cfg.data.skip_blend_alpha, alpha_is_white=cfg.model.white_background, data_device=cfg.data.data_device, use_test=cfg.data.eval, test_every=cfg.data.test_every, camera_params_only=True, ) self.tr_cam_lst = data_pack.get_train_cameras() self.te_cam_lst = data_pack.get_test_cameras() self.scene_center = ( np.mean([c.c2w[:3, 3].cpu().numpy() for c in self.tr_cam_lst], axis=0) if len(self.tr_cam_lst) else np.zeros(3, dtype=np.float32) ) self.voxel_model = SparseVoxelModel( n_samp_per_vox=cfg.model.n_samp_per_vox, sh_degree=cfg.model.sh_degree, ss=cfg.model.ss, white_background=cfg.model.white_background, black_background=cfg.model.black_background, ) self.voxel_model.load_iteration(args.model_path, args.iteration) self.voxel_model.freeze_vox_geo() # ---------- UI ---------- self.server = viser.ViserServer(port=cfg.port) self.is_connected = False # --- Compatibility layer for viser (new vs old API) --- if hasattr(self.server, "gui"): gui = self.server.gui # old API try: gui.set_panel_label("SVRaster viser") except Exception: pass else: gui = self.server # new API (no .gui) try: gui.set_panel_label("SVRaster viser") except Exception: print("[Warning] This viser build may not support GUI panel labels.") gui = _safe_gui(gui) if not hasattr(self.server, "scene"): self.server.scene = self.server # --- Create GUI elements safely --- gui.add_markdown("**View control:**\n- Mouse drag + scroll\n- WASD + QE keys") self.fps = gui.add_text("Rendering FPS", initial_value="-1", disabled=True) self.active_sh_degree_slider = gui.add_slider( "active_sh_degree", min=0, max=self.voxel_model.max_sh_degree, step=1, initial_value=self.voxel_model.active_sh_degree, ) self.ss_slider = gui.add_slider( "ss", min=0.5, max=2.0, step=0.05, initial_value=self.voxel_model.ss ) self.width_slider = gui.add_slider( "width", min=64, max=2048, step=8, initial_value=1024 ) self.fovx_slider = gui.add_slider( "fovx", min=10, max=150, step=1, initial_value=70 ) self.near_slider = gui.add_slider( "near", min=0.02, max=10, step=0.01, initial_value=0.2 ) self.render_dropdown = gui.add_dropdown( "render mod", options=["all", "rgb only", "depth only", "normal only"], initial_value="all", ) self.output_dropdown = gui.add_dropdown( "output", options=["rgb", "alpha", "dmean", "dmed", "dmean2n", "dmed2n", "n"], initial_value="rgb", ) # ---- Focus & crop controls ---- self.alpha_thr_slider = gui.add_slider( "alpha_threshold", min=0.0, max=0.95, step=0.01, initial_value=0.35 ) self.keep_closest_slider = gui.add_slider( "keep_closest_pct", min=0.2, max=1.0, step=0.05, initial_value=0.6 ) self.hide_outside_checkbox = gui.add_checkbox( "hide_outside_focus", initial_value=False ) self.center_btn = gui.add_button("Center on object") self.reset_btn = gui.add_button("Reset to first view") self.autoframe_btn = gui.add_button("Auto-frame (depth)") self.focus_btn = gui.add_button("Focus foreground") self.rebase_btn = gui.add_button("Recenter world to focus") # ---- state for world rebase / focus mask ---- self.world_offset = np.zeros(3, dtype=np.float32) self.focus_center: Optional[np.ndarray] = None # ---------- Camera frusta ---------- self.tr_frust, self.te_frust = [], [] def add_frustum(name, cam, color): c2w = cam.c2w.cpu().numpy() frame = self.server.scene.add_camera_frustum( name, fov=cam.fovy, aspect=cam.image_width / cam.image_height, scale=0.10, wxyz=matrix2wxyz(c2w[:3, :3]), position=c2w[:3, 3], color=color, visible=False, ) @frame.on_click def _(event: viser.SceneNodePointerEvent): client = event.client with client.atomic(): client.camera.wxyz = event.target.wxyz client.camera.position = event.target.position self._camera_lookat(client, self.scene_center) return frame for i, cam in enumerate(self.tr_cam_lst): self.tr_frust.append(add_frustum(f"/frustum/train/{i:04d}", cam, [0.0, 1.0, 0.0])) for i, cam in enumerate(self.te_cam_lst): self.te_frust.append(add_frustum(f"/frustum/test/{i:04d}", cam, [1.0, 0.0, 0.0])) self.show_cam_dropdown = gui.add_dropdown( "show cameras", options=["none", "train", "test", "all"], initial_value="none" ) @self.show_cam_dropdown.on_update def _(_): for f in self.tr_frust: f.visible = self.show_cam_dropdown.value in ["train", "all"] for f in self.te_frust: f.visible = self.show_cam_dropdown.value in ["test", "all"] # ---------- Button handlers ---------- @self.center_btn.on_click def _(event: viser.GuiEvent): if event.client: self._camera_lookat(event.client, self.scene_center) @self.reset_btn.on_click def _(event: viser.GuiEvent): client = event.client if not client: return init = self.tr_cam_lst[0].c2w.cpu().numpy() with client.atomic(): client.camera.wxyz = matrix2wxyz(init[:3, :3]) client.camera.position = init[:3, 3] self._camera_lookat(client, self.scene_center) @self.autoframe_btn.on_click def _(event: viser.GuiEvent): if event.client: self._auto_frame_by_depth(event.client) @self.focus_btn.on_click def _(event: viser.GuiEvent): if event.client: self._focus_foreground(event.client) @self.rebase_btn.on_click def _(event: viser.GuiEvent): client = event.client if not client or self.focus_center is None: print("[rebase] Run 'Focus foreground' first.") return delta = self.focus_center.astype(np.float32) self.world_offset = self.world_offset + delta with client.atomic(): client.camera.position = ( np.asarray(client.camera.position) - delta ).astype(np.float32) self.scene_center = np.zeros(3, dtype=np.float32) print("[rebase] World recentered; new world_offset:", self.world_offset) # ---------- On connect ---------- @self.server.on_client_connect def _(client: viser.ClientHandle): init = self.tr_cam_lst[0].c2w.cpu().numpy() with client.atomic(): client.camera.wxyz = matrix2wxyz(init[:3, :3]) client.camera.position = init[:3, 3] ok = self._auto_frame_by_depth(client, quiet=True) if not ok: self._camera_lookat(client, self.scene_center) self.is_connected = True # ---------- Download ---------- self.download_button = gui.add_button("Download view") @self.download_button.on_click def _(event: viser.GuiEvent): im, _ = self.render_viser_camera(event.client.camera) event.client.send_file_download( "svraster_viser.png", iio.imwrite("", im, extension=".png") ) # -------------- Camera utility methods -------------- def _camera_lookat(self, client: viser.ClientHandle, target: np.ndarray, distance: Optional[float] = None): target = np.asarray(target, dtype=np.float32) eye = np.asarray(client.camera.position, dtype=np.float32) vec = eye - target norm = np.linalg.norm(vec) if not np.isfinite(norm) or norm < 1e-6: vec = np.array([0, 0, 1.0], dtype=np.float32) norm = 0.5 d = float(norm if distance is None else distance) fwd = -(vec / max(norm, 1e-6)) up_guess = np.array([0, 1, 0], dtype=np.float32) if abs(np.dot(fwd, up_guess)) > 0.99: up_guess = np.array([1, 0, 0], dtype=np.float32) right = np.cross(up_guess, fwd) right /= max(np.linalg.norm(right), 1e-6) up = np.cross(fwd, right) up /= max(np.linalg.norm(up), 1e-6) R = np.stack([right, up, fwd], axis=1).astype(np.float32) new_pos = target - fwd * d with client.atomic(): client.camera.wxyz = matrix2wxyz(R) client.camera.position = new_pos def _auto_frame_by_depth(self, client: viser.ClientHandle, quiet: bool = False) -> bool: """Render once, use center-pixel median depth to determine a good pivot.""" try: _, _, depth_med = self.render_viser_camera(client.camera, return_depth=True) except Exception as e: if not quiet: print("[auto-frame] render error:", e) return False H, W = depth_med.shape d = float(depth_med[H // 2, W // 2]) if not np.isfinite(d) or d <= 0: if not quiet: print("[auto-frame] invalid depth; falling back") return False R = wxyz2matrix(client.camera.wxyz) fwd = R @ np.array([0, 0, 1], dtype=np.float32) target = np.asarray(client.camera.position, dtype=np.float32) + fwd * d self._camera_lookat(client, target, distance=d) if not quiet: print("[auto-frame] success; depth =", d) return True @torch.no_grad() def render_viser_camera( self, camera: viser.CameraHandle, return_depth: bool = False, return_T: bool = False, ): width = int(self.width_slider.value) aspect = max(1e-6, float(camera.aspect)) height = max(1, int(round(width / aspect))) fovx_deg = float(self.fovx_slider.value) fovy_deg = fovx_deg * height / max(width, 1) near = float(self.near_slider.value) c2w = np.eye(4, dtype=np.float32) c2w[:3, :3] = wxyz2matrix(camera.wxyz) c2w[:3, 3] = camera.position # Apply world rebase c2w[:3, 3] = c2w[:3, 3] - self.world_offset minicam = MiniCam( c2w, fovx=np.deg2rad(fovx_deg), fovy=np.deg2rad(fovy_deg), width=width, height=height, near=near, ) self.voxel_model.active_sh_degree = int(self.active_sh_degree_slider.value) render_opt = { "ss": self.ss_slider.value, "output_T": True, "output_depth": True, "output_normal": True, } if self.render_dropdown.value == "rgb only": render_opt["output_depth"] = False render_opt["output_normal"] = False elif self.render_dropdown.value == "depth only": render_opt["color_mode"] = "dontcare" render_opt["output_normal"] = False elif self.render_dropdown.value == "normal only": render_opt["color_mode"] = "dontcare" render_opt["output_depth"] = False t0 = time.time() try: render_pkg = self.voxel_model.render(minicam, **render_opt) except RuntimeError as e: print("[render] RuntimeError:", e) im = np.ones((height, width, 3), dtype=np.uint8) * 255 if return_depth and return_T: depth_med = np.full((height, width), np.nan, dtype=np.float32) T = np.ones((height, width), dtype=np.float32) return im, 0.0, depth_med, T if return_depth: depth_med = np.full((height, width), np.nan, dtype=np.float32) return im, 0.0, depth_med if return_T: T = np.ones((height, width), dtype=np.float32) return im, 0.0, T return im, 0.0 eps = time.time() - t0 # Handle CPU stub or missing outputs if render_pkg.get("depth") is None or render_pkg["depth"] is None: print("[render] CPU fallback: returning white image.") im = np.ones((height, width, 3), dtype=np.uint8) * 255 depth_med = np.full((height, width), np.nan, dtype=np.float32) T = np.ones((height, width), dtype=np.float32) if return_depth and return_T: return im, eps, depth_med, T if return_depth: return im, eps, depth_med if return_T: return im, eps, T return im, eps # Select output image if self.output_dropdown.value == "dmean": im = viz_tensordepth(render_pkg["depth"][0]) elif self.output_dropdown.value == "dmed": im = viz_tensordepth(render_pkg["depth"][2]) elif self.output_dropdown.value == "dmean2n": im = im_tensor2np(minicam.depth2normal(render_pkg["depth"][0]) * 0.5 + 0.5) elif self.output_dropdown.value == "dmed2n": im = im_tensor2np(minicam.depth2normal(render_pkg["depth"][2]) * 0.5 + 0.5) elif self.output_dropdown.value == "n": im = im_tensor2np(render_pkg["normal"] * 0.5 + 0.5) elif self.output_dropdown.value == "alpha": im = im_tensor2np(1 - render_pkg["T"].repeat(3, 1, 1)) else: im = im_tensor2np(render_pkg["color"]) depth_med = render_pkg["depth"][2].detach().cpu().numpy() T = render_pkg["T"].detach().cpu().numpy() del render_pkg if return_depth and return_T: return im, eps, depth_med, T if return_depth: return im, eps, depth_med if return_T: return im, eps, T return im, eps # ---------------- Update loop ---------------- def update(self): if not self.is_connected: return times = [] for client in self.server.get_clients().values(): im, eps = self.render_viser_camera(client.camera) times.append(eps) if hasattr(client, "scene"): client.scene.set_background_image(im, format="jpeg") else: client.set_background_image(im, format="jpeg") if times: self.fps.value = f"{round(1 / np.mean(times)):4d}" if __name__ == "__main__": import os, time class Args: model_path = "Entimus_imperialis_out_model/2025-1008-1320-c3c8c5" iteration = -1 port = 7860 args = Args() print("[INFO] Launching SVRaster viewer on Hugging Face...") print(f"[INFO] Model path: {args.model_path}") # ✅ Detect correct config.yaml path config_path = os.path.join("/home/user/app", args.model_path, "config.yaml") if not os.path.exists(config_path): raise FileNotFoundError(f"Config not found at {config_path}") print(f"[INFO] Using config path: {config_path}") update_config(config_path) cfg.port = args.port # ✅ Absolute paths confirmed by directory listing cfg.model_path = "/home/user/app/Entimus_imperialis_out_model/2025-1008-1320-c3c8c5" cfg.data_dir = "/home/user/app/Entimus_imperialis_out" print("📌 Model path:", cfg.model_path) print("📌 Data path:", cfg.data_dir) print("📄 transforms.json exists?", os.path.exists(os.path.join(cfg.data_dir, "transforms.json"))) print("📄 config.yaml exists?", os.path.exists(os.path.join(cfg.model_path, "config.yaml"))) svraster_viewer = SVRasterViewer(cfg) while True: svraster_viewer.update() time.sleep(0.01)