Spaces:

Jiangsan123
/

Beetle_viz

Paused

App Files Files Community

Jiangsan123 commited on Nov 7, 2025

Commit

2098a77

0 Parent(s):

Reinitialize clean repo without large files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +37 -0
README.md +13 -0
app.py +497 -0
requirements.txt +14 -0
src/__pycache__/cameras.cpython-39.pyc +0 -0
src/__pycache__/config.cpython-38.pyc +0 -0
src/__pycache__/config.cpython-39.pyc +0 -0
src/__pycache__/sparse_voxel_model.cpython-39.pyc +0 -0
src/cameras.py +287 -0
src/config.py +230 -0
src/config_old.py +230 -0
src/dataloader/__pycache__/data_pack.cpython-39.pyc +0 -0
src/dataloader/__pycache__/reader_colmap_dataset.cpython-39.pyc +0 -0
src/dataloader/__pycache__/reader_nerf_dataset.cpython-39.pyc +0 -0
src/dataloader/data_pack.py +232 -0
src/dataloader/reader_colmap_dataset.py +162 -0
src/dataloader/reader_colmap_dataset_or.py +148 -0
src/dataloader/reader_nerf_dataset.py +180 -0
src/dataloader/reader_nerf_dataset_copy.py +170 -0
src/sparse_voxel_gears/__pycache__/adaptive.cpython-39.pyc +0 -0
src/sparse_voxel_gears/__pycache__/constructor.cpython-39.pyc +0 -0
src/sparse_voxel_gears/__pycache__/io.cpython-39.pyc +0 -0
src/sparse_voxel_gears/__pycache__/pooling.cpython-39.pyc +0 -0
src/sparse_voxel_gears/__pycache__/properties.cpython-39.pyc +0 -0
src/sparse_voxel_gears/__pycache__/renderer.cpython-39.pyc +0 -0
src/sparse_voxel_gears/adaptive.py +296 -0
src/sparse_voxel_gears/constructor.py +425 -0
src/sparse_voxel_gears/io.py +156 -0
src/sparse_voxel_gears/pooling.py +68 -0
src/sparse_voxel_gears/properties.py +146 -0
src/sparse_voxel_gears/renderer.py +178 -0
src/sparse_voxel_gears/renderer_copy.py +178 -0
src/sparse_voxel_model.py +67 -0
src/sparse_voxel_model_copy.py +67 -0
src/utils/__pycache__/activation_utils.cpython-39.pyc +0 -0
src/utils/__pycache__/bounding_utils.cpython-39.pyc +0 -0
src/utils/__pycache__/camera_utils.cpython-39.pyc +0 -0
src/utils/__pycache__/colmap_utils.cpython-39.pyc +0 -0
src/utils/__pycache__/fuser_utils.cpython-39.pyc +0 -0
src/utils/__pycache__/image_utils.cpython-39.pyc +0 -0
src/utils/__pycache__/loss_utils.cpython-39.pyc +0 -0
src/utils/__pycache__/marching_cubes_utils.cpython-39.pyc +0 -0
src/utils/__pycache__/mono_utils.cpython-39.pyc +0 -0
src/utils/__pycache__/octree_utils.cpython-39.pyc +0 -0
src/utils/__pycache__/system_utils.cpython-39.pyc +0 -0
src/utils/activation_utils.py +49 -0
src/utils/bounding_utils.py +102 -0
src/utils/camera_utils.py +79 -0
src/utils/colmap_utils.py +62 -0
src/utils/fuser_utils.py +185 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1,37 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.ply filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,13 @@

+---
+title: Beetle Viz
+emoji: 😻
+colorFrom: red
+colorTo: gray
+sdk: gradio
+sdk_version: 5.49.1
+app_file: app.py
+pinned: false
+license: mit
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,497 @@

+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""
+Created on Mon Oct  6 10:16:31 2025
+@author: nibio
+"""
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+import os, time
+import numpy as np
+import imageio.v3 as iio
+from scipy.spatial.transform import Rotation
+from typing import Optional
+import torch
+from src.config import cfg, update_config
+from src.dataloader.data_pack import DataPack
+from src.sparse_voxel_model import SparseVoxelModel
+from src.utils.image_utils import im_tensor2np, viz_tensordepth
+from src.cameras import MiniCam
+import viser
+import viser.transforms as tf
+def matrix2wxyz(R: np.ndarray) -> np.ndarray:
+    return Rotation.from_matrix(R).as_quat()[[3, 0, 1, 2]]
+def wxyz2matrix(wxyz: np.ndarray) -> np.ndarray:
+    return Rotation.from_quat(wxyz[[1, 2, 3, 0]]).as_matrix()
+class SVRasterViewer:
+    def __init__(self, cfg):
+        # ---------- Data & model ----------
+        data_pack = DataPack(
+            source_path=cfg.data.source_path,
+            image_dir_name=cfg.data.image_dir_name,
+            res_downscale=cfg.data.res_downscale,
+            res_width=cfg.data.res_width,
+            skip_blend_alpha=cfg.data.skip_blend_alpha,
+            alpha_is_white=cfg.model.white_background,
+            data_device=cfg.data.data_device,
+            use_test=cfg.data.eval,
+            test_every=cfg.data.test_every,
+            camera_params_only=True,
+        )
+        self.tr_cam_lst = data_pack.get_train_cameras()
+        self.te_cam_lst = data_pack.get_test_cameras()
+        self.scene_center = (
+            np.mean([c.c2w[:3, 3].cpu().numpy() for c in self.tr_cam_lst], axis=0)
+            if len(self.tr_cam_lst)
+            else np.zeros(3, dtype=np.float32)
+        )
+        self.voxel_model = SparseVoxelModel(
+            n_samp_per_vox=cfg.model.n_samp_per_vox,
+            sh_degree=cfg.model.sh_degree,
+            ss=cfg.model.ss,
+            white_background=cfg.model.white_background,
+            black_background=cfg.model.black_background,
+        )
+        self.voxel_model.load_iteration(args.model_path, args.iteration)  # args from __main__
+        self.voxel_model.freeze_vox_geo()
+        # ---------- UI ----------
+        self.server = viser.ViserServer(port=cfg.port)
+        self.is_connected = False
+        self.server.gui.set_panel_label("SVRaster viser")
+        self.server.gui.add_markdown(
+            "**View control:**\n- Mouse drag + scroll\n- WASD + QE keys"
+        )
+        self.fps = self.server.gui.add_text("Rending FPS", initial_value="-1", disabled=True)
+        self.active_sh_degree_slider = self.server.gui.add_slider(
+            "active_sh_degree", min=0, max=self.voxel_model.max_sh_degree, step=1,
+            initial_value=self.voxel_model.active_sh_degree
+        )
+        self.ss_slider    = self.server.gui.add_slider("ss",    min=0.5, max=2.0, step=0.05, initial_value=self.voxel_model.ss)
+        self.width_slider = self.server.gui.add_slider("width", min=64,  max=2048, step=8,   initial_value=1024)
+        self.fovx_slider  = self.server.gui.add_slider("fovx",  min=10,  max=150,  step=1,   initial_value=70)
+        self.near_slider  = self.server.gui.add_slider("near",  min=0.02,max=10,   step=0.01,initial_value=0.2)
+        self.render_dropdown = self.server.gui.add_dropdown(
+            "render mod", options=["all","rgb only","depth only","normal only"], initial_value="all"
+        )
+        self.output_dropdown = self.server.gui.add_dropdown(
+            "output", options=["rgb","alpha","dmean","dmed","dmean2n","dmed2n","n"], initial_value="rgb"
+        )
+        # ---- Focus & crop controls ----
+        self.alpha_thr_slider = self.server.gui.add_slider(
+            "alpha_threshold", min=0.0, max=0.95, step=0.01, initial_value=0.35
+        )
+        self.keep_closest_slider = self.server.gui.add_slider(
+            "keep_closest_pct", min=0.2, max=1.0, step=0.05, initial_value=0.6
+        )
+        self.hide_outside_checkbox = self.server.gui.add_checkbox(
+            "hide_outside_focus", initial_value=False
+        )
+        self.center_btn    = self.server.gui.add_button("Center on object")
+        self.reset_btn     = self.server.gui.add_button("Reset to first view")
+        self.autoframe_btn = self.server.gui.add_button("Auto-frame (depth)")
+        self.focus_btn     = self.server.gui.add_button("Focus foreground")
+        self.rebase_btn    = self.server.gui.add_button("Recenter world to focus")
+        # ---- state for world rebase / focus mask ----
+        self.world_offset = np.zeros(3, dtype=np.float32)  # world translation applied during render
+        self.focus_center: Optional[np.ndarray] = None
+        # ---------- Camera frusta ----------
+        self.tr_frust, self.te_frust = [], []
+        def add_frustum(name, cam, color):
+            c2w = cam.c2w.cpu().numpy()
+            frame = self.server.scene.add_camera_frustum(
+                name,
+                fov=cam.fovy,
+                aspect=cam.image_width / cam.image_height,
+                scale=0.10,
+                wxyz=matrix2wxyz(c2w[:3, :3]),
+                position=c2w[:3, 3],
+                color=color,
+                visible=False,
+            )
+            @frame.on_click
+            def _(event: viser.SceneNodePointerEvent):
+                client = event.client
+                with client.atomic():
+                    client.camera.wxyz = event.target.wxyz
+                    client.camera.position = event.target.position
+                self._camera_lookat(client, self.scene_center)
+            return frame
+        for i, cam in enumerate(self.tr_cam_lst):
+            self.tr_frust.append(add_frustum(f"/frustum/train/{i:04d}", cam, [0.0, 1.0, 0.0]))
+        for i, cam in enumerate(self.te_cam_lst):
+            self.te_frust.append(add_frustum(f"/frustum/test/{i:04d}",  cam, [1.0, 0.0, 0.0]))
+        self.show_cam_dropdown = self.server.gui.add_dropdown(
+            "show cameras", options=["none","train","test","all"], initial_value="none"
+        )
+        @self.show_cam_dropdown.on_update
+        def _(_):
+            for f in self.tr_frust: f.visible = self.show_cam_dropdown.value in ["train","all"]
+            for f in self.te_frust: f.visible = self.show_cam_dropdown.value in ["test","all"]
+        # ---------- Button handlers ----------
+        @self.center_btn.on_click
+        def _(event: viser.GuiEvent):
+            if event.client: self._camera_lookat(event.client, self.scene_center)
+        @self.reset_btn.on_click
+        def _(event: viser.GuiEvent):
+            client = event.client
+            if not client: return
+            init = self.tr_cam_lst[0].c2w.cpu().numpy()
+            with client.atomic():
+                client.camera.wxyz = matrix2wxyz(init[:3, :3])
+                client.camera.position = init[:3, 3]
+            self._camera_lookat(client, self.scene_center)
+        @self.autoframe_btn.on_click
+        def _(event: viser.GuiEvent):
+            if event.client: self._auto_frame_by_depth(event.client)
+        @self.focus_btn.on_click
+        def _(event: viser.GuiEvent):
+            if event.client: self._focus_foreground(event.client)
+        @self.rebase_btn.on_click
+        def _(event: viser.GuiEvent):
+            client = event.client
+            if not client or self.focus_center is None:
+                print("[rebase] Run 'Focus foreground' first.")
+                return
+            delta = self.focus_center.astype(np.float32)
+            self.world_offset = self.world_offset + delta  # accumulate translation
+            with client.atomic():
+                client.camera.position = (np.asarray(client.camera.position) - delta).astype(np.float32)
+            self.scene_center = np.zeros(3, dtype=np.float32)
+            print("[rebase] World recentered; new world_offset:", self.world_offset)
+        # ---------- On connect ----------
+        @self.server.on_client_connect
+        def _(client: viser.ClientHandle):
+            init = self.tr_cam_lst[0].c2w.cpu().numpy()
+            with client.atomic():
+                client.camera.wxyz = matrix2wxyz(init[:3, :3])
+                client.camera.position = init[:3, 3]
+            ok = self._auto_frame_by_depth(client, quiet=True)
+            if not ok:
+                self._camera_lookat(client, self.scene_center)
+            self.is_connected = True
+        # ---------- Download ----------
+        self.download_button = self.server.gui.add_button("Download view")
+        @self.download_button.on_click
+        def _(event: viser.GuiEvent):
+            im, _ = self.render_viser_camera(event.client.camera)
+            event.client.send_file_download(
+                "svraster_viser.png", iio.imwrite("<bytes>", im, extension=".png")
+            )
+    # ---------------- camera utils ----------------
+    def _camera_lookat(
+        self,
+        client: viser.ClientHandle,
+        target: np.ndarray,
+        distance: Optional[float] = None,
+    ):
+        """
+        Point the camera at `target` by writing orientation (wxyz) and position directly.
+        Compatible with Viser builds where camera.look_at is not callable.
+        """
+        target = np.asarray(target, dtype=np.float32)
+        eye = np.asarray(client.camera.position, dtype=np.float32)
+        vec = eye - target  # target -> eye
+        norm = np.linalg.norm(vec)
+        if not np.isfinite(norm) or norm < 1e-6:
+            vec = np.array([0, 0, 1.0], dtype=np.float32)
+            norm = 0.5
+        d = float(norm if distance is None else distance)
+        # Orthonormal basis that looks at target.
+        fwd = -(vec / max(norm, 1e-6))  # camera forward (eye->target)
+        up_guess = np.array([0, 1, 0], dtype=np.float32)
+        if abs(np.dot(fwd, up_guess)) > 0.99:
+            up_guess = np.array([1, 0, 0], dtype=np.float32)
+        right = np.cross(up_guess, fwd)
+        right /= max(np.linalg.norm(right), 1e-6)
+        up = np.cross(fwd, right)
+        up /= max(np.linalg.norm(up), 1e-6)
+        R = np.stack([right, up, fwd], axis=1).astype(np.float32)
+        new_pos = target - fwd * d
+        with client.atomic():
+            client.camera.wxyz = matrix2wxyz(R)
+            client.camera.position = new_pos
+    def _auto_frame_by_depth(self, client: viser.ClientHandle, quiet: bool = False) -> bool:
+        """Render once, use center-pixel median depth to determine a good pivot."""
+        try:
+            _, _, depth_med = self.render_viser_camera(client.camera, return_depth=True)
+        except Exception as e:
+            if not quiet: print("[auto-frame] render error:", e)
+            return False
+        H, W = depth_med.shape
+        d = float(depth_med[H // 2, W // 2])
+        if not np.isfinite(d) or d <= 0:
+            if not quiet: print("[auto-frame] invalid depth; falling back")
+            return False
+        R = wxyz2matrix(client.camera.wxyz)
+        fwd = R @ np.array([0, 0, 1], dtype=np.float32)
+        target = np.asarray(client.camera.position, dtype=np.float32) + fwd * d
+        self._camera_lookat(client, target, distance=d)
+        if not quiet: print("[auto-frame] success; depth =", d)
+        return True
+    # ----------- Focus only the foreground object -----------
+    def _focus_foreground(self, client: viser.ClientHandle):
+        """
+        Use alpha (1 - T) to mask foreground, keep closest depths,
+        back-project to world, compute tight AABB, center & fit view.
+        Stores self.focus_center so you can 'Recenter world to focus'.
+        """
+        try:
+            _, _, depth_med, T = self.render_viser_camera(client.camera, return_depth=True, return_T=True)
+        except Exception as e:
+            print("[focus] render error:", e)
+            return
+        alpha = 1.0 - T
+        thr = float(self.alpha_thr_slider.value)
+        mask = (alpha > thr) & np.isfinite(depth_med) & (depth_med > 0)
+        if mask.sum() < 50:
+            print("[focus] Not enough foreground; lower alpha_threshold or change view.")
+            return
+        # Keep only the closest K% pixels to drop the outer ring
+        K = float(self.keep_closest_slider.value)
+        dvals = depth_med[mask]
+        q = np.quantile(dvals, K)
+        mask &= depth_med <= q
+        if mask.sum() < 50:
+            print("[focus] Too few pixels after depth filtering; raise keep_closest_pct.")
+            return
+        # Back-project masked pixels to world
+        width = int(self.width_slider.value)
+        aspect = max(1e-6, float(client.camera.aspect))
+        height = max(1, int(round(width / aspect)))
+        fovx = np.deg2rad(float(self.fovx_slider.value))
+        fovy = fovx * height / max(width, 1)
+        fx = width / (2.0 * np.tan(fovx * 0.5))
+        fy = height / (2.0 * np.tan(fovy * 0.5))
+        cx, cy = (width - 1) / 2.0, (height - 1) / 2.0
+        ys, xs = np.where(mask)
+        zs = depth_med[ys, xs].astype(np.float32)
+        x_cam = (xs - cx) / fx * zs
+        y_cam = (ys - cy) / fy * zs
+        z_cam = zs
+        P_cam = np.stack([x_cam, y_cam, z_cam], axis=0)  # (3, N)
+        R = wxyz2matrix(client.camera.wxyz)
+        t = np.asarray(client.camera.position, dtype=np.float32)[:, None]
+        # Apply current world rebase so P_world matches what we render
+        t = (t - self.world_offset[:, None]).astype(np.float32)
+        P_world = (R @ P_cam) + t  # (3, N)
+        pmin = np.min(P_world, axis=1)
+        pmax = np.max(P_world, axis=1)
+        center = (pmin + pmax) * 0.5
+        extent = (pmax - pmin) * 0.5
+        # Save for rebase
+        self.focus_center = center.astype(np.float32)
+        # Choose distance that fits bbox into the view (larger FOV dimension)
+        fovx_deg = float(self.fovx_slider.value)
+        fovy_deg = fovx_deg * height / max(width, 1)
+        fov_rad = np.deg2rad(max(fovx_deg, fovy_deg))
+        radius = float(np.linalg.norm(extent, ord=np.inf))
+        dist = radius / np.tan(max(1e-4, fov_rad * 0.5)) * 1.25  # padding
+        # Update logical scene center for orbiting & go there
+        self.scene_center = center.astype(np.float32)
+        self._camera_lookat(client, self.scene_center, distance=dist)
+        print(f"[focus] bbox half-extent ~{extent}, distance {dist:.3f}")
+    # ---------------- rendering ----------------
+    @torch.no_grad()
+    def render_viser_camera(
+        self,
+        camera: viser.CameraHandle,
+        return_depth: bool = False,
+        return_T: bool = False,
+    ):
+        width = int(self.width_slider.value)
+        aspect = max(1e-6, float(camera.aspect))
+        height = max(1, int(round(width / aspect)))
+        fovx_deg = float(self.fovx_slider.value)
+        fovy_deg = fovx_deg * height / max(width, 1)
+        near = float(self.near_slider.value)
+        c2w = np.eye(4, dtype=np.float32)
+        c2w[:3, :3] = wxyz2matrix(camera.wxyz)
+        c2w[:3, 3]  = camera.position
+        # Apply world rebase: move the *world* by -offset equivalently by moving camera by -offset in world coords.
+        c2w[:3, 3] = c2w[:3, 3] - self.world_offset
+        minicam = MiniCam(
+            c2w, fovx=np.deg2rad(fovx_deg), fovy=np.deg2rad(fovy_deg),
+            width=width, height=height, near=near
+        )
+        self.voxel_model.active_sh_degree = int(self.active_sh_degree_slider.value)
+        render_opt = {
+            "ss": self.ss_slider.value,
+            "output_T": True,
+            "output_depth": True,
+            "output_normal": True,
+        }
+        if self.render_dropdown.value == "rgb only":
+            render_opt["output_depth"] = False; render_opt["output_normal"] = False
+        elif self.render_dropdown.value == "depth only":
+            render_opt["color_mode"] = "dontcare"; render_opt["output_normal"] = False
+        elif self.render_dropdown.value == "normal only":
+            render_opt["color_mode"] = "dontcare"; render_opt["output_depth"] = False
+        t0 = time.time()
+        try:
+            render_pkg = self.voxel_model.render(minicam, **render_opt)
+        except RuntimeError as e:
+            print("[render] RuntimeError:", e)
+            im = np.ones((height, width, 3), dtype=np.uint8) * 255
+            if return_depth and return_T:
+                depth_med = np.full((height, width), np.nan, dtype=np.float32)
+                T = np.ones((height, width), dtype=np.float32)
+                return im, 0.0, depth_med, T
+            if return_depth:
+                depth_med = np.full((height, width), np.nan, dtype=np.float32)
+                return im, 0.0, depth_med
+            if return_T:
+                T = np.ones((height, width), dtype=np.float32)
+                return im, 0.0, T
+            return im, 0.0
+        torch.cuda.synchronize()
+        eps = time.time() - t0
+        # choose output image
+        if self.output_dropdown.value == "dmean":
+            im = viz_tensordepth(render_pkg["depth"][0])
+        elif self.output_dropdown.value == "dmed":
+            im = viz_tensordepth(render_pkg["depth"][2])
+        elif self.output_dropdown.value == "dmean2n":
+            im = im_tensor2np(minicam.depth2normal(render_pkg["depth"][0]) * 0.5 + 0.5)
+        elif self.output_dropdown.value == "dmed2n":
+            im = im_tensor2np(minicam.depth2normal(render_pkg["depth"][2]) * 0.5 + 0.5)
+        elif self.output_dropdown.value == "n":
+            im = im_tensor2np(render_pkg["normal"] * 0.5 + 0.5)
+        elif self.output_dropdown.value == "alpha":
+            im = im_tensor2np(1 - render_pkg["T"].repeat(3, 1, 1))
+        else:
+            im = im_tensor2np(render_pkg["color"])
+        depth_med = render_pkg["depth"][2].detach().cpu().numpy()
+        T = render_pkg["T"].detach().cpu().numpy()  # (H, W)
+        # Optional image-level masking to hide outside the focused object
+        if self.hide_outside_checkbox.value:
+            alpha = 1.0 - T
+            thr = float(self.alpha_thr_slider.value)
+            mask = (alpha > thr) & np.isfinite(depth_med) & (depth_med > 0)
+            if mask.any():
+                K = float(self.keep_closest_slider.value)
+                dvals = depth_med[mask]
+                q = np.quantile(dvals, K)
+                mask &= depth_med <= q
+                mask3 = np.repeat(mask[..., None], 3, axis=2)
+                bg = np.zeros_like(im)  # black background
+                im = np.where(mask3, im, bg)
+        del render_pkg
+        if return_depth and return_T:
+            return im, eps, depth_med, T
+        if return_depth:
+            return im, eps, depth_med
+        if return_T:
+            return im, eps, T
+        return im, eps
+    # ---------------- server tick ----------------
+    def update(self):
+        if not self.is_connected:
+            return
+        times = []
+        for client in self.server.get_clients().values():
+            im, eps = self.render_viser_camera(client.camera)
+            times.append(eps)
+            client.scene.set_background_image(im, format="jpeg")
+        if times:
+            self.fps.value = f"{round(1 / np.mean(times)):4d}"
+if __name__ == "__main__":
+    import os, time
+    class Args:
+        model_path = "Entimus_imperialis_out_model/2025-1008-1320-c3c8c5"
+        iteration = -1
+        port = 7860  # Hugging Face default port
+    args = Args()
+    print(f"[INFO] Launching SVRaster viewer on Hugging Face...")
+    print(f"[INFO] Model path: {args.model_path}")
+    update_config(os.path.join(args.model_path, "config.yaml"))
+    cfg.port = args.port
+    svraster_viewer = SVRasterViewer(cfg)
+    # Keep process alive so Hugging Face doesn't stop it
+    while True:
+        svraster_viewer.update()
+        time.sleep(0.01)

requirements.txt ADDED Viewed

	@@ -0,0 +1,14 @@

+torch
+torchvision
+torchaudio
+numpy
+scipy
+imageio
+open3d
+trimesh
+matplotlib
+Pillow
+tqdm
+huggingface_hub
+viser==0.1.30
+gradio==5.2.0

src/__pycache__/cameras.cpython-39.pyc ADDED Viewed

Binary file (8.98 kB). View file

src/__pycache__/config.cpython-38.pyc ADDED Viewed

Binary file (3.64 kB). View file

src/__pycache__/config.cpython-39.pyc ADDED Viewed

Binary file (3.67 kB). View file

src/__pycache__/sparse_voxel_model.cpython-39.pyc ADDED Viewed

Binary file (1.85 kB). View file

src/cameras.py ADDED Viewed

	@@ -0,0 +1,287 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import numpy as np
+import torch
+import svraster_cuda
+class CameraBase:
+    '''
+    Base class of perspective cameras.
+    '''
+    def __repr__(self):
+        clsname = self.__class__.__name__
+        fname = f"image_name='{self.image_name}'"
+        res = f"HW=({self.image_height}x{self.image_width})"
+        fov = f"fovx={np.rad2deg(self.fovx):.1f}deg"
+        return f"{clsname}({fname}, {res}, {fov})"
+    @property
+    def lookat(self):
+        return self.c2w[:3, 2]
+    @property
+    def position(self):
+        return self.c2w[:3, 3]
+    @property
+    def down(self):
+        return self.c2w[:3, 1]
+    @property
+    def right(self):
+        return self.c2w[:3, 0]
+    @property
+    def cx(self):
+        return self.image_width * self.cx_p
+    @property
+    def cy(self):
+        return self.image_height * self.cy_p
+    @property
+    def pix_size(self):
+        return 2 * self.tanfovx / self.image_width
+    @property
+    def tanfovx(self):
+        return np.tan(self.fovx * 0.5)
+    @property
+    def tanfovy(self):
+        return np.tan(self.fovy * 0.5)
+    def compute_rd(self, wh=None, cxcy=None, device=None):
+        '''Ray directions in world space.'''
+        if wh is None:
+            wh = (self.image_width, self.image_height)
+        if cxcy is None:
+            cxcy = (self.cx * wh[0] / self.image_width, self.cy * wh[1] / self.image_height)
+        rd = svraster_cuda.utils.compute_rd(
+            width=wh[0], height=wh[1],
+            cx=cxcy[0], cy=cxcy[1],
+            tanfovx=self.tanfovx, tanfovy=self.tanfovy,
+            c2w_matrix=self.c2w.cuda())
+        rd = rd.to(device if device is None else self.c2w.device)
+        return rd
+    def project(self, pts, return_depth=False):
+        # Return normalized image coordinate in [-1, 1]
+        cam_pts = pts @ self.w2c[:3, :3].T + self.w2c[:3, 3]
+        depth = cam_pts[:, [2]]
+        cam_uv = cam_pts[:, :2] / depth
+        scale_x = 1 / self.tanfovx
+        scale_y = 1 / self.tanfovy
+        shift_x = 2 * self.cx_p - 1
+        shift_y = 2 * self.cy_p - 1
+        cam_uv[:, 0] = cam_uv[:, 0] * scale_x + shift_x
+        cam_uv[:, 1] = cam_uv[:, 1] * scale_y + shift_y
+        if return_depth:
+            return cam_uv, depth
+        return cam_uv
+    def depth2pts(self, depth):
+        device = depth.device
+        h, w = depth.shape[-2:]
+        rd = self.compute_rd(wh=(w, h), device=device)
+        return self.position.view(3,1,1).to(device) + rd * depth
+    def depth2normal(self, depth, ks=3, tol_cos=-1):
+        assert ks % 2 == 1
+        pad = ks // 2
+        ks_1 = ks - 1
+        pts = self.depth2pts(depth)
+        normal_pseudo = torch.zeros_like(pts)
+        dx = pts[:, pad:-pad, ks_1:] - pts[:, pad:-pad, :-ks_1]
+        dy = pts[:, ks_1:, pad:-pad] - pts[:, :-ks_1, pad:-pad]
+        normal_pseudo[:, pad:-pad, pad:-pad] = torch.nn.functional.normalize(torch.cross(dx, dy, dim=0), dim=0)
+        if tol_cos > 0:
+            with torch.no_grad():
+                pts_dir = torch.nn.functional.normalize(pts - self.position.view(3,1,1), dim=0)
+                dot = (normal_pseudo * pts_dir).sum(0)
+                mask = (dot > tol_cos)
+            normal_pseudo = normal_pseudo * mask
+        return normal_pseudo
+class Camera(CameraBase):
+    def __init__(
+            self, image_name,
+            w2c, fovx, fovy, cx_p, cy_p,
+            near=0.02,
+            image=None, mask=None, depth=None,
+            sparse_pt=None):
+        self.image_name = image_name
+        # Camera parameters
+        self.w2c = torch.tensor(w2c, dtype=torch.float32, device="cuda")
+        self.c2w = self.w2c.inverse().contiguous()
+        self.fovx = fovx
+        self.fovy = fovy
+        # Load frame
+        self.image = image.cpu()
+        # Other camera parameters
+        self.image_width = self.image.shape[2]
+        self.image_height = self.image.shape[1]
+        self.cx_p = (0.5 if cx_p is None else cx_p)
+        self.cy_p = (0.5 if cy_p is None else cy_p)
+        self.near = near
+        # Load mask and depth if there are
+        self.mask = mask.cpu() if mask is not None else None
+        self.depth = depth.cpu() if depth is not None else None
+        # Load sparse depth
+        if sparse_pt is not None:
+            self.sparse_pt = torch.tensor(sparse_pt, dtype=torch.float32, device="cpu")
+        else:
+            self.sparse_pt = None
+    def to(self, device):
+        self.image = self.image.to(device)
+        if self.mask is not None:
+            self.mask = self.mask.to(device)
+        if self.depth is not None:
+            self.depth = self.depth.to(device)
+        return self
+    def auto_exposure_init(self):
+        self._exposure_A = torch.eye(3, dtype=torch.float32, device="cuda")
+        self._exposure_t = torch.zeros([3,1,1], dtype=torch.float32, device="cuda")
+        self.exposure_updated = False
+    def auto_exposure_apply(self, image):
+        if self.exposure_updated:
+            image = torch.einsum('ij,jhw->ihw', self._exposure_A, image) + self._exposure_t
+        return image
+    def auto_exposure_update(self, ren, ref):
+        self.exposure_updated = True
+        self._exposure_A.requires_grad_()
+        self._exposure_t.requires_grad_()
+        optim = torch.optim.Adam([self._exposure_A, self._exposure_t], lr=1e-3)
+        for _ in range(100):
+            loss = (self.auto_exposure_apply(ren).clamp(0, 1) - ref).abs().mean()
+            loss.backward()
+            optim.step()
+            optim.zero_grad(set_to_none=True)
+        self._exposure_A.requires_grad_(False)
+        self._exposure_t.requires_grad_(False)
+    def clone_mini(self):
+        return MiniCam(
+            c2w=self.c2w.clone(),
+            fovx=self.fovx, fovy=self.fovy,
+            width=self.image_width, height=self.image_height,
+            near=self.near,
+            cx_p=self.cx_p, cy_p=self.cy_p)
+class MiniCam(CameraBase):
+    def __init__(self,
+            c2w, fovx, fovy,
+            width, height,
+            near=0.02,
+            cx_p=None, cy_p=None,
+            image_name="minicam"):
+        self.image_name = image_name
+        self.c2w = torch.tensor(c2w).clone().cuda()
+        self.w2c = self.c2w.inverse()
+        self.fovx = fovx
+        self.fovy = fovy
+        self.image_width = width
+        self.image_height = height
+        self.cx_p = (0.5 if cx_p is None else cx_p)
+        self.cy_p = (0.5 if cy_p is None else cy_p)
+        self.near = near
+        self.depth = None
+        self.mask = None
+    def clone_mini(self):
+        return MiniCam(
+            c2w=self.c2w.clone(),
+            fovx=self.fovx, fovy=self.fovy,
+            width=self.image_width, height=self.image_height,
+            near=self.near,
+            cx_p=self.cx_p, cy_p=self.cy_p)
+    def move_forward(self, dist):
+        new_position = self.position + dist * self.lookat
+        self.c2w[:3, 3] = new_position
+        self.w2c = self.c2w.inverse()
+        return self
+    def move_up(self, dist):
+        return self.move_down(-dist)
+    def move_down(self, dist):
+        new_position = self.position + dist * self.down
+        self.c2w[:3, 3] = new_position
+        self.w2c = self.c2w.inverse()
+        return self
+    def move_right(self, dist):
+        new_position = self.position + dist * self.right
+        self.c2w[:3, 3] = new_position
+        self.w2c = self.c2w.inverse()
+        return self
+    def move_left(self, dist):
+        return self.move_right(-dist)
+    def rotate(self, R):
+        self.c2w[:3, :3] = (R @ self.w2c[:3, :3]).T
+        self.w2c = self.c2w.inverse()
+        return self
+    def rotate_x(self, rad=None, deg=None):
+        assert rad is None or deg is None, "Can only specify rotation by either rad or deg."
+        if rad is None:
+            rad = np.deg2rad(deg)
+        R = torch.tensor([
+            [1, 0, 0],
+            [0, np.cos(rad), -np.sin(rad)],
+            [0, np.sin(rad), np.cos(rad)],
+        ], dtype=torch.float32, device="cuda")
+        return self.rotate(R)
+    def rotate_y(self, rad=None, deg=None):
+        assert rad is None or deg is None, "Can only specify rotation by either rad or deg."
+        if rad is None:
+            rad = np.deg2rad(deg)
+        R = torch.tensor([
+            [np.cos(rad), 0, -np.sin(rad)],
+            [0, 1, 0],
+            [np.sin(rad), 0, np.cos(rad)],
+        ], dtype=torch.float32, device="cuda")
+        return self.rotate(R)
+    def rotate_z(self, rad=None, deg=None):
+        assert rad is None or deg is None, "Can only specify rotation by either rad or deg."
+        if rad is None:
+            rad = np.deg2rad(deg)
+        R = torch.tensor([
+            [np.cos(rad), -np.sin(rad), 0],
+            [np.sin(rad), np.cos(rad), 0],
+            [0, 0, 1],
+        ], dtype=torch.float32, device="cuda")
+        return self.rotate(R)

src/config.py ADDED Viewed

	@@ -0,0 +1,230 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import argparse
+from yacs.config import CfgNode
+cfg = CfgNode()
+cfg.model = CfgNode(dict(
+    n_samp_per_vox = 1,       # Number of sampled points per visited voxel
+    sh_degree = 3,            # Use 3 * (k+1)^2 params per voxels for view-dependent colors
+    ss = 1.5,                 # Super-sampling rates for anti-aliasing
+    white_background = False, # Assum white background
+    black_background = False, # Assum black background
+))
+cfg.data = CfgNode(dict(
+    source_path = "",
+    image_dir_name = "images",
+    mask_dir_name = "masks",
+    res_downscale = 0.,
+    res_width = 0,
+    skip_blend_alpha = False,
+    data_device = "cpu",
+    eval = False,
+    test_every = 8,
+))
+cfg.bounding = CfgNode(dict(
+    # Define the main (inside) region bounding box
+    # The default use the suggested bounding if given by dataset.
+    # Otherwise, it automatically chose from forward or camera_median modes.
+    # See src/utils/bounding_utils.py for details.
+    # default | camera_median | camera_max | forward | pcd
+    bound_mode = "default",
+    bound_scale = 1.0,        # Scaling factor of the bound
+    forward_dist_scale = 1.0, # For forward mode
+    pcd_density_rate = 0.1,   # For pcd mode
+    # Number of Octree level outside the main foreground region
+    outside_level = 5,
+))
+cfg.optimizer = CfgNode(dict(
+    geo_lr = 0.025,
+    sh0_lr = 0.010,
+    shs_lr = 0.00025,
+    optim_beta1 = 0.1,
+    optim_beta2 = 0.99,
+    optim_eps = 1e-15,
+    lr_decay_ckpt = [19000],
+    lr_decay_mult = 0.1,
+))
+cfg.regularizer = CfgNode(dict(
+    # Main photometric loss
+    lambda_photo = 1.0,
+    use_l1 = False,
+    use_huber = False,
+    huber_thres = 0.03,
+    # SSIM loss
+    lambda_ssim = 0.02,
+    # Sparse depth loss
+    lambda_sparse_depth = 0.0,
+    sparse_depth_until = 10_000,
+    # Mask loss
+    lambda_mask = 0.0,
+    # Depthanything loss
+    lambda_depthanythingv2 = 0.0,
+    depthanythingv2_from = 3000,
+    depthanythingv2_end = 20000,
+    depthanythingv2_end_mult = 0.1,
+    # Mast3r metrid loss
+    lambda_mast3r_metric_depth = 0.0,
+    mast3r_repo_path = '',
+    mast3r_metric_depth_from = 0,
+    mast3r_metric_depth_end = 20000,
+    mast3r_metric_depth_end_mult = 0.01,
+    # Final transmittance should concentrate to either 0 or 1
+    lambda_T_concen = 0.0,
+    # Final transmittance should be 0
+    lambda_T_inside = 0.0,
+    # Per-point rgb loss
+    lambda_R_concen = 0.01,
+    # Geometric regularization
+    lambda_ascending = 0.0,
+    ascending_from = 0,
+    # Distortion loss (encourage distribution concentration on ray)
+    lambda_dist = 0.1,
+    dist_from = 10000,
+    # Consistency loss of rendered normal and derived normal from expected depth
+    lambda_normal_dmean = 0.0,
+    n_dmean_from = 10_000,
+    n_dmean_end = 20_000,
+    n_dmean_ks = 3,
+    n_dmean_tol_deg = 90.0,
+    # Consistency loss of rendered normal and derived normal from median depth
+    lambda_normal_dmed = 0.0,
+    n_dmed_from=3000,
+    n_dmed_end=20_000,
+    # Total variation loss of density grid
+    lambda_tv_density = 1e-10,
+    tv_from = 0,
+    tv_until = 10000,
+    # Data augmentation
+    ss_aug_max = 1.5,
+    rand_bg = False,
+))
+cfg.init = CfgNode(dict(
+    # Voxel property initialization
+    geo_init = -10.0,
+    sh0_init = 0.5,
+    shs_init = 0.0,
+    sh_degree_init = 3,
+    # Init main inside region by dense voxels
+    init_n_level = 6,  # (2^6)^3 voxels
+    # Number of voxel ratio for outside (background region)
+    init_out_ratio = 2.0,
+))
+cfg.procedure = CfgNode(dict(
+    # Schedule
+    n_iter = 20_000,
+    sche_mult = 1.0,
+    seed=3721,
+    # Reset sh
+    reset_sh_ckpt = [-1],
+    # Adaptive general setup
+    adapt_from = 1000,
+    adapt_every = 1000,
+    # Adaptive voxel pruning
+    prune_until = 18000,
+    prune_thres_init = 0.0001,
+    prune_thres_final = 0.05,
+    # Adaptive voxel pruning
+    subdivide_until = 15000,
+    subdivide_all_until = 0,
+    subdivide_samp_thres = 1.0, # A voxel max sampling rate should larger than this.
+    subdivide_prop = 0.05,
+    subdivide_max_num = 10_000_000,
+))
+cfg.auto_exposure = CfgNode(dict(
+    enable = False,
+    auto_exposure_upd_ckpt = [5000, 10000, 15000]
+))
+for i_cfg in cfg.values():
+    i_cfg.set_new_allowed(True)
+def everytype2bool(v):
+    if v.isnumeric():
+        return bool(int(v))
+    v = v.lower()
+    if v in ['n', 'no', 'none', 'false']:
+        return False
+    return True
+def update_argparser(parser):
+    for name in cfg.keys():
+        group = parser.add_argument_group(name)
+        for key, value in getattr(cfg, name).items():
+            t = type(value)
+            if t == bool:
+                group.add_argument(f"--{key}", action='store_true' if t else 'store_false')
+            elif t == list:
+                group.add_argument(f"--{key}", default=value, type=type(value[0]), nargs="*")
+            elif t == tuple:
+                group.add_argument(f"--{key}", default=value, type=type(value[0]), nargs=len(value))
+            else:
+                group.add_argument(f"--{key}", default=value, type=t)
+def update_config(cfg_files, cmd_lst=[]):
+    # Update from config files
+    if isinstance(cfg_files, str):
+        cfg_files = [cfg_files]
+    for cfg_path in cfg_files:
+        cfg.merge_from_file(cfg_path)
+    if len(cmd_lst) == 0:
+        return
+    # Parse the arguments from command line
+    internal_parser = argparse.ArgumentParser()
+    update_argparser(internal_parser)
+    internal_args = internal_parser.parse_args(cmd_lst)
+    # Update from command line args
+    for name in cfg.keys():
+        cfg_subgroup = getattr(cfg, name)
+        for key in cfg_subgroup.keys():
+            arg_val = getattr(internal_args, key)
+            # Check if the default values is updated
+            if internal_parser.get_default(key) != arg_val:
+                cfg_subgroup[key] = arg_val

src/config_old.py ADDED Viewed

	@@ -0,0 +1,230 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import argparse
+from yacs.config import CfgNode
+cfg = CfgNode()
+cfg.model = CfgNode(dict(
+    n_samp_per_vox = 1,       # Number of sampled points per visited voxel
+    sh_degree = 3,            # Use 3 * (k+1)^2 params per voxels for view-dependent colors
+    ss = 1.5,                 # Super-sampling rates for anti-aliasing
+    white_background = False, # Assum white background
+    black_background = False, # Assum black background
+))
+cfg.data = CfgNode(dict(
+    source_path = "",
+    image_dir_name = "images",
+    res_downscale = 0.,
+    res_width = 0,
+    skip_blend_alpha = False,
+    data_device = "cpu",
+    eval = False,
+    test_every = 8,
+    alpha_is_white = True,
+))
+cfg.bounding = CfgNode(dict(
+    # Define the main (inside) region bounding box
+    # The default use the suggested bounding if given by dataset.
+    # Otherwise, it automatically chose from forward or camera_median modes.
+    # See src/utils/bounding_utils.py for details.
+    # default | camera_median | camera_max | forward | pcd
+    bound_mode = "default",
+    bound_scale = 1.0,        # Scaling factor of the bound
+    forward_dist_scale = 1.0, # For forward mode
+    pcd_density_rate = 0.1,   # For pcd mode
+    # Number of Octree level outside the main foreground region
+    outside_level = 5,
+))
+cfg.optimizer = CfgNode(dict(
+    geo_lr = 0.025,
+    sh0_lr = 0.010,
+    shs_lr = 0.00025,
+    optim_beta1 = 0.1,
+    optim_beta2 = 0.99,
+    optim_eps = 1e-15,
+    lr_decay_ckpt = [19000],
+    lr_decay_mult = 0.1,
+))
+cfg.regularizer = CfgNode(dict(
+    # Main photometric loss
+    lambda_photo = 1.0,
+    use_l1 = False,
+    use_huber = False,
+    huber_thres = 0.03,
+    # SSIM loss
+    lambda_ssim = 0.02,
+    # Sparse depth loss
+    lambda_sparse_depth = 0.0,
+    sparse_depth_until = 10_000,
+    # Mask loss
+    lambda_mask = 0.0,
+    # Depthanything loss
+    lambda_depthanythingv2 = 0.0,
+    depthanythingv2_from = 3000,
+    depthanythingv2_end = 20000,
+    depthanythingv2_end_mult = 0.1,
+    # Mast3r metrid loss
+    lambda_mast3r_metric_depth = 0.0,
+    mast3r_repo_path = '',
+    mast3r_metric_depth_from = 0,
+    mast3r_metric_depth_end = 20000,
+    mast3r_metric_depth_end_mult = 0.01,
+    # Final transmittance should concentrate to either 0 or 1
+    lambda_T_concen = 0.0,
+    # Final transmittance should be 0
+    lambda_T_inside = 0.0,
+    # Per-point rgb loss
+    lambda_R_concen = 0.01,
+    # Geometric regularization
+    lambda_ascending = 0.0,
+    ascending_from = 0,
+    # Distortion loss (encourage distribution concentration on ray)
+    lambda_dist = 0.1,
+    dist_from = 10000,
+    # Consistency loss of rendered normal and derived normal from expected depth
+    lambda_normal_dmean = 0.0,
+    n_dmean_from = 10_000,
+    n_dmean_end = 20_000,
+    n_dmean_ks = 3,
+    n_dmean_tol_deg = 90.0,
+    # Consistency loss of rendered normal and derived normal from median depth
+    lambda_normal_dmed = 0.0,
+    n_dmed_from=3000,
+    n_dmed_end=20_000,
+    # Total variation loss of density grid
+    lambda_tv_density = 1e-10,
+    tv_from = 0,
+    tv_until = 10000,
+    # Data augmentation
+    ss_aug_max = 1.5,
+    rand_bg = False,
+))
+cfg.init = CfgNode(dict(
+    # Voxel property initialization
+    geo_init = -10.0,
+    sh0_init = 0.5,
+    shs_init = 0.0,
+    sh_degree_init = 3,
+    # Init main inside region by dense voxels
+    init_n_level = 6,  # (2^6)^3 voxels
+    # Number of voxel ratio for outside (background region)
+    init_out_ratio = 2.0,
+))
+cfg.procedure = CfgNode(dict(
+    # Schedule
+    n_iter = 20_000,
+    sche_mult = 1.0,
+    seed=3721,
+    # Reset sh
+    reset_sh_ckpt = [-1],
+    # Adaptive general setup
+    adapt_from = 1000,
+    adapt_every = 1000,
+    # Adaptive voxel pruning
+    prune_until = 18000,
+    prune_thres_init = 0.0001,
+    prune_thres_final = 0.05,
+    # Adaptive voxel pruning
+    subdivide_until = 15000,
+    subdivide_all_until = 0,
+    subdivide_samp_thres = 1.0, # A voxel max sampling rate should larger than this.
+    subdivide_prop = 0.05,
+    subdivide_max_num = 10_000_000,
+))
+cfg.auto_exposure = CfgNode(dict(
+    enable = False,
+    auto_exposure_upd_ckpt = [5000, 10000, 15000]
+))
+for i_cfg in cfg.values():
+    i_cfg.set_new_allowed(True)
+def everytype2bool(v):
+    if v.isnumeric():
+        return bool(int(v))
+    v = v.lower()
+    if v in ['n', 'no', 'none', 'false']:
+        return False
+    return True
+def update_argparser(parser):
+    for name in cfg.keys():
+        group = parser.add_argument_group(name)
+        for key, value in getattr(cfg, name).items():
+            t = type(value)
+            if t == bool:
+                group.add_argument(f"--{key}", action='store_true' if t else 'store_false')
+            elif t == list:
+                group.add_argument(f"--{key}", default=value, type=type(value[0]), nargs="*")
+            elif t == tuple:
+                group.add_argument(f"--{key}", default=value, type=type(value[0]), nargs=len(value))
+            else:
+                group.add_argument(f"--{key}", default=value, type=t)
+def update_config(cfg_files, cmd_lst=[]):
+    # Update from config files
+    if isinstance(cfg_files, str):
+        cfg_files = [cfg_files]
+    for cfg_path in cfg_files:
+        cfg.merge_from_file(cfg_path)
+    if len(cmd_lst) == 0:
+        return
+    # Parse the arguments from command line
+    internal_parser = argparse.ArgumentParser()
+    update_argparser(internal_parser)
+    internal_args = internal_parser.parse_args(cmd_lst)
+    # Update from command line args
+    for name in cfg.keys():
+        cfg_subgroup = getattr(cfg, name)
+        for key in cfg_subgroup.keys():
+            arg_val = getattr(internal_args, key)
+            # Check if the default values is updated
+            if internal_parser.get_default(key) != arg_val:
+                cfg_subgroup[key] = arg_val

src/dataloader/__pycache__/data_pack.cpython-39.pyc ADDED Viewed

Binary file (5.62 kB). View file

src/dataloader/__pycache__/reader_colmap_dataset.cpython-39.pyc ADDED Viewed

Binary file (4.04 kB). View file

src/dataloader/__pycache__/reader_nerf_dataset.cpython-39.pyc ADDED Viewed

Binary file (3.97 kB). View file

src/dataloader/data_pack.py ADDED Viewed

	@@ -0,0 +1,232 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import os
+import time
+import random
+import numpy as np
+import torch
+from src.dataloader.reader_colmap_dataset import read_colmap_dataset
+from src.dataloader.reader_nerf_dataset import read_nerf_dataset
+from src.utils.camera_utils import interpolate_poses
+from src.cameras import Camera, MiniCam
+class DataPack:
+    def __init__(self,
+                 source_path,
+                 image_dir_name="images",
+                 mask_dir_name="masks",
+                 res_downscale=0.,
+                 res_width=0,
+                 skip_blend_alpha=False,
+                 alpha_is_white=False,
+                 data_device="cpu",
+                 use_test=False,
+                 test_every=8,
+                 camera_params_only=False):
+        camera_creator = CameraCreator(
+            res_downscale=res_downscale,
+            res_width=res_width,
+            skip_blend_alpha=skip_blend_alpha,
+            alpha_is_white=alpha_is_white,
+            data_device=data_device,
+            camera_params_only=camera_params_only,
+        )
+        sparse_path = os.path.join(source_path, "sparse")
+        colmap_path = os.path.join(source_path, "colmap", "sparse")
+        meta_path1 = os.path.join(source_path, "transforms_train.json")
+        meta_path2 = os.path.join(source_path, "transforms.json")
+        # Read images concurrently
+        s_time = time.perf_counter()
+        if os.path.exists(sparse_path) or os.path.exists(colmap_path):
+            print("Read dataset in COLMAP format.")
+            dataset = read_colmap_dataset(
+                source_path=source_path,
+                image_dir_name=image_dir_name,
+                mask_dir_name=mask_dir_name,
+                use_test=use_test,
+                test_every=test_every,
+                camera_creator=camera_creator)
+        elif os.path.exists(meta_path1) or os.path.exists(meta_path2):
+            print("Read dataset in NeRF format.")
+            dataset = read_nerf_dataset(
+                source_path=source_path,
+                use_test=use_test,
+                test_every=test_every,
+                camera_creator=camera_creator)
+        else:
+            raise Exception("Unknown scene type!")
+        e_time = time.perf_counter()
+        print(f"Read dataset in {e_time - s_time:.3f} seconds.")
+        self._cameras = {
+            'train': dataset['train_cam_lst'],
+            'test': dataset['test_cam_lst'],
+        }
+        ##############################
+        # Read additional dataset info
+        ##############################
+        # If the dataset suggested a scene bound
+        self.suggested_bounding = dataset.get('suggested_bounding', None)
+        # If the dataset provide a transformation to other coordinate
+        self.to_world_matrix = None
+        to_world_path = os.path.join(source_path, 'to_world_matrix.txt')
+        if os.path.isfile(to_world_path):
+            self.to_world_matrix = np.loadtxt(to_world_path)
+        # If the dataset has a point cloud
+        self.point_cloud = dataset.get('point_cloud', None)
+    def get_train_cameras(self):
+        return self._cameras['train']
+    def get_test_cameras(self):
+        return self._cameras['test']
+    def interpolate_cameras(self, n_frames, starting_id=0, ids=[], step_forward=0):
+        cams = self.get_train_cameras()
+        if len(ids):
+            key_poses = [cams[i].c2w.cpu().numpy() for i in ids]
+        else:
+            assert starting_id >= 0
+            assert starting_id < len(cams)
+            cam_pos = torch.stack([cam.position for cam in cams])
+            ids = [starting_id]
+            for _ in range(3):
+                farthest_id = torch.cdist(cam_pos[ids], cam_pos).amin(0).argmax().item()
+                ids.append(farthest_id)
+            ids[1], ids[2] = ids[2], ids[1]
+            key_poses = [cams[i].c2w.cpu().numpy() for i in ids]
+        if step_forward != 0:
+            for i in range(len(key_poses)):
+                lookat = key_poses[i][:3, 2]
+                key_poses[i][:3, 3] += step_forward * lookat
+        interp_poses = interpolate_poses(key_poses, n_frame=n_frames, periodic=True)
+        base_cam = cams[ids[0]]
+        interp_cams = [
+            MiniCam(
+                c2w=pose,
+                fovx=base_cam.fovx, fovy=base_cam.fovy,
+                width=base_cam.image_width, height=base_cam.image_height)
+            for pose in interp_poses]
+        return interp_cams
+# Create a random sequence of image indices
+def compute_iter_idx(num_data, num_iter):
+    tr_iter_idx = []
+    while len(tr_iter_idx) < num_iter:
+        lst = list(range(num_data))
+        random.shuffle(lst)
+        tr_iter_idx.extend(lst)
+    return tr_iter_idx[:num_iter]
+# Function that create Camera instances while parsing dataset
+class CameraCreator:
+    warned = False
+    def __init__(self,
+                 res_downscale=0.,
+                 res_width=0,
+                 skip_blend_alpha=False,
+                 alpha_is_white=False,
+                 data_device="cpu",
+                 camera_params_only=False):
+        self.res_downscale = res_downscale
+        self.res_width = res_width
+        self.skip_blend_alpha = skip_blend_alpha
+        self.alpha_is_white = alpha_is_white
+        self.data_device = data_device
+        self.camera_params_only = camera_params_only
+    def __call__(self,
+                 image,
+                 w2c,
+                 fovx,
+                 fovy,
+                 cx_p=0.5,
+                 cy_p=0.5,
+                 sparse_pt=None,
+                 image_name="",
+                 mask=None):
+        # Determine target resolution
+        if self.res_downscale > 0:
+            downscale = self.res_downscale
+        elif self.res_width > 0:
+            downscale = image.size[0] / self.res_width
+        else:
+            downscale = 1
+            total_pix = image.size[0] * image.size[1]
+            if total_pix > 1200 ** 2 and not self.warned:
+                self.warned = True
+                suggest_ds = (total_pix ** 0.5) / 1200
+                print(f"###################################################################")
+                print(f"Image too large. Suggest to use `--res_downscale {suggest_ds:.1f}`.")
+                print(f"###################################################################")
+        # Load camera parameters only
+        if self.camera_params_only:
+            return MiniCam(
+                c2w=np.linalg.inv(w2c),
+                fovx=fovx, fovy=fovy,
+                cx_p=cx_p, cy_p=cy_p,
+                width=round(image.size[0] / downscale),
+                height=round(image.size[1] / downscale),
+                image_name=image_name)
+        # Resize image if needed
+        if downscale != 1:
+            size = (round(image.size[0] / downscale), round(image.size[1] / downscale))
+            image = image.resize(size)
+        # Convert image to tensor
+        tensor = torch.tensor(np.array(image), dtype=torch.float32).moveaxis(-1, 0) / 255.0
+        if tensor.shape[0] == 4:
+            # Blend alpha channel
+            tensor, mask = tensor.split([3, 1], dim=0)
+            if not self.skip_blend_alpha:
+                tensor = tensor * mask + int(self.alpha_is_white) * (1 - mask)
+        # Conver mask to tensor if there is
+        if mask is not None:
+            size = tensor.shape[-2:][::-1]
+            if mask.size != size:
+                mask = mask.resize(size)
+            mask = torch.tensor(np.array(mask), dtype=torch.float32) / 255.0
+            if len(mask.shape) == 3:
+                mask = mask.mean(-1)
+            mask = mask[None]
+        return Camera(
+            w2c=w2c,
+            fovx=fovx, fovy=fovy,
+            cx_p=cx_p, cy_p=cy_p,
+            image=tensor,
+            mask=mask,
+            sparse_pt=sparse_pt,
+            image_name=image_name)

src/dataloader/reader_colmap_dataset.py ADDED Viewed

	@@ -0,0 +1,162 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import os
+import json
+import natsort
+import pycolmap
+import numpy as np
+from PIL import Image
+from pathlib import Path
+import concurrent.futures
+from src.utils.colmap_utils import parse_colmap_pts
+from src.utils.camera_utils import focal2fov
+def read_colmap_dataset(source_path, image_dir_name, mask_dir_name, use_test, test_every, camera_creator):
+    """
+    Read a COLMAP dataset and return cameras, intrinsics, extrinsics, and optional masks.
+    Fixes:
+      - Safe image/mask opening using `with Image.open(...)` (no file leaks).
+      - Compatible with both old/new pycolmap APIs.
+      - Returns PIL.Image objects (for backward compatibility with DataPack).
+    """
+    source_path = Path(source_path)
+    # ---------------- Parse COLMAP reconstruction ----------------
+    sparse_path = source_path / "sparse" / "0"
+    if not sparse_path.exists():
+        sparse_path = source_path / "colmap" / "sparse" / "0"
+    if not sparse_path.exists():
+        raise Exception("Cannot find COLMAP reconstruction (expected sparse/0 or colmap/sparse/0).")
+    sfm = pycolmap.Reconstruction(sparse_path)
+    point_cloud = parse_colmap_pts(sfm)
+    correspondent = point_cloud.corr
+    # ---------------- Sort key by filename ----------------
+    keys = natsort.natsorted(sfm.images.keys(), key=lambda k: sfm.images[k].name)
+    # ---------------- Load all frames ----------------
+    todo_lst = []
+    for key in keys:
+        frame = sfm.images[key]
+        # ---- Load RGB image safely ----
+        image_path = source_path / image_dir_name / frame.name
+        if not image_path.exists():
+            image_path = image_path.with_suffix(".png")
+        if not image_path.exists():
+            image_path = image_path.with_suffix(".jpg")
+        if not image_path.exists():
+            image_path = image_path.with_suffix(".JPG")
+        if not image_path.exists():
+            raise Exception(f"File not found: {str(image_path)}")
+        # safely open and immediately copy to new PIL object (closed after copy)
+        with Image.open(image_path) as img:
+            image = img.copy()  # copy keeps data in memory, closes file handle
+        # ---- Load intrinsics ----
+        if frame.camera.model.name == "SIMPLE_PINHOLE":
+            focal_x, cx, cy = frame.camera.params
+            fovx = focal2fov(focal_x, frame.camera.width)
+            fovy = focal2fov(focal_x, frame.camera.height)
+            cx_p = cx / frame.camera.width
+            cy_p = cy / frame.camera.height
+        elif frame.camera.model.name == "PINHOLE":
+            focal_x, focal_y, cx, cy = frame.camera.params
+            fovx = focal2fov(focal_x, frame.camera.width)
+            fovy = focal2fov(focal_y, frame.camera.height)
+            cx_p = cx / frame.camera.width
+            cy_p = cy / frame.camera.height
+        else:
+            raise ValueError(
+                f"Unsupported COLMAP camera model: {frame.camera.model.name}. "
+                "Only undistorted SIMPLE_PINHOLE and PINHOLE are supported."
+            )
+        # ---- Load extrinsics (support both pycolmap APIs) ----
+        w2c = np.eye(4, dtype=np.float32)
+        cam_from_world = getattr(frame, "cam_from_world", None)
+        if cam_from_world is not None:
+            if callable(cam_from_world):
+                # Old pycolmap API
+                w2c[:3] = cam_from_world().matrix()
+            else:
+                # New pycolmap API (Rigid3d object)
+                w2c[:3] = cam_from_world.matrix()
+        else:
+            raise RuntimeError("Cannot find cam_from_world attribute in COLMAP frame.")
+        # ---- Sparse point correspondence ----
+        sparse_pt = point_cloud.points[correspondent[frame.name]]
+        # ---- Optional mask ----
+        mask = None
+        if mask_dir_name is not None:
+            mask_path = (source_path / mask_dir_name / frame.name).with_suffix(".png")
+            if mask_path.exists():
+                with Image.open(mask_path) as m:
+                    mask = m.copy()  # keep PIL.Image for DataPack
+        # ---- Store frame data ----
+        todo_lst.append(dict(
+            image=image,
+            w2c=w2c,
+            fovx=fovx,
+            fovy=fovy,
+            cx_p=cx_p,
+            cy_p=cy_p,
+            sparse_pt=sparse_pt,
+            image_name=image_path.name,
+            mask=mask,
+        ))
+    # ---------------- Create cameras concurrently ----------------
+    import torch
+    torch.inverse(torch.eye(3, device="cuda"))  # fix PyTorch lazy init bug
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = [executor.submit(camera_creator, **todo) for todo in todo_lst]
+        cam_lst = [f.result() for f in futures]
+    # ---------------- Split train/test ----------------
+    if use_test:
+        train_cam_lst = [cam for i, cam in enumerate(cam_lst) if i % test_every != 0]
+        test_cam_lst = [cam for i, cam in enumerate(cam_lst) if i % test_every == 0]
+    else:
+        train_cam_lst = cam_lst
+        test_cam_lst = []
+    # ---------------- Optional bounding box ----------------
+    nerf_normalization_path = source_path / "nerf_normalization.json"
+    if nerf_normalization_path.is_file():
+        with open(nerf_normalization_path) as f:
+            nerf_norm = json.load(f)
+        suggested_center = np.array(nerf_norm["center"], dtype=np.float32)
+        suggested_radius = np.array(nerf_norm["radius"], dtype=np.float32)
+        suggested_bounding = np.stack([
+            suggested_center - suggested_radius,
+            suggested_center + suggested_radius,
+        ])
+    else:
+        suggested_bounding = None
+    # ---------------- Return dataset ----------------
+    dataset = {
+        "train_cam_lst": train_cam_lst,
+        "test_cam_lst": test_cam_lst,
+        "suggested_bounding": suggested_bounding,
+        "point_cloud": point_cloud,
+    }
+    return dataset

src/dataloader/reader_colmap_dataset_or.py ADDED Viewed

	@@ -0,0 +1,148 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import os
+import json
+import natsort
+import pycolmap
+import numpy as np
+from PIL import Image
+from pathlib import Path
+import concurrent.futures
+from src.utils.colmap_utils import parse_colmap_pts
+from src.utils.camera_utils import focal2fov
+def read_colmap_dataset(source_path, image_dir_name, mask_dir_name, use_test, test_every, camera_creator):
+    source_path = Path(source_path)
+    # Parse colmap meta data
+    sparse_path = source_path / "sparse" / "0"
+    if not sparse_path.exists():
+        sparse_path = source_path / "colmap" / "sparse" / "0"
+    if not sparse_path.exists():
+        raise Exception("Can not find COLMAP reconstruction.")
+    sfm = pycolmap.Reconstruction(sparse_path)
+    point_cloud = parse_colmap_pts(sfm)
+    correspondent = point_cloud.corr
+    # Sort key by filename
+    keys = natsort.natsorted(
+        sfm.images.keys(),
+        key = lambda k : sfm.images[k].name)
+    # Load all images and cameras
+    todo_lst = []
+    for key in keys:
+        frame = sfm.images[key]
+        # Load image
+        image_path = source_path / image_dir_name / frame.name
+        if not image_path.exists():
+            image_path = image_path.with_suffix('.png')
+        if not image_path.exists():
+            image_path = image_path.with_suffix('.jpg')
+        if not image_path.exists():
+            image_path = image_path.with_suffix('.JPG')
+        if not image_path.exists():
+            raise Exception(f"File not found: {str(image_path)}")
+        image = Image.open(image_path)
+        # Load camera intrinsic
+        if frame.camera.model.name == "SIMPLE_PINHOLE":
+            focal_x, cx, cy = frame.camera.params
+            fovx = focal2fov(focal_x, frame.camera.width)
+            fovy = focal2fov(focal_x, frame.camera.height)
+            cx_p = cx / frame.camera.width
+            cy_p = cy / frame.camera.height
+        elif frame.camera.model.name == "PINHOLE":
+            focal_x, focal_y, cx, cy = frame.camera.params
+            fovx = focal2fov(focal_x, frame.camera.width)
+            fovy = focal2fov(focal_y, frame.camera.height)
+            cx_p = cx / frame.camera.width
+            cy_p = cy / frame.camera.height
+        else:
+            assert False, "Colmap camera model not handled: only undistorted datasets (PINHOLE or SIMPLE_PINHOLE cameras) supported!"
+        # Load camera extrinsic
+        w2c = np.eye(4, dtype=np.float32)
+        try:
+            w2c[:3] = frame.cam_from_world().matrix()
+        except:
+            # Older version of pycolmap
+            w2c[:3] = frame.cam_from_world.matrix()
+        # Load sparse point
+        sparse_pt = point_cloud.points[correspondent[frame.name]]
+        # Load mask if there is
+        mask_path = (source_path / mask_dir_name / frame.name).with_suffix('.png')
+        if mask_path.exists():
+            mask = Image.open(mask_path)
+        else:
+            mask = None
+        todo_lst.append(dict(
+            image=image,
+            w2c=w2c,
+            fovx=fovx,
+            fovy=fovy,
+            cx_p=cx_p,
+            cy_p=cy_p,
+            sparse_pt=sparse_pt,
+            image_name=image_path.name,
+            mask=mask,
+        ))
+    # Load all cameras concurrently
+    import torch
+    torch.inverse(torch.eye(3, device="cuda"))  # Fix module lazy loading bug:
+                                                # https://github.com/pytorch/pytorch/issues/90613
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = [executor.submit(camera_creator, **todo) for todo in todo_lst]
+        cam_lst = [f.result() for f in futures]
+    # Split train/test
+    if use_test:
+        train_cam_lst = [
+            cam for i, cam in enumerate(cam_lst)
+            if i % test_every != 0]
+        test_cam_lst = [
+            cam for i, cam in enumerate(cam_lst)
+            if i % test_every == 0]
+    else:
+        train_cam_lst = cam_lst
+        test_cam_lst = []
+    # Parse main scene bound if there is
+    nerf_normalization_path = os.path.join(source_path, "nerf_normalization.json")
+    if os.path.isfile(nerf_normalization_path):
+        with open(nerf_normalization_path) as f:
+            nerf_normalization = json.load(f)
+        suggested_center = np.array(nerf_normalization["center"], dtype=np.float32)
+        suggested_radius = np.array(nerf_normalization["radius"], dtype=np.float32)
+        suggested_bounding = np.stack([
+            suggested_center - suggested_radius,
+            suggested_center + suggested_radius,
+        ])
+    else:
+        suggested_bounding = None
+    # Pack dataset
+    dataset = {
+        'train_cam_lst': train_cam_lst,
+        'test_cam_lst': test_cam_lst,
+        'suggested_bounding': suggested_bounding,
+        'point_cloud': point_cloud,
+    }
+    return dataset

src/dataloader/reader_nerf_dataset.py ADDED Viewed

	@@ -0,0 +1,180 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import os
+import json
+import pycolmap
+import numpy as np
+from PIL import Image
+from pathlib import Path
+import concurrent.futures
+from src.utils.colmap_utils import parse_colmap_pts
+from src.utils.camera_utils import fov2focal, focal2fov
+def read_nerf_dataset(source_path, test_every, use_test, camera_creator):
+    source_path = Path(source_path)
+    # Load training cameras
+    if (source_path / "transforms_train.json").exists():
+        train_cam_lst, point_cloud = read_cameras_from_json(
+            source_path=source_path,
+            meta_fname="transforms_train.json",
+            camera_creator=camera_creator)
+    else:
+        train_cam_lst, point_cloud = read_cameras_from_json(
+            source_path=source_path,
+            meta_fname="transforms.json",
+            camera_creator=camera_creator)
+    # Load testing cameras
+    if (source_path / "transforms_test.json").exists():
+        test_cam_lst, _ = read_cameras_from_json(
+            source_path=source_path,
+            meta_fname="transforms_test.json",
+            camera_creator=camera_creator)
+    elif use_test:
+        test_cam_lst = [
+            cam for i, cam in enumerate(train_cam_lst)
+            if i % test_every == 0]
+        train_cam_lst = [
+            cam for i, cam in enumerate(train_cam_lst)
+            if i % test_every != 0]
+    else:
+        test_cam_lst = []
+    # Parse main scene bound if there is
+    nerf_normalization_path = os.path.join(source_path, "nerf_normalization.json")
+    if os.path.isfile(nerf_normalization_path):
+        with open(nerf_normalization_path) as f:
+            nerf_normalization = json.load(f)
+        suggested_center = np.array(nerf_normalization["center"], dtype=np.float32)
+        suggested_radius = np.array(nerf_normalization["radius"], dtype=np.float32)
+        suggested_bounding = np.stack([
+            suggested_center - suggested_radius,
+            suggested_center + suggested_radius,
+        ])
+    else:
+        # Assume synthetic blender scene bound
+        suggested_bounding = np.array([
+            [-1.5, -1.5, -1.5],
+            [1.5, 1.5, 1.5],
+        ], dtype=np.float32)
+    # Pack dataset
+    dataset = {
+        'train_cam_lst': train_cam_lst,
+        'test_cam_lst': test_cam_lst,
+        'suggested_bounding': suggested_bounding,
+        'point_cloud': point_cloud,
+    }
+    return dataset
+def read_cameras_from_json(source_path, meta_fname, camera_creator):
+    with open(source_path / meta_fname) as f:
+        meta = json.load(f)
+    # Load COLMAP points if there is
+    if "colmap" in meta:
+        sfm = pycolmap.Reconstruction(source_path / meta["colmap"]["path"])
+        if "transform" in meta["colmap"]:
+            transform = np.array(meta["colmap"]["transform"])
+        else:
+            transform = None
+        point_cloud = parse_colmap_pts(sfm, transform)
+        correspondent = point_cloud.corr
+    else:
+        point_cloud = None
+        correspondent = None
+    # Load global setup
+    global_fovx = meta.get("camera_angle_x", 0)
+    global_fovy = meta.get("camera_angle_y", 0)
+    global_cx_p = parse_principle_point(meta, is_cx=True)
+    global_cy_p = parse_principle_point(meta, is_cx=False)
+    # Load all images and cameras
+    todo_lst = []
+    for frame in meta["frames"]:
+        # Guess the rgb image path and load image
+        path_candidates = [
+            source_path / frame["file_path"],
+            source_path / (frame["file_path"] + '.png'),
+            source_path / (frame["file_path"] + '.jpg'),
+            source_path / (frame["file_path"] + '.JPG'),
+        ]
+        for image_path in path_candidates:
+            if image_path.exists():
+                break
+        if frame.get('heldout', False):
+            image = Image.new('RGB', (frame['w'], frame['h']))
+        elif image_path.exists():
+            image = Image.open(image_path)
+        else:
+            raise Exception(f"File not found: {str(image_path)}")
+        # Load camera intrinsic
+        fovx = frame.get('camera_angle_x', global_fovx)
+        cx_p = frame.get('cx_p', global_cx_p)
+        cy_p = frame.get('cy_p', global_cy_p)
+        if 'camera_angle_y' in frame:
+            fovy = frame['camera_angle_y']
+        elif global_fovy > 0:
+            fovy = global_fovy
+        else:
+            fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1])
+        # Load camera pose
+        c2w = np.array(frame["transform_matrix"])
+        c2w[:3, 1:3] *= -1  # from opengl y-up-z-back to colmap y-down-z-forward
+        w2c = np.linalg.inv(c2w).astype(np.float32)
+        # Load sparse point
+        if point_cloud is not None:
+            sparse_pt = point_cloud.points[correspondent[image_path.name]]
+        else:
+            sparse_pt = None
+        todo_lst.append(dict(
+            image=image,
+            w2c=w2c,
+            fovx=fovx,
+            fovy=fovy,
+            cx_p=cx_p,
+            cy_p=cy_p,
+            sparse_pt=sparse_pt,
+            image_name=image_path.name,
+        ))
+    # Load all cameras concurrently
+    import torch
+    torch.inverse(torch.eye(3, device="cuda"))  # Fix module lazy loading bug:
+                                                # https://github.com/pytorch/pytorch/issues/90613
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        futures = [executor.submit(camera_creator, **todo) for todo in todo_lst]
+        cam_lst = [f.result() for f in futures]
+    return cam_lst, point_cloud
+def parse_principle_point(info, is_cx):
+    key = "cx" if is_cx else "cy"
+    key_res = "w" if is_cx else "h"
+    if f"{key}_p" in info:
+        return info[f"{key}_p"]
+    if key in info and key_res in info:
+        return info[key] / info[key_res]
+    return None

src/dataloader/reader_nerf_dataset_copy.py ADDED Viewed

	@@ -0,0 +1,170 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import os
+import json
+import pycolmap
+import numpy as np
+from PIL import Image
+from pathlib import Path
+from src.utils.colmap_utils import parse_colmap_pts
+from src.utils.camera_utils import fov2focal, focal2fov
+def read_nerf_dataset(source_path, test_every, use_test, camera_creator):
+    source_path = Path(source_path)
+    # Load training cameras
+    if (source_path / "transforms_train.json").exists():
+        train_cam_lst, point_cloud = read_cameras_from_json(
+            source_path=source_path,
+            meta_fname="transforms_train.json",
+            camera_creator=camera_creator)
+    else:
+        train_cam_lst, point_cloud = read_cameras_from_json(
+            source_path=source_path,
+            meta_fname="transforms.json",
+            camera_creator=camera_creator)
+    # Load testing cameras
+    if (source_path / "transforms_test.json").exists():
+        test_cam_lst, _ = read_cameras_from_json(
+            source_path=source_path,
+            meta_fname="transforms_test.json",
+            camera_creator=camera_creator)
+    elif use_test:
+        test_cam_lst = [
+            cam for i, cam in enumerate(train_cam_lst)
+            if i % test_every == 0]
+        train_cam_lst = [
+            cam for i, cam in enumerate(train_cam_lst)
+            if i % test_every != 0]
+    else:
+        test_cam_lst = []
+    # Parse main scene bound if there is
+    nerf_normalization_path = os.path.join(source_path, "nerf_normalization.json")
+    if os.path.isfile(nerf_normalization_path):
+        with open(nerf_normalization_path) as f:
+            nerf_normalization = json.load(f)
+        suggested_center = np.array(nerf_normalization["center"], dtype=np.float32)
+        suggested_radius = np.array(nerf_normalization["radius"], dtype=np.float32)
+        suggested_bounding = np.stack([
+            suggested_center - suggested_radius,
+            suggested_center + suggested_radius,
+        ])
+    else:
+        # Assume synthetic blender scene bound
+        suggested_bounding = np.array([
+            [-1.5, -1.5, -1.5],
+            [1.5, 1.5, 1.5],
+        ], dtype=np.float32)
+    # Pack dataset
+    dataset = {
+        'train_cam_lst': train_cam_lst,
+        'test_cam_lst': test_cam_lst,
+        'suggested_bounding': suggested_bounding,
+        'point_cloud': point_cloud,
+    }
+    return dataset
+def read_cameras_from_json(source_path, meta_fname, camera_creator):
+    with open(source_path / meta_fname) as f:
+        meta = json.load(f)
+    # Load COLMAP points if there is
+    if "colmap" in meta:
+        sfm = pycolmap.Reconstruction(source_path / meta["colmap"]["path"])
+        if "transform" in meta["colmap"]:
+            transform = np.array(meta["colmap"]["transform"])
+        else:
+            transform = None
+        point_cloud = parse_colmap_pts(sfm, transform)
+        correspondent = point_cloud.corr
+    else:
+        point_cloud = None
+        correspondent = None
+    # Load global setup
+    global_fovx = meta.get("camera_angle_x", 0)
+    global_fovy = meta.get("camera_angle_y", 0)
+    global_cx_p = parse_principle_point(meta, is_cx=True)
+    global_cy_p = parse_principle_point(meta, is_cx=False)
+    # Load all images and cameras
+    cam_lst = []
+    for frame in meta["frames"]:
+        # Guess the rgb image path and load image
+        path_candidates = [
+            source_path / frame["file_path"],
+            source_path / (frame["file_path"] + '.png'),
+            source_path / (frame["file_path"] + '.jpg'),
+            source_path / (frame["file_path"] + '.JPG'),
+        ]
+        for image_path in path_candidates:
+            if image_path.exists():
+                break
+        if frame.get('heldout', False):
+            image = Image.new('RGB', (frame['w'], frame['h']))
+        elif image_path.exists():
+            image = Image.open(image_path)
+        else:
+            raise Exception(f"File not found: {str(image_path)}")
+        # Load camera intrinsic
+        fovx = frame.get('camera_angle_x', global_fovx)
+        cx_p = frame.get('cx_p', global_cx_p)
+        cy_p = frame.get('cy_p', global_cy_p)
+        if 'camera_angle_y' in frame:
+            fovy = frame['camera_angle_y']
+        elif global_fovy > 0:
+            fovy = global_fovy
+        else:
+            fovy = focal2fov(fov2focal(fovx, image.size[0]), image.size[1])
+        # Load camera pose
+        c2w = np.array(frame["transform_matrix"])
+        c2w[:3, 1:3] *= -1  # from opengl y-up-z-back to colmap y-down-z-forward
+        w2c = np.linalg.inv(c2w).astype(np.float32)
+        # Load sparse point
+        if point_cloud is not None:
+            sparse_pt = point_cloud.points[correspondent[image_path.name]]
+        else:
+            sparse_pt = None
+        cam_lst.append(camera_creator(
+            image=image,
+            w2c=w2c,
+            fovx=fovx,
+            fovy=fovy,
+            cx_p=cx_p,
+            cy_p=cy_p,
+            sparse_pt=sparse_pt,
+            image_name=image_path.name,
+        ))
+    return cam_lst, point_cloud
+def parse_principle_point(info, is_cx):
+    key = "cx" if is_cx else "cy"
+    key_res = "w" if is_cx else "h"
+    if f"{key}_p" in info:
+        return info[f"{key}_p"]
+    if key in info and key_res in info:
+        return info[key] / info[key_res]
+    return None

src/sparse_voxel_gears/__pycache__/adaptive.cpython-39.pyc ADDED Viewed

Binary file (6.77 kB). View file

src/sparse_voxel_gears/__pycache__/constructor.cpython-39.pyc ADDED Viewed

Binary file (8 kB). View file

src/sparse_voxel_gears/__pycache__/io.cpython-39.pyc ADDED Viewed

Binary file (4.76 kB). View file

src/sparse_voxel_gears/__pycache__/pooling.cpython-39.pyc ADDED Viewed

Binary file (1.81 kB). View file

src/sparse_voxel_gears/__pycache__/properties.cpython-39.pyc ADDED Viewed

Binary file (5.06 kB). View file

src/sparse_voxel_gears/__pycache__/renderer.cpython-39.pyc ADDED Viewed

Binary file (3.74 kB). View file

src/sparse_voxel_gears/adaptive.py ADDED Viewed

	@@ -0,0 +1,296 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import torch
+from src.utils import octree_utils
+'''
+Adaptive sparse voxel pruning and subdivision.
+There are three types of data mode to tackle.
+1. Per-voxel attribute:
+   Each voxel has it's own non-trainable data field.
+2. Per-voxel parameters:
+   Similar to per-voxel attribute but these are trainable parameters.
+3. Grid points parameters:
+   The trainable parameters are attached to the eight grid points of each voxel.
+   A grid point parameter can be shared by adjacent voxels.
+'''
+class SVAdaptive:
+    @torch.no_grad()
+    def pruning(self, prune_mask):
+        '''
+        Prune sparse voxels. The grid points are updated accordingly.
+        Input:
+            @prune_mask      [N] Mask indicating the voxels to prune.
+        '''
+        if len(prune_mask.shape) == 2:
+            assert prune_mask.shape[1] == 1
+            prune_mask = prune_mask.squeeze(1)
+        assert prune_mask.shape == (self.num_voxels, )
+        kept_idx = (~prune_mask).argwhere().squeeze(1)
+        if len(kept_idx) == 0:
+            return
+        old_vox_key = self.vox_key.clone()
+        # Prune non-trainable per-voxel attributes.
+        for name in self.per_voxel_attr_lst:
+            ori_attr = getattr(self, name)
+            new_attr = mask_cat_perm(ori_attr, kept_idx=kept_idx)
+            setattr(self, name, new_attr)
+            if name == '_subdiv_p' and ori_attr.grad is not None:
+                self._subdiv_p.grad = mask_cat_perm(ori_attr.grad, kept_idx=kept_idx)
+                self._subdiv_p.requires_grad_()
+            del ori_attr
+        torch.cuda.empty_cache()
+        # Prune trainable per-voxel parameters.
+        for name in self.per_voxel_param_lst:
+            ori_param = getattr(self, name).detach()
+            new_param = mask_cat_perm(
+                ori_param,
+                kept_idx=kept_idx).requires_grad_()
+            setattr(self, name, new_param)
+            del ori_param, new_param
+        torch.cuda.empty_cache()
+        # Prune trainable grid points parameters (on voxel corners).
+        for name in self.grid_pts_param_lst:
+            ori_grid_pts = getattr(self, name).detach()
+            # Update parameter
+            ori_vox_grid_pts_val = ori_grid_pts[old_vox_key]
+            new_vox_val = mask_cat_perm(
+                ori_vox_grid_pts_val,
+                kept_idx=kept_idx)
+            new_param = agg_voxel_into_grid_pts(
+                self.num_grid_pts,  # It's the updated one
+                self.vox_key,
+                new_vox_val).requires_grad_()
+            setattr(self, name, new_param)
+            del ori_grid_pts, ori_vox_grid_pts_val, new_vox_val, new_param
+        torch.cuda.empty_cache()
+    @torch.no_grad()
+    def subdividing(self, subdivide_mask):
+        '''
+        Prune sparse voxels. The grid points are updated accordingly.
+        Input:
+            @subdivide_mask  [N] Mask indicating the voxels to subdivide.
+        '''
+        # Compute voxel index to keep and to subdivided
+        if len(subdivide_mask.shape) == 2:
+            assert subdivide_mask.shape[1] == 1
+            subdivide_mask = subdivide_mask.squeeze(1)
+        assert subdivide_mask.shape == (self.num_voxels, )
+        kept_idx = (~subdivide_mask).argwhere().squeeze(1)
+        subdivide_idx = subdivide_mask.argwhere().squeeze(1)
+        if len(subdivide_idx) == 0:
+            return
+        old_vox_key = self.vox_key.clone()
+        # Subdivide non-trainable per-voxel attributes.
+        octpath, octlevel = octree_utils.gen_children(
+            self.octpath[subdivide_idx],
+            self.octlevel[subdivide_idx])
+        special_subdiv = dict(
+            octpath=octpath,
+            octlevel=octlevel,
+        )
+        for name in self.per_voxel_attr_lst:
+            ori_attr = getattr(self, name)
+            if name in special_subdiv:
+                subdiv_attr = special_subdiv.pop(name)
+            else:
+                subdiv_attr = ori_attr[subdivide_idx].repeat_interleave(8, dim=0)
+            new_attr = mask_cat_perm(
+                ori_attr,
+                kept_idx=kept_idx,
+                cat_tensor=subdiv_attr)
+            setattr(self, name, new_attr)
+            if name == '_subdiv_p' and ori_attr.grad is not None:
+                self._subdiv_p.grad = mask_cat_perm(
+                    ori_attr.grad,
+                    kept_idx=kept_idx,
+                    cat_tensor=subdiv_attr)
+                self._subdiv_p.requires_grad_()
+            del ori_attr, subdiv_attr
+        assert len(special_subdiv) == 0
+        torch.cuda.empty_cache()
+        # Subdivide trainable per-voxel parameters.
+        for name in self.per_voxel_param_lst:
+            ori_param = getattr(self, name).detach()
+            # Update parameter
+            subdiv_param = ori_param[subdivide_idx].repeat_interleave(8, dim=0)
+            new_param = mask_cat_perm(
+                ori_param,
+                kept_idx=kept_idx,
+                cat_tensor=subdiv_param).requires_grad_()
+            setattr(self, name, new_param)
+            del ori_param, subdiv_param, new_param
+        torch.cuda.empty_cache()
+        # Subdivide grid points parameters (on voxel corners).
+        for name in self.grid_pts_param_lst:
+            ori_grid_pts = getattr(self, name).detach()
+            # Update parameter
+            # First we gather grid_pts values into each voxel first.
+            # The voxel is then subdivided by trilinear interpolation.
+            # Finally, we gather voxel values back to the grid_pts.
+            ori_vox_grid_pts_val = ori_grid_pts[old_vox_key]
+            subdiv_vox_grid_pts_val = subdivide_by_interp(
+                ori_vox_grid_pts_val[subdivide_idx])
+            new_vox_val = mask_cat_perm(
+                ori_vox_grid_pts_val,
+                kept_idx=kept_idx,
+                cat_tensor=subdiv_vox_grid_pts_val)
+            del ori_grid_pts, ori_vox_grid_pts_val, subdiv_vox_grid_pts_val
+            new_param = agg_voxel_into_grid_pts(
+                self.num_grid_pts,  # It's the updated one
+                self.vox_key,
+                new_vox_val).cuda().requires_grad_()
+            setattr(self, name, new_param)
+            del new_vox_val, new_param
+        torch.cuda.empty_cache()
+    @torch.no_grad()
+    def sh_degree_add1(self):
+        if self.active_sh_degree < self.max_sh_degree:
+            self.active_sh_degree += 1
+    @torch.no_grad()
+    def compute_training_stat(self, camera_lst):
+        '''
+        Compute the following statistic of each voxel from the given cameras.
+        1. max_w:             the maximum blending weight.
+        2. min_samp_interval: the minimum sampling interval (inverse of maximum sampling rate).
+        3. view_cnt:          number of cameras with non-zero blending weight.
+        Input:
+            @camera_lst    [Camera, ...] A list of cameras.
+        '''
+        self.freeze_vox_geo()
+        max_w = torch.zeros([self.num_voxels, 1], dtype=torch.float32, device="cuda")
+        min_samp_interval = torch.full([self.num_voxels, 1], 1e30, dtype=torch.float32, device="cuda")
+        view_cnt = torch.zeros([self.num_voxels, 1], dtype=torch.float32, device="cuda")
+        for camera in camera_lst:
+            max_w_i = self.render(camera, color_mode='dontcare', track_max_w=True)['max_w']
+            max_w = torch.maximum(max_w, max_w_i)
+            vis_idx = (max_w_i > 0).squeeze().argwhere().squeeze()
+            zdist = ((self.vox_center[vis_idx] - camera.position) * camera.lookat).sum(-1, keepdims=True)
+            samp_interval = zdist * camera.pix_size
+            min_samp_interval[vis_idx] = torch.minimum(min_samp_interval[vis_idx], samp_interval)
+            view_cnt[vis_idx] += 1
+        stat_pkg = {
+            'max_w': max_w,
+            'min_samp_interval': min_samp_interval,
+            'view_cnt': view_cnt,
+        }
+        self.unfreeze_vox_geo()
+        return stat_pkg
+# Some helpful functions
+def mask_cat_perm(tensor, kept_idx=None, cat_tensor=None, perm=None):
+    '''
+    Perform tensor masking, concatenation, and permutation.
+    '''
+    if kept_idx is None and cat_tensor is None and perm is None:
+        raise Exception("No op for mask_cat_perm??")
+    device = tensor.device
+    if kept_idx is not None:
+        tensor = tensor[kept_idx.to(device)]
+    if cat_tensor is not None:
+        tensor = torch.cat([tensor, cat_tensor.to(device)])
+    if perm is not None:
+        assert len(perm) == len(tensor)
+        tensor = tensor[perm.to(device)]
+    return tensor.contiguous()
+def agg_voxel_into_grid_pts(num_grid_pts, vox_key, vox_val, reduce='mean'):
+    '''
+    Aggregate per-voxel data into their eight grid points.
+    Input:
+        @num_grid_pts  Number of final grid points.
+        @vox_key       [N, 8] Index to the eight grid points of each voxel.
+        @vox_val       [N, 8, *] Data of the eight grid points of each voxel.
+    Output:
+        @new_param     [num_grid_pts, *] Grid points data aggregated from vox_val.
+    '''
+    ch = vox_val.shape[2:]
+    device = vox_val.device
+    vox_key = vox_key.to(device)
+    new_param = torch.zeros([num_grid_pts, *ch], dtype=torch.float32, device=device)
+    new_param.index_reduce_(
+        dim=0,
+        index=vox_key.flatten(),
+        source=vox_val.flatten(0,1),
+        reduce=reduce,
+        include_self=False)
+    # Equivalent implementation by old API
+    # new_param /= vox_key.flatten().bincount(minlength=num_grid_pts).unsqueeze(-1)
+    # new_param.nan_to_num_()
+    return new_param.contiguous()
+def subdivide_by_interp(vox_val):
+    '''
+    Subdivide grid point data by trilinear interpolation.
+    The subdivided children order is the same as those from `_subdivide_attr` and `gen_children`.
+    Input:
+        @vox_val       [N, 8, *] Data of the eight grid points of each voxel.
+    Output:
+        @new_vox_val   [8N, 8, *] Data of the eight grid points of the subdivided voxel.
+    '''
+    vox_val = vox_val.contiguous()
+    main_idx = torch.arange(8, dtype=torch.int64, device=vox_val.device)
+    new_vox_val = torch.zeros([len(vox_val), 8, *vox_val.shape[1:]], device=vox_val.device)
+    new_vox_val[:, main_idx, main_idx] = vox_val
+    new_vox_val[:, main_idx, main_idx^0b001] = 0.5 * (vox_val + vox_val[:, main_idx^0b001])
+    new_vox_val[:, main_idx, main_idx^0b010] = 0.5 * (vox_val + vox_val[:, main_idx^0b010])
+    new_vox_val[:, main_idx, main_idx^0b100] = 0.5 * (vox_val + vox_val[:, main_idx^0b100])
+    new_vox_val[:, main_idx, main_idx^0b011] = 0.25 * (
+        vox_val + \
+        vox_val[:, main_idx^0b001] + \
+        vox_val[:, main_idx^0b010] + \
+        vox_val[:, main_idx^0b011]
+    )
+    new_vox_val[:, main_idx, main_idx^0b101] = 0.25 * (
+        vox_val + \
+        vox_val[:, main_idx^0b001] + \
+        vox_val[:, main_idx^0b100] + \
+        vox_val[:, main_idx^0b101]
+    )
+    new_vox_val[:, main_idx, main_idx^0b110] = 0.25 * (
+        vox_val + \
+        vox_val[:, main_idx^0b010] + \
+        vox_val[:, main_idx^0b100] + \
+        vox_val[:, main_idx^0b110]
+    )
+    new_vox_val[:, main_idx, main_idx^0b111] = vox_val.mean(1, keepdim=True)
+    new_vox_val = new_vox_val.reshape(len(vox_val)*8, *vox_val.shape[1:])
+    return new_vox_val.contiguous()

src/sparse_voxel_gears/constructor.py ADDED Viewed

	@@ -0,0 +1,425 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import torch
+import numpy as np
+import svraster_cuda
+from src.utils.activation_utils import rgb2shzero
+from src.utils import octree_utils
+class SVConstructor:
+    def model_init(self,
+                   bounding,           # Scene bound [min_xyz, max_xyz]
+                   outside_level,      # Number of Octree levels for background
+                   init_n_level=6,     # Starting from (2^init_n_level)^3 voxels
+                   init_out_ratio=2.0, # Number of voxel ratio for outside (background region)
+                   sh_degree_init=3,   # Initial activated sh degree
+                   geo_init=-10.0,     # Init pre-activation density
+                   sh0_init=0.5,       # Init voxel colors in range [0,1]
+                   shs_init=0.0,       # Init coefficients of higher-degree sh
+                   cameras=None,       # Cameras that helps voxel allocation
+                   ):
+        assert outside_level <= svraster_cuda.meta.MAX_NUM_LEVELS
+        # Define scene bound
+        center = (bounding[0] + bounding[1]) * 0.5
+        extent = max(bounding[1] - bounding[0])
+        self.scene_center, self.scene_extent, self.inside_extent = get_scene_bound_tensor(
+            center=center, extent=extent, outside_level=outside_level)
+        # Init voxel layout.
+        # The world is seperated into inside (main foreground) and outside (background) regions.
+        in_path, in_level = octlayout_inside_uniform(
+            scene_center=self.scene_center,
+            scene_extent=self.scene_extent,
+            outside_level=outside_level,
+            n_level=init_n_level,
+            cameras=cameras,
+            filter_zero_visiblity=(cameras is not None),
+            filter_near=-1)
+        if outside_level == 0:
+            # Object centric bounded scenes
+            ou_path = torch.empty([0, 1], dtype=in_path.dtype, device="cuda")
+            ou_level = torch.empty([0, 1], dtype=in_level.dtype, device="cuda")
+        else:
+            min_num = len(in_path) * init_out_ratio
+            max_level = outside_level + init_n_level
+            ou_path, ou_level = octlayout_outside_heuristic(
+                scene_center=self.scene_center,
+                scene_extent=self.scene_extent,
+                outside_level=outside_level,
+                cameras=cameras,
+                min_num=min_num,
+                max_level=max_level,
+                filter_near=-1)
+        self.octpath = torch.cat([ou_path, in_path])
+        self.octlevel = torch.cat([ou_level, in_level])
+        self.active_sh_degree = min(sh_degree_init, self.max_sh_degree)
+        # Init trainable parameters
+        self._geo_grid_pts = torch.full(
+            [self.num_grid_pts, 1], geo_init,
+            dtype=torch.float32, device="cuda").requires_grad_()
+        self._sh0 = torch.full(
+            [self.num_voxels, 3], rgb2shzero(sh0_init),
+            dtype=torch.float32, device="cuda").requires_grad_()
+        self._shs = torch.full(
+            [self.num_voxels, (self.max_sh_degree+1)**2 - 1, 3], shs_init,
+            dtype=torch.float32, device="cuda").requires_grad_()
+        # Subdivision priority trackor
+        self._subdiv_p = torch.ones(
+            [self.num_voxels, 1],
+            dtype=torch.float32, device="cuda").requires_grad_()
+    def octpath_init(self,
+                  scene_center,
+                  scene_extent,
+                  octpath,       # Nx1 octpath.
+                  octlevel,      # Nx1 or scalar for the Octree level of each voxel.
+                  # The following are model parameters.
+                  # If the input are tensors, the gradient of rendering can be backprop to them.
+                  # Otherwise, it creates new trainable tensors.
+                  rgb=0.5,       # Nx3 or scalar for voxel color in range of 0~1.
+                  shs=0.0,       # NxDx3 or scalar for voxel higher-deg sh coefficient.
+                  density=-10.,  # Nx8 or Ngridx1 or scalar for voxel density field.
+                                 # The order is [0,0,0] => [0,0,1] => [0,1,0] => [0,1,1] ...
+                  reduce_density=False,  # Whether to merge grid points if density is Nx8.
+                  ):
+        self.scene_center, self.scene_extent, self.inside_extent = get_scene_bound_tensor(
+            center=scene_center, extent=scene_extent)
+        assert torch.is_tensor(octpath)
+        octlevel = get_octlevel_tensor(octlevel, num_voxels=len(octpath))
+        self.octpath = octpath.view(-1, 1).contiguous()
+        self.octlevel = octlevel.view(-1, 1).contiguous()
+        assert len(self.octpath) == len(self.octlevel)
+        # Subdivision priority trackor
+        self._subdiv_p = torch.ones(
+            [self.num_voxels, 1],
+            dtype=torch.float32, device="cuda").requires_grad_()
+        # Setup appearence parameters
+        if torch.is_tensor(rgb):
+            assert rgb.shape == (self.num_voxels, 3)
+            self._sh0 = rgb2shzero(rgb.contiguous().cuda())
+        else:
+            self._sh0 = torch.full(
+                [self.num_voxels, 3], rgb2shzero(rgb),
+                dtype=torch.float32, device="cuda").requires_grad_()
+        if torch.is_tensor(shs):
+            assert shs.shape == (self.num_voxels, (self.max_sh_degree+1)**2 - 1, 3)
+            self.shs = shs.contiguous().cuda()
+        else:
+            self._shs = torch.full(
+                [self.num_voxels, (self.max_sh_degree+1)**2 - 1, 3], shs,
+                dtype=torch.float32, device="cuda").requires_grad_()
+        # Setup geometry parameters
+        if torch.is_tensor(density):
+            if density.shape == (self.num_grid_pts, 1):
+                self._geo_grid_pts = density.contiguous().cuda()
+            elif density.shape == (self.num_voxels, 8):
+                if reduce_density:
+                    self._geo_grid_pts = torch.zeros(
+                        [self.num_grid_pts, 1], dtype=torch.float32, device="cuda")
+                    self._geo_grid_pts.index_reduce_(
+                        dim=0,
+                        index=self.vox_key.flatten(),
+                        source=density.flatten(),
+                        reduce="mean",
+                        include_self=False)
+                else:
+                    self.frozen_vox_geo = density.contiguous().cuda()
+            else:
+                raise Exception(f"Unexpected density shape. "
+                                f"It should be either {(self.num_grid_pts,1)} or {(self.num_voxels,8)}")
+        else:
+            self._geo_grid_pts = torch.full(
+                [self.num_grid_pts, 1], density,
+                dtype=torch.float32, device="cuda").requires_grad_()
+    def ijkl_init(self,
+                  scene_center,
+                  scene_extent,
+                  ijk,           # Nx3 integer coordinates of each voxel.
+                  octlevel,      # Nx1 or scalar for the Octree level of each voxel.
+                  # The following are model parameters.
+                  # If the input are tensors, the gradient of rendering can be backprop to them.
+                  # Otherwise, it creates new trainable tensors.
+                  rgb=0.5,       # Nx3 or scalar for voxel color in range of 0~1.
+                  shs=0.0,       # NxDx3 or scalar for voxel higher-deg sh coefficient.
+                  density=-10.,  # Nx8 or Ngridx1 or scalar for voxel density field.
+                                 # The order is [0,0,0] => [0,0,1] => [0,1,0] => [0,1,1] ...
+                  reduce_density=False,  # Whether to merge grid points if density is Nx8.
+                  ):
+        scene_center, scene_extent, _ = get_scene_bound_tensor(
+            center=scene_center, extent=scene_extent)
+        # Convert to ijkl to octpath
+        octlevel = get_octlevel_tensor(octlevel, num_voxels=len(ijk))
+        assert torch.is_tensor(ijk)
+        assert len(ijk.shape) == 2 and ijk.shape[1] == 3
+        assert len(ijk) == len(octlevel)
+        ijk = ijk.long()
+        if (ijk < 0).any():
+            raise Exception("xyz out of scene bound")
+        if (ijk >= (1 << octlevel.long())).any():
+            raise Exception("xyz out of scene bound")
+        octpath = svraster_cuda.utils.ijk_2_octpath(ijk, octlevel)
+        self.octpath_init(
+            scene_center=scene_center,
+            scene_extent=scene_extent,
+            octpath=octpath,
+            octlevel=octlevel,
+            rgb=rgb,
+            shs=shs,
+            density=density,
+            reduce_density=reduce_density)
+    def points_init(self,
+                         scene_center,
+                         scene_extent,
+                         xyz,           # Nx3 point coordinates in world space.
+                         octlevel=None, # Nx1 or scalar for the Octree level of each voxel.
+                         expected_vox_size=None,
+                         level_round_mode='nearest',
+                         # The following are model parameters.
+                         # If the input are tensors, the gradient of rendering can be backprop to them.
+                         # Otherwise, it creates new trainable tensors.
+                         rgb=0.5,       # Nx3 or scalar for voxel color in range of 0~1.
+                         shs=0.0,       # NxDx3 or scalar for voxel higher-deg sh coefficient.
+                         density=-10.,  # Nx8 or scalar for voxel density field.
+                                        # The order is [0,0,0] => [0,0,1] => [0,1,0] => [0,1,1] ...
+                         reduce_density=False,  # Whether to merge grid points if density is Nx8.
+                         ):
+        scene_center, scene_extent, _ = get_scene_bound_tensor(center=scene_center, extent=scene_extent)
+        # Compute voxel level
+        if octlevel is not None:
+            assert expected_vox_size is None
+            octlevel = get_octlevel_tensor(octlevel, num_voxels=len(xyz))
+        elif expected_vox_size is not None:
+            octlevel_fp32 = octree_utils.vox_size_2_level(scene_extent, expected_vox_size)
+            if level_round_mode == "nearest":
+                octlevel_fp32 = octlevel_fp32.round()
+            elif level_round_mode == "down":
+                octlevel_fp32 = octlevel_fp32.floor()
+            elif level_round_mode == "up":
+                octlevel_fp32 = octlevel_fp32.ceil()
+            else:
+                raise Exception("Unknonw level_round_mode")
+            octlevel_fp32 = octlevel_fp32.clamp(1, svraster_cuda.meta.MAX_NUM_LEVELS)
+            octlevel = get_octlevel_tensor(octlevel_fp32.to(torch.int8), num_voxels=len(xyz))
+        else:
+            raise Exception("Either octlevel or expected_vox_size should be given.")
+        # Transform point to ijk integer coordinate
+        scene_min_xyz = scene_center - 0.5 * scene_extent
+        vox_size = octree_utils.level_2_vox_size(scene_extent, octlevel)
+        ijk = ((xyz - scene_min_xyz) / vox_size).long()
+        # Reduce duplicated tensor
+        ijkl = torch.cat([ijk, octlevel], dim=1)
+        ijkl_unq, invmap = ijkl.unique(dim=0, return_inverse=True)
+        ijk, octlevel = ijkl_unq.split([3, 1], dim=1)
+        octlevel = octlevel.to(torch.int8)
+        if torch.is_tensor(rgb):
+            assert rgb.shape == (len(invmap), 3)
+            new_shape = (len(ijk), 3)
+            rgb = torch.zeros(new_shape, dtype=torch.float32, device="cuda").index_reduce_(
+                dim=0,
+                index=invmap,
+                source=rgb,
+                reduce="mean",
+                include_self=False)
+        if torch.is_tensor(shs):
+            assert shs.shape == (len(invmap), (self.max_sh_degree+1)**2 - 1, 3)
+            new_shape = (len(ijk), (self.max_sh_degree+1)**2 - 1, 3)
+            shs = torch.zeros(new_shape, dtype=torch.float32, device="cuda").index_reduce_(
+                dim=0,
+                index=invmap,
+                source=shs,
+                reduce="mean",
+                include_self=False)
+        if torch.is_tensor(density):
+            assert density.shape == (len(invmap), 8)
+            new_shape = (len(ijk), 8)
+            density = torch.zeros(new_shape, dtype=torch.float32, device="cuda").index_reduce_(
+                dim=0,
+                index=invmap,
+                source=density,
+                reduce="mean",
+                include_self=False)
+        # Allocate voxel using ijkl coordinate
+        self.ijkl_init(
+            scene_center=scene_center,
+            scene_extent=scene_extent,
+            ijk=ijk,
+            octlevel=octlevel,
+            rgb=rgb,
+            shs=shs,
+            density=density,
+            reduce_density=reduce_density)
+#################################################
+# Helper function
+#################################################
+def get_scene_bound_tensor(center, extent, outside_level=0):
+    if torch.is_tensor(center):
+        scene_center = center.float().clone().cuda()
+    else:
+        scene_center = torch.tensor(center, dtype=torch.float32, device="cuda")
+    if torch.is_tensor(extent):
+        inside_extent = extent.float().clone().cuda()
+    else:
+        inside_extent = torch.tensor(extent, dtype=torch.float32, device="cuda")
+    scene_extent = inside_extent * (2 ** outside_level)
+    assert scene_center.shape == (3,)
+    assert scene_extent.numel() == 1
+    return scene_center, scene_extent, inside_extent
+def get_octlevel_tensor(octlevel, num_voxels=None):
+    if not torch.is_tensor(octlevel):
+        assert np.all(octlevel > 0)
+        assert np.all(octlevel <= svraster_cuda.meta.MAX_NUM_LEVELS)
+        octlevel = torch.tensor(octlevel, dtype=torch.int8, device="cuda")
+    if octlevel.numel() == 1:
+        octlevel = octlevel.view(1, 1).repeat(num_voxels, 1).contiguous()
+    octlevel = octlevel.reshape(-1, 1)
+    assert octlevel.dtype == torch.int8
+    assert num_voxels is None or octlevel.numel() == num_voxels
+    return octlevel
+#################################################
+# Octree layout construction heuristic
+#################################################
+def octlayout_filtering(octpath, octlevel, scene_center, scene_extent, cameras=None, filter_zero_visiblity=True, filter_near=-1):
+    vox_center, vox_size = octree_utils.octpath_decoding(
+        octpath, octlevel,
+        scene_center, scene_extent)
+    # Filtering
+    kept_mask = torch.ones([len(octpath)], dtype=torch.bool, device="cuda")
+    if filter_zero_visiblity:
+        assert cameras is not None, "Cameras should be given to filter invisible voxels"
+        rate = svraster_cuda.renderer.mark_max_samp_rate(
+            cameras, octpath, vox_center, vox_size)
+        kept_mask &= (rate > 0)
+    if filter_near > 0:
+        is_near = svraster_cuda.renderer.mark_near(
+            cameras, octpath, vox_center, vox_size, near=filter_near)
+        kept_mask &= (~is_near)
+    kept_idx = torch.where(kept_mask)[0]
+    octpath = octpath[kept_idx]
+    octlevel = octlevel[kept_idx]
+    return octpath, octlevel
+def octlayout_inside_uniform(scene_center, scene_extent, outside_level, n_level, cameras=None, filter_zero_visiblity=True, filter_near=-1):
+    octpath, octlevel = octree_utils.gen_octpath_dense(
+        outside_level=outside_level,
+        n_level_inside=n_level)
+    octpath, octlevel = octlayout_filtering(
+        octpath=octpath,
+        octlevel=octlevel,
+        scene_center=scene_center,
+        scene_extent=scene_extent,
+        cameras=cameras,
+        filter_zero_visiblity=filter_zero_visiblity,
+        filter_near=filter_near)
+    return octpath, octlevel
+def octlayout_outside_heuristic(scene_center, scene_extent, outside_level, cameras, min_num, max_level, filter_near=-1):
+    assert cameras is not None, "Cameras should provided in this mode."
+    # Init by adding one sub-level in each shell level
+    octpath = []
+    octlevel = []
+    for lv in range(1, 1+outside_level):
+        path, lv = octree_utils.gen_octpath_shell(
+            shell_level=lv,
+            n_level_inside=1)
+        octpath.append(path)
+        octlevel.append(lv)
+    octpath = torch.cat(octpath)
+    octlevel = torch.cat(octlevel)
+    # Iteratively subdivide voxels with maximum sampling rate
+    while True:
+        vox_center, vox_size = octree_utils.octpath_decoding(
+            octpath, octlevel, scene_center, scene_extent)
+        samp_rate = svraster_cuda.renderer.mark_max_samp_rate(
+            cameras, octpath, vox_center, vox_size)
+        kept_idx = torch.where((samp_rate > 0))[0]
+        octpath = octpath[kept_idx]
+        octlevel = octlevel[kept_idx]
+        octlevel_mask = (octlevel.squeeze(1) < max_level)
+        samp_rate = samp_rate[kept_idx] * octlevel_mask
+        vox_size = vox_size[kept_idx]
+        still_need_n = (min_num - len(octpath)) // 7
+        still_need_n = min(len(octpath), round(still_need_n))
+        if still_need_n <= 0:
+            break
+        rank = samp_rate * (octlevel.squeeze(1) < svraster_cuda.meta.MAX_NUM_LEVELS)
+        subdiv_mask = (rank >= rank.sort().values[-still_need_n])
+        subdiv_mask &= (octlevel.squeeze(1) < svraster_cuda.meta.MAX_NUM_LEVELS)
+        subdiv_mask &= octlevel_mask
+        samp_rate *= subdiv_mask
+        subdiv_mask &= (samp_rate >= samp_rate.quantile(0.9))  # Subdivide only 10% each iteration
+        if subdiv_mask.sum() == 0:
+            break
+        octpath_children, octlevel_children = octree_utils.gen_children(
+            octpath[subdiv_mask], octlevel[subdiv_mask])
+        octpath = torch.cat([octpath[~subdiv_mask], octpath_children])
+        octlevel = torch.cat([octlevel[~subdiv_mask], octlevel_children])
+    octpath, octlevel = octlayout_filtering(
+        octpath=octpath,
+        octlevel=octlevel,
+        scene_center=scene_center,
+        scene_extent=scene_extent,
+        cameras=cameras,
+        filter_zero_visiblity=True,
+        filter_near=filter_near)
+    return octpath, octlevel

src/sparse_voxel_gears/io.py ADDED Viewed

	@@ -0,0 +1,156 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import os
+import re
+import torch
+from src.utils import octree_utils
+class SVInOut:
+    def save(self, path, quantize=False):
+        '''
+        Save the necessary attributes and parameters for reproducing rendering.
+        '''
+        os.makedirs(os.path.dirname(path), exist_ok=True)
+        state_dict = {
+            'active_sh_degree': self.active_sh_degree,
+            'scene_center': self.scene_center.data.contiguous(),
+            'inside_extent': self.inside_extent.data.contiguous(),
+            'scene_extent': self.scene_extent.data.contiguous(),
+            'octpath': self.octpath.data.contiguous(),
+            'octlevel': self.octlevel.data.contiguous(),
+            '_geo_grid_pts': self._geo_grid_pts.data.contiguous(),
+            '_sh0': self._sh0.data.contiguous(),
+            '_shs': self._shs.data.contiguous(),
+        }
+        if quantize:
+            quantize_state_dict(state_dict)
+            state_dict['quantized'] = True
+        else:
+            state_dict['quantized'] = False
+        for k, v in state_dict.items():
+            if torch.is_tensor(v):
+                state_dict[k] = v.cpu()
+        torch.save(state_dict, path)
+        self.latest_save_path = path
+    def load(self, path):
+        '''
+        Load the saved models.
+        '''
+        self.loaded_path = path
+        state_dict = torch.load(path, map_location="cpu", weights_only=False)
+        if state_dict.get('quantized', False):
+            dequantize_state_dict(state_dict)
+        self.active_sh_degree = state_dict['active_sh_degree']
+        self.scene_center = state_dict['scene_center'].cuda()
+        self.inside_extent = state_dict['inside_extent'].cuda()
+        self.scene_extent = state_dict['scene_extent'].cuda()
+        self.octpath = state_dict['octpath'].cuda()
+        self.octlevel = state_dict['octlevel'].cuda().to(torch.int8)
+        self._geo_grid_pts = state_dict['_geo_grid_pts'].cuda().requires_grad_()
+        self._sh0 = state_dict['_sh0'].cuda().requires_grad_()
+        self._shs = state_dict['_shs'].cuda().requires_grad_()
+        # Subdivision priority trackor
+        self._subdiv_p = torch.ones(
+            [self.num_voxels, 1],
+            dtype=torch.float32, device="cuda").requires_grad_()
+    def save_iteration(self, model_path, iteration, quantize=False):
+        path = os.path.join(model_path, "checkpoints", f"iter{iteration:06d}_model.pt")
+        self.save(path, quantize=quantize)
+        self.latest_save_iter = iteration
+    def load_iteration(self, model_path, iteration=-1):
+        if iteration == -1:
+            # Find the maximum iteration if it is -1.
+            fnames = os.listdir(os.path.join(model_path, "checkpoints"))
+            loaded_iter = max(int(re.sub("[^0-9]", "", fname)) for fname in fnames)
+        else:
+            loaded_iter = iteration
+        path = os.path.join(model_path, "checkpoints", f"iter{loaded_iter:06d}_model.pt")
+        self.load(path)
+        self.loaded_iter = iteration
+        return loaded_iter
+# Quantization utilities to reduce size when saving model.
+# It can reduce ~70% model size with minor PSNR drop.
+def quantize_state_dict(state_dict):
+    state_dict['_geo_grid_pts'] = quantization(state_dict['_geo_grid_pts'])
+    state_dict['_sh0'] = [quantization(v) for v in state_dict['_sh0'].split(1, dim=1)]
+    state_dict['_shs'] = [quantization(v) for v in state_dict['_shs'].split(1, dim=1)]
+def dequantize_state_dict(state_dict):
+    state_dict['_geo_grid_pts'] = dequantization(state_dict['_geo_grid_pts'])
+    state_dict['_sh0'] = torch.cat(
+        [dequantization(v) for v in state_dict['_sh0']], dim=1)
+    state_dict['_shs'] = torch.cat(
+        [dequantization(v) for v in state_dict['_shs']], dim=1)
+def quantization(src_tensor, max_iter=10):
+    src_shape = src_tensor.shape
+    src_vals = src_tensor.flatten().contiguous()
+    order = src_vals.argsort()
+    quantile_ind = (torch.linspace(0,1,257) * (len(order) - 1)).long().clamp_(0, len(order)-1)
+    codebook = src_vals[order[quantile_ind]].contiguous()
+    codebook[0] = -torch.inf
+    ind = torch.searchsorted(codebook, src_vals)
+    codebook = codebook[1:]
+    ind = (ind - 1).clamp_(0, 255)
+    diff_l = (src_vals - codebook[ind-1]).abs()
+    diff_m = (src_vals - codebook[ind]).abs()
+    ind = ind - 1 + (diff_m < diff_l)
+    ind.clamp_(0, 255)
+    for _ in range(max_iter):
+        codebook = torch.zeros_like(codebook).index_reduce_(
+            dim=0,
+            index=ind,
+            source=src_vals,
+            reduce='mean',
+            include_self=False)
+        diff_l = (src_vals - codebook[ind-1]).abs()
+        diff_r = (src_vals - codebook[(ind+1).clamp_max_(255)]).abs()
+        diff_m = (src_vals - codebook[ind]).abs()
+        upd_mask = torch.minimum(diff_l, diff_r) < diff_m
+        if upd_mask.sum() == 0:
+            break
+        shift = (diff_r < diff_l) * 2 - 1
+        ind[upd_mask] += shift[upd_mask]
+        ind.clamp_(0, 255)
+    codebook = torch.zeros_like(codebook).index_reduce_(
+        dim=0,
+        index=ind,
+        source=src_vals,
+        reduce='mean',
+        include_self=False)
+    return dict(
+        index=ind.reshape(src_shape).to(torch.uint8),
+        codebook=codebook,
+    )
+def dequantization(quant_dict):
+    return quant_dict['codebook'][quant_dict['index'].long()]

src/sparse_voxel_gears/pooling.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import torch
+import svraster_cuda
+from src.utils import octree_utils
+class SVPooling:
+    def pooling_to_level(self, max_level, octpath=None, octlevel=None):
+        octpath = self.octpath if octpath is None else octpath
+        octlevel = self.octlevel if octlevel is None else octlevel
+        num_bit_to_mask = 3 * max(0, svraster_cuda.meta.MAX_NUM_LEVELS - max_level)
+        octpath = (octpath >> num_bit_to_mask) << num_bit_to_mask
+        octlevel = octlevel.clamp_max(max_level)
+        octpack, invmap = torch.stack([octpath, octlevel]).unique(sorted=True, dim=1, return_inverse=True)
+        octpath, octlevel = octpack
+        octlevel = octlevel.to(torch.int8)
+        vox_center, vox_size = octree_utils.octpath_decoding(
+            octpath, octlevel, self.scene_center, self.scene_extent)
+        return dict(
+            invmap=invmap,
+            octpath=octpath,
+            octlevel=octlevel,
+            vox_center=vox_center,
+            vox_size=vox_size,
+        )
+    def pooling_to_rate(self, cameras, max_rate, octpath=None, octlevel=None):
+        octpath = self.octpath.clone() if octpath is None else octpath
+        octlevel = self.octlevel.clone() if octlevel is None else octlevel
+        invmap = torch.arange(len(octpath), device="cuda")
+        for _ in range(svraster_cuda.meta.MAX_NUM_LEVELS):
+            vox_center, vox_size = octree_utils.octpath_decoding(octpath, octlevel, self.scene_center, self.scene_extent)
+            samp_rate = svraster_cuda.renderer.mark_max_samp_rate(cameras, octpath, vox_center, vox_size)
+            pool_mask = (samp_rate < max_rate) & (octlevel.squeeze(1) > 1)
+            if pool_mask.sum() == 0:
+                break
+            octlevel[pool_mask] = octlevel[pool_mask] - 1
+            num_bit_to_mask = 3 * (svraster_cuda.meta.MAX_NUM_LEVELS - octlevel[pool_mask])
+            octpath[pool_mask] = octpath[pool_mask] >> num_bit_to_mask << num_bit_to_mask
+            octpack, cur_invmap = torch.stack([octpath, octlevel]).unique(sorted=True, dim=1, return_inverse=True)
+            octpath, octlevel = octpack
+            octlevel = octlevel.to(torch.int8)
+            invmap = cur_invmap[invmap]
+        vox_center, vox_size = octree_utils.octpath_decoding(
+            octpath, octlevel, self.scene_center, self.scene_extent)
+        return dict(
+            invmap=invmap,
+            octpath=octpath,
+            octlevel=octlevel,
+            vox_center=vox_center,
+            vox_size=vox_size,
+        )

src/sparse_voxel_gears/properties.py ADDED Viewed

	@@ -0,0 +1,146 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import torch
+from src.utils import octree_utils
+from src.utils.fuser_utils import rgb_fusion
+from src.utils.activation_utils import rgb2shzero
+import svraster_cuda
+class SVProperties:
+    @property
+    def num_voxels(self):
+        return len(self.octpath)
+    @property
+    def num_grid_pts(self):
+        return len(self.grid_pts_key)
+    @property
+    def scene_min(self):
+        return self.scene_center - 0.5 * self.scene_extent
+    @property
+    def scene_max(self):
+        return self.scene_center + 0.5 * self.scene_extent
+    @property
+    def inside_min(self):
+        return self.scene_center - 0.5 * self.inside_extent
+    @property
+    def inside_max(self):
+        return self.scene_center + 0.5 * self.inside_extent
+    @property
+    def outside_level(self):
+        return (self.scene_extent / self.inside_extent).log2().round().long().item()
+    @property
+    def bounding(self):
+        return torch.stack([self.scene_min, self.scene_max])
+    @property
+    def inside_mask(self):
+        isin = ((self.inside_min < self.vox_center) & (self.vox_center < self.inside_max)).all(1)
+        return isin
+    @property
+    def sh0(self):
+        return self._sh0
+    @property
+    def shs(self):
+        return self._shs
+    @property
+    def subdivision_priority(self):
+        return self._subdiv_p.grad
+    def reset_subdivision_priority(self):
+        self._subdiv_p.grad = None
+    @property
+    def signature(self):
+        # Signature to check if the voxel grid layout is updated
+        return (self.num_voxels, id(self.octpath), id(self.octlevel))
+    def _check_derived_voxel_attr(self):
+        # Lazy computation of inverse voxel sizes
+        signature = self.signature
+        need_recompute = not hasattr(self, '_check_derived_voxel_attr_signature') or \
+                         self._check_derived_voxel_attr_signature != signature
+        if need_recompute:
+            self._vox_center, self._vox_size = octree_utils.octpath_decoding(
+                self.octpath, self.octlevel, self.scene_center, self.scene_extent)
+            self._grid_pts_key, self._vox_key = octree_utils.build_grid_pts_link(self.octpath, self.octlevel)
+            self._check_derived_voxel_attr_signature = signature
+    @property
+    def vox_center(self):
+        self._check_derived_voxel_attr()
+        return self._vox_center
+    @property
+    def vox_size(self):
+        self._check_derived_voxel_attr()
+        return self._vox_size
+    @property
+    def grid_pts_key(self):
+        self._check_derived_voxel_attr()
+        return self._grid_pts_key
+    @property
+    def vox_key(self):
+        self._check_derived_voxel_attr()
+        return self._vox_key
+    @property
+    def vox_size_inv(self):
+        # Lazy computation of inverse voxel sizes
+        signature = self.signature
+        need_recompute = not hasattr(self, '_vox_size_inv_signature') or \
+                         self._vox_size_inv_signature != signature
+        if need_recompute:
+            self._vox_size_inv = 1 / self.vox_size
+            self._vox_size_inv_signature = signature
+        return self._vox_size_inv
+    @property
+    def grid_pts_xyz(self):
+        # Lazy computation of grid points xyz
+        signature = self.signature
+        need_recompute = not hasattr(self, '_grid_pts_xyz_signature') or \
+                         self._grid_pts_xyz_signature != signature
+        if need_recompute:
+            self._grid_pts_xyz = octree_utils.compute_gridpoints_xyz(
+                self.grid_pts_key, self.scene_center, self.scene_extent)
+            self._grid_pts_xyz_signature = signature
+        return self._grid_pts_xyz
+    @torch.no_grad()
+    def reset_sh_from_cameras(self, cameras):
+        self._sh0.data.copy_(rgb2shzero(rgb_fusion(self, cameras)))
+        self._shs.data.zero_()
+    def apply_tv_on_density_field(self, lambda_tv_density):
+        if self._geo_grid_pts.grad is None:
+            self._geo_grid_pts.grad = torch.zeros_like(self._geo_grid_pts.data)
+        svraster_cuda.grid_loss_bw.total_variation(
+            grid_pts=self._geo_grid_pts,
+            vox_key=self.vox_key,
+            weight=lambda_tv_density,
+            vox_size_inv=self.vox_size_inv,
+            no_tv_s=True,
+            tv_sparse=False,
+            grid_pts_grad=self._geo_grid_pts.grad)

src/sparse_voxel_gears/renderer.py ADDED Viewed

	@@ -0,0 +1,178 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import torch
+import svraster_cuda
+from src.utils.image_utils import resize_rendering
+class SVRenderer:
+    def freeze_vox_geo(self):
+        '''
+        Freeze grid points parameter and pre-gather them to each voxel.
+        '''
+        with torch.no_grad():
+            self.frozen_vox_geo = svraster_cuda.renderer.GatherGeoParams.apply(
+                self.vox_key,
+                torch.arange(self.num_voxels, device="cuda"),
+                self._geo_grid_pts
+            )
+        self._geo_grid_pts.requires_grad = False
+    def unfreeze_vox_geo(self):
+        '''
+        Unfreeze grid points parameter.
+        '''
+        del self.frozen_vox_geo
+        self._geo_grid_pts.requires_grad = True
+    def vox_fn(self, idx, cam_pos, color_mode=None, viewdir=None):
+        '''
+        Per-frame voxel property processing. Two important operations:
+        1. Gather grid points parameter into each voxel.
+        2. Compute view-dependent color of each voxel.
+        Input:
+            @idx        Indices for active voxel for current frame.
+            @cam_pos    Camera position.
+        Output:
+            @vox_params A dictionary of the pre-process voxel properties.
+        '''
+        # Gather the density values at the eight corners of each voxel.
+        # It defined a trilinear density field.
+        # The final tensor are in shape [#vox, 8]
+        if hasattr(self, 'frozen_vox_geo'):
+            geos = self.frozen_vox_geo
+        else:
+            geos = svraster_cuda.renderer.GatherGeoParams.apply(
+                self.vox_key,
+                idx,
+                self._geo_grid_pts
+            )
+        # Compute voxel colors
+        if color_mode is None or color_mode == "sh":
+            active_sh_degree = self.active_sh_degree
+            color_mode = "sh"
+        elif color_mode.startswith("sh"):
+            active_sh_degree = int(color_mode[2])
+            color_mode = "sh"
+        if color_mode == "sh":
+            rgbs = svraster_cuda.renderer.SH_eval.apply(
+                active_sh_degree,
+                idx,
+                self.vox_center,
+                cam_pos,
+                viewdir, # Ignore above two when viewdir is not None
+                self.sh0,
+                self.shs,
+            )
+        elif color_mode == "rand":
+            rgbs = torch.rand([self.num_voxels, 3], dtype=torch.float32, device="cuda")
+        elif color_mode == "dontcare":
+            rgbs = torch.empty([self.num_voxels, 3], dtype=torch.float32, device="cuda")
+        else:
+            raise NotImplementedError
+        # Pack everything
+        vox_params = {
+            'geos': geos,
+            'rgbs': rgbs,
+            'subdiv_p': self._subdiv_p, # Dummy param to record subdivision priority
+        }
+        if vox_params['subdiv_p'] is None:
+            vox_params['subdiv_p'] = torch.ones([self.num_voxels, 1], device="cuda")
+        return vox_params
+    def render(
+            self,
+            camera,
+            color_mode=None,
+            track_max_w=False,
+            ss=None,
+            output_depth=False,
+            output_normal=False,
+            output_T=False,
+            rand_bg=False,
+            use_auto_exposure=False,
+            **other_opt):
+        ###################################
+        # Pre-processing
+        ###################################
+        if ss is None:
+            ss = self.ss
+        w_src, h_src = camera.image_width, camera.image_height
+        w, h = round(w_src * ss), round(h_src * ss)
+        w_ss, h_ss = w / w_src, h / h_src
+        if ss != 1.0 and 'gt_color' in other_opt:
+            other_opt['gt_color'] = resize_rendering(other_opt['gt_color'], size=(h, w))
+        n_samp_per_vox = other_opt.pop('n_samp_per_vox', self.n_samp_per_vox)
+        ###################################
+        # Call low-level rasterization API
+        ###################################
+        raster_settings = svraster_cuda.renderer.RasterSettings(
+            color_mode=color_mode,
+            n_samp_per_vox=n_samp_per_vox,
+            image_width=w,
+            image_height=h,
+            tanfovx=camera.tanfovx,
+            tanfovy=camera.tanfovy,
+            cx=camera.cx * w_ss,
+            cy=camera.cy * h_ss,
+            w2c_matrix=camera.w2c,
+            c2w_matrix=camera.c2w,
+            bg_color=float(self.white_background),
+            near=camera.near,
+            need_depth=output_depth,
+            need_normal=output_normal,
+            track_max_w=track_max_w,
+            **other_opt)
+        color, depth, normal, T, max_w = svraster_cuda.renderer.rasterize_voxels(
+            raster_settings,
+            self.octpath,
+            self.vox_center,
+            self.vox_size,
+            self.vox_fn)
+        ###################################
+        # Post-processing and pack output
+        ###################################
+        if rand_bg:
+            color = color + T * torch.rand_like(color, requires_grad=False)
+        elif not self.white_background and not self.black_background:
+            color = color + T * color.mean((1,2), keepdim=True)
+        if use_auto_exposure:
+            color = camera.auto_exposure_apply(color)
+        render_pkg = {
+            'color': color,
+            'depth': depth if output_depth else None,
+            'normal': normal if output_normal else None,
+            'T': T if output_T else None,
+            'max_w': max_w,
+        }
+        for k in ['color', 'depth', 'normal', 'T']:
+            render_pkg[f'raw_{k}'] = render_pkg[k]
+            # Post process super-sampling
+            if render_pkg[k] is not None and render_pkg[k].shape[-2:] != (h_src, w_src):
+                render_pkg[k] = resize_rendering(render_pkg[k], size=(h_src, w_src))
+        # Clip intensity
+        render_pkg['color'] = render_pkg['color'].clamp(0, 1)
+        return render_pkg

src/sparse_voxel_gears/renderer_copy.py ADDED Viewed

	@@ -0,0 +1,178 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import torch
+import svraster_cuda
+from src.utils.image_utils import resize_rendering
+class SVRenderer:
+    def freeze_vox_geo(self):
+        '''
+        Freeze grid points parameter and pre-gather them to each voxel.
+        '''
+        with torch.no_grad():
+            self.frozen_vox_geo = svraster_cuda.renderer.GatherGeoParams.apply(
+                self.vox_key,
+                torch.arange(self.num_voxels, device="cuda"),
+                self._geo_grid_pts
+            )
+        self._geo_grid_pts.requires_grad = False
+    def unfreeze_vox_geo(self):
+        '''
+        Unfreeze grid points parameter.
+        '''
+        del self.frozen_vox_geo
+        self._geo_grid_pts.requires_grad = True
+    def vox_fn(self, idx, cam_pos, color_mode=None, viewdir=None):
+        '''
+        Per-frame voxel property processing. Two important operations:
+        1. Gather grid points parameter into each voxel.
+        2. Compute view-dependent color of each voxel.
+        Input:
+            @idx        Indices for active voxel for current frame.
+            @cam_pos    Camera position.
+        Output:
+            @vox_params A dictionary of the pre-process voxel properties.
+        '''
+        # Gather the density values at the eight corners of each voxel.
+        # It defined a trilinear density field.
+        # The final tensor are in shape [#vox, 8]
+        if hasattr(self, 'frozen_vox_geo'):
+            geos = self.frozen_vox_geo
+        else:
+            geos = svraster_cuda.renderer.GatherGeoParams.apply(
+                self.vox_key,
+                idx,
+                self._geo_grid_pts
+            )
+        # Compute voxel colors
+        if color_mode is None or color_mode == "sh":
+            active_sh_degree = self.active_sh_degree
+            color_mode = "sh"
+        elif color_mode.startswith("sh"):
+            active_sh_degree = int(color_mode[2])
+            color_mode = "sh"
+        if color_mode == "sh":
+            rgbs = svraster_cuda.renderer.SH_eval.apply(
+                active_sh_degree,
+                idx,
+                self.vox_center,
+                cam_pos,
+                viewdir, # Ignore above two when viewdir is not None
+                self.sh0,
+                self.shs,
+            )
+        elif color_mode == "rand":
+            rgbs = torch.rand([self.num_voxels, 3], dtype=torch.float32, device="cuda")
+        elif color_mode == "dontcare":
+            rgbs = torch.empty([self.num_voxels, 3], dtype=torch.float32, device="cuda")
+        else:
+            raise NotImplementedError
+        # Pack everything
+        vox_params = {
+            'geos': geos,
+            'rgbs': rgbs,
+            'subdiv_p': self._subdiv_p, # Dummy param to record subdivision priority
+        }
+        if vox_params['subdiv_p'] is None:
+            vox_params['subdiv_p'] = torch.ones([self.num_voxels, 1], device="cuda")
+        return vox_params
+    def render(
+            self,
+            camera,
+            color_mode=None,
+            track_max_w=False,
+            ss=None,
+            output_depth=False,
+            output_normal=False,
+            output_T=False,
+            rand_bg=False,
+            use_auto_exposure=False,
+            **other_opt):
+        ###################################
+        # Pre-processing
+        ###################################
+        if ss is None:
+            ss = self.ss
+        w_src, h_src = camera.image_width, camera.image_height
+        w, h = round(w_src * ss), round(h_src * ss)
+        w_ss, h_ss = w / w_src, h / h_src
+        if ss != 1.0 and 'gt_color' in other_opt:
+            other_opt['gt_color'] = resize_rendering(other_opt['gt_color'], size=(h, w))
+        n_samp_per_vox = other_opt.pop('n_samp_per_vox', self.n_samp_per_vox)
+        ###################################
+        # Call low-level rasterization API
+        ###################################
+        raster_settings = svraster_cuda.renderer.RasterSettings(
+            color_mode=color_mode,
+            n_samp_per_vox=n_samp_per_vox,
+            image_width=w,
+            image_height=h,
+            tanfovx=camera.tanfovx,
+            tanfovy=camera.tanfovy,
+            cx=camera.cx * w_ss,
+            cy=camera.cy * h_ss,
+            w2c_matrix=camera.w2c,
+            c2w_matrix=camera.c2w,
+            bg_color=float(self.white_background),
+            near=camera.near,
+            need_depth=output_depth,
+            need_normal=output_normal,
+            track_max_w=track_max_w,
+            **other_opt)
+        color, depth, normal, T, max_w = svraster_cuda.renderer.rasterize_voxels(
+            raster_settings,
+            self.octpath,
+            self.vox_center,
+            self.vox_size,
+            self.vox_fn)
+        ###################################
+        # Post-processing and pack output
+        ###################################
+        if rand_bg:
+            color = color + T * torch.rand_like(color, requires_grad=False)
+        elif not self.white_background and not self.black_background:
+            color = color + T * color.mean((1,2), keepdim=True)
+        if use_auto_exposure:
+            color = camera.auto_exposure_apply(color)
+        render_pkg = {
+            'color': color,
+            'depth': depth if output_depth else None,
+            'normal': normal if output_normal else None,
+            'T': T if output_T else None,
+            'max_w': max_w,
+        }
+        for k in ['color', 'depth', 'normal', 'T']:
+            render_pkg[f'raw_{k}'] = render_pkg[k]
+            # Post process super-sampling
+            if render_pkg[k] is not None and render_pkg[k].shape[-2:] != (h_src, w_src):
+                render_pkg[k] = resize_rendering(render_pkg[k], size=(h_src, w_src))
+        # Clip intensity
+        render_pkg['color'] = render_pkg['color'].clamp(0, 1)
+        return render_pkg

src/sparse_voxel_model.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+from src.sparse_voxel_gears.constructor import SVConstructor
+from src.sparse_voxel_gears.properties import SVProperties
+from src.sparse_voxel_gears.renderer import SVRenderer
+from src.sparse_voxel_gears.adaptive import SVAdaptive
+from src.sparse_voxel_gears.io import SVInOut
+from src.sparse_voxel_gears.pooling import SVPooling
+class SparseVoxelModel(SVConstructor, SVProperties, SVRenderer, SVAdaptive, SVInOut, SVPooling):
+    def __init__(self,
+                 n_samp_per_vox=1,       # Number of sampled points per visited voxel
+                 sh_degree=3,            # Use 3 * (k+1)^2 params per voxels for view-dependent colors
+                 ss=1.5,                 # Super-sampling rates for anti-aliasing
+                 white_background=False, # Assum white background
+                 black_background=False, # Assum black background
+                 ):
+        '''
+        Setup of the model meta. At this point, no voxel is allocated.
+        Use the following methods to allocate voxels and parameters.
+        1. `model_load` defined in `src/sparse_voxel_gears/io.py`.
+           Load the saved models from a given path.
+        2. `model_init` defined in `src/sparse_voxel_gears/constructor.py`.
+           Heuristically initial the sparse grid layout and parameters from the training datas.
+        '''
+        super().__init__()
+        self.n_samp_per_vox = n_samp_per_vox
+        self.max_sh_degree = sh_degree
+        self.ss = ss
+        self.white_background = white_background
+        self.black_background = black_background
+        # List the variable names
+        self.per_voxel_attr_lst = [
+            'octpath', 'octlevel',
+            '_subdiv_p',
+        ]
+        self.per_voxel_param_lst = [
+            '_sh0', '_shs',
+        ]
+        self.grid_pts_param_lst = [
+            '_geo_grid_pts',
+        ]
+        # To be init from model_init
+        self.scene_center = None
+        self.scene_extent = None
+        self.inside_extent = None
+        self.octpath = None
+        self.octlevel = None
+        self.active_sh_degree = sh_degree
+        self._geo_grid_pts = None
+        self._sh0 = None
+        self._shs = None
+        self._subdiv_p = None

src/sparse_voxel_model_copy.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+from src.sparse_voxel_gears.constructor import SVConstructor
+from src.sparse_voxel_gears.properties import SVProperties
+from src.sparse_voxel_gears.renderer import SVRenderer
+from src.sparse_voxel_gears.adaptive import SVAdaptive
+from src.sparse_voxel_gears.io import SVInOut
+from src.sparse_voxel_gears.pooling import SVPooling
+class SparseVoxelModel(SVConstructor, SVProperties, SVRenderer, SVAdaptive, SVInOut, SVPooling):
+    def __init__(self,
+                 n_samp_per_vox=1,       # Number of sampled points per visited voxel
+                 sh_degree=3,            # Use 3 * (k+1)^2 params per voxels for view-dependent colors
+                 ss=1.5,                 # Super-sampling rates for anti-aliasing
+                 white_background=False, # Assum white background
+                 black_background=False, # Assum black background
+                 ):
+        '''
+        Setup of the model meta. At this point, no voxel is allocated.
+        Use the following methods to allocate voxels and parameters.
+        1. `model_load` defined in `src/sparse_voxel_gears/io.py`.
+           Load the saved models from a given path.
+        2. `model_init` defined in `src/sparse_voxel_gears/constructor.py`.
+           Heuristically initial the sparse grid layout and parameters from the training datas.
+        '''
+        super().__init__()
+        self.n_samp_per_vox = n_samp_per_vox
+        self.max_sh_degree = sh_degree
+        self.ss = ss
+        self.white_background = white_background
+        self.black_background = black_background
+        # List the variable names
+        self.per_voxel_attr_lst = [
+            'octpath', 'octlevel',
+            '_subdiv_p',
+        ]
+        self.per_voxel_param_lst = [
+            '_sh0', '_shs',
+        ]
+        self.grid_pts_param_lst = [
+            '_geo_grid_pts',
+        ]
+        # To be init from model_init
+        self.scene_center = None
+        self.scene_extent = None
+        self.inside_extent = None
+        self.octpath = None
+        self.octlevel = None
+        self.active_sh_degree = sh_degree
+        self._geo_grid_pts = None
+        self._sh0 = None
+        self._shs = None
+        self._subdiv_p = None

src/utils/__pycache__/activation_utils.cpython-39.pyc ADDED Viewed

Binary file (2.16 kB). View file

src/utils/__pycache__/bounding_utils.cpython-39.pyc ADDED Viewed

Binary file (3.05 kB). View file

src/utils/__pycache__/camera_utils.cpython-39.pyc ADDED Viewed

Binary file (2.38 kB). View file

src/utils/__pycache__/colmap_utils.cpython-39.pyc ADDED Viewed

Binary file (1.77 kB). View file

src/utils/__pycache__/fuser_utils.cpython-39.pyc ADDED Viewed

Binary file (3.87 kB). View file

src/utils/__pycache__/image_utils.cpython-39.pyc ADDED Viewed

Binary file (2.51 kB). View file

src/utils/__pycache__/loss_utils.cpython-39.pyc ADDED Viewed

Binary file (8.78 kB). View file

src/utils/__pycache__/marching_cubes_utils.cpython-39.pyc ADDED Viewed

Binary file (25.1 kB). View file

src/utils/__pycache__/mono_utils.cpython-39.pyc ADDED Viewed

Binary file (4.88 kB). View file

src/utils/__pycache__/octree_utils.cpython-39.pyc ADDED Viewed

Binary file (7.49 kB). View file

src/utils/__pycache__/system_utils.cpython-39.pyc ADDED Viewed

Binary file (372 Bytes). View file

src/utils/activation_utils.py ADDED Viewed

	@@ -0,0 +1,49 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import torch
+from svraster_cuda.meta import STEP_SZ_SCALE
+def softplus(x):
+    return torch.nn.functional.softplus(x)
+def exp_linear_10(x):
+    return torch.where(x > 1, x, torch.exp(x - 1))
+def exp_linear_11(x):
+    return torch.where(x > 1.1, x, torch.exp(0.909090909091 * x - 0.904689820196))
+def exp_linear_20(x):
+    return torch.where(x > 2.0, x, torch.exp(0.5 * x - 0.30685281944))
+def softplus_inverse(y):
+    return y + torch.log(-torch.expm1(-y))
+def exp_linear_10_inverse(y):
+    return torch.where(y > 1, y, torch.log(y) + 1)
+def exp_linear_11_inverse(y):
+    return torch.where(y > 1.1, y, (torch.log(y) + 0.904689820196) / 0.909090909091)
+def exp_linear_20_inverse(x):
+    return torch.where(y > 2.0, y, (torch.log(y) + 0.30685281944) / 0.5)
+def smooth_clamp_max(x, max_val):
+    return max_val - torch.nn.functional.softplus(max_val - x)
+def density2alpha(density, interval):
+    return 1 - torch.exp(-STEP_SZ_SCALE * interval * density)
+def alpha2density(alpha, interval):
+    return torch.log(1 - alpha) / (-STEP_SZ_SCALE * interval)
+def rgb2shzero(x):
+    return (x - 0.5) / 0.28209479177387814
+def shzero2rgb(x):
+    return x * 0.28209479177387814 + 0.5

src/utils/bounding_utils.py ADDED Viewed

	@@ -0,0 +1,102 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import numpy as np
+def decide_main_bounding(bound_mode="default",
+                         forward_dist_scale=1.0,  # For "forward" mode
+                         pcd_density_rate=0.1,    # For "pcd" mode
+                         bound_scale=1.0,         # Scaling of the bounding
+                         tr_cams=None,            # Cameras
+                         pcd=None,                # Point cloud
+                         suggested_bounding=None):
+    if bound_mode == "default" and suggested_bounding is not None:
+        print("Use suggested bounding")
+        center = suggested_bounding.mean(0)
+        radius = (suggested_bounding[1] - suggested_bounding[0]) * 0.5
+    elif bound_mode in ["camera_max", "camera_median"]:
+        center, radius = main_scene_bound_camera_heuristic(
+            cams=tr_cams, bound_mode=bound_mode)
+    elif bound_mode == "forward":
+        center, radius = main_scene_bound_forward_heuristic(
+            cams=tr_cams, forward_dist_scale=forward_dist_scale)
+    elif bound_mode == "pcd":
+        center, radius = main_scene_bound_pcd_heuristic(
+            pcd=pcd, pcd_density_rate=pcd_density_rate)
+    elif bound_mode == "default":
+        cam_lookats = np.stack([cam.lookat.tolist() for cam in tr_cams])
+        lookat_dots = (cam_lookats[:,None] * cam_lookats).sum(-1)
+        is_forward_facing = lookat_dots.min() > 0
+        if is_forward_facing:
+            center, radius = main_scene_bound_forward_heuristic(
+                cams=tr_cams, forward_dist_scale=forward_dist_scale)
+        else:
+            center, radius = main_scene_bound_camera_heuristic(
+                cams=tr_cams, bound_mode="camera_median")
+    else:
+        raise NotImplementedError
+    radius = radius * bound_scale
+    bounding = np.array([
+        center - radius,
+        center + radius,
+    ], dtype=np.float32)
+    return bounding
+def main_scene_bound_camera_heuristic(cams, bound_mode):
+    print("Heuristic bounding:", bound_mode)
+    cam_positions = np.stack([cam.position.tolist() for cam in cams])
+    center = cam_positions.mean(0)
+    dists = np.linalg.norm(cam_positions - center, axis=1)
+    if bound_mode == "camera_max":
+        radius = np.max(dists)
+    elif bound_mode == "camera_median":
+        radius = np.median(dists)
+    else:
+        raise NotImplementedError
+    return center, radius
+def main_scene_bound_forward_heuristic(cams, forward_dist_scale):
+    print("Heuristic bounding: forward")
+    positions = np.stack([cam.position.tolist() for cam in cams])
+    cam_center = positions.mean(0)
+    cam_lookat = np.stack([cam.lookat.tolist() for cam in cams]).mean(0)
+    cam_lookat /= np.linalg.norm(cam_lookat)
+    cam_extent = 2 * np.linalg.norm(positions - cam_center, axis=1).max()
+    center = cam_center + forward_dist_scale * cam_extent * cam_lookat
+    radius = 0.8 * forward_dist_scale * cam_extent
+    return center, radius
+def main_scene_bound_pcd_heuristic(pcd, pcd_density_rate):
+    print("Heuristic bounding: pcd")
+    center = np.median(pcd.points, axis=0)
+    dist = np.abs(pcd.points - center).max(axis=1)
+    dist = np.sort(dist)
+    density = (1 + np.arange(len(dist))) * (dist > 0) / ((2 * dist) ** 3 + 1e-6)
+    # Should cover at least 5% of the point
+    begin_idx = round(len(density) * 0.05)
+    # Find the radius with maximum point density
+    max_idx = begin_idx + density[begin_idx:].argmax()
+    # Find the smallest radius with point density equal to pcd_density_rate of maximum
+    target_density = pcd_density_rate * density[max_idx]
+    target_idx = max_idx + np.where(density[max_idx:] < target_density)[0][0]
+    radius = dist[target_idx]
+    return center, radius

src/utils/camera_utils.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import numpy as np
+from scipy.interpolate import make_interp_spline
+def fov2focal(fov, pixels):
+    return pixels / (2 * np.tan(0.5 * fov))
+def focal2fov(focal, pixels):
+    return 2 * np.arctan(pixels / (2 * focal))
+def interpolate_poses(poses, n_frame, periodic=True):
+    assert len(poses) > 1
+    poses = list(poses)
+    bc_type = None
+    if periodic:
+        poses.append(poses[0])
+        bc_type = "periodic"
+    pos_lst = np.stack([pose[:3, 3] for pose in poses])
+    lookat_lst = np.stack([pose[:3, 2] for pose in poses])
+    right_lst = np.stack([pose[:3, 0] for pose in poses])
+    ts = np.linspace(0, 1, len(poses))
+    pos_interp_f = make_interp_spline(ts, pos_lst, bc_type=bc_type)
+    lookat_interp_f = make_interp_spline(ts, lookat_lst, bc_type=bc_type)
+    right_interp_f = make_interp_spline(ts, right_lst, bc_type=bc_type)
+    samps = np.linspace(0, 1, n_frame+1)[:n_frame]
+    pos_video = pos_interp_f(samps)
+    lookat_video = lookat_interp_f(samps)
+    right_video = right_interp_f(samps)
+    interp_poses = []
+    for i in range(n_frame):
+        pos = pos_video[i]
+        lookat = lookat_video[i] / np.linalg.norm(lookat_video[i])
+        right_ = right_video[i] / np.linalg.norm(right_video[i])
+        down = np.cross(lookat, right_)
+        right = np.cross(down, lookat)
+        c2w = np.eye(4, dtype=np.float32)
+        c2w[:3, 0] = right
+        c2w[:3, 1] = down
+        c2w[:3, 2] = lookat
+        c2w[:3, 3] = pos
+        interp_poses.append(c2w)
+    return interp_poses
+def gen_circular_poses(radius,
+                       n_frame,
+                       starting=1.5 * np.pi, # Starting from -z
+                       ):
+    poses = []
+    for rad in np.linspace(starting, starting + 2 * np.pi, n_frame):
+        pos = radius * np.array([np.cos(rad), 0, np.sin(rad)])
+        lookat = -pos / np.linalg.norm(pos)
+        down = np.array([0, 1, 0])
+        right = np.cross(down, lookat)
+        right = right / np.linalg.norm(right)
+        down = np.cross(lookat, right)
+        c2w = np.eye(4, dtype=np.float32)
+        c2w[:3, 0] = right
+        c2w[:3, 1] = down
+        c2w[:3, 2] = lookat
+        c2w[:3, 3] = pos
+        poses.append(c2w)
+    return poses

src/utils/colmap_utils.py ADDED Viewed

	@@ -0,0 +1,62 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+import pycolmap
+import numpy as np
+from typing import NamedTuple
+class PointCloud(NamedTuple):
+    points: np.array
+    colors: np.array
+    errors: np.array
+    corr: dict
+def parse_colmap_pts(sfm: pycolmap.Reconstruction, transform: np.array =None):
+    """
+    Parse COLMAP points and correspondents.
+    Input:
+        @sfm        Reconstruction from COLMAP.
+        @transform  3x3 matrix to transform xyz.
+    Output:
+        @xyz        Nx3 point positions.
+        @rgb        Nx3 point colors.
+        @err        N   errors.
+        @corr       Dictionary from file name to point indices.
+    """
+    xyz = []
+    rgb = []
+    err = []
+    points_id = []
+    for k, v in sfm.points3D.items():
+        points_id.append(k)
+        xyz.append(v.xyz)
+        rgb.append(v.color)
+        err.append(v.error)
+        if transform is not None:
+            xyz[-1] = transform @ xyz[-1]
+    xyz = np.array(xyz)
+    rgb = np.array(rgb)
+    err = np.array(err)
+    points_id = np.array(points_id)
+    points_idmap = np.full([points_id.max()+2], -1, dtype=np.int64)
+    points_idmap[points_id] = np.arange(len(xyz))
+    corr = {}
+    for image in sfm.images.values():
+        idx = np.array([p.point3D_id for p in image.points2D if p.has_point3D()])
+        corr[image.name] = points_idmap[idx]
+        assert corr[image.name].min() >= 0 and corr[image.name].max() < len(xyz)
+    return PointCloud(points=xyz, colors=rgb, errors=err, corr=corr)

src/utils/fuser_utils.py ADDED Viewed

	@@ -0,0 +1,185 @@

+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# NVIDIA CORPORATION and its licensors retain all intellectual property
+# and proprietary rights in and to this software, related documentation
+# and any modifications thereto.  Any use, reproduction, disclosure or
+# distribution of this software and related documentation without an express
+# license agreement from NVIDIA CORPORATION is strictly prohibited.
+'''
+Reference: KinectFusion algorithm.
+'''
+import numpy as np
+import torch
+class Fuser:
+    def __init__(self,
+            xyz,
+            bandwidth,
+            use_trunc=True,
+            fuse_tsdf=True,
+            feat_dim=0,
+            alpha_thres=0.5,
+            crop_border=0.0,
+            normal_weight=False,
+            depth_weight=False,
+            border_weight=False,
+            max_norm_dist=10.,
+            use_half=False):
+        assert len(xyz.shape) == 2
+        assert xyz.shape[1] == 3
+        self.xyz = xyz
+        self.bandwidth = bandwidth
+        self.use_trunc = use_trunc
+        self.fuse_tsdf = fuse_tsdf
+        self.feat_dim = feat_dim
+        self.alpha_thres = alpha_thres
+        self.crop_border = crop_border
+        self.normal_weight = normal_weight
+        self.depth_weight = depth_weight
+        self.border_weight = border_weight
+        self.max_norm_dist = max_norm_dist
+        self.dtype = torch.float16 if use_half else torch.float32
+        self.weight = torch.zeros([len(xyz), 1], dtype=self.dtype, device="cuda")
+        self.feat = torch.zeros([len(xyz), feat_dim], dtype=self.dtype, device="cuda")
+        if self.fuse_tsdf:
+            self.sd_val = torch.zeros([len(xyz), 1], dtype=self.dtype, device="cuda")
+        else:
+            self.sd_val = None
+    def integrate(self, cam, depth, feat=None, alpha=None):
+        # Project grid points to image
+        xyz_uv = cam.project(self.xyz)
+        xyz_front = ((self.xyz - cam.position) @ cam.lookat) > cam.near
+        # Filter points projected outside
+        filter_idx = torch.where((xyz_uv.abs() <= 1-self.crop_border).all(-1) & xyz_front)[0]
+        valid_idx = filter_idx
+        valid_xyz = self.xyz[filter_idx]
+        valid_uv = xyz_uv[filter_idx]
+        # Compute projective sdf
+        valid_frame_depth = torch.nn.functional.grid_sample(
+            depth.view(1,1,*depth.shape[-2:]),
+            valid_uv.view(1,1,-1,2),
+            mode='bilinear',
+            align_corners=False).flatten()
+        valid_xyz_depth = (valid_xyz - cam.position) @ cam.lookat
+        valid_sdf = valid_frame_depth - valid_xyz_depth
+        if torch.is_tensor(self.bandwidth):
+            bandwidth = self.bandwidth[valid_idx]
+        else:
+            bandwidth = self.bandwidth
+        valid_sdf *= (1 / bandwidth)
+        if self.use_trunc:
+            # Filter occluded
+            filter_idx = torch.where(valid_sdf >= -1)[0]
+            valid_idx = valid_idx[filter_idx]
+            valid_uv = valid_uv[filter_idx]
+            valid_frame_depth = valid_frame_depth[filter_idx]
+            valid_sdf = valid_sdf[filter_idx]
+            valid_sdf = valid_sdf.clamp_(-1, 1)
+            # Init weighting
+            w = torch.ones_like(valid_frame_depth)
+        else:
+            norm_dist = valid_sdf.abs()
+            w = torch.exp(-norm_dist.clamp_max(self.max_norm_dist))
+        # Alpha filtering
+        if alpha is not None:
+            valid_alpha = torch.nn.functional.grid_sample(
+                alpha.view(1,1,*alpha.shape[-2:]),
+                valid_uv.view(1,1,-1,2),
+                mode='bilinear',
+                align_corners=False).flatten()
+            w *= valid_alpha
+            filter_idx = torch.where(valid_alpha >= self.alpha_thres)[0]
+            valid_idx = valid_idx[filter_idx]
+            valid_uv = valid_uv[filter_idx]
+            valid_frame_depth = valid_frame_depth[filter_idx]
+            valid_sdf = valid_sdf[filter_idx]
+            w = w[filter_idx]
+        # Compute geometric weighting
+        if self.depth_weight:
+            w *= 1 / valid_frame_depth.clamp_min(0.1)
+        if self.normal_weight:
+            normal = cam.depth2normal(depth)
+            rd = torch.nn.functional.normalize(cam.depth2pts(depth) - cam.position.view(3,1,1), dim=0)
+            cos_theta = (normal * rd).sum(0).clamp_min(0)
+            valid_cos_theta = torch.nn.functional.grid_sample(
+                cos_theta.view(1,1,*cos_theta.shape[-2:]),
+                valid_uv.view(1,1,-1,2),
+                mode='bilinear',
+                align_corners=False).flatten()
+            w *= valid_cos_theta
+        if self.border_weight:
+            # The image center get 1.0; corners get 0.1
+            w *= 1 / (1 + 9/np.sqrt(2) * valid_uv.square().sum(1).sqrt())
+        # Reshape integration weight
+        w = w.unsqueeze(-1).to(self.dtype)
+        # Integrate weight
+        self.weight[valid_idx] += w
+        # Integrate tsdf
+        if self.fuse_tsdf:
+            valid_sdf = valid_sdf.unsqueeze(-1).to(self.dtype)
+            self.sd_val[valid_idx] += w * valid_sdf
+        # Sample feature
+        if self.feat_dim > 0:
+            valid_feat = torch.nn.functional.grid_sample(
+                feat.view(1,self.feat_dim,*feat.shape[-2:]).to(self.dtype),
+                valid_uv.view(1,1,-1,2).to(self.dtype),
+                mode='bilinear',
+                align_corners=False)[0,:,0].T
+            self.feat[valid_idx] += w * valid_feat
+    @property
+    def feature(self):
+        return self.feat / self.weight
+    @property
+    def tsdf(self):
+        return self.sd_val / self.weight
+@torch.no_grad()
+def rgb_fusion(voxel_model, cameras):
+    from .octree_utils import level_2_vox_size
+    # Define volume integrator
+    finest_vox_size = level_2_vox_size(voxel_model.scene_extent, voxel_model.octlevel.max()).item()
+    feat_volume = Fuser(
+        xyz=voxel_model.vox_center,
+        bandwidth=10 * finest_vox_size,
+        use_trunc=False,
+        fuse_tsdf=False,
+        feat_dim=3,
+        crop_border=0.,
+        normal_weight=False,
+        depth_weight=False,
+        border_weight=False,
+        use_half=True)
+    # Run semantic maps fusion
+    for cam in cameras:
+        render_pkg = voxel_model.render(cam, color_mode="dontcare", output_depth=True)
+        depth = render_pkg['depth'][2]
+        feat_volume.integrate(cam=cam, feat=cam.image.cuda(), depth=depth)
+    return feat_volume.feature.nan_to_num_(0.5).float()