Spaces:
Runtime error
Runtime error
| namespace { | |
| inline float fmodf(float a, float b) { | |
| return std::fmod(a, b); | |
| } | |
| inline double fmod(double a, double b) { | |
| return std::fmod(a, b); | |
| } | |
| } | |
| using std::isfinite; | |
| // We use Real for most of the internal computation. | |
| // However, for PyTorch interfaces, Optix Prime and Embree queries | |
| // we use float | |
| using Real = float; | |
| template <typename T> | |
| DEVICE | |
| inline T square(const T &x) { | |
| return x * x; | |
| } | |
| template <typename T> | |
| DEVICE | |
| inline T cubic(const T &x) { | |
| return x * x * x; | |
| } | |
| template <typename T> | |
| DEVICE | |
| inline T clamp(const T &v, const T &lo, const T &hi) { | |
| if (v < lo) return lo; | |
| else if (v > hi) return hi; | |
| else return v; | |
| } | |
| DEVICE | |
| inline int modulo(int a, int b) { | |
| auto r = a % b; | |
| return (r < 0) ? r+b : r; | |
| } | |
| DEVICE | |
| inline float modulo(float a, float b) { | |
| float r = ::fmodf(a, b); | |
| return (r < 0.0f) ? r+b : r; | |
| } | |
| DEVICE | |
| inline double modulo(double a, double b) { | |
| double r = ::fmod(a, b); | |
| return (r < 0.0) ? r+b : r; | |
| } | |
| template <typename T> | |
| DEVICE | |
| inline T max(const T &a, const T &b) { | |
| return a > b ? a : b; | |
| } | |
| template <typename T> | |
| DEVICE | |
| inline T min(const T &a, const T &b) { | |
| return a < b ? a : b; | |
| } | |
| /// Return ceil(x/y) for integers x and y | |
| inline int idiv_ceil(int x, int y) { | |
| return (x + y-1) / y; | |
| } | |
| template <typename T> | |
| DEVICE | |
| inline void swap_(T &a, T &b) { | |
| T tmp = a; | |
| a = b; | |
| b = tmp; | |
| } | |
| inline double log2(double x) { | |
| return log(x) / log(Real(2)); | |
| } | |
| template <typename T> | |
| DEVICE | |
| inline T safe_acos(const T &x) { | |
| if (x >= 1) return T(0); | |
| else if(x <= -1) return T(M_PI); | |
| return acos(x); | |
| } | |
| // For Morton code computation. This can be made faster. | |
| DEVICE | |
| inline uint32_t expand_bits(uint32_t x) { | |
| // Insert one zero after every bit given a 10-bit integer | |
| constexpr uint64_t mask = 0x1u; | |
| // We start from LSB (bit 31) | |
| auto result = (x & (mask << 0u)); | |
| result |= ((x & (mask << 1u)) << 1u); | |
| result |= ((x & (mask << 2u)) << 2u); | |
| result |= ((x & (mask << 3u)) << 3u); | |
| result |= ((x & (mask << 4u)) << 4u); | |
| result |= ((x & (mask << 5u)) << 5u); | |
| result |= ((x & (mask << 6u)) << 6u); | |
| result |= ((x & (mask << 7u)) << 7u); | |
| result |= ((x & (mask << 8u)) << 8u); | |
| result |= ((x & (mask << 9u)) << 9u); | |
| return result; | |
| } | |
| // DEVICE | |
| // inline int clz(uint64_t x) { | |
| // #ifdef __CUDA_ARCH__ | |
| // return __clzll(x); | |
| // #else | |
| // // TODO: use _BitScanReverse in windows | |
| // return x == 0 ? 64 : __builtin_clzll(x); | |
| // #endif | |
| // } | |
| // DEVICE | |
| // inline int ffs(uint8_t x) { | |
| // #ifdef __CUDA_ARCH__ | |
| // return __ffs(x); | |
| // #else | |
| // // TODO: use _BitScanReverse in windows | |
| // return __builtin_ffs(x); | |
| // #endif | |
| // } | |
| // DEVICE | |
| // inline int popc(uint8_t x) { | |
| // #ifdef __CUDA_ARCH__ | |
| // return __popc(x); | |
| // #else | |
| // // TODO: use _popcnt in windows | |
| // return __builtin_popcount(x); | |
| // #endif | |
| // } | |