Spaces:

ma-xu
/

LIVE

Runtime error

LIVE / diffvg.h

Xu Ma

update

1c3c0d9 over 3 years ago

3.18 kB

	#pragma once

	#ifdef __NVCC__
	#define DEVICE __device__ __host__
	#else
	#define DEVICE
	#endif

	#ifndef __NVCC__
	#include <cmath>
	namespace {
	inline float fmodf(float a, float b) {
	return std::fmod(a, b);
	}
	inline double fmod(double a, double b) {
	return std::fmod(a, b);
	}
	}
	using std::isfinite;
	#endif

	#ifndef M_PI
	#define M_PI 3.14159265358979323846
	#endif

	#include <cstdint>
	#include <atomic>

	// We use Real for most of the internal computation.
	// However, for PyTorch interfaces, Optix Prime and Embree queries
	// we use float
	using Real = float;

	template <typename T>
	DEVICE
	inline T square(const T &x) {
	return x * x;
	}

	template <typename T>
	DEVICE
	inline T cubic(const T &x) {
	return x * x * x;
	}

	template <typename T>
	DEVICE
	inline T clamp(const T &v, const T &lo, const T &hi) {
	if (v < lo) return lo;
	else if (v > hi) return hi;
	else return v;
	}

	DEVICE
	inline int modulo(int a, int b) {
	auto r = a % b;
	return (r < 0) ? r+b : r;
	}

	DEVICE
	inline float modulo(float a, float b) {
	float r = ::fmodf(a, b);
	return (r < 0.0f) ? r+b : r;
	}

	DEVICE
	inline double modulo(double a, double b) {
	double r = ::fmod(a, b);
	return (r < 0.0) ? r+b : r;
	}

	template <typename T>
	DEVICE
	inline T max(const T &a, const T &b) {
	return a > b ? a : b;
	}

	template <typename T>
	DEVICE
	inline T min(const T &a, const T &b) {
	return a < b ? a : b;
	}

	/// Return ceil(x/y) for integers x and y
	inline int idiv_ceil(int x, int y) {
	return (x + y-1) / y;
	}

	template <typename T>
	DEVICE
	inline void swap_(T &a, T &b) {
	T tmp = a;
	a = b;
	b = tmp;
	}

	inline double log2(double x) {
	return log(x) / log(Real(2));
	}

	template <typename T>
	DEVICE
	inline T safe_acos(const T &x) {
	if (x >= 1) return T(0);
	else if(x <= -1) return T(M_PI);
	return acos(x);
	}

	// For Morton code computation. This can be made faster.
	DEVICE
	inline uint32_t expand_bits(uint32_t x) {
	// Insert one zero after every bit given a 10-bit integer
	constexpr uint64_t mask = 0x1u;
	// We start from LSB (bit 31)
	auto result = (x & (mask << 0u));
	result \|= ((x & (mask << 1u)) << 1u);
	result \|= ((x & (mask << 2u)) << 2u);
	result \|= ((x & (mask << 3u)) << 3u);
	result \|= ((x & (mask << 4u)) << 4u);
	result \|= ((x & (mask << 5u)) << 5u);
	result \|= ((x & (mask << 6u)) << 6u);
	result \|= ((x & (mask << 7u)) << 7u);
	result \|= ((x & (mask << 8u)) << 8u);
	result \|= ((x & (mask << 9u)) << 9u);
	return result;
	}

	// DEVICE
	// inline int clz(uint64_t x) {
	// #ifdef __CUDA_ARCH__
	// return __clzll(x);
	// #else
	// // TODO: use _BitScanReverse in windows
	// return x == 0 ? 64 : __builtin_clzll(x);
	// #endif
	// }

	// DEVICE
	// inline int ffs(uint8_t x) {
	// #ifdef __CUDA_ARCH__
	// return __ffs(x);
	// #else
	// // TODO: use _BitScanReverse in windows
	// return __builtin_ffs(x);
	// #endif
	// }

	// DEVICE
	// inline int popc(uint8_t x) {
	// #ifdef __CUDA_ARCH__
	// return __popc(x);
	// #else
	// // TODO: use _popcnt in windows
	// return __builtin_popcount(x);
	// #endif
	// }