15 namespace faiss {
namespace gpu {
namespace utils {
17 template <
typename U,
typename V>
18 constexpr __host__ __device__
auto divUp(U a, V b) -> decltype(a + b) {
19 return (a + b - 1) / b;
22 template <
typename U,
typename V>
23 constexpr __host__ __device__
auto roundDown(U a, V b) -> decltype(a + b) {
27 template <
typename U,
typename V>
28 constexpr __host__ __device__
auto roundUp(U a, V b) -> decltype(a + b) {
29 return divUp(a, b) * b;
33 constexpr __host__ __device__ T pow(T n, T power) {
34 return (power > 0 ? n * pow(n, power - 1) : 1);
38 constexpr __host__ __device__ T pow2(T n) {
42 static_assert(pow2(8) == 256,
"pow2");
45 constexpr __host__ __device__
int log2(T n,
int p = 0) {
46 return (n <= 1) ? p : log2(n / 2, p + 1);
49 static_assert(log2(2) == 1,
"log2");
50 static_assert(log2(3) == 1,
"log2");
51 static_assert(log2(4) == 2,
"log2");
54 constexpr __host__ __device__
bool isPowerOf2(T v) {
55 return (v && !(v & (v - 1)));
58 static_assert(isPowerOf2(2048),
"isPowerOf2");
59 static_assert(!isPowerOf2(3333),
"isPowerOf2");
62 constexpr __host__ __device__ T nextHighestPowerOf2(T v) {
63 return (isPowerOf2(v) ? (T) 2 * v : ((T) 1 << (log2(v) + 1)));
66 static_assert(nextHighestPowerOf2(1) == 2,
"nextHighestPowerOf2");
67 static_assert(nextHighestPowerOf2(2) == 4,
"nextHighestPowerOf2");
68 static_assert(nextHighestPowerOf2(3) == 4,
"nextHighestPowerOf2");
69 static_assert(nextHighestPowerOf2(4) == 8,
"nextHighestPowerOf2");
71 static_assert(nextHighestPowerOf2(15) == 16,
"nextHighestPowerOf2");
72 static_assert(nextHighestPowerOf2(16) == 32,
"nextHighestPowerOf2");
73 static_assert(nextHighestPowerOf2(17) == 32,
"nextHighestPowerOf2");
75 static_assert(nextHighestPowerOf2(1536000000u) == 2147483648u,
76 "nextHighestPowerOf2");
77 static_assert(nextHighestPowerOf2((
size_t) 2147483648ULL) ==
78 (
size_t) 4294967296ULL,
"nextHighestPowerOf2");