14 namespace faiss {
namespace gpu {
namespace utils {
16 template <
typename U,
typename V>
17 constexpr __host__ __device__
auto divUp(U a, V b) -> decltype(a + b) {
18 return (a + b - 1) / b;
21 template <
typename U,
typename V>
22 constexpr __host__ __device__
auto roundDown(U a, V b) -> decltype(a + b) {
26 template <
typename U,
typename V>
27 constexpr __host__ __device__
auto roundUp(U a, V b) -> decltype(a + b) {
28 return divUp(a, b) * b;
32 constexpr __host__ __device__ T pow(T n, T power) {
33 return (power > 0 ? n * pow(n, power - 1) : 1);
37 constexpr __host__ __device__ T pow2(T n) {
41 static_assert(pow2(8) == 256,
"pow2");
44 constexpr __host__ __device__
int log2(T n,
int p = 0) {
45 return (n <= 1) ? p : log2(n / 2, p + 1);
48 static_assert(log2(2) == 1,
"log2");
49 static_assert(log2(3) == 1,
"log2");
50 static_assert(log2(4) == 2,
"log2");
53 constexpr __host__ __device__
bool isPowerOf2(T v) {
54 return (v && !(v & (v - 1)));
57 static_assert(isPowerOf2(2048),
"isPowerOf2");
58 static_assert(!isPowerOf2(3333),
"isPowerOf2");
61 constexpr __host__ __device__ T nextHighestPowerOf2(T v) {
62 return (isPowerOf2(v) ? (T) 2 * v : ((T) 1 << (log2(v) + 1)));
65 static_assert(nextHighestPowerOf2(1) == 2,
"nextHighestPowerOf2");
66 static_assert(nextHighestPowerOf2(2) == 4,
"nextHighestPowerOf2");
67 static_assert(nextHighestPowerOf2(3) == 4,
"nextHighestPowerOf2");
68 static_assert(nextHighestPowerOf2(4) == 8,
"nextHighestPowerOf2");
70 static_assert(nextHighestPowerOf2(15) == 16,
"nextHighestPowerOf2");
71 static_assert(nextHighestPowerOf2(16) == 32,
"nextHighestPowerOf2");
72 static_assert(nextHighestPowerOf2(17) == 32,
"nextHighestPowerOf2");
74 static_assert(nextHighestPowerOf2(1536000000u) == 2147483648u,
75 "nextHighestPowerOf2");
76 static_assert(nextHighestPowerOf2((
size_t) 2147483648ULL) ==
77 (
size_t) 4294967296ULL,
"nextHighestPowerOf2");