faiss/gpu/utils/DeviceDefs.cuh

47 lines
1.1 KiB
Plaintext

/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
namespace faiss { namespace gpu {
#ifdef __CUDA_ARCH__
#if __CUDA_ARCH__ <= 750
constexpr int kWarpSize = 32;
#else
#error Unknown __CUDA_ARCH__; please define parameters for compute capability
#endif // __CUDA_ARCH__ types
#endif // __CUDA_ARCH__
#ifndef __CUDA_ARCH__
// dummy value for host compiler
constexpr int kWarpSize = 32;
#endif // !__CUDA_ARCH__
// This is a memory barrier for intra-warp writes to shared memory.
__forceinline__ __device__ void warpFence() {
#if CUDA_VERSION >= 9000
__syncwarp();
#else
// For the time being, assume synchronicity.
// __threadfence_block();
#endif
}
#if CUDA_VERSION > 9000
// Based on the CUDA version (we assume what version of nvcc/ptxas we were
// compiled with), the register allocation algorithm is much better, so only
// enable the 2048 selection code if we are above 9.0 (9.2 seems to be ok)
#define GPU_MAX_SELECTION_K 2048
#else
#define GPU_MAX_SELECTION_K 1024
#endif
} } // namespace