Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceDefs.cuh
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #pragma once
10 
11 namespace faiss { namespace gpu {
12 
13 #ifdef __CUDA_ARCH__
14 #if __CUDA_ARCH__ <= 750
15 constexpr int kWarpSize = 32;
16 #else
17 #error Unknown __CUDA_ARCH__; please define parameters for compute capability
18 #endif // __CUDA_ARCH__ types
19 #endif // __CUDA_ARCH__
20 
21 #ifndef __CUDA_ARCH__
22 // dummy value for host compiler
23 constexpr int kWarpSize = 32;
24 #endif // !__CUDA_ARCH__
25 
26 // This is a memory barrier for intra-warp writes to shared memory.
27 __forceinline__ __device__ void warpFence() {
28 
29 #if CUDA_VERSION >= 9000
30  __syncwarp();
31 #else
32  // For the time being, assume synchronicity.
33  // __threadfence_block();
34 #endif
35 }
36 
37 #if CUDA_VERSION > 9000
38 // Based on the CUDA version (we assume what version of nvcc/ptxas we were
39 // compiled with), the register allocation algorithm is much better, so only
40 // enable the 2048 selection code if we are above 9.0 (9.2 seems to be ok)
41 #define GPU_MAX_SELECTION_K 2048
42 #else
43 #define GPU_MAX_SELECTION_K 1024
44 #endif
45 
46 } } // namespace