Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
PtxUtils.cuh
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #pragma once
10 
11 #include <cuda.h>
12 
13 namespace faiss { namespace gpu {
14 
15 __device__ __forceinline__
16 unsigned int getBitfield(unsigned int val, int pos, int len) {
17  unsigned int ret;
18  asm("bfe.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(val), "r"(pos), "r"(len));
19  return ret;
20 }
21 
22 __device__ __forceinline__
23 unsigned long getBitfield(unsigned long val, int pos, int len) {
24  unsigned long ret;
25  asm("bfe.u64 %0, %1, %2, %3;" : "=l"(ret) : "l"(val), "r"(pos), "r"(len));
26  return ret;
27 }
28 
29 __device__ __forceinline__
30 unsigned int setBitfield(unsigned int val,
31  unsigned int toInsert, int pos, int len) {
32  unsigned int ret;
33  asm("bfi.b32 %0, %1, %2, %3, %4;" :
34  "=r"(ret) : "r"(toInsert), "r"(val), "r"(pos), "r"(len));
35  return ret;
36 }
37 
38 __device__ __forceinline__ int getLaneId() {
39  int laneId;
40  asm("mov.s32 %0, %laneid;" : "=r"(laneId) );
41  return laneId;
42 }
43 
44 __device__ __forceinline__ unsigned getLaneMaskLt() {
45  unsigned mask;
46  asm("mov.u32 %0, %%lanemask_lt;" : "=r"(mask));
47  return mask;
48 }
49 
50 __device__ __forceinline__ unsigned getLaneMaskLe() {
51  unsigned mask;
52  asm("mov.u32 %0, %%lanemask_le;" : "=r"(mask));
53  return mask;
54 }
55 
56 __device__ __forceinline__ unsigned getLaneMaskGt() {
57  unsigned mask;
58  asm("mov.u32 %0, %%lanemask_gt;" : "=r"(mask));
59  return mask;
60 }
61 
62 __device__ __forceinline__ unsigned getLaneMaskGe() {
63  unsigned mask;
64  asm("mov.u32 %0, %%lanemask_ge;" : "=r"(mask));
65  return mask;
66 }
67 
68 __device__ __forceinline__ void namedBarrierWait(int name, int numThreads) {
69  asm volatile("bar.sync %0, %1;" : : "r"(name), "r"(numThreads) : "memory");
70 }
71 
72 __device__ __forceinline__ void namedBarrierArrived(int name, int numThreads) {
73  asm volatile("bar.arrive %0, %1;" : : "r"(name), "r"(numThreads) : "memory");
74 }
75 
76 // FIXME: prefetch does nothing (in SASS) on Maxwell
77 __device__ __forceinline__ void prefetchL2(const void *p) {
78  asm volatile("prefetch.global.L2 [%0];" : : "l"(p));
79 }
80 
81 __device__ __forceinline__ void prefetchL1(const void *p) {
82  asm volatile("prefetch.global.L1 [%0];" : : "l"(p));
83 }
84 
85 } } // namespace