Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
PtxUtils.cuh
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #pragma once
11 
12 #include <cuda.h>
13 
14 namespace faiss { namespace gpu {
15 
16 __device__ __forceinline__
17 unsigned int getBitfield(unsigned int val, int pos, int len) {
18  unsigned int ret;
19  asm("bfe.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(val), "r"(pos), "r"(len));
20  return ret;
21 }
22 
23 __device__ __forceinline__
24 unsigned long getBitfield(unsigned long val, int pos, int len) {
25  unsigned long ret;
26  asm("bfe.u64 %0, %1, %2, %3;" : "=l"(ret) : "l"(val), "r"(pos), "r"(len));
27  return ret;
28 }
29 
30 __device__ __forceinline__
31 unsigned int setBitfield(unsigned int val,
32  unsigned int toInsert, int pos, int len) {
33  unsigned int ret;
34  asm("bfi.b32 %0, %1, %2, %3, %4;" :
35  "=r"(ret) : "r"(toInsert), "r"(val), "r"(pos), "r"(len));
36  return ret;
37 }
38 
39 __device__ __forceinline__ int getLaneId() {
40  int laneId;
41  asm("mov.s32 %0, %laneid;" : "=r"(laneId) );
42  return laneId;
43 }
44 
45 __device__ __forceinline__ unsigned getLaneMaskLt() {
46  unsigned mask;
47  asm("mov.u32 %0, %%lanemask_lt;" : "=r"(mask));
48  return mask;
49 }
50 
51 __device__ __forceinline__ unsigned getLaneMaskLe() {
52  unsigned mask;
53  asm("mov.u32 %0, %%lanemask_le;" : "=r"(mask));
54  return mask;
55 }
56 
57 __device__ __forceinline__ unsigned getLaneMaskGt() {
58  unsigned mask;
59  asm("mov.u32 %0, %%lanemask_gt;" : "=r"(mask));
60  return mask;
61 }
62 
63 __device__ __forceinline__ unsigned getLaneMaskGe() {
64  unsigned mask;
65  asm("mov.u32 %0, %%lanemask_ge;" : "=r"(mask));
66  return mask;
67 }
68 
69 __device__ __forceinline__ void namedBarrierWait(int name, int numThreads) {
70  asm volatile("bar.sync %0, %1;" : : "r"(name), "r"(numThreads) : "memory");
71 }
72 
73 __device__ __forceinline__ void namedBarrierArrived(int name, int numThreads) {
74  asm volatile("bar.arrive %0, %1;" : : "r"(name), "r"(numThreads) : "memory");
75 }
76 
77 // FIXME: prefetch does nothing (in SASS) on Maxwell
78 __device__ __forceinline__ void prefetchL2(const void *p) {
79  asm volatile("prefetch.global.L2 [%0];" : : "l"(p));
80 }
81 
82 __device__ __forceinline__ void prefetchL1(const void *p) {
83  asm volatile("prefetch.global.L1 [%0];" : : "l"(p));
84 }
85 
86 } } // namespace