Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
PtxUtils.cuh
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #pragma once
12 
13 #include <cuda.h>
14 
15 namespace faiss { namespace gpu {
16 
17 __device__ __forceinline__
18 unsigned int getBitfield(unsigned int val, int pos, int len) {
19  unsigned int ret;
20  asm("bfe.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(val), "r"(pos), "r"(len));
21  return ret;
22 }
23 
24 __device__ __forceinline__
25 unsigned long getBitfield(unsigned long val, int pos, int len) {
26  unsigned long ret;
27  asm("bfe.u64 %0, %1, %2, %3;" : "=l"(ret) : "l"(val), "r"(pos), "r"(len));
28  return ret;
29 }
30 
31 __device__ __forceinline__
32 unsigned int setBitfield(unsigned int val,
33  unsigned int toInsert, int pos, int len) {
34  unsigned int ret;
35  asm("bfi.b32 %0, %1, %2, %3, %4;" :
36  "=r"(ret) : "r"(toInsert), "r"(val), "r"(pos), "r"(len));
37  return ret;
38 }
39 
40 __device__ __forceinline__ int getLaneId() {
41  int laneId;
42  asm("mov.s32 %0, %laneid;" : "=r"(laneId) );
43  return laneId;
44 }
45 
46 __device__ __forceinline__ unsigned getLaneMaskLt() {
47  unsigned mask;
48  asm("mov.u32 %0, %%lanemask_lt;" : "=r"(mask));
49  return mask;
50 }
51 
52 __device__ __forceinline__ unsigned getLaneMaskLe() {
53  unsigned mask;
54  asm("mov.u32 %0, %%lanemask_le;" : "=r"(mask));
55  return mask;
56 }
57 
58 __device__ __forceinline__ unsigned getLaneMaskGt() {
59  unsigned mask;
60  asm("mov.u32 %0, %%lanemask_gt;" : "=r"(mask));
61  return mask;
62 }
63 
64 __device__ __forceinline__ unsigned getLaneMaskGe() {
65  unsigned mask;
66  asm("mov.u32 %0, %%lanemask_ge;" : "=r"(mask));
67  return mask;
68 }
69 
70 __device__ __forceinline__ void namedBarrierWait(int name, int numThreads) {
71  asm volatile("bar.sync %0, %1;" : : "r"(name), "r"(numThreads) : "memory");
72 }
73 
74 __device__ __forceinline__ void namedBarrierArrived(int name, int numThreads) {
75  asm volatile("bar.arrive %0, %1;" : : "r"(name), "r"(numThreads) : "memory");
76 }
77 
78 // FIXME: prefetch does nothing (in SASS) on Maxwell
79 __device__ __forceinline__ void prefetchL2(const void *p) {
80  asm volatile("prefetch.global.L2 [%0];" : : "l"(p));
81 }
82 
83 __device__ __forceinline__ void prefetchL1(const void *p) {
84  asm volatile("prefetch.global.L1 [%0];" : : "l"(p));
85 }
86 
87 } } // namespace