Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
PtxUtils.cuh
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #pragma once
13 
14 #include <cuda.h>
15 
16 namespace faiss { namespace gpu {
17 
18 __device__ __forceinline__
19 unsigned int getBitfield(unsigned int val, int pos, int len) {
20  unsigned int ret;
21  asm("bfe.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(val), "r"(pos), "r"(len));
22  return ret;
23 }
24 
25 __device__ __forceinline__
26 unsigned long getBitfield(unsigned long val, int pos, int len) {
27  unsigned long ret;
28  asm("bfe.u64 %0, %1, %2, %3;" : "=l"(ret) : "l"(val), "r"(pos), "r"(len));
29  return ret;
30 }
31 
32 __device__ __forceinline__
33 unsigned int setBitfield(unsigned int val,
34  unsigned int toInsert, int pos, int len) {
35  unsigned int ret;
36  asm("bfi.b32 %0, %1, %2, %3, %4;" :
37  "=r"(ret) : "r"(toInsert), "r"(val), "r"(pos), "r"(len));
38  return ret;
39 }
40 
41 __device__ __forceinline__ int getLaneId() {
42  int laneId;
43  asm("mov.s32 %0, %laneid;" : "=r"(laneId) );
44  return laneId;
45 }
46 
47 __device__ __forceinline__ unsigned getLaneMaskLt() {
48  unsigned mask;
49  asm("mov.u32 %0, %%lanemask_lt;" : "=r"(mask));
50  return mask;
51 }
52 
53 __device__ __forceinline__ unsigned getLaneMaskLe() {
54  unsigned mask;
55  asm("mov.u32 %0, %%lanemask_le;" : "=r"(mask));
56  return mask;
57 }
58 
59 __device__ __forceinline__ unsigned getLaneMaskGt() {
60  unsigned mask;
61  asm("mov.u32 %0, %%lanemask_gt;" : "=r"(mask));
62  return mask;
63 }
64 
65 __device__ __forceinline__ unsigned getLaneMaskGe() {
66  unsigned mask;
67  asm("mov.u32 %0, %%lanemask_ge;" : "=r"(mask));
68  return mask;
69 }
70 
71 __device__ __forceinline__ void namedBarrierWait(int name, int numThreads) {
72  asm volatile("bar.sync %0, %1;" : : "r"(name), "r"(numThreads) : "memory");
73 }
74 
75 __device__ __forceinline__ void namedBarrierArrived(int name, int numThreads) {
76  asm volatile("bar.arrive %0, %1;" : : "r"(name), "r"(numThreads) : "memory");
77 }
78 
79 // FIXME: prefetch does nothing (in SASS) on Maxwell
80 __device__ __forceinline__ void prefetchL2(const void *p) {
81  asm volatile("prefetch.global.L2 [%0];" : : "l"(p));
82 }
83 
84 __device__ __forceinline__ void prefetchL1(const void *p) {
85  asm volatile("prefetch.global.L1 [%0];" : : "l"(p));
86 }
87 
88 } } // namespace