2017-02-23 06:26:44 +08:00
|
|
|
/**
|
2019-05-28 22:17:22 +08:00
|
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
2017-02-23 06:26:44 +08:00
|
|
|
*
|
2019-05-28 22:17:22 +08:00
|
|
|
* This source code is licensed under the MIT license found in the
|
2017-02-23 06:26:44 +08:00
|
|
|
* LICENSE file in the root directory of this source tree.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
#pragma once
|
|
|
|
|
|
|
|
#include <cuda.h>
|
|
|
|
|
|
|
|
namespace faiss { namespace gpu {
|
|
|
|
|
|
|
|
__device__ __forceinline__
|
|
|
|
unsigned int getBitfield(unsigned int val, int pos, int len) {
|
|
|
|
unsigned int ret;
|
|
|
|
asm("bfe.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(val), "r"(pos), "r"(len));
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device__ __forceinline__
|
|
|
|
unsigned long getBitfield(unsigned long val, int pos, int len) {
|
|
|
|
unsigned long ret;
|
|
|
|
asm("bfe.u64 %0, %1, %2, %3;" : "=l"(ret) : "l"(val), "r"(pos), "r"(len));
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device__ __forceinline__
|
|
|
|
unsigned int setBitfield(unsigned int val,
|
|
|
|
unsigned int toInsert, int pos, int len) {
|
|
|
|
unsigned int ret;
|
|
|
|
asm("bfi.b32 %0, %1, %2, %3, %4;" :
|
|
|
|
"=r"(ret) : "r"(toInsert), "r"(val), "r"(pos), "r"(len));
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device__ __forceinline__ int getLaneId() {
|
|
|
|
int laneId;
|
2019-09-21 00:59:10 +08:00
|
|
|
asm("mov.u32 %0, %laneid;" : "=r"(laneId) );
|
2017-02-23 06:26:44 +08:00
|
|
|
return laneId;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device__ __forceinline__ unsigned getLaneMaskLt() {
|
|
|
|
unsigned mask;
|
|
|
|
asm("mov.u32 %0, %%lanemask_lt;" : "=r"(mask));
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device__ __forceinline__ unsigned getLaneMaskLe() {
|
|
|
|
unsigned mask;
|
|
|
|
asm("mov.u32 %0, %%lanemask_le;" : "=r"(mask));
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device__ __forceinline__ unsigned getLaneMaskGt() {
|
|
|
|
unsigned mask;
|
|
|
|
asm("mov.u32 %0, %%lanemask_gt;" : "=r"(mask));
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device__ __forceinline__ unsigned getLaneMaskGe() {
|
|
|
|
unsigned mask;
|
|
|
|
asm("mov.u32 %0, %%lanemask_ge;" : "=r"(mask));
|
|
|
|
return mask;
|
|
|
|
}
|
|
|
|
|
|
|
|
__device__ __forceinline__ void namedBarrierWait(int name, int numThreads) {
|
|
|
|
asm volatile("bar.sync %0, %1;" : : "r"(name), "r"(numThreads) : "memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
__device__ __forceinline__ void namedBarrierArrived(int name, int numThreads) {
|
|
|
|
asm volatile("bar.arrive %0, %1;" : : "r"(name), "r"(numThreads) : "memory");
|
|
|
|
}
|
|
|
|
|
|
|
|
} } // namespace
|