faiss/gpu/utils/DeviceUtils.cu

207 lines
4.5 KiB
Plaintext

/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/gpu/utils/DeviceUtils.h>
#include <faiss/gpu/utils/DeviceDefs.cuh>
#include <faiss/impl/FaissAssert.h>
#include <mutex>
#include <unordered_map>
#include <cuda_profiler_api.h>
namespace faiss { namespace gpu {
int getCurrentDevice() {
int dev = -1;
CUDA_VERIFY(cudaGetDevice(&dev));
FAISS_ASSERT(dev != -1);
return dev;
}
void setCurrentDevice(int device) {
CUDA_VERIFY(cudaSetDevice(device));
}
int getNumDevices() {
int numDev = -1;
cudaError_t err = cudaGetDeviceCount(&numDev);
if (cudaErrorNoDevice == err) {
numDev = 0;
} else {
CUDA_VERIFY(err);
}
FAISS_ASSERT(numDev != -1);
return numDev;
}
void profilerStart() {
CUDA_VERIFY(cudaProfilerStart());
}
void profilerStop() {
CUDA_VERIFY(cudaProfilerStop());
}
void synchronizeAllDevices() {
for (int i = 0; i < getNumDevices(); ++i) {
DeviceScope scope(i);
CUDA_VERIFY(cudaDeviceSynchronize());
}
}
const cudaDeviceProp& getDeviceProperties(int device) {
static std::mutex mutex;
static std::unordered_map<int, cudaDeviceProp> properties;
std::lock_guard<std::mutex> guard(mutex);
auto it = properties.find(device);
if (it == properties.end()) {
cudaDeviceProp prop;
CUDA_VERIFY(cudaGetDeviceProperties(&prop, device));
properties[device] = prop;
it = properties.find(device);
}
return it->second;
}
const cudaDeviceProp& getCurrentDeviceProperties() {
return getDeviceProperties(getCurrentDevice());
}
int getMaxThreads(int device) {
return getDeviceProperties(device).maxThreadsPerBlock;
}
int getMaxThreadsCurrentDevice() {
return getMaxThreads(getCurrentDevice());
}
size_t getMaxSharedMemPerBlock(int device) {
return getDeviceProperties(device).sharedMemPerBlock;
}
size_t getMaxSharedMemPerBlockCurrentDevice() {
return getMaxSharedMemPerBlock(getCurrentDevice());
}
int getDeviceForAddress(const void* p) {
if (!p) {
return -1;
}
cudaPointerAttributes att;
cudaError_t err = cudaPointerGetAttributes(&att, p);
FAISS_ASSERT_FMT(err == cudaSuccess ||
err == cudaErrorInvalidValue,
"unknown error %d", (int) err);
if (err == cudaErrorInvalidValue) {
// Make sure the current thread error status has been reset
err = cudaGetLastError();
FAISS_ASSERT_FMT(err == cudaErrorInvalidValue,
"unknown error %d", (int) err);
return -1;
} else if (att.memoryType == cudaMemoryTypeHost) {
return -1;
} else {
return att.device;
}
}
bool getFullUnifiedMemSupport(int device) {
const auto& prop = getDeviceProperties(device);
return (prop.major >= 6);
}
bool getFullUnifiedMemSupportCurrentDevice() {
return getFullUnifiedMemSupport(getCurrentDevice());
}
bool getTensorCoreSupport(int device) {
const auto& prop = getDeviceProperties(device);
return (prop.major >= 7);
}
bool getTensorCoreSupportCurrentDevice() {
return getTensorCoreSupport(getCurrentDevice());
}
int getMaxKSelection() {
// Don't use the device at the moment, just base this based on the CUDA SDK
// that we were compiled with
return GPU_MAX_SELECTION_K;
}
DeviceScope::DeviceScope(int device) {
prevDevice_ = getCurrentDevice();
if (prevDevice_ != device) {
setCurrentDevice(device);
} else {
prevDevice_ = -1;
}
}
DeviceScope::~DeviceScope() {
if (prevDevice_ != -1) {
setCurrentDevice(prevDevice_);
}
}
CublasHandleScope::CublasHandleScope() {
auto blasStatus = cublasCreate(&blasHandle_);
FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
}
CublasHandleScope::~CublasHandleScope() {
auto blasStatus = cublasDestroy(blasHandle_);
FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
}
CudaEvent::CudaEvent(cudaStream_t stream)
: event_(0) {
CUDA_VERIFY(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
CUDA_VERIFY(cudaEventRecord(event_, stream));
}
CudaEvent::CudaEvent(CudaEvent&& event) noexcept
: event_(std::move(event.event_)) {
event.event_ = 0;
}
CudaEvent::~CudaEvent() {
if (event_) {
CUDA_VERIFY(cudaEventDestroy(event_));
}
}
CudaEvent&
CudaEvent::operator=(CudaEvent&& event) noexcept {
event_ = std::move(event.event_);
event.event_ = 0;
return *this;
}
void
CudaEvent::streamWaitOnEvent(cudaStream_t stream) {
CUDA_VERIFY(cudaStreamWaitEvent(stream, event_, 0));
}
void
CudaEvent::cpuWaitOnEvent() {
CUDA_VERIFY(cudaEventSynchronize(event_));
}
} } // namespace