9 #include "DeviceUtils.h"
10 #include "DeviceDefs.cuh"
11 #include "../../FaissAssert.h"
13 #include <unordered_map>
15 namespace faiss {
namespace gpu {
17 int getCurrentDevice() {
19 CUDA_VERIFY(cudaGetDevice(&dev));
20 FAISS_ASSERT(dev != -1);
25 void setCurrentDevice(
int device) {
26 CUDA_VERIFY(cudaSetDevice(device));
31 cudaError_t err = cudaGetDeviceCount(&numDev);
32 if (cudaErrorNoDevice == err) {
37 FAISS_ASSERT(numDev != -1);
42 void synchronizeAllDevices() {
43 for (
int i = 0; i < getNumDevices(); ++i) {
46 CUDA_VERIFY(cudaDeviceSynchronize());
50 const cudaDeviceProp& getDeviceProperties(
int device) {
51 static std::mutex mutex;
52 static std::unordered_map<int, cudaDeviceProp> properties;
54 std::lock_guard<std::mutex> guard(mutex);
56 auto it = properties.find(device);
57 if (it == properties.end()) {
59 CUDA_VERIFY(cudaGetDeviceProperties(&prop, device));
61 properties[device] = prop;
62 it = properties.find(device);
68 const cudaDeviceProp& getCurrentDeviceProperties() {
69 return getDeviceProperties(getCurrentDevice());
72 int getMaxThreads(
int device) {
73 return getDeviceProperties(device).maxThreadsPerBlock;
76 int getMaxThreadsCurrentDevice() {
77 return getMaxThreads(getCurrentDevice());
80 size_t getMaxSharedMemPerBlock(
int device) {
81 return getDeviceProperties(device).sharedMemPerBlock;
84 size_t getMaxSharedMemPerBlockCurrentDevice() {
85 return getMaxSharedMemPerBlock(getCurrentDevice());
88 int getDeviceForAddress(
const void* p) {
93 cudaPointerAttributes att;
94 cudaError_t err = cudaPointerGetAttributes(&att, p);
95 FAISS_ASSERT(err == cudaSuccess ||
96 err == cudaErrorInvalidValue);
98 if (err == cudaErrorInvalidValue) {
100 err = cudaGetLastError();
101 FAISS_ASSERT(err == cudaErrorInvalidValue);
103 }
else if (att.memoryType == cudaMemoryTypeHost) {
110 bool getFullUnifiedMemSupport(
int device) {
111 const auto& prop = getDeviceProperties(device);
112 return (prop.major >= 6);
115 bool getFullUnifiedMemSupportCurrentDevice() {
116 return getFullUnifiedMemSupport(getCurrentDevice());
119 int getMaxKSelection() {
122 return GPU_MAX_SELECTION_K;
125 DeviceScope::DeviceScope(
int device) {
126 prevDevice_ = getCurrentDevice();
128 if (prevDevice_ != device) {
129 setCurrentDevice(device);
135 DeviceScope::~DeviceScope() {
136 if (prevDevice_ != -1) {
137 setCurrentDevice(prevDevice_);
141 CublasHandleScope::CublasHandleScope() {
142 auto blasStatus = cublasCreate(&blasHandle_);
143 FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
146 CublasHandleScope::~CublasHandleScope() {
147 auto blasStatus = cublasDestroy(blasHandle_);
148 FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
153 CUDA_VERIFY(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
154 CUDA_VERIFY(cudaEventRecord(event_, stream));
158 : event_(std::move(event.event_)) {
162 CudaEvent::~CudaEvent() {
164 CUDA_VERIFY(cudaEventDestroy(event_));
169 CudaEvent::operator=(CudaEvent&& event) noexcept {
170 event_ = std::move(event.event_);
178 CUDA_VERIFY(cudaStreamWaitEvent(stream, event_, 0));
183 CUDA_VERIFY(cudaEventSynchronize(event_));
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.