Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceUtils.cu
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #include "DeviceUtils.h"
10 #include "DeviceDefs.cuh"
11 #include "../../FaissAssert.h"
12 #include <mutex>
13 #include <unordered_map>
14 
15 namespace faiss { namespace gpu {
16 
17 int getCurrentDevice() {
18  int dev = -1;
19  CUDA_VERIFY(cudaGetDevice(&dev));
20  FAISS_ASSERT(dev != -1);
21 
22  return dev;
23 }
24 
25 void setCurrentDevice(int device) {
26  CUDA_VERIFY(cudaSetDevice(device));
27 }
28 
29 int getNumDevices() {
30  int numDev = -1;
31  cudaError_t err = cudaGetDeviceCount(&numDev);
32  if (cudaErrorNoDevice == err) {
33  numDev = 0;
34  } else {
35  CUDA_VERIFY(err);
36  }
37  FAISS_ASSERT(numDev != -1);
38 
39  return numDev;
40 }
41 
42 void synchronizeAllDevices() {
43  for (int i = 0; i < getNumDevices(); ++i) {
44  DeviceScope scope(i);
45 
46  CUDA_VERIFY(cudaDeviceSynchronize());
47  }
48 }
49 
50 const cudaDeviceProp& getDeviceProperties(int device) {
51  static std::mutex mutex;
52  static std::unordered_map<int, cudaDeviceProp> properties;
53 
54  std::lock_guard<std::mutex> guard(mutex);
55 
56  auto it = properties.find(device);
57  if (it == properties.end()) {
58  cudaDeviceProp prop;
59  CUDA_VERIFY(cudaGetDeviceProperties(&prop, device));
60 
61  properties[device] = prop;
62  it = properties.find(device);
63  }
64 
65  return it->second;
66 }
67 
68 const cudaDeviceProp& getCurrentDeviceProperties() {
69  return getDeviceProperties(getCurrentDevice());
70 }
71 
72 int getMaxThreads(int device) {
73  return getDeviceProperties(device).maxThreadsPerBlock;
74 }
75 
76 int getMaxThreadsCurrentDevice() {
77  return getMaxThreads(getCurrentDevice());
78 }
79 
80 size_t getMaxSharedMemPerBlock(int device) {
81  return getDeviceProperties(device).sharedMemPerBlock;
82 }
83 
84 size_t getMaxSharedMemPerBlockCurrentDevice() {
85  return getMaxSharedMemPerBlock(getCurrentDevice());
86 }
87 
88 int getDeviceForAddress(const void* p) {
89  if (!p) {
90  return -1;
91  }
92 
93  cudaPointerAttributes att;
94  cudaError_t err = cudaPointerGetAttributes(&att, p);
95  FAISS_ASSERT(err == cudaSuccess ||
96  err == cudaErrorInvalidValue);
97 
98  if (err == cudaErrorInvalidValue) {
99  // Make sure the current thread error status has been reset
100  err = cudaGetLastError();
101  FAISS_ASSERT(err == cudaErrorInvalidValue);
102  return -1;
103  } else if (att.memoryType == cudaMemoryTypeHost) {
104  return -1;
105  } else {
106  return att.device;
107  }
108 }
109 
110 bool getFullUnifiedMemSupport(int device) {
111  const auto& prop = getDeviceProperties(device);
112  return (prop.major >= 6);
113 }
114 
115 bool getFullUnifiedMemSupportCurrentDevice() {
116  return getFullUnifiedMemSupport(getCurrentDevice());
117 }
118 
119 int getMaxKSelection() {
120  // Don't use the device at the moment, just base this based on the CUDA SDK
121  // that we were compiled with
122  return GPU_MAX_SELECTION_K;
123 }
124 
125 DeviceScope::DeviceScope(int device) {
126  prevDevice_ = getCurrentDevice();
127 
128  if (prevDevice_ != device) {
129  setCurrentDevice(device);
130  } else {
131  prevDevice_ = -1;
132  }
133 }
134 
135 DeviceScope::~DeviceScope() {
136  if (prevDevice_ != -1) {
137  setCurrentDevice(prevDevice_);
138  }
139 }
140 
141 CublasHandleScope::CublasHandleScope() {
142  auto blasStatus = cublasCreate(&blasHandle_);
143  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
144 }
145 
146 CublasHandleScope::~CublasHandleScope() {
147  auto blasStatus = cublasDestroy(blasHandle_);
148  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
149 }
150 
151 CudaEvent::CudaEvent(cudaStream_t stream)
152  : event_(0) {
153  CUDA_VERIFY(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
154  CUDA_VERIFY(cudaEventRecord(event_, stream));
155 }
156 
157 CudaEvent::CudaEvent(CudaEvent&& event) noexcept
158  : event_(std::move(event.event_)) {
159  event.event_ = 0;
160 }
161 
162 CudaEvent::~CudaEvent() {
163  if (event_) {
164  CUDA_VERIFY(cudaEventDestroy(event_));
165  }
166 }
167 
168 CudaEvent&
169 CudaEvent::operator=(CudaEvent&& event) noexcept {
170  event_ = std::move(event.event_);
171  event.event_ = 0;
172 
173  return *this;
174 }
175 
176 void
177 CudaEvent::streamWaitOnEvent(cudaStream_t stream) {
178  CUDA_VERIFY(cudaStreamWaitEvent(stream, event_, 0));
179 }
180 
181 void
183  CUDA_VERIFY(cudaEventSynchronize(event_));
184 }
185 
186 } } // namespace
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
Definition: DeviceUtils.cu:182
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
Definition: DeviceUtils.cu:177
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.
Definition: DeviceUtils.cu:151