Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceUtils.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "DeviceUtils.h"
11 #include "../../FaissAssert.h"
12 #include <mutex>
13 #include <unordered_map>
14 
15 namespace faiss { namespace gpu {
16 
17 int getCurrentDevice() {
18  int dev = -1;
19  CUDA_VERIFY(cudaGetDevice(&dev));
20  FAISS_ASSERT(dev != -1);
21 
22  return dev;
23 }
24 
25 void setCurrentDevice(int device) {
26  CUDA_VERIFY(cudaSetDevice(device));
27 }
28 
29 int getNumDevices() {
30  int numDev = -1;
31  CUDA_VERIFY(cudaGetDeviceCount(&numDev));
32  FAISS_ASSERT(numDev != -1);
33 
34  return numDev;
35 }
36 
37 void synchronizeAllDevices() {
38  for (int i = 0; i < getNumDevices(); ++i) {
39  DeviceScope scope(i);
40 
41  CUDA_VERIFY(cudaDeviceSynchronize());
42  }
43 }
44 
45 const cudaDeviceProp& getDeviceProperties(int device) {
46  static std::mutex mutex;
47  static std::unordered_map<int, cudaDeviceProp> properties;
48 
49  std::lock_guard<std::mutex> guard(mutex);
50 
51  auto it = properties.find(device);
52  if (it == properties.end()) {
53  cudaDeviceProp prop;
54  CUDA_VERIFY(cudaGetDeviceProperties(&prop, device));
55 
56  properties[device] = prop;
57  it = properties.find(device);
58  }
59 
60  return it->second;
61 }
62 
63 const cudaDeviceProp& getCurrentDeviceProperties() {
64  return getDeviceProperties(getCurrentDevice());
65 }
66 
67 int getMaxThreads(int device) {
68  return getDeviceProperties(device).maxThreadsPerBlock;
69 }
70 
71 int getMaxThreadsCurrentDevice() {
72  return getMaxThreads(getCurrentDevice());
73 }
74 
75 size_t getMaxSharedMemPerBlock(int device) {
76  return getDeviceProperties(device).sharedMemPerBlock;
77 }
78 
79 size_t getMaxSharedMemPerBlockCurrentDevice() {
80  return getMaxSharedMemPerBlock(getCurrentDevice());
81 }
82 
83 int getDeviceForAddress(const void* p) {
84  if (!p) {
85  return -1;
86  }
87 
88  cudaPointerAttributes att;
89  cudaError_t err = cudaPointerGetAttributes(&att, p);
90  FAISS_ASSERT(err == cudaSuccess ||
91  err == cudaErrorInvalidValue);
92 
93  if (err == cudaErrorInvalidValue) {
94  // Make sure the current thread error status has been reset
95  err = cudaGetLastError();
96  FAISS_ASSERT(err == cudaErrorInvalidValue);
97  return -1;
98  } else if (att.memoryType == cudaMemoryTypeHost) {
99  return -1;
100  } else {
101  return att.device;
102  }
103 }
104 
105 bool getFullUnifiedMemSupport(int device) {
106  const auto& prop = getDeviceProperties(device);
107  return (prop.major >= 6);
108 }
109 
110 bool getFullUnifiedMemSupportCurrentDevice() {
111  return getFullUnifiedMemSupport(getCurrentDevice());
112 }
113 
114 DeviceScope::DeviceScope(int device) {
115  prevDevice_ = getCurrentDevice();
116 
117  if (prevDevice_ != device) {
118  setCurrentDevice(device);
119  } else {
120  prevDevice_ = -1;
121  }
122 }
123 
124 DeviceScope::~DeviceScope() {
125  if (prevDevice_ != -1) {
126  setCurrentDevice(prevDevice_);
127  }
128 }
129 
130 CublasHandleScope::CublasHandleScope() {
131  auto blasStatus = cublasCreate(&blasHandle_);
132  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
133 }
134 
135 CublasHandleScope::~CublasHandleScope() {
136  auto blasStatus = cublasDestroy(blasHandle_);
137  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
138 }
139 
140 CudaEvent::CudaEvent(cudaStream_t stream)
141  : event_(0) {
142  CUDA_VERIFY(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
143  CUDA_VERIFY(cudaEventRecord(event_, stream));
144 }
145 
146 CudaEvent::CudaEvent(CudaEvent&& event) noexcept
147  : event_(std::move(event.event_)) {
148  event.event_ = 0;
149 }
150 
151 CudaEvent::~CudaEvent() {
152  if (event_) {
153  CUDA_VERIFY(cudaEventDestroy(event_));
154  }
155 }
156 
157 CudaEvent&
158 CudaEvent::operator=(CudaEvent&& event) noexcept {
159  event_ = std::move(event.event_);
160  event.event_ = 0;
161 
162  return *this;
163 }
164 
165 void
166 CudaEvent::streamWaitOnEvent(cudaStream_t stream) {
167  CUDA_VERIFY(cudaStreamWaitEvent(stream, event_, 0));
168 }
169 
170 void
172  CUDA_VERIFY(cudaEventSynchronize(event_));
173 }
174 
175 } } // namespace
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.