Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceUtils.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "DeviceUtils.h"
12 #include "../../FaissAssert.h"
13 #include <mutex>
14 #include <unordered_map>
15 
16 namespace faiss { namespace gpu {
17 
18 int getCurrentDevice() {
19  int dev = -1;
20  CUDA_VERIFY(cudaGetDevice(&dev));
21  FAISS_ASSERT(dev != -1);
22 
23  return dev;
24 }
25 
26 void setCurrentDevice(int device) {
27  CUDA_VERIFY(cudaSetDevice(device));
28 }
29 
30 int getNumDevices() {
31  int numDev = -1;
32  CUDA_VERIFY(cudaGetDeviceCount(&numDev));
33  FAISS_ASSERT(numDev != -1);
34 
35  return numDev;
36 }
37 
38 void synchronizeAllDevices() {
39  for (int i = 0; i < getNumDevices(); ++i) {
40  DeviceScope scope(i);
41 
42  CUDA_VERIFY(cudaDeviceSynchronize());
43  }
44 }
45 
46 const cudaDeviceProp& getDeviceProperties(int device) {
47  static std::mutex mutex;
48  static std::unordered_map<int, cudaDeviceProp> properties;
49 
50  std::lock_guard<std::mutex> guard(mutex);
51 
52  auto it = properties.find(device);
53  if (it == properties.end()) {
54  cudaDeviceProp prop;
55  CUDA_VERIFY(cudaGetDeviceProperties(&prop, device));
56 
57  properties[device] = prop;
58  it = properties.find(device);
59  }
60 
61  return it->second;
62 }
63 
64 const cudaDeviceProp& getCurrentDeviceProperties() {
65  return getDeviceProperties(getCurrentDevice());
66 }
67 
68 int getMaxThreads(int device) {
69  return getDeviceProperties(device).maxThreadsPerBlock;
70 }
71 
72 int getMaxThreadsCurrentDevice() {
73  return getMaxThreads(getCurrentDevice());
74 }
75 
76 size_t getMaxSharedMemPerBlock(int device) {
77  return getDeviceProperties(device).sharedMemPerBlock;
78 }
79 
80 size_t getMaxSharedMemPerBlockCurrentDevice() {
81  return getMaxSharedMemPerBlock(getCurrentDevice());
82 }
83 
84 int getDeviceForAddress(const void* p) {
85  if (!p) {
86  return -1;
87  }
88 
89  cudaPointerAttributes att;
90  cudaError_t err = cudaPointerGetAttributes(&att, p);
91  FAISS_ASSERT(err == cudaSuccess ||
92  err == cudaErrorInvalidValue);
93 
94  if (err == cudaErrorInvalidValue) {
95  // Make sure the current thread error status has been reset
96  err = cudaGetLastError();
97  FAISS_ASSERT(err == cudaErrorInvalidValue);
98  return -1;
99  } else if (att.memoryType == cudaMemoryTypeHost) {
100  return -1;
101  } else {
102  return att.device;
103  }
104 }
105 
106 bool getFullUnifiedMemSupport(int device) {
107  const auto& prop = getDeviceProperties(device);
108  return (prop.major >= 6);
109 }
110 
111 bool getFullUnifiedMemSupportCurrentDevice() {
112  return getFullUnifiedMemSupport(getCurrentDevice());
113 }
114 
115 DeviceScope::DeviceScope(int device) {
116  prevDevice_ = getCurrentDevice();
117 
118  if (prevDevice_ != device) {
119  setCurrentDevice(device);
120  } else {
121  prevDevice_ = -1;
122  }
123 }
124 
125 DeviceScope::~DeviceScope() {
126  if (prevDevice_ != -1) {
127  setCurrentDevice(prevDevice_);
128  }
129 }
130 
131 CublasHandleScope::CublasHandleScope() {
132  auto blasStatus = cublasCreate(&blasHandle_);
133  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
134 }
135 
136 CublasHandleScope::~CublasHandleScope() {
137  auto blasStatus = cublasDestroy(blasHandle_);
138  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
139 }
140 
141 CudaEvent::CudaEvent(cudaStream_t stream)
142  : event_(0) {
143  CUDA_VERIFY(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
144  CUDA_VERIFY(cudaEventRecord(event_, stream));
145 }
146 
147 CudaEvent::CudaEvent(CudaEvent&& event) noexcept
148  : event_(std::move(event.event_)) {
149  event.event_ = 0;
150 }
151 
152 CudaEvent::~CudaEvent() {
153  if (event_) {
154  CUDA_VERIFY(cudaEventDestroy(event_));
155  }
156 }
157 
158 CudaEvent&
159 CudaEvent::operator=(CudaEvent&& event) noexcept {
160  event_ = std::move(event.event_);
161  event.event_ = 0;
162 
163  return *this;
164 }
165 
166 void
167 CudaEvent::streamWaitOnEvent(cudaStream_t stream) {
168  CUDA_VERIFY(cudaStreamWaitEvent(stream, event_, 0));
169 }
170 
171 void
173  CUDA_VERIFY(cudaEventSynchronize(event_));
174 }
175 
176 } } // namespace
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.