Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceUtils.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "DeviceUtils.h"
12 #include "../../FaissAssert.h"
13 #include <mutex>
14 #include <unordered_map>
15 
16 namespace faiss { namespace gpu {
17 
18 int getCurrentDevice() {
19  int dev = -1;
20  CUDA_VERIFY(cudaGetDevice(&dev));
21  FAISS_ASSERT(dev != -1);
22 
23  return dev;
24 }
25 
26 void setCurrentDevice(int device) {
27  CUDA_VERIFY(cudaSetDevice(device));
28 }
29 
30 int getNumDevices() {
31  int numDev = -1;
32  CUDA_VERIFY(cudaGetDeviceCount(&numDev));
33  FAISS_ASSERT(numDev != -1);
34 
35  return numDev;
36 }
37 
38 void synchronizeAllDevices() {
39  for (int i = 0; i < getNumDevices(); ++i) {
40  DeviceScope scope(i);
41 
42  CUDA_VERIFY(cudaDeviceSynchronize());
43  }
44 }
45 
46 cudaDeviceProp& getDeviceProperties(int device) {
47  static std::mutex mutex;
48  static std::unordered_map<int, cudaDeviceProp> properties;
49 
50  std::lock_guard<std::mutex> guard(mutex);
51 
52  auto it = properties.find(device);
53  if (it == properties.end()) {
54  cudaDeviceProp prop;
55  CUDA_VERIFY(cudaGetDeviceProperties(&prop, device));
56 
57  properties[device] = prop;
58  it = properties.find(device);
59  }
60 
61  return it->second;
62 }
63 
64 int getMaxThreads(int device) {
65  return getDeviceProperties(device).maxThreadsPerBlock;
66 }
67 
68 int getMaxThreadsCurrentDevice() {
69  return getMaxThreads(getCurrentDevice());
70 }
71 
72 size_t getMaxSharedMemPerBlock(int device) {
73  return getDeviceProperties(device).sharedMemPerBlock;
74 }
75 
76 size_t getMaxSharedMemPerBlockCurrentDevice() {
77  return getMaxSharedMemPerBlock(getCurrentDevice());
78 }
79 
80 int getDeviceForAddress(const void* p) {
81  if (!p) {
82  return -1;
83  }
84 
85  cudaPointerAttributes att;
86  cudaError_t err = cudaPointerGetAttributes(&att, p);
87  FAISS_ASSERT(err == cudaSuccess ||
88  err == cudaErrorInvalidValue);
89 
90  if (err == cudaErrorInvalidValue) {
91  // Make sure the current thread error status has been reset
92  err = cudaGetLastError();
93  FAISS_ASSERT(err == cudaErrorInvalidValue);
94  return -1;
95  } else if (att.memoryType == cudaMemoryTypeHost) {
96  return -1;
97  } else {
98  return att.device;
99  }
100 }
101 
102 bool getFullUnifiedMemSupport(int device) {
103  const auto& prop = getDeviceProperties(device);
104  return (prop.major >= 6);
105 }
106 
107 bool getFullUnifiedMemSupportCurrentDevice() {
108  return getFullUnifiedMemSupport(getCurrentDevice());
109 }
110 
111 DeviceScope::DeviceScope(int device) {
112  prevDevice_ = getCurrentDevice();
113 
114  if (prevDevice_ != device) {
115  setCurrentDevice(device);
116  } else {
117  prevDevice_ = -1;
118  }
119 }
120 
121 DeviceScope::~DeviceScope() {
122  if (prevDevice_ != -1) {
123  setCurrentDevice(prevDevice_);
124  }
125 }
126 
127 CublasHandleScope::CublasHandleScope() {
128  auto blasStatus = cublasCreate(&blasHandle_);
129  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
130 }
131 
132 CublasHandleScope::~CublasHandleScope() {
133  auto blasStatus = cublasDestroy(blasHandle_);
134  FAISS_ASSERT(blasStatus == CUBLAS_STATUS_SUCCESS);
135 }
136 
137 CudaEvent::CudaEvent(cudaStream_t stream)
138  : event_(0) {
139  CUDA_VERIFY(cudaEventCreateWithFlags(&event_, cudaEventDisableTiming));
140  CUDA_VERIFY(cudaEventRecord(event_, stream));
141 }
142 
143 CudaEvent::CudaEvent(CudaEvent&& event) noexcept
144  : event_(std::move(event.event_)) {
145  event.event_ = 0;
146 }
147 
148 CudaEvent::~CudaEvent() {
149  if (event_) {
150  CUDA_VERIFY(cudaEventDestroy(event_));
151  }
152 }
153 
154 CudaEvent&
155 CudaEvent::operator=(CudaEvent&& event) noexcept {
156  event_ = std::move(event.event_);
157  event.event_ = 0;
158 
159  return *this;
160 }
161 
162 void
163 CudaEvent::streamWaitOnEvent(cudaStream_t stream) {
164  CUDA_VERIFY(cudaStreamWaitEvent(stream, event_, 0));
165 }
166 
167 void
169  CUDA_VERIFY(cudaEventSynchronize(event_));
170 }
171 
172 } } // namespace
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.