Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceUtils.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #pragma once
10 
11 #include "../../FaissAssert.h"
12 #include <cuda_runtime.h>
13 #include <cublas_v2.h>
14 #include <vector>
15 
16 namespace faiss { namespace gpu {
17 
18 /// Returns the current thread-local GPU device
19 int getCurrentDevice();
20 
21 /// Sets the current thread-local GPU device
22 void setCurrentDevice(int device);
23 
24 /// Returns the number of available GPU devices
25 int getNumDevices();
26 
27 /// Synchronizes the CPU against all devices (equivalent to
28 /// cudaDeviceSynchronize for each device)
29 void synchronizeAllDevices();
30 
31 /// Returns a cached cudaDeviceProp for the given device
32 const cudaDeviceProp& getDeviceProperties(int device);
33 
34 /// Returns the cached cudaDeviceProp for the current device
35 const cudaDeviceProp& getCurrentDeviceProperties();
36 
37 /// Returns the maximum number of threads available for the given GPU
38 /// device
39 int getMaxThreads(int device);
40 
41 /// Equivalent to getMaxThreads(getCurrentDevice())
42 int getMaxThreadsCurrentDevice();
43 
44 /// Returns the maximum smem available for the given GPU device
45 size_t getMaxSharedMemPerBlock(int device);
46 
47 /// Equivalent to getMaxSharedMemPerBlock(getCurrentDevice())
48 size_t getMaxSharedMemPerBlockCurrentDevice();
49 
50 /// For a given pointer, returns whether or not it is located on
51 /// a device (deviceId >= 0) or the host (-1).
52 int getDeviceForAddress(const void* p);
53 
54 /// Does the given device support full unified memory sharing host
55 /// memory?
56 bool getFullUnifiedMemSupport(int device);
57 
58 /// Equivalent to getFullUnifiedMemSupport(getCurrentDevice())
59 bool getFullUnifiedMemSupportCurrentDevice();
60 
61 /// Returns the maximum k-selection value supported based on the CUDA SDK that
62 /// we were compiled with. .cu files can use DeviceDefs.cuh, but this is for
63 /// non-CUDA files
64 int getMaxKSelection();
65 
66 /// RAII object to set the current device, and restore the previous
67 /// device upon destruction
68 class DeviceScope {
69  public:
70  explicit DeviceScope(int device);
71  ~DeviceScope();
72 
73  private:
74  int prevDevice_;
75 };
76 
77 /// RAII object to manage a cublasHandle_t
79  public:
82 
83  cublasHandle_t get() { return blasHandle_; }
84 
85  private:
86  cublasHandle_t blasHandle_;
87 };
88 
89 // RAII object to manage a cudaEvent_t
90 class CudaEvent {
91  public:
92  /// Creates an event and records it in this stream
93  explicit CudaEvent(cudaStream_t stream);
94  CudaEvent(const CudaEvent& event) = delete;
95  CudaEvent(CudaEvent&& event) noexcept;
96  ~CudaEvent();
97 
98  inline cudaEvent_t get() { return event_; }
99 
100  /// Wait on this event in this stream
101  void streamWaitOnEvent(cudaStream_t stream);
102 
103  /// Have the CPU wait for the completion of this event
104  void cpuWaitOnEvent();
105 
106  CudaEvent& operator=(CudaEvent&& event) noexcept;
107  CudaEvent& operator=(CudaEvent& event) = delete;
108 
109  private:
110  cudaEvent_t event_;
111 };
112 
113 /// Wrapper to test return status of CUDA functions
114 #define CUDA_VERIFY(X) \
115  do { \
116  auto err__ = (X); \
117  FAISS_ASSERT_FMT(err__ == cudaSuccess, "CUDA error %d %s", \
118  (int) err__, cudaGetErrorString(err__)); \
119  } while (0)
120 
121 /// Wrapper to synchronously probe for CUDA errors
122 // #define FAISS_GPU_SYNC_ERROR 1
123 
124 #ifdef FAISS_GPU_SYNC_ERROR
125 #define CUDA_TEST_ERROR() \
126  do { \
127  CUDA_VERIFY(cudaDeviceSynchronize()); \
128  } while (0)
129 #else
130 #define CUDA_TEST_ERROR() \
131  do { \
132  CUDA_VERIFY(cudaGetLastError()); \
133  } while (0)
134 #endif
135 
136 /// Call for a collection of streams to wait on
137 template <typename L1, typename L2>
138 void streamWaitBase(const L1& listWaiting, const L2& listWaitOn) {
139  // For all the streams we are waiting on, create an event
140  std::vector<cudaEvent_t> events;
141  for (auto& stream : listWaitOn) {
142  cudaEvent_t event;
143  CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
144  CUDA_VERIFY(cudaEventRecord(event, stream));
145  events.push_back(event);
146  }
147 
148  // For all the streams that are waiting, issue a wait
149  for (auto& stream : listWaiting) {
150  for (auto& event : events) {
151  CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
152  }
153  }
154 
155  for (auto& event : events) {
156  CUDA_VERIFY(cudaEventDestroy(event));
157  }
158 }
159 
160 /// These versions allow usage of initializer_list as arguments, since
161 /// otherwise {...} doesn't have a type
162 template <typename L1>
163 void streamWait(const L1& a,
164  const std::initializer_list<cudaStream_t>& b) {
165  streamWaitBase(a, b);
166 }
167 
168 template <typename L2>
169 void streamWait(const std::initializer_list<cudaStream_t>& a,
170  const L2& b) {
171  streamWaitBase(a, b);
172 }
173 
174 inline void streamWait(const std::initializer_list<cudaStream_t>& a,
175  const std::initializer_list<cudaStream_t>& b) {
176  streamWaitBase(a, b);
177 }
178 
179 } } // namespace
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
Definition: DeviceUtils.cu:182
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
Definition: DeviceUtils.cu:177
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.
Definition: DeviceUtils.cu:151
RAII object to manage a cublasHandle_t.
Definition: DeviceUtils.h:78