Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceUtils.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #pragma once
11 
12 #include "../../FaissAssert.h"
13 #include <cuda_runtime.h>
14 #include <cublas_v2.h>
15 #include <vector>
16 
17 namespace faiss { namespace gpu {
18 
19 /// Returns the current thread-local GPU device
20 int getCurrentDevice();
21 
22 /// Sets the current thread-local GPU device
23 void setCurrentDevice(int device);
24 
25 /// Returns the number of available GPU devices
26 int getNumDevices();
27 
28 /// Synchronizes the CPU against all devices (equivalent to
29 /// cudaDeviceSynchronize for each device)
30 void synchronizeAllDevices();
31 
32 /// Returns a cached cudaDeviceProp for the given device
33 const cudaDeviceProp& getDeviceProperties(int device);
34 
35 /// Returns the cached cudaDeviceProp for the current device
36 const cudaDeviceProp& getCurrentDeviceProperties();
37 
38 /// Returns the maximum number of threads available for the given GPU
39 /// device
40 int getMaxThreads(int device);
41 
42 /// Equivalent to getMaxThreads(getCurrentDevice())
43 int getMaxThreadsCurrentDevice();
44 
45 /// Returns the maximum smem available for the given GPU device
46 size_t getMaxSharedMemPerBlock(int device);
47 
48 /// Equivalent to getMaxSharedMemPerBlock(getCurrentDevice())
49 size_t getMaxSharedMemPerBlockCurrentDevice();
50 
51 /// For a given pointer, returns whether or not it is located on
52 /// a device (deviceId >= 0) or the host (-1).
53 int getDeviceForAddress(const void* p);
54 
55 /// Does the given device support full unified memory sharing host
56 /// memory?
57 bool getFullUnifiedMemSupport(int device);
58 
59 /// Equivalent to getFullUnifiedMemSupport(getCurrentDevice())
60 bool getFullUnifiedMemSupportCurrentDevice();
61 
62 /// RAII object to set the current device, and restore the previous
63 /// device upon destruction
64 class DeviceScope {
65  public:
66  explicit DeviceScope(int device);
67  ~DeviceScope();
68 
69  private:
70  int prevDevice_;
71 };
72 
73 /// RAII object to manage a cublasHandle_t
75  public:
78 
79  cublasHandle_t get() { return blasHandle_; }
80 
81  private:
82  cublasHandle_t blasHandle_;
83 };
84 
85 // RAII object to manage a cudaEvent_t
86 class CudaEvent {
87  public:
88  /// Creates an event and records it in this stream
89  explicit CudaEvent(cudaStream_t stream);
90  CudaEvent(const CudaEvent& event) = delete;
91  CudaEvent(CudaEvent&& event) noexcept;
92  ~CudaEvent();
93 
94  inline cudaEvent_t get() { return event_; }
95 
96  /// Wait on this event in this stream
97  void streamWaitOnEvent(cudaStream_t stream);
98 
99  /// Have the CPU wait for the completion of this event
100  void cpuWaitOnEvent();
101 
102  CudaEvent& operator=(CudaEvent&& event) noexcept;
103  CudaEvent& operator=(CudaEvent& event) = delete;
104 
105  private:
106  cudaEvent_t event_;
107 };
108 
109 /// Wrapper to test return status of CUDA functions
110 #define CUDA_VERIFY(X) \
111  do { \
112  auto err__ = (X); \
113  FAISS_ASSERT_FMT(err__ == cudaSuccess, "CUDA error %d", (int) err__); \
114  } while (0)
115 
116 /// Wrapper to synchronously probe for CUDA errors
117 // #define FAISS_GPU_SYNC_ERROR 1
118 
119 #ifdef FAISS_GPU_SYNC_ERROR
120 #define CUDA_TEST_ERROR() \
121  do { \
122  CUDA_VERIFY(cudaDeviceSynchronize()); \
123  } while (0)
124 #else
125 #define CUDA_TEST_ERROR() \
126  do { \
127  CUDA_VERIFY(cudaGetLastError()); \
128  } while (0)
129 #endif
130 
131 /// Call for a collection of streams to wait on
132 template <typename L1, typename L2>
133 void streamWaitBase(const L1& listWaiting, const L2& listWaitOn) {
134  // For all the streams we are waiting on, create an event
135  std::vector<cudaEvent_t> events;
136  for (auto& stream : listWaitOn) {
137  cudaEvent_t event;
138  CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
139  CUDA_VERIFY(cudaEventRecord(event, stream));
140  events.push_back(event);
141  }
142 
143  // For all the streams that are waiting, issue a wait
144  for (auto& stream : listWaiting) {
145  for (auto& event : events) {
146  CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
147  }
148  }
149 
150  for (auto& event : events) {
151  CUDA_VERIFY(cudaEventDestroy(event));
152  }
153 }
154 
155 /// These versions allow usage of initializer_list as arguments, since
156 /// otherwise {...} doesn't have a type
157 template <typename L1>
158 void streamWait(const L1& a,
159  const std::initializer_list<cudaStream_t>& b) {
160  streamWaitBase(a, b);
161 }
162 
163 template <typename L2>
164 void streamWait(const std::initializer_list<cudaStream_t>& a,
165  const L2& b) {
166  streamWaitBase(a, b);
167 }
168 
169 inline void streamWait(const std::initializer_list<cudaStream_t>& a,
170  const std::initializer_list<cudaStream_t>& b) {
171  streamWaitBase(a, b);
172 }
173 
174 } } // namespace
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.
RAII object to manage a cublasHandle_t.
Definition: DeviceUtils.h:74