Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceUtils.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #pragma once
12 
13 #include "../../FaissAssert.h"
14 #include <cuda_runtime.h>
15 #include <cublas_v2.h>
16 #include <vector>
17 
18 namespace faiss { namespace gpu {
19 
20 /// Returns the current thread-local GPU device
21 int getCurrentDevice();
22 
23 /// Sets the current thread-local GPU device
24 void setCurrentDevice(int device);
25 
26 /// Returns the number of available GPU devices
27 int getNumDevices();
28 
29 /// Synchronizes the CPU against all devices (equivalent to
30 /// cudaDeviceSynchronize for each device)
31 void synchronizeAllDevices();
32 
33 /// Returns a cached cudaDeviceProp for the given device
34 const cudaDeviceProp& getDeviceProperties(int device);
35 
36 /// Returns the cached cudaDeviceProp for the current device
37 const cudaDeviceProp& getCurrentDeviceProperties();
38 
39 /// Returns the maximum number of threads available for the given GPU
40 /// device
41 int getMaxThreads(int device);
42 
43 /// Equivalent to getMaxThreads(getCurrentDevice())
44 int getMaxThreadsCurrentDevice();
45 
46 /// Returns the maximum smem available for the given GPU device
47 size_t getMaxSharedMemPerBlock(int device);
48 
49 /// Equivalent to getMaxSharedMemPerBlock(getCurrentDevice())
50 size_t getMaxSharedMemPerBlockCurrentDevice();
51 
52 /// For a given pointer, returns whether or not it is located on
53 /// a device (deviceId >= 0) or the host (-1).
54 int getDeviceForAddress(const void* p);
55 
56 /// Does the given device support full unified memory sharing host
57 /// memory?
58 bool getFullUnifiedMemSupport(int device);
59 
60 /// Equivalent to getFullUnifiedMemSupport(getCurrentDevice())
61 bool getFullUnifiedMemSupportCurrentDevice();
62 
63 /// RAII object to set the current device, and restore the previous
64 /// device upon destruction
65 class DeviceScope {
66  public:
67  explicit DeviceScope(int device);
68  ~DeviceScope();
69 
70  private:
71  int prevDevice_;
72 };
73 
74 /// RAII object to manage a cublasHandle_t
76  public:
79 
80  cublasHandle_t get() { return blasHandle_; }
81 
82  private:
83  cublasHandle_t blasHandle_;
84 };
85 
86 // RAII object to manage a cudaEvent_t
87 class CudaEvent {
88  public:
89  /// Creates an event and records it in this stream
90  explicit CudaEvent(cudaStream_t stream);
91  CudaEvent(const CudaEvent& event) = delete;
92  CudaEvent(CudaEvent&& event) noexcept;
93  ~CudaEvent();
94 
95  inline cudaEvent_t get() { return event_; }
96 
97  /// Wait on this event in this stream
98  void streamWaitOnEvent(cudaStream_t stream);
99 
100  /// Have the CPU wait for the completion of this event
101  void cpuWaitOnEvent();
102 
103  CudaEvent& operator=(CudaEvent&& event) noexcept;
104  CudaEvent& operator=(CudaEvent& event) = delete;
105 
106  private:
107  cudaEvent_t event_;
108 };
109 
110 /// Wrapper to test return status of CUDA functions
111 #define CUDA_VERIFY(X) \
112  do { \
113  auto err__ = (X); \
114  FAISS_ASSERT_FMT(err__ == cudaSuccess, "CUDA error %d", (int) err__); \
115  } while (0)
116 
117 /// Wrapper to synchronously probe for CUDA errors
118 // #define FAISS_GPU_SYNC_ERROR 1
119 
120 #ifdef FAISS_GPU_SYNC_ERROR
121 #define CUDA_TEST_ERROR() \
122  do { \
123  CUDA_VERIFY(cudaDeviceSynchronize()); \
124  } while (0)
125 #else
126 #define CUDA_TEST_ERROR() \
127  do { \
128  CUDA_VERIFY(cudaGetLastError()); \
129  } while (0)
130 #endif
131 
132 /// Call for a collection of streams to wait on
133 template <typename L1, typename L2>
134 void streamWaitBase(const L1& listWaiting, const L2& listWaitOn) {
135  // For all the streams we are waiting on, create an event
136  std::vector<cudaEvent_t> events;
137  for (auto& stream : listWaitOn) {
138  cudaEvent_t event;
139  CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
140  CUDA_VERIFY(cudaEventRecord(event, stream));
141  events.push_back(event);
142  }
143 
144  // For all the streams that are waiting, issue a wait
145  for (auto& stream : listWaiting) {
146  for (auto& event : events) {
147  CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
148  }
149  }
150 
151  for (auto& event : events) {
152  CUDA_VERIFY(cudaEventDestroy(event));
153  }
154 }
155 
156 /// These versions allow usage of initializer_list as arguments, since
157 /// otherwise {...} doesn't have a type
158 template <typename L1>
159 void streamWait(const L1& a,
160  const std::initializer_list<cudaStream_t>& b) {
161  streamWaitBase(a, b);
162 }
163 
164 template <typename L2>
165 void streamWait(const std::initializer_list<cudaStream_t>& a,
166  const L2& b) {
167  streamWaitBase(a, b);
168 }
169 
170 inline void streamWait(const std::initializer_list<cudaStream_t>& a,
171  const std::initializer_list<cudaStream_t>& b) {
172  streamWaitBase(a, b);
173 }
174 
175 } } // namespace
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.
RAII object to manage a cublasHandle_t.
Definition: DeviceUtils.h:75