Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceUtils.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #pragma once
12 
13 #include "../../FaissAssert.h"
14 #include <cuda_runtime.h>
15 #include <cublas_v2.h>
16 #include <vector>
17 
18 namespace faiss { namespace gpu {
19 
20 /// Returns the current thread-local GPU device
21 int getCurrentDevice();
22 
23 /// Sets the current thread-local GPU device
24 void setCurrentDevice(int device);
25 
26 /// Returns the number of available GPU devices
27 int getNumDevices();
28 
29 /// Synchronizes the CPU against all devices (equivalent to
30 /// cudaDeviceSynchronize for each device)
31 void synchronizeAllDevices();
32 
33 /// Returns a cached cudaDeviceProp for the given device
34 cudaDeviceProp& getDeviceProperties(int device);
35 
36 /// Returns the maximum number of threads available for the given GPU
37 /// device
38 int getMaxThreads(int device);
39 
40 /// Equivalent to getMaxThreads(getCurrentDevice())
41 int getMaxThreadsCurrentDevice();
42 
43 /// Returns the maximum smem available for the given GPU device
44 size_t getMaxSharedMemPerBlock(int device);
45 
46 /// Equivalent to getMaxSharedMemPerBlock(getCurrentDevice())
47 size_t getMaxSharedMemPerBlockCurrentDevice();
48 
49 /// For a given pointer, returns whether or not it is located on
50 /// a device (deviceId >= 0) or the host (-1).
51 int getDeviceForAddress(const void* p);
52 
53 /// Does the given device support full unified memory sharing host
54 /// memory?
55 bool getFullUnifiedMemSupport(int device);
56 
57 /// Equivalent to getFullUnifiedMemSupport(getCurrentDevice())
58 bool getFullUnifiedMemSupportCurrentDevice();
59 
60 /// RAII object to set the current device, and restore the previous
61 /// device upon destruction
62 class DeviceScope {
63  public:
64  explicit DeviceScope(int device);
65  ~DeviceScope();
66 
67  private:
68  int prevDevice_;
69 };
70 
71 /// RAII object to manage a cublasHandle_t
73  public:
76 
77  cublasHandle_t get() { return blasHandle_; }
78 
79  private:
80  cublasHandle_t blasHandle_;
81 };
82 
83 // RAII object to manage a cudaEvent_t
84 class CudaEvent {
85  public:
86  /// Creates an event and records it in this stream
87  explicit CudaEvent(cudaStream_t stream);
88  CudaEvent(const CudaEvent& event) = delete;
89  CudaEvent(CudaEvent&& event) noexcept;
90  ~CudaEvent();
91 
92  inline cudaEvent_t get() { return event_; }
93 
94  /// Wait on this event in this stream
95  void streamWaitOnEvent(cudaStream_t stream);
96 
97  /// Have the CPU wait for the completion of this event
98  void cpuWaitOnEvent();
99 
100  CudaEvent& operator=(CudaEvent&& event) noexcept;
101  CudaEvent& operator=(CudaEvent& event) = delete;
102 
103  private:
104  cudaEvent_t event_;
105 };
106 
107 /// Wrapper to test return status of CUDA functions
108 #define CUDA_VERIFY(X) \
109  do { \
110  auto err__ = (X); \
111  FAISS_ASSERT_FMT(err__ == cudaSuccess, "CUDA error %d", (int) err__); \
112  } while (0)
113 
114 /// Wrapper to synchronously probe for CUDA errors
115 // #define FAISS_GPU_SYNC_ERROR 1
116 
117 #ifdef FAISS_GPU_SYNC_ERROR
118 #define CUDA_TEST_ERROR() \
119  do { \
120  CUDA_VERIFY(cudaDeviceSynchronize()); \
121  } while (0)
122 #else
123 #define CUDA_TEST_ERROR() \
124  do { \
125  CUDA_VERIFY(cudaGetLastError()); \
126  } while (0)
127 #endif
128 
129 /// Call for a collection of streams to wait on
130 template <typename L1, typename L2>
131 void streamWaitBase(const L1& listWaiting, const L2& listWaitOn) {
132  // For all the streams we are waiting on, create an event
133  std::vector<cudaEvent_t> events;
134  for (auto& stream : listWaitOn) {
135  cudaEvent_t event;
136  CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
137  CUDA_VERIFY(cudaEventRecord(event, stream));
138  events.push_back(event);
139  }
140 
141  // For all the streams that are waiting, issue a wait
142  for (auto& stream : listWaiting) {
143  for (auto& event : events) {
144  CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
145  }
146  }
147 
148  for (auto& event : events) {
149  CUDA_VERIFY(cudaEventDestroy(event));
150  }
151 }
152 
153 /// These versions allow usage of initializer_list as arguments, since
154 /// otherwise {...} doesn't have a type
155 template <typename L1>
156 void streamWait(const L1& a,
157  const std::initializer_list<cudaStream_t>& b) {
158  streamWaitBase(a, b);
159 }
160 
161 template <typename L2>
162 void streamWait(const std::initializer_list<cudaStream_t>& a,
163  const L2& b) {
164  streamWaitBase(a, b);
165 }
166 
167 inline void streamWait(const std::initializer_list<cudaStream_t>& a,
168  const std::initializer_list<cudaStream_t>& b) {
169  streamWaitBase(a, b);
170 }
171 
172 } } // namespace
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.
RAII object to manage a cublasHandle_t.
Definition: DeviceUtils.h:72