Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
DeviceUtils.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #pragma once
13 
14 #include <cuda_runtime.h>
15 #include <cublas_v2.h>
16 #include <vector>
17 
18 namespace faiss { namespace gpu {
19 
20 /// Returns the current thread-local GPU device
21 int getCurrentDevice();
22 
23 /// Sets the current thread-local GPU device
24 void setCurrentDevice(int device);
25 
26 /// Returns the number of available GPU devices
27 int getNumDevices();
28 
29 /// Synchronizes the CPU against all devices (equivalent to
30 /// cudaDeviceSynchronize for each device)
31 void synchronizeAllDevices();
32 
33 /// Returns a cached cudaDeviceProp for the given device
34 cudaDeviceProp& getDeviceProperties(int device);
35 
36 /// Returns the maximum number of threads available for the given GPU
37 /// device
38 int getMaxThreads(int device);
39 
40 /// Equivalent to getMaxThreads(getCurrentDevice())
41 int getMaxThreadsCurrentDevice();
42 
43 /// Returns the maximum smem available for the given GPU device
44 size_t getMaxSharedMemPerBlock(int device);
45 
46 /// Equivalent to getMaxSharedMemPerBlock(getCurrentDevice())
47 size_t getMaxSharedMemPerBlockCurrentDevice();
48 
49 /// For a given pointer, returns whether or not it is located on
50 /// a device (deviceId >= 0) or the host (-1).
51 int getDeviceForAddress(const void* p);
52 
53 /// RAII object to set the current device, and restore the previous
54 /// device upon destruction
55 class DeviceScope {
56  public:
57  explicit DeviceScope(int device);
58  ~DeviceScope();
59 
60  private:
61  int prevDevice_;
62 };
63 
64 /// RAII object to manage a cublasHandle_t
66  public:
69 
70  cublasHandle_t get() { return blasHandle_; }
71 
72  private:
73  cublasHandle_t blasHandle_;
74 };
75 
76 // RAII object to manage a cudaEvent_t
77 class CudaEvent {
78  public:
79  /// Creates an event and records it in this stream
80  explicit CudaEvent(cudaStream_t stream);
81  CudaEvent(const CudaEvent& event) = delete;
82  CudaEvent(CudaEvent&& event) noexcept;
83  ~CudaEvent();
84 
85  inline cudaEvent_t get() { return event_; }
86 
87  /// Wait on this event in this stream
88  void streamWaitOnEvent(cudaStream_t stream);
89 
90  /// Have the CPU wait for the completion of this event
91  void cpuWaitOnEvent();
92 
93  CudaEvent& operator=(CudaEvent&& event) noexcept;
94  CudaEvent& operator=(CudaEvent& event) = delete;
95 
96  private:
97  cudaEvent_t event_;
98 };
99 
100 /// Wrapper to test return status of CUDA functions
101 #if DEBUG
102 #define CUDA_VERIFY(X) \
103  do { \
104  auto err = (X); \
105  FAISS_ASSERT(err == cudaSuccess); \
106  } while (0)
107 #else
108 #define CUDA_VERIFY(X) do { (X); } while (0)
109 #endif
110 
111 /// Call for a collection of streams to wait on
112 template <typename L1, typename L2>
113 void streamWaitBase(const L1& listWaiting, const L2& listWaitOn) {
114  // For all the streams we are waiting on, create an event
115  std::vector<cudaEvent_t> events;
116  for (auto& stream : listWaitOn) {
117  cudaEvent_t event;
118  CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
119  CUDA_VERIFY(cudaEventRecord(event, stream));
120  events.push_back(event);
121  }
122 
123  // For all the streams that are waiting, issue a wait
124  for (auto& stream : listWaiting) {
125  for (auto& event : events) {
126  CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
127  }
128  }
129 
130  for (auto& event : events) {
131  CUDA_VERIFY(cudaEventDestroy(event));
132  }
133 }
134 
135 /// These versions allow usage of initializer_list as arguments, since
136 /// otherwise {...} doesn't have a type
137 template <typename L1>
138 void streamWait(const L1& a,
139  const std::initializer_list<cudaStream_t>& b) {
140  streamWaitBase(a, b);
141 }
142 
143 template <typename L2>
144 void streamWait(const std::initializer_list<cudaStream_t>& a,
145  const L2& b) {
146  streamWaitBase(a, b);
147 }
148 
149 inline void streamWait(const std::initializer_list<cudaStream_t>& a,
150  const std::initializer_list<cudaStream_t>& b) {
151  streamWaitBase(a, b);
152 }
153 
154 } } // namespace
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.
RAII object to manage a cublasHandle_t.
Definition: DeviceUtils.h:65