14 #include <cuda_runtime.h>
15 #include <cublas_v2.h>
18 namespace faiss {
namespace gpu {
21 int getCurrentDevice();
24 void setCurrentDevice(
int device);
31 void synchronizeAllDevices();
34 cudaDeviceProp& getDeviceProperties(
int device);
38 int getMaxThreads(
int device);
41 int getMaxThreadsCurrentDevice();
44 size_t getMaxSharedMemPerBlock(
int device);
47 size_t getMaxSharedMemPerBlockCurrentDevice();
51 int getDeviceForAddress(
const void* p);
70 cublasHandle_t
get() {
return blasHandle_; }
73 cublasHandle_t blasHandle_;
85 inline cudaEvent_t
get() {
return event_; }
102 #define CUDA_VERIFY(X) \
105 FAISS_ASSERT(err == cudaSuccess); \
108 #define CUDA_VERIFY(X) do { (X); } while (0)
112 template <
typename L1,
typename L2>
113 void streamWaitBase(
const L1& listWaiting,
const L2& listWaitOn) {
115 std::vector<cudaEvent_t> events;
116 for (
auto& stream : listWaitOn) {
118 CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
119 CUDA_VERIFY(cudaEventRecord(event, stream));
120 events.push_back(event);
124 for (
auto& stream : listWaiting) {
125 for (
auto& event : events) {
126 CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
130 for (
auto& event : events) {
131 CUDA_VERIFY(cudaEventDestroy(event));
137 template <
typename L1>
138 void streamWait(
const L1& a,
139 const std::initializer_list<cudaStream_t>& b) {
140 streamWaitBase(a, b);
143 template <
typename L2>
144 void streamWait(
const std::initializer_list<cudaStream_t>& a,
146 streamWaitBase(a, b);
149 inline void streamWait(
const std::initializer_list<cudaStream_t>& a,
150 const std::initializer_list<cudaStream_t>& b) {
151 streamWaitBase(a, b);
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.
RAII object to manage a cublasHandle_t.