11 #include "../../FaissAssert.h"
12 #include <cuda_runtime.h>
13 #include <cublas_v2.h>
16 namespace faiss {
namespace gpu {
19 int getCurrentDevice();
22 void setCurrentDevice(
int device);
29 void synchronizeAllDevices();
32 const cudaDeviceProp& getDeviceProperties(
int device);
35 const cudaDeviceProp& getCurrentDeviceProperties();
39 int getMaxThreads(
int device);
42 int getMaxThreadsCurrentDevice();
45 size_t getMaxSharedMemPerBlock(
int device);
48 size_t getMaxSharedMemPerBlockCurrentDevice();
52 int getDeviceForAddress(
const void* p);
56 bool getFullUnifiedMemSupport(
int device);
59 bool getFullUnifiedMemSupportCurrentDevice();
64 int getMaxKSelection();
83 cublasHandle_t
get() {
return blasHandle_; }
86 cublasHandle_t blasHandle_;
98 inline cudaEvent_t
get() {
return event_; }
114 #define CUDA_VERIFY(X) \
117 FAISS_ASSERT_FMT(err__ == cudaSuccess, "CUDA error %d %s", \
118 (int) err__, cudaGetErrorString(err__)); \
124 #ifdef FAISS_GPU_SYNC_ERROR
125 #define CUDA_TEST_ERROR() \
127 CUDA_VERIFY(cudaDeviceSynchronize()); \
130 #define CUDA_TEST_ERROR() \
132 CUDA_VERIFY(cudaGetLastError()); \
137 template <
typename L1,
typename L2>
138 void streamWaitBase(
const L1& listWaiting,
const L2& listWaitOn) {
140 std::vector<cudaEvent_t> events;
141 for (
auto& stream : listWaitOn) {
143 CUDA_VERIFY(cudaEventCreateWithFlags(&event, cudaEventDisableTiming));
144 CUDA_VERIFY(cudaEventRecord(event, stream));
145 events.push_back(event);
149 for (
auto& stream : listWaiting) {
150 for (
auto& event : events) {
151 CUDA_VERIFY(cudaStreamWaitEvent(stream, event, 0));
155 for (
auto& event : events) {
156 CUDA_VERIFY(cudaEventDestroy(event));
162 template <
typename L1>
163 void streamWait(
const L1& a,
164 const std::initializer_list<cudaStream_t>& b) {
165 streamWaitBase(a, b);
168 template <
typename L2>
169 void streamWait(
const std::initializer_list<cudaStream_t>& a,
171 streamWaitBase(a, b);
174 inline void streamWait(
const std::initializer_list<cudaStream_t>& a,
175 const std::initializer_list<cudaStream_t>& b) {
176 streamWaitBase(a, b);
void cpuWaitOnEvent()
Have the CPU wait for the completion of this event.
void streamWaitOnEvent(cudaStream_t stream)
Wait on this event in this stream.
CudaEvent(cudaStream_t stream)
Creates an event and records it in this stream.
RAII object to manage a cublasHandle_t.