/** * Copyright (c) 2015-present, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD+Patents license found in the * LICENSE file in the root directory of this source tree. */ #pragma once #include "GpuResources.h" #include "utils/StackDeviceMemory.h" #include "utils/DeviceUtils.h" #include #include namespace faiss { namespace gpu { /// Default implementation of GpuResources that allocates a cuBLAS /// stream and 2 streams for use, as well as temporary memory class StandardGpuResources : public GpuResources { public: StandardGpuResources(); ~StandardGpuResources() override; /// Disable allocation of temporary memory; all temporary memory /// requests will call cudaMalloc / cudaFree at the point of use void noTempMemory(); /// Specify that we wish to use a certain fixed size of memory on /// all devices as temporary memory. This is the upper bound for the GPU /// memory that we will reserve. We will never go above 1.5 GiB on any GPU; /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that. /// To avoid any temporary memory allocation, pass 0. void setTempMemory(size_t size); /// Set amount of pinned memory to allocate, for async GPU <-> CPU /// transfers void setPinnedMemory(size_t size); /// Called to change the stream for work ordering void setDefaultStream(int device, cudaStream_t stream); /// Called to change the work ordering streams to the null stream /// for all devices void setDefaultNullStreamAllDevices(); /// Enable or disable the warning about not having enough temporary memory /// when cudaMalloc gets called void setCudaMallocWarning(bool b); public: /// Internal system calls /// Initialize resources for this device void initializeForDevice(int device) override; cublasHandle_t getBlasHandle(int device) override; cudaStream_t getDefaultStream(int device) override; std::vector getAlternateStreams(int device) override; DeviceMemory& getMemoryManager(int device) override; std::pair getPinnedMemory() override; cudaStream_t getAsyncCopyStream(int device) override; private: /// Have GPU resources been initialized for this device yet? bool isInitialized(int device) const; /// Adjust the default temporary memory allocation based on the total GPU /// memory size static size_t getDefaultTempMemForGPU(int device, size_t requested); private: /// Our default stream that work is ordered on, one per each device std::unordered_map defaultStreams_; /// This contains particular streams as set by the user for /// ordering, if any std::unordered_map userDefaultStreams_; /// Other streams we can use, per each device std::unordered_map > alternateStreams_; /// Async copy stream to use for GPU <-> CPU pinned memory copies std::unordered_map asyncCopyStreams_; /// cuBLAS handle for each device std::unordered_map blasHandles_; /// Temporary memory provider, per each device std::unordered_map > memory_; /// Pinned memory allocation for use with this GPU void* pinnedMemAlloc_; size_t pinnedMemAllocSize_; /// Another option is to use a specified amount of memory on all /// devices size_t tempMemSize_; /// Amount of pinned memory we should allocate size_t pinnedMemSize_; /// Whether or not a warning upon cudaMalloc is generated bool cudaMallocWarning_; }; } } // namespace