115 lines
3.5 KiB
C++
115 lines
3.5 KiB
C++
/**
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
|
|
#pragma once
|
|
|
|
#include <faiss/gpu/GpuResources.h>
|
|
#include <faiss/gpu/utils/StackDeviceMemory.h>
|
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
#include <unordered_map>
|
|
#include <vector>
|
|
|
|
namespace faiss { namespace gpu {
|
|
|
|
/// Default implementation of GpuResources that allocates a cuBLAS
|
|
/// stream and 2 streams for use, as well as temporary memory
|
|
class StandardGpuResources : public GpuResources {
|
|
public:
|
|
StandardGpuResources();
|
|
|
|
~StandardGpuResources() override;
|
|
|
|
/// Disable allocation of temporary memory; all temporary memory
|
|
/// requests will call cudaMalloc / cudaFree at the point of use
|
|
void noTempMemory();
|
|
|
|
/// Specify that we wish to use a certain fixed size of memory on
|
|
/// all devices as temporary memory. This is the upper bound for the GPU
|
|
/// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
|
|
/// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
|
|
/// To avoid any temporary memory allocation, pass 0.
|
|
void setTempMemory(size_t size);
|
|
|
|
/// Set amount of pinned memory to allocate, for async GPU <-> CPU
|
|
/// transfers
|
|
void setPinnedMemory(size_t size);
|
|
|
|
/// Called to change the stream for work ordering
|
|
void setDefaultStream(int device, cudaStream_t stream);
|
|
|
|
/// Called to change the work ordering streams to the null stream
|
|
/// for all devices
|
|
void setDefaultNullStreamAllDevices();
|
|
|
|
/// Enable or disable the warning about not having enough temporary memory
|
|
/// when cudaMalloc gets called
|
|
void setCudaMallocWarning(bool b);
|
|
|
|
public:
|
|
/// Internal system calls
|
|
|
|
/// Initialize resources for this device
|
|
void initializeForDevice(int device) override;
|
|
|
|
cublasHandle_t getBlasHandle(int device) override;
|
|
|
|
cudaStream_t getDefaultStream(int device) override;
|
|
|
|
std::vector<cudaStream_t> getAlternateStreams(int device) override;
|
|
|
|
DeviceMemory& getMemoryManager(int device) override;
|
|
|
|
std::pair<void*, size_t> getPinnedMemory() override;
|
|
|
|
cudaStream_t getAsyncCopyStream(int device) override;
|
|
|
|
private:
|
|
/// Have GPU resources been initialized for this device yet?
|
|
bool isInitialized(int device) const;
|
|
|
|
/// Adjust the default temporary memory allocation based on the total GPU
|
|
/// memory size
|
|
static size_t getDefaultTempMemForGPU(int device, size_t requested);
|
|
|
|
private:
|
|
/// Our default stream that work is ordered on, one per each device
|
|
std::unordered_map<int, cudaStream_t> defaultStreams_;
|
|
|
|
/// This contains particular streams as set by the user for
|
|
/// ordering, if any
|
|
std::unordered_map<int, cudaStream_t> userDefaultStreams_;
|
|
|
|
/// Other streams we can use, per each device
|
|
std::unordered_map<int, std::vector<cudaStream_t> > alternateStreams_;
|
|
|
|
/// Async copy stream to use for GPU <-> CPU pinned memory copies
|
|
std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
|
|
|
|
/// cuBLAS handle for each device
|
|
std::unordered_map<int, cublasHandle_t> blasHandles_;
|
|
|
|
/// Temporary memory provider, per each device
|
|
std::unordered_map<int, std::unique_ptr<StackDeviceMemory> > memory_;
|
|
|
|
/// Pinned memory allocation for use with this GPU
|
|
void* pinnedMemAlloc_;
|
|
size_t pinnedMemAllocSize_;
|
|
|
|
/// Another option is to use a specified amount of memory on all
|
|
/// devices
|
|
size_t tempMemSize_;
|
|
|
|
/// Amount of pinned memory we should allocate
|
|
size_t pinnedMemSize_;
|
|
|
|
/// Whether or not a warning upon cudaMalloc is generated
|
|
bool cudaMallocWarning_;
|
|
};
|
|
|
|
} } // namespace
|