Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
StandardGpuResources.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #pragma once
10 
11 #include "GpuResources.h"
12 #include "utils/StackDeviceMemory.h"
13 #include "utils/DeviceUtils.h"
14 #include <unordered_map>
15 #include <vector>
16 
17 namespace faiss { namespace gpu {
18 
19 /// Default implementation of GpuResources that allocates a cuBLAS
20 /// stream and 2 streams for use, as well as temporary memory
22  public:
24 
25  ~StandardGpuResources() override;
26 
27  /// Disable allocation of temporary memory; all temporary memory
28  /// requests will call cudaMalloc / cudaFree at the point of use
29  void noTempMemory();
30 
31  /// Specify that we wish to use a certain fixed size of memory on
32  /// all devices as temporary memory. This is the upper bound for the GPU
33  /// memory that we will reserve. We will never go above 1.5 GiB on any GPU;
34  /// smaller GPUs (with <= 4 GiB or <= 8 GiB) will use less memory than that.
35  /// To avoid any temporary memory allocation, pass 0.
36  void setTempMemory(size_t size);
37 
38  /// Set amount of pinned memory to allocate, for async GPU <-> CPU
39  /// transfers
40  void setPinnedMemory(size_t size);
41 
42  /// Called to change the stream for work ordering
43  void setDefaultStream(int device, cudaStream_t stream);
44 
45  /// Called to change the work ordering streams to the null stream
46  /// for all devices
48 
49  /// Enable or disable the warning about not having enough temporary memory
50  /// when cudaMalloc gets called
51  void setCudaMallocWarning(bool b);
52 
53  public:
54  /// Internal system calls
55 
56  /// Initialize resources for this device
57  void initializeForDevice(int device) override;
58 
59  cublasHandle_t getBlasHandle(int device) override;
60 
61  cudaStream_t getDefaultStream(int device) override;
62 
63  std::vector<cudaStream_t> getAlternateStreams(int device) override;
64 
65  DeviceMemory& getMemoryManager(int device) override;
66 
67  std::pair<void*, size_t> getPinnedMemory() override;
68 
69  cudaStream_t getAsyncCopyStream(int device) override;
70 
71  private:
72  /// Have GPU resources been initialized for this device yet?
73  bool isInitialized(int device) const;
74 
75  /// Adjust the default temporary memory allocation based on the total GPU
76  /// memory size
77  static size_t getDefaultTempMemForGPU(int device, size_t requested);
78 
79  private:
80  /// Our default stream that work is ordered on, one per each device
81  std::unordered_map<int, cudaStream_t> defaultStreams_;
82 
83  /// This contains particular streams as set by the user for
84  /// ordering, if any
85  std::unordered_map<int, cudaStream_t> userDefaultStreams_;
86 
87  /// Other streams we can use, per each device
88  std::unordered_map<int, std::vector<cudaStream_t> > alternateStreams_;
89 
90  /// Async copy stream to use for GPU <-> CPU pinned memory copies
91  std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
92 
93  /// cuBLAS handle for each device
94  std::unordered_map<int, cublasHandle_t> blasHandles_;
95 
96  /// Temporary memory provider, per each device
97  std::unordered_map<int, std::unique_ptr<StackDeviceMemory> > memory_;
98 
99  /// Pinned memory allocation for use with this GPU
100  void* pinnedMemAlloc_;
101  size_t pinnedMemAllocSize_;
102 
103  /// Another option is to use a specified amount of memory on all
104  /// devices
105  size_t tempMemSize_;
106 
107  /// Amount of pinned memory we should allocate
108  size_t pinnedMemSize_;
109 
110  /// Whether or not a warning upon cudaMalloc is generated
111  bool cudaMallocWarning_;
112 };
113 
114 } } // namespace
void setDefaultStream(int device, cudaStream_t stream)
Called to change the stream for work ordering.
cublasHandle_t getBlasHandle(int device) override
Returns the cuBLAS handle that we use for the given device.
void initializeForDevice(int device) override
Internal system calls.
cudaStream_t getAsyncCopyStream(int device) override
Returns the stream on which we perform async CPU &lt;-&gt; GPU copies.
DeviceMemory & getMemoryManager(int device) override
Returns the temporary memory manager for the given device.
cudaStream_t getDefaultStream(int device) override
Manages temporary memory allocations on a GPU device.
Definition: DeviceMemory.h:44
std::pair< void *, size_t > getPinnedMemory() override
Returns the available CPU pinned memory buffer.
std::vector< cudaStream_t > getAlternateStreams(int device) override
Returns the set of alternative streams that we use for the given device.