Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
StandardGpuResources.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #pragma once
11 
12 #include "GpuResources.h"
13 #include "utils/StackDeviceMemory.h"
14 #include "utils/DeviceUtils.h"
15 #include <unordered_map>
16 #include <vector>
17 
18 namespace faiss { namespace gpu {
19 
20 /// Default implementation of GpuResources that allocates a cuBLAS
21 /// stream and 2 streams for use, as well as temporary memory
23  public:
25 
26  ~StandardGpuResources() override;
27 
28  /// Disable allocation of temporary memory; all temporary memory
29  /// requests will call cudaMalloc / cudaFree at the point of use
30  void noTempMemory();
31 
32  /// Specify that we wish to use a certain fixed size of memory on
33  /// all devices as temporary memory
34  void setTempMemory(size_t size);
35 
36  /// Specify that we wish to use a certain fraction of memory on
37  /// all devices as temporary memory
38  void setTempMemoryFraction(float fraction);
39 
40  /// Set amount of pinned memory to allocate, for async GPU <-> CPU
41  /// transfers
42  void setPinnedMemory(size_t size);
43 
44  /// Called to change the stream for work ordering
45  void setDefaultStream(int device, cudaStream_t stream);
46 
47  /// Called to change the work ordering streams to the null stream
48  /// for all devices
50 
51  /// Enable or disable the warning about not having enough temporary memory
52  /// when cudaMalloc gets called
53  void setCudaMallocWarning(bool b);
54 
55  public:
56  /// Internal system calls
57  void initializeForDevice(int device) override;
58 
59  cublasHandle_t getBlasHandle(int device) override;
60 
61  cudaStream_t getDefaultStream(int device) override;
62 
63  std::vector<cudaStream_t> getAlternateStreams(int device) override;
64 
65  DeviceMemory& getMemoryManager(int device) override;
66 
67  std::pair<void*, size_t> getPinnedMemory() override;
68 
69  cudaStream_t getAsyncCopyStream(int device) override;
70 
71  private:
72  /// Our default stream that work is ordered on, one per each device
73  std::unordered_map<int, cudaStream_t> defaultStreams_;
74 
75  /// This contains particular streams as set by the user for
76  /// ordering, if any
77  std::unordered_map<int, cudaStream_t> userDefaultStreams_;
78 
79  /// Other streams we can use, per each device
80  std::unordered_map<int, std::vector<cudaStream_t> > alternateStreams_;
81 
82  /// Async copy stream to use for GPU <-> CPU pinned memory copies
83  std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
84 
85  /// cuBLAS handle for each device
86  std::unordered_map<int, cublasHandle_t> blasHandles_;
87 
88  /// Temporary memory provider, per each device
89  std::unordered_map<int, std::unique_ptr<StackDeviceMemory> > memory_;
90 
91  /// Pinned memory allocation for use with this GPU
92  void* pinnedMemAlloc_;
93  size_t pinnedMemAllocSize_;
94 
95  /// By default, we reserve this fraction of memory on all devices
96  float tempMemFraction_;
97 
98  /// Another option is to use a specified amount of memory on all
99  /// devices
100  size_t tempMemSize_;
101 
102  /// Whether we look at tempMemFraction_ or tempMemSize_
103  bool useFraction_;
104 
105  /// Amount of pinned memory we should allocate
106  size_t pinnedMemSize_;
107 
108  /// Whether or not a warning upon cudaMalloc is generated
109  bool cudaMallocWarning_;
110 };
111 
112 } } // namespace
void setDefaultStream(int device, cudaStream_t stream)
Called to change the stream for work ordering.
cublasHandle_t getBlasHandle(int device) override
Returns the cuBLAS handle that we use for the given device.
void initializeForDevice(int device) override
Internal system calls.
cudaStream_t getAsyncCopyStream(int device) override
Returns the stream on which we perform async CPU &lt;-&gt; GPU copies.
DeviceMemory & getMemoryManager(int device) override
Returns the temporary memory manager for the given device.
cudaStream_t getDefaultStream(int device) override
Manages temporary memory allocations on a GPU device.
Definition: DeviceMemory.h:45
std::pair< void *, size_t > getPinnedMemory() override
Returns the available CPU pinned memory buffer.
std::vector< cudaStream_t > getAlternateStreams(int device) override
Returns the set of alternative streams that we use for the given device.