Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
StandardGpuResources.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #pragma once
12 
13 #include "GpuResources.h"
14 #include "utils/StackDeviceMemory.h"
15 #include "utils/DeviceUtils.h"
16 #include <unordered_map>
17 #include <vector>
18 
19 namespace faiss { namespace gpu {
20 
21 /// Default implementation of GpuResources that allocates a cuBLAS
22 /// stream and 2 streams for use, as well as temporary memory
24  public:
26 
27  ~StandardGpuResources() override;
28 
29  /// Disable allocation of temporary memory; all temporary memory
30  /// requests will call cudaMalloc / cudaFree at the point of use
31  void noTempMemory();
32 
33  /// Specify that we wish to use a certain fixed size of memory on
34  /// all devices as temporary memory
35  void setTempMemory(size_t size);
36 
37  /// Specify that we wish to use a certain fraction of memory on
38  /// all devices as temporary memory
39  void setTempMemoryFraction(float fraction);
40 
41  /// Set amount of pinned memory to allocate, for async GPU <-> CPU
42  /// transfers
43  void setPinnedMemory(size_t size);
44 
45  public:
46  /// Internal system calls
47  void initializeForDevice(int device) override;
48 
49  cublasHandle_t getBlasHandle(int device) override;
50 
51  cudaStream_t getDefaultStream(int device) override;
52 
53  std::vector<cudaStream_t> getAlternateStreams(int device) override;
54 
55  DeviceMemory& getMemoryManager(int device) override;
56 
57  std::pair<void*, size_t> getPinnedMemory() override;
58 
59  cudaStream_t getAsyncCopyStream(int device) override;
60 
61  private:
62  /// Our default stream that work is ordered on, one per each device
63  std::unordered_map<int, cudaStream_t> defaultStreams_;
64 
65  /// Other streams we can use, per each device
66  std::unordered_map<int, std::vector<cudaStream_t> > alternateStreams_;
67 
68  /// Async copy stream to use for GPU <-> CPU pinned memory copies
69  std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
70 
71  /// cuBLAS handle for each device
72  std::unordered_map<int, cublasHandle_t> blasHandles_;
73 
74  /// Temporary memory provider, per each device
75  std::unordered_map<int, std::unique_ptr<StackDeviceMemory> > memory_;
76 
77  /// Pinned memory allocation for use with this GPU
78  void* pinnedMemAlloc_;
79  size_t pinnedMemAllocSize_;
80 
81  /// By default, we reserve this fraction of memory on all devices
82  float tempMemFraction_;
83 
84  /// Another option is to use a specified amount of memory on all
85  /// devices
86  size_t tempMemSize_;
87 
88  /// Whether we look at tempMemFraction_ or tempMemSize_
89  bool useFraction_;
90 
91  /// Amount of pinned memory we should allocate
92  size_t pinnedMemSize_;
93 };
94 
95 } } // namespace
void initializeForDevice(int device) override
Internal system calls.
Manages temporary memory allocations on a GPU device.
Definition: DeviceMemory.h:46