Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
StandardGpuResources.h
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #pragma once
13 
14 #include "GpuResources.h"
15 #include "utils/StackDeviceMemory.h"
16 #include "utils/DeviceUtils.h"
17 #include <unordered_map>
18 #include <vector>
19 
20 namespace faiss { namespace gpu {
21 
22 /// Default implementation of GpuResources that allocates a cuBLAS
23 /// stream and 2 streams for use, as well as temporary memory
25  public:
27 
28  ~StandardGpuResources() override;
29 
30  /// Disable allocation of temporary memory; all temporary memory
31  /// requests will call cudaMalloc / cudaFree at the point of use
32  void noTempMemory();
33 
34  /// Specify that we wish to use a certain fixed size of memory on
35  /// all devices as temporary memory
36  void setTempMemory(size_t size);
37 
38  /// Specify that we wish to use a certain fraction of memory on
39  /// all devices as temporary memory
40  void setTempMemoryFraction(float fraction);
41 
42  /// Set amount of pinned memory to allocate, for async GPU <-> CPU
43  /// transfers
44  void setPinnedMemory(size_t size);
45 
46  public:
47  /// Internal system calls
48  void initializeForDevice(int device) override;
49 
50  cublasHandle_t getBlasHandle(int device) override;
51 
52  cudaStream_t getDefaultStream(int device) override;
53 
54  std::vector<cudaStream_t> getAlternateStreams(int device) override;
55 
56  DeviceMemory& getMemoryManager(int device) override;
57 
58  std::pair<void*, size_t> getPinnedMemory() override;
59 
60  cudaStream_t getAsyncCopyStream(int device) override;
61 
62  private:
63  /// Our default stream that work is ordered on, one per each device
64  std::unordered_map<int, cudaStream_t> defaultStreams_;
65 
66  /// Other streams we can use, per each device
67  std::unordered_map<int, std::vector<cudaStream_t>> alternateStreams_;
68 
69  /// Async copy stream to use for GPU <-> CPU pinned memory copies
70  std::unordered_map<int, cudaStream_t> asyncCopyStreams_;
71 
72  /// cuBLAS handle for each device
73  std::unordered_map<int, cublasHandle_t> blasHandles_;
74 
75  /// Temporary memory provider, per each device
76  std::unordered_map<int, std::unique_ptr<StackDeviceMemory>> memory_;
77 
78  /// Pinned memory allocation for use with this GPU
79  void* pinnedMemAlloc_;
80  size_t pinnedMemAllocSize_;
81 
82  /// By default, we reserve this fraction of memory on all devices
83  float tempMemFraction_;
84 
85  /// Another option is to use a specified amount of memory on all
86  /// devices
87  size_t tempMemSize_;
88 
89  /// Whether we look at tempMemFraction_ or tempMemSize_
90  bool useFraction_;
91 
92  /// Amount of pinned memory we should allocate
93  size_t pinnedMemSize_;
94 };
95 
96 } } // namespace
void initializeForDevice(int device) override
Internal system calls.
Manages temporary memory allocations on a GPU device.
Definition: DeviceMemory.h:47