Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuIndexFlat.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #pragma once
11 
12 #include "GpuIndex.h"
13 
14 namespace faiss {
15 
16 struct IndexFlat;
17 struct IndexFlatL2;
18 struct IndexFlatIP;
19 
20 }
21 
22 namespace faiss { namespace gpu {
23 
24 struct FlatIndex;
25 
27  inline GpuIndexFlatConfig()
28  : useFloat16(false),
29  useFloat16Accumulator(false),
30  storeTransposed(false) {
31  }
32 
33  /// Whether or not data is stored as float16
34  bool useFloat16;
35 
36  /// Whether or not all math is performed in float16, if useFloat16 is
37  /// specified. If true, we use cublasHgemm, supported only on CC
38  /// 5.3+. Otherwise, we use cublasSgemmEx.
40 
41  /// Whether or not data is stored (transparently) in a transposed
42  /// layout, enabling use of the NN GEMM call, which is ~10% faster.
43  /// This will improve the speed of the flat index, but will
44  /// substantially slow down any add() calls made, as all data must
45  /// be transposed, and will increase storage requirements (we store
46  /// data in both transposed and non-transposed layouts).
48 };
49 
50 /// Wrapper around the GPU implementation that looks like
51 /// faiss::IndexFlat; copies over centroid data from a given
52 /// faiss::IndexFlat
53 class GpuIndexFlat : public GpuIndex {
54  public:
55  /// Construct from a pre-existing faiss::IndexFlat instance, copying
56  /// data over to the given GPU
57  GpuIndexFlat(GpuResources* resources,
58  const faiss::IndexFlat* index,
60 
61  /// Construct an empty instance that can be added to
62  GpuIndexFlat(GpuResources* resources,
63  int dims,
64  faiss::MetricType metric,
66 
67  ~GpuIndexFlat() override;
68 
69  /// Set the minimum data size for searches (in MiB) for which we use
70  /// CPU -> GPU paging
71  void setMinPagingSize(size_t size);
72 
73  /// Returns the current minimum data size for paged searches
74  size_t getMinPagingSize() const;
75 
76  /// Initialize ourselves from the given CPU index; will overwrite
77  /// all data in ourselves
78  void copyFrom(const faiss::IndexFlat* index);
79 
80  /// Copy ourselves to the given CPU index; will overwrite all data
81  /// in the index instance
82  void copyTo(faiss::IndexFlat* index) const;
83 
84  /// Returns the number of vectors we contain
85  size_t getNumVecs() const;
86 
87  /// Clears all vectors from this index
88  void reset() override;
89 
90  /// This index is not trained, so this does nothing
91  void train(Index::idx_t n, const float* x) override;
92 
93  /// Overrides to avoid excessive copies
94  void add(faiss::Index::idx_t, const float* x) override;
95 
96  /// `x`, `distances` and `labels` can be resident on the CPU or any
97  /// GPU; copies are performed as needed
98  /// We have our own implementation here which handles CPU async
99  /// copies; searchImpl_ is not called
100  /// FIXME: move paged impl into GpuIndex
101  void search(
103  const float* x,
105  float* distances,
106  faiss::Index::idx_t* labels) const override;
107 
108  /// Reconstruction methods; prefer the batch reconstruct as it will
109  /// be more efficient
110  void reconstruct(faiss::Index::idx_t key, float* out) const override;
111 
112  /// Batch reconstruction method
113  void reconstruct_n(
116  float* out) const override;
117 
118  /// For internal access
119  inline FlatIndex* getGpuData() { return data_; }
120 
121  protected:
122  /// Called from GpuIndex for add
123  void addImpl_(
125  const float* x,
126  const faiss::Index::idx_t* ids) override;
127 
128  /// Should not be called (we have our own implementation)
129  void searchImpl_(
131  const float* x,
133  float* distances,
134  faiss::Index::idx_t* labels) const override;
135 
136  /// Called from search when the input data is on the CPU;
137  /// potentially allows for pinned memory usage
138  void searchFromCpuPaged_(int n,
139  const float* x,
140  int k,
141  float* outDistancesData,
142  int* outIndicesData) const;
143 
144  void searchNonPaged_(int n,
145  const float* x,
146  int k,
147  float* outDistancesData,
148  int* outIndicesData) const;
149 
150  private:
151  /// Checks user settings for consistency
152  void verifySettings_() const;
153 
154  protected:
155  /// Our config object
157 
158  /// Size above which we page copies from the CPU to GPU
160 
161  /// Holds our GPU data containing the list of vectors; is managed via raw
162  /// pointer so as to allow non-CUDA compilers to see this header
164 };
165 
166 /// Wrapper around the GPU implementation that looks like
167 /// faiss::IndexFlatL2; copies over centroid data from a given
168 /// faiss::IndexFlat
169 class GpuIndexFlatL2 : public GpuIndexFlat {
170  public:
171  /// Construct from a pre-existing faiss::IndexFlatL2 instance, copying
172  /// data over to the given GPU
173  GpuIndexFlatL2(GpuResources* resources,
174  faiss::IndexFlatL2* index,
176 
177  /// Construct an empty instance that can be added to
178  GpuIndexFlatL2(GpuResources* resources,
179  int dims,
181 
182  /// Initialize ourselves from the given CPU index; will overwrite
183  /// all data in ourselves
184  void copyFrom(faiss::IndexFlatL2* index);
185 
186  /// Copy ourselves to the given CPU index; will overwrite all data
187  /// in the index instance
188  void copyTo(faiss::IndexFlatL2* index);
189 };
190 
191 /// Wrapper around the GPU implementation that looks like
192 /// faiss::IndexFlatIP; copies over centroid data from a given
193 /// faiss::IndexFlat
194 class GpuIndexFlatIP : public GpuIndexFlat {
195  public:
196  /// Construct from a pre-existing faiss::IndexFlatIP instance, copying
197  /// data over to the given GPU
198  GpuIndexFlatIP(GpuResources* resources,
199  faiss::IndexFlatIP* index,
201 
202  /// Construct an empty instance that can be added to
203  GpuIndexFlatIP(GpuResources* resources,
204  int dims,
206 
207  /// Initialize ourselves from the given CPU index; will overwrite
208  /// all data in ourselves
209  void copyFrom(faiss::IndexFlatIP* index);
210 
211  /// Copy ourselves to the given CPU index; will overwrite all data
212  /// in the index instance
213  void copyTo(faiss::IndexFlatIP* index);
214 };
215 
216 } } // namespace
void copyFrom(faiss::IndexFlatL2 *index)
void copyTo(faiss::IndexFlat *index) const
void reconstruct_n(faiss::Index::idx_t i0, faiss::Index::idx_t num, float *out) const override
Batch reconstruction method.
size_t getMinPagingSize() const
Returns the current minimum data size for paged searches.
Definition: GpuIndexFlat.cu:82
Holder of GPU resources for a particular flat index.
Definition: FlatIndex.cuh:22
FlatIndex * getGpuData()
For internal access.
Definition: GpuIndexFlat.h:119
void copyTo(faiss::IndexFlatL2 *index)
void searchFromCpuPaged_(int n, const float *x, int k, float *outDistancesData, int *outIndicesData) const
void addImpl_(faiss::Index::idx_t n, const float *x, const faiss::Index::idx_t *ids) override
Called from GpuIndex for add.
void searchImpl_(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
Should not be called (we have our own implementation)
size_t getNumVecs() const
Returns the number of vectors we contain.
GpuIndexFlat(GpuResources *resources, const faiss::IndexFlat *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
Definition: GpuIndexFlat.cu:33
void setMinPagingSize(size_t size)
Definition: GpuIndexFlat.cu:77
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:34
GpuIndexFlatL2(GpuResources *resources, faiss::IndexFlatL2 *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
void reconstruct(faiss::Index::idx_t key, float *out) const override
void copyTo(faiss::IndexFlatIP *index)
GpuIndexFlatIP(GpuResources *resources, faiss::IndexFlatIP *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
long idx_t
all indices are this type
Definition: Index.h:64
void copyFrom(const faiss::IndexFlat *index)
Definition: GpuIndexFlat.cu:87
size_t minPagedSize_
Size above which we page copies from the CPU to GPU.
Definition: GpuIndexFlat.h:159
const GpuIndexFlatConfig config_
Our config object.
Definition: GpuIndexFlat.h:156
void add(faiss::Index::idx_t, const float *x) override
Overrides to avoid excessive copies.
void reset() override
Clears all vectors from this index.
void copyFrom(faiss::IndexFlatIP *index)
void search(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
void train(Index::idx_t n, const float *x) override
This index is not trained, so this does nothing.
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:45