Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuIndexFlat.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #pragma once
12 
13 #include "GpuIndex.h"
14 
15 namespace faiss {
16 
17 struct IndexFlat;
18 struct IndexFlatL2;
19 struct IndexFlatIP;
20 
21 }
22 
23 namespace faiss { namespace gpu {
24 
25 struct FlatIndex;
26 
28  inline GpuIndexFlatConfig()
29  : useFloat16(false),
30  useFloat16Accumulator(false),
31  storeTransposed(false) {
32  }
33 
34  /// Whether or not data is stored as float16
35  bool useFloat16;
36 
37  /// Whether or not all math is performed in float16, if useFloat16 is
38  /// specified. If true, we use cublasHgemm, supported only on CC
39  /// 5.3+. Otherwise, we use cublasSgemmEx.
41 
42  /// Whether or not data is stored (transparently) in a transposed
43  /// layout, enabling use of the NN GEMM call, which is ~10% faster.
44  /// This will improve the speed of the flat index, but will
45  /// substantially slow down any add() calls made, as all data must
46  /// be transposed, and will increase storage requirements (we store
47  /// data in both transposed and non-transposed layouts).
49 };
50 
51 /// Wrapper around the GPU implementation that looks like
52 /// faiss::IndexFlat; copies over centroid data from a given
53 /// faiss::IndexFlat
54 class GpuIndexFlat : public GpuIndex {
55  public:
56  /// Construct from a pre-existing faiss::IndexFlat instance, copying
57  /// data over to the given GPU
58  GpuIndexFlat(GpuResources* resources,
59  const faiss::IndexFlat* index,
61 
62  /// Construct an empty instance that can be added to
63  GpuIndexFlat(GpuResources* resources,
64  int dims,
65  faiss::MetricType metric,
67 
68  ~GpuIndexFlat() override;
69 
70  /// Set the minimum data size for searches (in MiB) for which we use
71  /// CPU -> GPU paging
72  void setMinPagingSize(size_t size);
73 
74  /// Returns the current minimum data size for paged searches
75  size_t getMinPagingSize() const;
76 
77  /// Initialize ourselves from the given CPU index; will overwrite
78  /// all data in ourselves
79  void copyFrom(const faiss::IndexFlat* index);
80 
81  /// Copy ourselves to the given CPU index; will overwrite all data
82  /// in the index instance
83  void copyTo(faiss::IndexFlat* index) const;
84 
85  /// Returns the number of vectors we contain
86  size_t getNumVecs() const;
87 
88  /// Clears all vectors from this index
89  void reset() override;
90 
91  /// This index is not trained, so this does nothing
92  void train(Index::idx_t n, const float* x) override;
93 
94  /// Overrides to avoid excessive copies
95  void add(faiss::Index::idx_t, const float* x) override;
96 
97  /// `x`, `distances` and `labels` can be resident on the CPU or any
98  /// GPU; copies are performed as needed
99  /// We have our own implementation here which handles CPU async
100  /// copies; searchImpl_ is not called
101  /// FIXME: move paged impl into GpuIndex
102  void search(
104  const float* x,
106  float* distances,
107  faiss::Index::idx_t* labels) const override;
108 
109  /// Reconstruction methods; prefer the batch reconstruct as it will
110  /// be more efficient
111  void reconstruct(faiss::Index::idx_t key, float* out) const override;
112 
113  /// Batch reconstruction method
114  void reconstruct_n(
117  float* out) const override;
118 
119  /// For internal access
120  inline FlatIndex* getGpuData() { return data_; }
121 
122  protected:
123  /// Called from GpuIndex for add
124  void addImpl_(
126  const float* x,
127  const faiss::Index::idx_t* ids) override;
128 
129  /// Should not be called (we have our own implementation)
130  void searchImpl_(
132  const float* x,
134  float* distances,
135  faiss::Index::idx_t* labels) const override;
136 
137  /// Called from search when the input data is on the CPU;
138  /// potentially allows for pinned memory usage
139  void searchFromCpuPaged_(int n,
140  const float* x,
141  int k,
142  float* outDistancesData,
143  int* outIndicesData) const;
144 
145  void searchNonPaged_(int n,
146  const float* x,
147  int k,
148  float* outDistancesData,
149  int* outIndicesData) const;
150 
151  private:
152  /// Checks user settings for consistency
153  void verifySettings_() const;
154 
155  protected:
156  /// Our config object
158 
159  /// Size above which we page copies from the CPU to GPU
161 
162  /// Holds our GPU data containing the list of vectors
164 };
165 
166 /// Wrapper around the GPU implementation that looks like
167 /// faiss::IndexFlatL2; copies over centroid data from a given
168 /// faiss::IndexFlat
169 class GpuIndexFlatL2 : public GpuIndexFlat {
170  public:
171  /// Construct from a pre-existing faiss::IndexFlatL2 instance, copying
172  /// data over to the given GPU
173  GpuIndexFlatL2(GpuResources* resources,
174  faiss::IndexFlatL2* index,
176 
177  /// Construct an empty instance that can be added to
178  GpuIndexFlatL2(GpuResources* resources,
179  int dims,
181 
182  /// Initialize ourselves from the given CPU index; will overwrite
183  /// all data in ourselves
184  void copyFrom(faiss::IndexFlatL2* index);
185 
186  /// Copy ourselves to the given CPU index; will overwrite all data
187  /// in the index instance
188  void copyTo(faiss::IndexFlatL2* index);
189 };
190 
191 /// Wrapper around the GPU implementation that looks like
192 /// faiss::IndexFlatIP; copies over centroid data from a given
193 /// faiss::IndexFlat
194 class GpuIndexFlatIP : public GpuIndexFlat {
195  public:
196  /// Construct from a pre-existing faiss::IndexFlatIP instance, copying
197  /// data over to the given GPU
198  GpuIndexFlatIP(GpuResources* resources,
199  faiss::IndexFlatIP* index,
201 
202  /// Construct an empty instance that can be added to
203  GpuIndexFlatIP(GpuResources* resources,
204  int dims,
206 
207  /// Initialize ourselves from the given CPU index; will overwrite
208  /// all data in ourselves
209  void copyFrom(faiss::IndexFlatIP* index);
210 
211  /// Copy ourselves to the given CPU index; will overwrite all data
212  /// in the index instance
213  void copyTo(faiss::IndexFlatIP* index);
214 };
215 
216 } } // namespace
void copyFrom(faiss::IndexFlatL2 *index)
void copyTo(faiss::IndexFlat *index) const
void reconstruct_n(faiss::Index::idx_t i0, faiss::Index::idx_t num, float *out) const override
Batch reconstruction method.
size_t getMinPagingSize() const
Returns the current minimum data size for paged searches.
Definition: GpuIndexFlat.cu:82
Holder of GPU resources for a particular flat index.
Definition: FlatIndex.cuh:23
FlatIndex * getGpuData()
For internal access.
Definition: GpuIndexFlat.h:120
void copyTo(faiss::IndexFlatL2 *index)
void searchFromCpuPaged_(int n, const float *x, int k, float *outDistancesData, int *outIndicesData) const
void addImpl_(faiss::Index::idx_t n, const float *x, const faiss::Index::idx_t *ids) override
Called from GpuIndex for add.
void searchImpl_(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
Should not be called (we have our own implementation)
size_t getNumVecs() const
Returns the number of vectors we contain.
GpuIndexFlat(GpuResources *resources, const faiss::IndexFlat *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
Definition: GpuIndexFlat.cu:33
void setMinPagingSize(size_t size)
Definition: GpuIndexFlat.cu:77
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:35
GpuIndexFlatL2(GpuResources *resources, faiss::IndexFlatL2 *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
void reconstruct(faiss::Index::idx_t key, float *out) const override
void copyTo(faiss::IndexFlatIP *index)
GpuIndexFlatIP(GpuResources *resources, faiss::IndexFlatIP *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
long idx_t
all indices are this type
Definition: Index.h:62
void copyFrom(const faiss::IndexFlat *index)
Definition: GpuIndexFlat.cu:87
size_t minPagedSize_
Size above which we page copies from the CPU to GPU.
Definition: GpuIndexFlat.h:160
const GpuIndexFlatConfig config_
Our config object.
Definition: GpuIndexFlat.h:157
void add(faiss::Index::idx_t, const float *x) override
Overrides to avoid excessive copies.
void reset() override
Clears all vectors from this index.
void copyFrom(faiss::IndexFlatIP *index)
void search(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
void train(Index::idx_t n, const float *x) override
This index is not trained, so this does nothing.
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43
FlatIndex * data_
Holds our GPU data containing the list of vectors.
Definition: GpuIndexFlat.h:163