Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuIndexFlat.cu
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #include "GpuIndexFlat.h"
10 #include "../IndexFlat.h"
11 #include "GpuResources.h"
12 #include "impl/FlatIndex.cuh"
13 #include "utils/ConversionOperators.cuh"
14 #include "utils/CopyUtils.cuh"
15 #include "utils/DeviceUtils.h"
16 #include "utils/Float16.cuh"
17 #include "utils/StaticUtils.h"
18 
19 #include <thrust/execution_policy.h>
20 #include <thrust/transform.h>
21 #include <limits>
22 
23 namespace faiss { namespace gpu {
24 
26  const faiss::IndexFlat* index,
27  GpuIndexFlatConfig config) :
28  GpuIndex(resources, index->d, index->metric_type, config),
29  config_(std::move(config)),
30  data_(nullptr) {
31  verifySettings_();
32 
33  // Flat index doesn't need training
34  this->is_trained = true;
35 
36  copyFrom(index);
37 }
38 
40  int dims,
41  faiss::MetricType metric,
42  GpuIndexFlatConfig config) :
43  GpuIndex(resources, dims, metric, config),
44  config_(std::move(config)),
45  data_(nullptr) {
46  verifySettings_();
47 
48  // Flat index doesn't need training
49  this->is_trained = true;
50 
51  // Construct index
52  DeviceScope scope(device_);
53  data_ = new FlatIndex(resources,
54  dims,
55  metric == faiss::METRIC_L2,
59  memorySpace_);
60 }
61 
62 GpuIndexFlat::~GpuIndexFlat() {
63  delete data_;
64 }
65 
66 void
68  DeviceScope scope(device_);
69 
70  this->d = index->d;
71  this->metric_type = index->metric_type;
72 
73  // GPU code has 32 bit indices
74  FAISS_THROW_IF_NOT_FMT(index->ntotal <=
75  (faiss::Index::idx_t) std::numeric_limits<int>::max(),
76  "GPU index only supports up to %zu indices; "
77  "attempting to copy CPU index with %zu parameters",
78  (size_t) std::numeric_limits<int>::max(),
79  (size_t) index->ntotal);
80  this->ntotal = index->ntotal;
81 
82  delete data_;
84  this->d,
85  index->metric_type == faiss::METRIC_L2,
89  memorySpace_);
90 
91  // The index could be empty
92  if (index->ntotal > 0) {
93  data_->add(index->xb.data(),
94  index->ntotal,
96  }
97 }
98 
99 void
101  DeviceScope scope(device_);
102 
103  index->d = this->d;
104  index->ntotal = this->ntotal;
105  index->metric_type = this->metric_type;
106 
107  FAISS_ASSERT(data_);
108  FAISS_ASSERT(data_->getSize() == this->ntotal);
109  index->xb.resize(this->ntotal * this->d);
110 
111  auto stream = resources_->getDefaultStream(device_);
112 
113  if (this->ntotal > 0) {
114  if (config_.useFloat16) {
115  auto vecFloat32 = data_->getVectorsFloat32Copy(stream);
116  fromDevice(vecFloat32, index->xb.data(), stream);
117  } else {
118  fromDevice(data_->getVectorsFloat32Ref(), index->xb.data(), stream);
119  }
120  }
121 }
122 
123 size_t
125  return this->ntotal;
126 }
127 
128 void
130  DeviceScope scope(device_);
131 
132  // Free the underlying memory
133  data_->reset();
134  this->ntotal = 0;
135 }
136 
137 void
138 GpuIndexFlat::train(Index::idx_t n, const float* x) {
139  // nothing to do
140 }
141 
142 void
143 GpuIndexFlat::add(Index::idx_t n, const float* x) {
144  FAISS_THROW_IF_NOT_MSG(this->is_trained, "Index not trained");
145 
146  // For now, only support <= max int results
147  FAISS_THROW_IF_NOT_FMT(n <= (Index::idx_t) std::numeric_limits<int>::max(),
148  "GPU index only supports up to %d indices",
149  std::numeric_limits<int>::max());
150 
151  if (n == 0) {
152  // nothing to add
153  return;
154  }
155 
156  DeviceScope scope(device_);
157 
158  // To avoid multiple re-allocations, ensure we have enough storage
159  // available
161 
162  // If we're not operating in float16 mode, we don't need the input
163  // data to be resident on our device; we can add directly.
164  if (!config_.useFloat16) {
165  addImpl_(n, x, nullptr);
166  } else {
167  // Otherwise, perform the paging
168  GpuIndex::add(n, x);
169  }
170 }
171 
172 bool
174  return false;
175 }
176 
177 void
179  const float* x,
180  const Index::idx_t* ids) {
181  FAISS_ASSERT(data_);
182  FAISS_ASSERT(n > 0);
183 
184  // We do not support add_with_ids
185  FAISS_THROW_IF_NOT_MSG(!ids, "add_with_ids not supported");
186 
187  // Due to GPU indexing in int32, we can't store more than this
188  // number of vectors on a GPU
189  FAISS_THROW_IF_NOT_FMT(this->ntotal + n <=
190  (faiss::Index::idx_t) std::numeric_limits<int>::max(),
191  "GPU index only supports up to %zu indices",
192  (size_t) std::numeric_limits<int>::max());
193 
195  this->ntotal += n;
196 }
197 
198 void
200  const float* x,
201  int k,
202  float* distances,
203  Index::idx_t* labels) const {
204  auto stream = resources_->getDefaultStream(device_);
205 
206  // Input and output data are already resident on the GPU
207  Tensor<float, 2, true> queries(const_cast<float*>(x), {n, (int) this->d});
208  Tensor<float, 2, true> outDistances(distances, {n, k});
209  Tensor<Index::idx_t, 2, true> outLabels(labels, {n, k});
210 
211  // FlatIndex only supports int indices
212  DeviceTensor<int, 2, true> outIntLabels(
213  resources_->getMemoryManagerCurrentDevice(), {n, k}, stream);
214 
215  data_->query(queries, k, outDistances, outIntLabels, true);
216 
217  // Convert int to idx_t
218  thrust::transform(thrust::cuda::par.on(stream),
219  outIntLabels.data(),
220  outIntLabels.end(),
221  outLabels.data(),
222  IntToIdxType());
223 }
224 
225 void
227  float* out) const {
228  DeviceScope scope(device_);
229 
230  FAISS_THROW_IF_NOT_MSG(key < this->ntotal, "index out of bounds");
231  auto stream = resources_->getDefaultStream(device_);
232 
233  if (config_.useFloat16) {
234  auto vec = data_->getVectorsFloat32Copy(key, 1, stream);
235  fromDevice(vec.data(), out, this->d, stream);
236  } else {
237  auto vec = data_->getVectorsFloat32Ref()[key];
238  fromDevice(vec.data(), out, this->d, stream);
239  }
240 }
241 
242 void
245  float* out) const {
246  DeviceScope scope(device_);
247 
248  FAISS_THROW_IF_NOT_MSG(i0 < this->ntotal, "index out of bounds");
249  FAISS_THROW_IF_NOT_MSG(i0 + num - 1 < this->ntotal, "num out of bounds");
250  auto stream = resources_->getDefaultStream(device_);
251 
252  if (config_.useFloat16) {
253  auto vec = data_->getVectorsFloat32Copy(i0, num, stream);
254  fromDevice(vec.data(), out, num * this->d, stream);
255  } else {
256  auto vec = data_->getVectorsFloat32Ref()[i0];
257  fromDevice(vec.data(), out, this->d * num, stream);
258  }
259 }
260 
261 void
262 GpuIndexFlat::verifySettings_() const {
263  // If we want Hgemm, ensure that it is supported on this device
265 #ifdef FAISS_USE_FLOAT16
266  FAISS_THROW_IF_NOT_MSG(config_.useFloat16,
267  "useFloat16Accumulator can only be enabled "
268  "with useFloat16");
269 
270  FAISS_THROW_IF_NOT_FMT(getDeviceSupportsFloat16Math(config_.device),
271  "Device %d does not support Hgemm "
272  "(useFloat16Accumulator)",
273  config_.device);
274 #else
275  FAISS_THROW_IF_NOT_MSG(false, "not compiled with float16 support");
276 #endif
277  }
278 }
279 
280 //
281 // GpuIndexFlatL2
282 //
283 
285  faiss::IndexFlatL2* index,
286  GpuIndexFlatConfig config) :
287  GpuIndexFlat(resources, index, config) {
288 }
289 
291  int dims,
292  GpuIndexFlatConfig config) :
293  GpuIndexFlat(resources, dims, faiss::METRIC_L2, config) {
294 }
295 
296 void
298  GpuIndexFlat::copyFrom(index);
299 }
300 
301 void
303  GpuIndexFlat::copyTo(index);
304 }
305 
306 //
307 // GpuIndexFlatIP
308 //
309 
311  faiss::IndexFlatIP* index,
312  GpuIndexFlatConfig config) :
313  GpuIndexFlat(resources, index, config) {
314 }
315 
317  int dims,
318  GpuIndexFlatConfig config) :
319  GpuIndexFlat(resources, dims, faiss::METRIC_INNER_PRODUCT, config) {
320 }
321 
322 void
324  GpuIndexFlat::copyFrom(index);
325 }
326 
327 void
329  GpuIndexFlat::copyTo(index);
330 }
331 
332 } } // namespace
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
Definition: FlatIndex.cu:89
void copyFrom(faiss::IndexFlatL2 *index)
void copyTo(faiss::IndexFlat *index) const
void reconstruct_n(faiss::Index::idx_t i0, faiss::Index::idx_t num, float *out) const override
Batch reconstruction method.
int getSize() const
Returns the number of vectors we contain.
Definition: FlatIndex.cu:45
Holder of GPU resources for a particular flat index.
Definition: FlatIndex.cuh:21
void copyTo(faiss::IndexFlatL2 *index)
size_t getNumVecs() const
Returns the number of vectors we contain.
void searchImpl_(int n, const float *x, int k, float *distances, faiss::Index::idx_t *labels) const override
Called from GpuIndex for search.
GpuIndexFlat(GpuResources *resources, const faiss::IndexFlat *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
Definition: GpuIndexFlat.cu:25
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:33
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:25
GpuIndexFlatL2(GpuResources *resources, faiss::IndexFlatL2 *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
virtual cudaStream_t getDefaultStream(int device)=0
int d
vector dimension
Definition: Index.h:66
long idx_t
all indices are this type
Definition: Index.h:62
void reconstruct(faiss::Index::idx_t key, float *out) const override
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
const int device_
The GPU device we are resident on.
Definition: GpuIndex.h:126
void copyTo(faiss::IndexFlatIP *index)
GpuIndexFlatIP(GpuResources *resources, faiss::IndexFlatIP *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
GpuResources * resources_
Manages streams, cuBLAS handles and scratch memory for devices.
Definition: GpuIndex.h:123
void addImpl_(int n, const float *x, const Index::idx_t *ids) override
Called from GpuIndex for add.
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
Definition: FlatIndex.cu:66
void add(const float *data, int numVecs, cudaStream_t stream)
Definition: FlatIndex.cu:196
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
void add(faiss::Index::idx_t, const float *x) override
Definition: GpuIndex.cu:79
void copyFrom(const faiss::IndexFlat *index)
Definition: GpuIndexFlat.cu:67
Our tensor type.
Definition: Tensor.cuh:28
const MemorySpace memorySpace_
The memory space of our primary storage on the GPU.
Definition: GpuIndex.h:129
bool addImplRequiresIDs_() const override
Flat index does not require IDs as there is no storage available for them.
const GpuIndexFlatConfig config_
Our config object.
Definition: GpuIndexFlat.h:123
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
void add(faiss::Index::idx_t, const float *x) override
Overrides to avoid excessive copies.
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
Definition: FlatIndex.cu:77
void reset() override
Clears all vectors from this index.
void copyFrom(faiss::IndexFlatIP *index)
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71
void reset()
Free all storage.
Definition: FlatIndex.cu:270
std::vector< float > xb
database vectors, size ntotal * d
Definition: IndexFlat.h:23
void train(Index::idx_t n, const float *x) override
This index is not trained, so this does nothing.
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:44