Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
GpuIndex.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "GpuIndex.h"
12 #include "../FaissAssert.h"
13 #include "GpuResources.h"
14 #include "utils/DeviceUtils.h"
15 #include <stdio.h>
16 
17 namespace faiss { namespace gpu {
18 
19 // Default size for which we page add or search
20 constexpr size_t kAddPageSize = (size_t) 256 * 1024 * 1024;
21 constexpr size_t kSearchPageSize = (size_t) 256 * 1024 * 1024;
22 
23 // Or, maximum number of vectors to consider per page of add or search
24 constexpr size_t kAddVecSize = (size_t) 512 * 1024;
25 
26 // Use a smaller search size, as precomputed code usage on IVFPQ
27 // requires substantial amounts of memory
28 // FIXME: parameterize based on algorithm need
29 constexpr size_t kSearchVecSize = (size_t) 32 * 1024;
30 
31 GpuIndex::GpuIndex(GpuResources* resources,
32  int dims,
33  faiss::MetricType metric,
34  GpuIndexConfig config) :
35  Index(dims, metric),
36  resources_(resources),
37  device_(config.device),
38  memorySpace_(config.memorySpace) {
39  FAISS_THROW_IF_NOT_FMT(device_ < getNumDevices(),
40  "Invalid GPU device %d", device_);
41 
42  FAISS_THROW_IF_NOT_MSG(dims > 0, "Invalid number of dimensions");
43 
44 #ifdef FAISS_UNIFIED_MEM
45  FAISS_THROW_IF_NOT_FMT(
46  memorySpace_ == MemorySpace::Device ||
47  (memorySpace_ == MemorySpace::Unified &&
48  getFullUnifiedMemSupport(device_)),
49  "Device %d does not support full CUDA 8 Unified Memory (CC 6.0+)",
50  config.device);
51 #else
52  FAISS_THROW_IF_NOT_MSG(memorySpace_ == MemorySpace::Device,
53  "Must compile with CUDA 8+ for Unified Memory support");
54 #endif
55 
56  FAISS_ASSERT(resources_);
57  resources_->initializeForDevice(device_);
58 }
59 
60 void
61 GpuIndex::add(Index::idx_t n, const float* x) {
62  addInternal_(n, x, nullptr);
63 }
64 
65 void
67  const float* x,
68  const Index::idx_t* ids) {
69  addInternal_(n, x, ids);
70 }
71 
72 void
74  const float* x,
75  const Index::idx_t* ids) {
76  DeviceScope scope(device_);
77 
78  FAISS_THROW_IF_NOT_MSG(this->is_trained, "Index not trained");
79 
80  if (n > 0) {
81  size_t totalSize = n * (size_t) this->d * sizeof(float);
82 
83  if (totalSize > kAddPageSize || n > kAddVecSize) {
84  // How many vectors fit into kAddPageSize?
85  size_t maxNumVecsForPageSize =
86  kAddPageSize / ((size_t) this->d * sizeof(float));
87 
88  // Always add at least 1 vector, if we have huge vectors
89  maxNumVecsForPageSize = std::max(maxNumVecsForPageSize, (size_t) 1);
90 
91  size_t tileSize = std::min((size_t) n, maxNumVecsForPageSize);
92  tileSize = std::min(tileSize, kSearchVecSize);
93 
94  for (size_t i = 0; i < n; i += tileSize) {
95  size_t curNum = std::min(tileSize, n - i);
96 
97  addImpl_(curNum,
98  x + i * (size_t) this->d,
99  ids ? ids + i : nullptr);
100  }
101  } else {
102  addImpl_(n, x, ids);
103  }
104  }
105 }
106 
107 void
109  const float* x,
110  Index::idx_t k,
111  float* distances,
112  Index::idx_t* labels) const {
113  DeviceScope scope(device_);
114 
115  FAISS_THROW_IF_NOT_MSG(this->is_trained, "Index not trained");
116 
117  if (n > 0) {
118  size_t totalSize = n * (size_t) this->d * sizeof(float);
119 
120  if ((totalSize > kSearchPageSize) || (n > kSearchVecSize)) {
121  // How many vectors fit into kSearchPageSize?
122  // Just consider `x`, not the size of `distances` or `labels`
123  // since they should be small, relatively speaking
124  size_t maxNumVecsForPageSize =
125  kSearchPageSize / ((size_t) this->d * sizeof(float));
126 
127  // Always search at least 1 vector, if we have huge vectors
128  maxNumVecsForPageSize = std::max(maxNumVecsForPageSize, (size_t) 1);
129 
130  size_t tileSize = std::min((size_t) n, maxNumVecsForPageSize);
131  tileSize = std::min(tileSize, kSearchVecSize);
132 
133  for (size_t i = 0; i < n; i += tileSize) {
134  size_t curNum = std::min(tileSize, n - i);
135 
136  searchImpl_(curNum,
137  x + i * (size_t) this->d,
138  k,
139  distances + i * k,
140  labels + i * k);
141  }
142  } else {
143  searchImpl_(n, x, k, distances, labels);
144  }
145  }
146 }
147 
148 } } // namespace
void search(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
Definition: GpuIndex.cu:108
virtual void searchImpl_(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const =0
Overridden to actually perform the search.
void addInternal_(Index::idx_t n, const float *x, const Index::idx_t *ids)
Definition: GpuIndex.cu:73
void add_with_ids(Index::idx_t n, const float *x, const Index::idx_t *ids) override
Definition: GpuIndex.cu:66
int d
vector dimension
Definition: Index.h:64
const int device_
The GPU device we are resident on.
Definition: GpuIndex.h:94
long idx_t
all indices are this type
Definition: Index.h:62
virtual void addImpl_(Index::idx_t n, const float *x, const Index::idx_t *ids)=0
Overridden to actually perform the add.
void add(faiss::Index::idx_t, const float *x) override
Definition: GpuIndex.cu:61
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
MetricType
Some algorithms support both an inner product vetsion and a L2 search version.
Definition: Index.h:43