Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
IVFBase.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "IVFBase.cuh"
11 #include "../GpuResources.h"
12 #include "FlatIndex.cuh"
13 #include "InvertedListAppend.cuh"
14 #include "RemapIndices.h"
15 #include "../utils/DeviceDefs.cuh"
16 #include "../utils/DeviceUtils.h"
17 #include "../utils/HostTensor.cuh"
18 #include <limits>
19 #include <thrust/host_vector.h>
20 #include <unordered_map>
21 
22 namespace faiss { namespace gpu {
23 
25  FlatIndex* quantizer,
26  int bytesPerVector,
27  IndicesOptions indicesOptions,
28  MemorySpace space) :
29  resources_(resources),
30  quantizer_(quantizer),
31  bytesPerVector_(bytesPerVector),
32  indicesOptions_(indicesOptions),
33  space_(space),
34  dim_(quantizer->getDim()),
35  numLists_(quantizer->getSize()),
36  maxListLength_(0) {
37  reset();
38 }
39 
40 IVFBase::~IVFBase() {
41 }
42 
43 void
44 IVFBase::reserveMemory(size_t numVecs) {
45  size_t vecsPerList = numVecs / deviceListData_.size();
46  if (vecsPerList < 1) {
47  return;
48  }
49 
51 
52  size_t bytesPerDataList = vecsPerList * bytesPerVector_;
53  for (auto& list : deviceListData_) {
54  list->reserve(bytesPerDataList, stream);
55  }
56 
57  if ((indicesOptions_ == INDICES_32_BIT) ||
58  (indicesOptions_ == INDICES_64_BIT)) {
59  // Reserve for index lists as well
60  size_t bytesPerIndexList = vecsPerList *
61  (indicesOptions_ == INDICES_32_BIT ? sizeof(int) : sizeof(long));
62 
63  for (auto& list : deviceListIndices_) {
64  list->reserve(bytesPerIndexList, stream);
65  }
66  }
67 
68  // Update device info for all lists, since the base pointers may
69  // have changed
70  updateDeviceListInfo_(stream);
71 }
72 
73 void
75  deviceListData_.clear();
76  deviceListIndices_.clear();
79  deviceListLengths_.clear();
80  listOffsetToUserIndex_.clear();
81 
82  for (size_t i = 0; i < numLists_; ++i) {
83  deviceListData_.emplace_back(
84  std::unique_ptr<DeviceVector<unsigned char>>(
86  deviceListIndices_.emplace_back(
87  std::unique_ptr<DeviceVector<unsigned char>>(
89  listOffsetToUserIndex_.emplace_back(std::vector<long>());
90  }
91 
92  deviceListDataPointers_.resize(numLists_, nullptr);
93  deviceListIndexPointers_.resize(numLists_, nullptr);
94  deviceListLengths_.resize(numLists_, 0);
95  maxListLength_ = 0;
96 }
97 
98 int
99 IVFBase::getDim() const {
100  return dim_;
101 }
102 
103 size_t
105  // Reclaim all unused memory exactly
106  return reclaimMemory_(true);
107 }
108 
109 size_t
111  auto stream = resources_->getDefaultStreamCurrentDevice();
112 
113  size_t totalReclaimed = 0;
114 
115  for (int i = 0; i < deviceListData_.size(); ++i) {
116  auto& data = deviceListData_[i];
117  totalReclaimed += data->reclaim(exact, stream);
118 
119  deviceListDataPointers_[i] = data->data();
120  }
121 
122  for (int i = 0; i < deviceListIndices_.size(); ++i) {
123  auto& indices = deviceListIndices_[i];
124  totalReclaimed += indices->reclaim(exact, stream);
125 
126  deviceListIndexPointers_[i] = indices->data();
127  }
128 
129  // Update device info for all lists, since the base pointers may
130  // have changed
131  updateDeviceListInfo_(stream);
132 
133  return totalReclaimed;
134 }
135 
136 void
137 IVFBase::updateDeviceListInfo_(cudaStream_t stream) {
138  std::vector<int> listIds(deviceListData_.size());
139  for (int i = 0; i < deviceListData_.size(); ++i) {
140  listIds[i] = i;
141  }
142 
143  updateDeviceListInfo_(listIds, stream);
144 }
145 
146 void
147 IVFBase::updateDeviceListInfo_(const std::vector<int>& listIds,
148  cudaStream_t stream) {
150 
152  hostListsToUpdate({(int) listIds.size()});
154  hostNewListLength({(int) listIds.size()});
156  hostNewDataPointers({(int) listIds.size()});
158  hostNewIndexPointers({(int) listIds.size()});
159 
160  for (int i = 0; i < listIds.size(); ++i) {
161  auto listId = listIds[i];
162  auto& data = deviceListData_[listId];
163  auto& indices = deviceListIndices_[listId];
164 
165  hostListsToUpdate[i] = listId;
166  hostNewListLength[i] = data->size() / bytesPerVector_;
167  hostNewDataPointers[i] = data->data();
168  hostNewIndexPointers[i] = indices->data();
169  }
170 
171  // Copy the above update sets to the GPU
172  DeviceTensor<int, 1, true> listsToUpdate(
173  mem, hostListsToUpdate, stream);
174  DeviceTensor<int, 1, true> newListLength(
175  mem, hostNewListLength, stream);
176  DeviceTensor<void*, 1, true> newDataPointers(
177  mem, hostNewDataPointers, stream);
178  DeviceTensor<void*, 1, true> newIndexPointers(
179  mem, hostNewIndexPointers, stream);
180 
181  // Update all pointers to the lists on the device that may have
182  // changed
183  runUpdateListPointers(listsToUpdate,
184  newListLength,
185  newDataPointers,
186  newIndexPointers,
190  stream);
191 }
192 
193 size_t
195  return numLists_;
196 }
197 
198 int
199 IVFBase::getListLength(int listId) const {
200  FAISS_ASSERT(listId < deviceListLengths_.size());
201 
202  return deviceListLengths_[listId];
203 }
204 
205 std::vector<long>
206 IVFBase::getListIndices(int listId) const {
207  FAISS_ASSERT(listId < numLists_);
208 
209  if (indicesOptions_ == INDICES_32_BIT) {
210  FAISS_ASSERT(listId < deviceListIndices_.size());
211 
212  auto intInd = deviceListIndices_[listId]->copyToHost<int>(
214 
215  std::vector<long> out(intInd.size());
216  for (size_t i = 0; i < intInd.size(); ++i) {
217  out[i] = (long) intInd[i];
218  }
219 
220  return out;
221  } else if (indicesOptions_ == INDICES_64_BIT) {
222  FAISS_ASSERT(listId < deviceListIndices_.size());
223 
224  return deviceListIndices_[listId]->copyToHost<long>(
226  } else if (indicesOptions_ == INDICES_CPU) {
227  FAISS_ASSERT(listId < deviceListData_.size());
228  FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
229 
230  auto& userIds = listOffsetToUserIndex_[listId];
231  FAISS_ASSERT(userIds.size() ==
232  deviceListData_[listId]->size() / bytesPerVector_);
233 
234  // this will return a copy
235  return userIds;
236  } else {
237  // unhandled indices type (includes INDICES_IVF)
238  FAISS_ASSERT(false);
239  return std::vector<long>();
240  }
241 }
242 
243 void
245  const long* indices,
246  size_t numVecs) {
247  auto stream = resources_->getDefaultStreamCurrentDevice();
248 
249  auto& listIndices = deviceListIndices_[listId];
250  auto prevIndicesData = listIndices->data();
251 
252  if (indicesOptions_ == INDICES_32_BIT) {
253  // Make sure that all indices are in bounds
254  std::vector<int> indices32(numVecs);
255  for (size_t i = 0; i < numVecs; ++i) {
256  auto ind = indices[i];
257  FAISS_ASSERT(ind <= (long) std::numeric_limits<int>::max());
258  indices32[i] = (int) ind;
259  }
260 
261  listIndices->append((unsigned char*) indices32.data(),
262  numVecs * sizeof(int),
263  stream,
264  true /* exact reserved size */);
265  } else if (indicesOptions_ == INDICES_64_BIT) {
266  listIndices->append((unsigned char*) indices,
267  numVecs * sizeof(long),
268  stream,
269  true /* exact reserved size */);
270  } else if (indicesOptions_ == INDICES_CPU) {
271  // indices are stored on the CPU
272  FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
273 
274  auto& userIndices = listOffsetToUserIndex_[listId];
275  userIndices.insert(userIndices.begin(), indices, indices + numVecs);
276  } else {
277  // indices are not stored
278  FAISS_ASSERT(indicesOptions_ == INDICES_IVF);
279  }
280 
281  if (prevIndicesData != listIndices->data()) {
282  deviceListIndexPointers_[listId] = listIndices->data();
283  }
284 }
285 
286 } } // namespace
const int numLists_
Number of inverted lists we maintain.
Definition: IVFBase.cuh:90
int maxListLength_
Maximum list length seen.
Definition: IVFBase.cuh:114
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
IVFBase(GpuResources *resources, FlatIndex *quantizer, int bytesPerVector, IndicesOptions indicesOptions, MemorySpace space)
Definition: IVFBase.cu:24
std::vector< std::vector< long > > listOffsetToUserIndex_
Definition: IVFBase.cuh:126
Holder of GPU resources for a particular flat index.
Definition: FlatIndex.cuh:22
int getDim() const
Return the number of dimensions we are indexing.
Definition: IVFBase.cu:99
int getListLength(int listId) const
Definition: IVFBase.cu:199
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
Definition: IVFBase.cu:44
size_t reclaimMemory_(bool exact)
Definition: IVFBase.cu:110
thrust::device_vector< int > deviceListLengths_
Definition: IVFBase.cuh:111
thrust::device_vector< void * > deviceListIndexPointers_
Definition: IVFBase.cuh:107
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
thrust::device_vector< void * > deviceListDataPointers_
Definition: IVFBase.cuh:103
GpuResources * resources_
Collection of GPU resources that we use.
Definition: IVFBase.cuh:81
const int bytesPerVector_
Number of bytes per vector in the list.
Definition: IVFBase.cuh:93
void updateDeviceListInfo_(cudaStream_t stream)
Update all device-side list pointer and size information.
Definition: IVFBase.cu:137
std::vector< long > getListIndices(int listId) const
Return the list indices of a particular list back to the CPU.
Definition: IVFBase.cu:206
const IndicesOptions indicesOptions_
How are user indices stored on the GPU?
Definition: IVFBase.cuh:96
const MemorySpace space_
What memory space our inverted list storage is in.
Definition: IVFBase.cuh:99
std::vector< std::unique_ptr< DeviceVector< unsigned char > > > deviceListData_
Definition: IVFBase.cuh:120
const int dim_
Expected dimensionality of the vectors.
Definition: IVFBase.cuh:87
void addIndicesFromCpu_(int listId, const long *indices, size_t numVecs)
Shared function to copy indices from CPU to GPU.
Definition: IVFBase.cu:244
size_t reclaimMemory()
Definition: IVFBase.cu:104
size_t getNumLists() const
Returns the number of inverted lists.
Definition: IVFBase.cu:194