Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
IVFBase.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "IVFBase.cuh"
12 #include "../GpuResources.h"
13 #include "FlatIndex.cuh"
14 #include "InvertedListAppend.cuh"
15 #include "RemapIndices.h"
16 #include "../utils/DeviceDefs.cuh"
17 #include "../utils/DeviceUtils.h"
18 #include "../utils/HostTensor.cuh"
19 #include <limits>
20 #include <thrust/host_vector.h>
21 #include <unordered_map>
22 
23 namespace faiss { namespace gpu {
24 
26  FlatIndex* quantizer,
27  int bytesPerVector,
28  IndicesOptions indicesOptions,
29  MemorySpace space) :
30  resources_(resources),
31  quantizer_(quantizer),
32  bytesPerVector_(bytesPerVector),
33  indicesOptions_(indicesOptions),
34  space_(space),
35  dim_(quantizer->getDim()),
36  numLists_(quantizer->getSize()),
37  maxListLength_(0) {
38  reset();
39 }
40 
41 IVFBase::~IVFBase() {
42 }
43 
44 void
45 IVFBase::reserveMemory(size_t numVecs) {
46  size_t vecsPerList = numVecs / deviceListData_.size();
47  if (vecsPerList < 1) {
48  return;
49  }
50 
52 
53  size_t bytesPerDataList = vecsPerList * bytesPerVector_;
54  for (auto& list : deviceListData_) {
55  list->reserve(bytesPerDataList, stream);
56  }
57 
58  if ((indicesOptions_ == INDICES_32_BIT) ||
59  (indicesOptions_ == INDICES_64_BIT)) {
60  // Reserve for index lists as well
61  size_t bytesPerIndexList = vecsPerList *
62  (indicesOptions_ == INDICES_32_BIT ? sizeof(int) : sizeof(long));
63 
64  for (auto& list : deviceListIndices_) {
65  list->reserve(bytesPerIndexList, stream);
66  }
67  }
68 
69  // Update device info for all lists, since the base pointers may
70  // have changed
71  updateDeviceListInfo_(stream);
72 }
73 
74 void
76  deviceListData_.clear();
77  deviceListIndices_.clear();
80  deviceListLengths_.clear();
81  listOffsetToUserIndex_.clear();
82 
83  for (size_t i = 0; i < numLists_; ++i) {
84  deviceListData_.emplace_back(
85  std::unique_ptr<DeviceVector<unsigned char>>(
87  deviceListIndices_.emplace_back(
88  std::unique_ptr<DeviceVector<unsigned char>>(
90  listOffsetToUserIndex_.emplace_back(std::vector<long>());
91  }
92 
93  deviceListDataPointers_.resize(numLists_, nullptr);
94  deviceListIndexPointers_.resize(numLists_, nullptr);
95  deviceListLengths_.resize(numLists_, 0);
96  maxListLength_ = 0;
97 }
98 
99 int
101  return dim_;
102 }
103 
104 size_t
106  // Reclaim all unused memory exactly
107  return reclaimMemory_(true);
108 }
109 
110 size_t
112  auto stream = resources_->getDefaultStreamCurrentDevice();
113 
114  size_t totalReclaimed = 0;
115 
116  for (int i = 0; i < deviceListData_.size(); ++i) {
117  auto& data = deviceListData_[i];
118  totalReclaimed += data->reclaim(exact, stream);
119 
120  deviceListDataPointers_[i] = data->data();
121  }
122 
123  for (int i = 0; i < deviceListIndices_.size(); ++i) {
124  auto& indices = deviceListIndices_[i];
125  totalReclaimed += indices->reclaim(exact, stream);
126 
127  deviceListIndexPointers_[i] = indices->data();
128  }
129 
130  // Update device info for all lists, since the base pointers may
131  // have changed
132  updateDeviceListInfo_(stream);
133 
134  return totalReclaimed;
135 }
136 
137 void
138 IVFBase::updateDeviceListInfo_(cudaStream_t stream) {
139  std::vector<int> listIds(deviceListData_.size());
140  for (int i = 0; i < deviceListData_.size(); ++i) {
141  listIds[i] = i;
142  }
143 
144  updateDeviceListInfo_(listIds, stream);
145 }
146 
147 void
148 IVFBase::updateDeviceListInfo_(const std::vector<int>& listIds,
149  cudaStream_t stream) {
151 
153  hostListsToUpdate({(int) listIds.size()});
155  hostNewListLength({(int) listIds.size()});
157  hostNewDataPointers({(int) listIds.size()});
159  hostNewIndexPointers({(int) listIds.size()});
160 
161  for (int i = 0; i < listIds.size(); ++i) {
162  auto listId = listIds[i];
163  auto& data = deviceListData_[listId];
164  auto& indices = deviceListIndices_[listId];
165 
166  hostListsToUpdate[i] = listId;
167  hostNewListLength[i] = data->size() / bytesPerVector_;
168  hostNewDataPointers[i] = data->data();
169  hostNewIndexPointers[i] = indices->data();
170  }
171 
172  // Copy the above update sets to the GPU
173  DeviceTensor<int, 1, true> listsToUpdate(
174  mem, hostListsToUpdate, stream);
175  DeviceTensor<int, 1, true> newListLength(
176  mem, hostNewListLength, stream);
177  DeviceTensor<void*, 1, true> newDataPointers(
178  mem, hostNewDataPointers, stream);
179  DeviceTensor<void*, 1, true> newIndexPointers(
180  mem, hostNewIndexPointers, stream);
181 
182  // Update all pointers to the lists on the device that may have
183  // changed
184  runUpdateListPointers(listsToUpdate,
185  newListLength,
186  newDataPointers,
187  newIndexPointers,
191  stream);
192 }
193 
194 size_t
196  return numLists_;
197 }
198 
199 int
200 IVFBase::getListLength(int listId) const {
201  FAISS_ASSERT(listId < deviceListLengths_.size());
202 
203  return deviceListLengths_[listId];
204 }
205 
206 std::vector<long>
207 IVFBase::getListIndices(int listId) const {
208  FAISS_ASSERT(listId < numLists_);
209 
210  if (indicesOptions_ == INDICES_32_BIT) {
211  FAISS_ASSERT(listId < deviceListIndices_.size());
212 
213  auto intInd = deviceListIndices_[listId]->copyToHost<int>(
215 
216  std::vector<long> out(intInd.size());
217  for (size_t i = 0; i < intInd.size(); ++i) {
218  out[i] = (long) intInd[i];
219  }
220 
221  return out;
222  } else if (indicesOptions_ == INDICES_64_BIT) {
223  FAISS_ASSERT(listId < deviceListIndices_.size());
224 
225  return deviceListIndices_[listId]->copyToHost<long>(
227  } else if (indicesOptions_ == INDICES_CPU) {
228  FAISS_ASSERT(listId < deviceListData_.size());
229  FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
230 
231  auto& userIds = listOffsetToUserIndex_[listId];
232  FAISS_ASSERT(userIds.size() ==
233  deviceListData_[listId]->size() / bytesPerVector_);
234 
235  // this will return a copy
236  return userIds;
237  } else {
238  // unhandled indices type (includes INDICES_IVF)
239  FAISS_ASSERT(false);
240  return std::vector<long>();
241  }
242 }
243 
244 void
246  const long* indices,
247  size_t numVecs) {
248  auto stream = resources_->getDefaultStreamCurrentDevice();
249 
250  auto& listIndices = deviceListIndices_[listId];
251  auto prevIndicesData = listIndices->data();
252 
253  if (indicesOptions_ == INDICES_32_BIT) {
254  // Make sure that all indices are in bounds
255  std::vector<int> indices32(numVecs);
256  for (size_t i = 0; i < numVecs; ++i) {
257  auto ind = indices[i];
258  FAISS_ASSERT(ind <= (long) std::numeric_limits<int>::max());
259  indices32[i] = (int) ind;
260  }
261 
262  listIndices->append((unsigned char*) indices32.data(),
263  numVecs * sizeof(int),
264  stream,
265  true /* exact reserved size */);
266  } else if (indicesOptions_ == INDICES_64_BIT) {
267  listIndices->append((unsigned char*) indices,
268  numVecs * sizeof(long),
269  stream,
270  true /* exact reserved size */);
271  } else if (indicesOptions_ == INDICES_CPU) {
272  // indices are stored on the CPU
273  FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
274 
275  auto& userIndices = listOffsetToUserIndex_[listId];
276  userIndices.insert(userIndices.begin(), indices, indices + numVecs);
277  } else {
278  // indices are not stored
279  FAISS_ASSERT(indicesOptions_ == INDICES_IVF);
280  }
281 
282  if (prevIndicesData != listIndices->data()) {
283  deviceListIndexPointers_[listId] = listIndices->data();
284  }
285 }
286 
287 } } // namespace
const int numLists_
Number of inverted lists we maintain.
Definition: IVFBase.cuh:91
int maxListLength_
Maximum list length seen.
Definition: IVFBase.cuh:115
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
IVFBase(GpuResources *resources, FlatIndex *quantizer, int bytesPerVector, IndicesOptions indicesOptions, MemorySpace space)
Definition: IVFBase.cu:25
std::vector< std::vector< long > > listOffsetToUserIndex_
Definition: IVFBase.cuh:127
Holder of GPU resources for a particular flat index.
Definition: FlatIndex.cuh:23
int getDim() const
Return the number of dimensions we are indexing.
Definition: IVFBase.cu:100
int getListLength(int listId) const
Definition: IVFBase.cu:200
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
Definition: IVFBase.cu:45
size_t reclaimMemory_(bool exact)
Definition: IVFBase.cu:111
thrust::device_vector< int > deviceListLengths_
Definition: IVFBase.cuh:112
thrust::device_vector< void * > deviceListIndexPointers_
Definition: IVFBase.cuh:108
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
thrust::device_vector< void * > deviceListDataPointers_
Definition: IVFBase.cuh:104
GpuResources * resources_
Collection of GPU resources that we use.
Definition: IVFBase.cuh:82
const int bytesPerVector_
Number of bytes per vector in the list.
Definition: IVFBase.cuh:94
void updateDeviceListInfo_(cudaStream_t stream)
Update all device-side list pointer and size information.
Definition: IVFBase.cu:138
std::vector< long > getListIndices(int listId) const
Return the list indices of a particular list back to the CPU.
Definition: IVFBase.cu:207
const IndicesOptions indicesOptions_
How are user indices stored on the GPU?
Definition: IVFBase.cuh:97
const MemorySpace space_
What memory space our inverted list storage is in.
Definition: IVFBase.cuh:100
std::vector< std::unique_ptr< DeviceVector< unsigned char > > > deviceListData_
Definition: IVFBase.cuh:121
const int dim_
Expected dimensionality of the vectors.
Definition: IVFBase.cuh:88
void addIndicesFromCpu_(int listId, const long *indices, size_t numVecs)
Shared function to copy indices from CPU to GPU.
Definition: IVFBase.cu:245
size_t reclaimMemory()
Definition: IVFBase.cu:105
size_t getNumLists() const
Returns the number of inverted lists.
Definition: IVFBase.cu:195