Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
IVFBase.cu
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #include "IVFBase.cuh"
10 #include "../GpuResources.h"
11 #include "FlatIndex.cuh"
12 #include "InvertedListAppend.cuh"
13 #include "RemapIndices.h"
14 #include "../utils/DeviceDefs.cuh"
15 #include "../utils/DeviceUtils.h"
16 #include "../utils/HostTensor.cuh"
17 #include <limits>
18 #include <thrust/host_vector.h>
19 #include <unordered_map>
20 
21 namespace faiss { namespace gpu {
22 
24  FlatIndex* quantizer,
25  int bytesPerVector,
26  IndicesOptions indicesOptions,
27  MemorySpace space) :
28  resources_(resources),
29  quantizer_(quantizer),
30  bytesPerVector_(bytesPerVector),
31  indicesOptions_(indicesOptions),
32  space_(space),
33  dim_(quantizer->getDim()),
34  numLists_(quantizer->getSize()),
35  maxListLength_(0) {
36  reset();
37 }
38 
39 IVFBase::~IVFBase() {
40 }
41 
42 void
43 IVFBase::reserveMemory(size_t numVecs) {
44  size_t vecsPerList = numVecs / deviceListData_.size();
45  if (vecsPerList < 1) {
46  return;
47  }
48 
50 
51  size_t bytesPerDataList = vecsPerList * bytesPerVector_;
52  for (auto& list : deviceListData_) {
53  list->reserve(bytesPerDataList, stream);
54  }
55 
56  if ((indicesOptions_ == INDICES_32_BIT) ||
57  (indicesOptions_ == INDICES_64_BIT)) {
58  // Reserve for index lists as well
59  size_t bytesPerIndexList = vecsPerList *
60  (indicesOptions_ == INDICES_32_BIT ? sizeof(int) : sizeof(long));
61 
62  for (auto& list : deviceListIndices_) {
63  list->reserve(bytesPerIndexList, stream);
64  }
65  }
66 
67  // Update device info for all lists, since the base pointers may
68  // have changed
69  updateDeviceListInfo_(stream);
70 }
71 
72 void
74  deviceListData_.clear();
75  deviceListIndices_.clear();
78  deviceListLengths_.clear();
79  listOffsetToUserIndex_.clear();
80 
81  for (size_t i = 0; i < numLists_; ++i) {
82  deviceListData_.emplace_back(
83  std::unique_ptr<DeviceVector<unsigned char>>(
85  deviceListIndices_.emplace_back(
86  std::unique_ptr<DeviceVector<unsigned char>>(
88  listOffsetToUserIndex_.emplace_back(std::vector<long>());
89  }
90 
91  deviceListDataPointers_.resize(numLists_, nullptr);
92  deviceListIndexPointers_.resize(numLists_, nullptr);
93  deviceListLengths_.resize(numLists_, 0);
94  maxListLength_ = 0;
95 }
96 
97 int
98 IVFBase::getDim() const {
99  return dim_;
100 }
101 
102 size_t
104  // Reclaim all unused memory exactly
105  return reclaimMemory_(true);
106 }
107 
108 size_t
110  auto stream = resources_->getDefaultStreamCurrentDevice();
111 
112  size_t totalReclaimed = 0;
113 
114  for (int i = 0; i < deviceListData_.size(); ++i) {
115  auto& data = deviceListData_[i];
116  totalReclaimed += data->reclaim(exact, stream);
117 
118  deviceListDataPointers_[i] = data->data();
119  }
120 
121  for (int i = 0; i < deviceListIndices_.size(); ++i) {
122  auto& indices = deviceListIndices_[i];
123  totalReclaimed += indices->reclaim(exact, stream);
124 
125  deviceListIndexPointers_[i] = indices->data();
126  }
127 
128  // Update device info for all lists, since the base pointers may
129  // have changed
130  updateDeviceListInfo_(stream);
131 
132  return totalReclaimed;
133 }
134 
135 void
136 IVFBase::updateDeviceListInfo_(cudaStream_t stream) {
137  std::vector<int> listIds(deviceListData_.size());
138  for (int i = 0; i < deviceListData_.size(); ++i) {
139  listIds[i] = i;
140  }
141 
142  updateDeviceListInfo_(listIds, stream);
143 }
144 
145 void
146 IVFBase::updateDeviceListInfo_(const std::vector<int>& listIds,
147  cudaStream_t stream) {
149 
151  hostListsToUpdate({(int) listIds.size()});
153  hostNewListLength({(int) listIds.size()});
155  hostNewDataPointers({(int) listIds.size()});
157  hostNewIndexPointers({(int) listIds.size()});
158 
159  for (int i = 0; i < listIds.size(); ++i) {
160  auto listId = listIds[i];
161  auto& data = deviceListData_[listId];
162  auto& indices = deviceListIndices_[listId];
163 
164  hostListsToUpdate[i] = listId;
165  hostNewListLength[i] = data->size() / bytesPerVector_;
166  hostNewDataPointers[i] = data->data();
167  hostNewIndexPointers[i] = indices->data();
168  }
169 
170  // Copy the above update sets to the GPU
171  DeviceTensor<int, 1, true> listsToUpdate(
172  mem, hostListsToUpdate, stream);
173  DeviceTensor<int, 1, true> newListLength(
174  mem, hostNewListLength, stream);
175  DeviceTensor<void*, 1, true> newDataPointers(
176  mem, hostNewDataPointers, stream);
177  DeviceTensor<void*, 1, true> newIndexPointers(
178  mem, hostNewIndexPointers, stream);
179 
180  // Update all pointers to the lists on the device that may have
181  // changed
182  runUpdateListPointers(listsToUpdate,
183  newListLength,
184  newDataPointers,
185  newIndexPointers,
189  stream);
190 }
191 
192 size_t
194  return numLists_;
195 }
196 
197 int
198 IVFBase::getListLength(int listId) const {
199  FAISS_ASSERT(listId < deviceListLengths_.size());
200 
201  return deviceListLengths_[listId];
202 }
203 
204 std::vector<long>
205 IVFBase::getListIndices(int listId) const {
206  FAISS_ASSERT(listId < numLists_);
207 
208  if (indicesOptions_ == INDICES_32_BIT) {
209  FAISS_ASSERT(listId < deviceListIndices_.size());
210 
211  auto intInd = deviceListIndices_[listId]->copyToHost<int>(
213 
214  std::vector<long> out(intInd.size());
215  for (size_t i = 0; i < intInd.size(); ++i) {
216  out[i] = (long) intInd[i];
217  }
218 
219  return out;
220  } else if (indicesOptions_ == INDICES_64_BIT) {
221  FAISS_ASSERT(listId < deviceListIndices_.size());
222 
223  return deviceListIndices_[listId]->copyToHost<long>(
225  } else if (indicesOptions_ == INDICES_CPU) {
226  FAISS_ASSERT(listId < deviceListData_.size());
227  FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
228 
229  auto& userIds = listOffsetToUserIndex_[listId];
230  FAISS_ASSERT(userIds.size() ==
231  deviceListData_[listId]->size() / bytesPerVector_);
232 
233  // this will return a copy
234  return userIds;
235  } else {
236  // unhandled indices type (includes INDICES_IVF)
237  FAISS_ASSERT(false);
238  return std::vector<long>();
239  }
240 }
241 
242 void
244  const long* indices,
245  size_t numVecs) {
246  auto stream = resources_->getDefaultStreamCurrentDevice();
247 
248  auto& listIndices = deviceListIndices_[listId];
249  auto prevIndicesData = listIndices->data();
250 
251  if (indicesOptions_ == INDICES_32_BIT) {
252  // Make sure that all indices are in bounds
253  std::vector<int> indices32(numVecs);
254  for (size_t i = 0; i < numVecs; ++i) {
255  auto ind = indices[i];
256  FAISS_ASSERT(ind <= (long) std::numeric_limits<int>::max());
257  indices32[i] = (int) ind;
258  }
259 
260  listIndices->append((unsigned char*) indices32.data(),
261  numVecs * sizeof(int),
262  stream,
263  true /* exact reserved size */);
264  } else if (indicesOptions_ == INDICES_64_BIT) {
265  listIndices->append((unsigned char*) indices,
266  numVecs * sizeof(long),
267  stream,
268  true /* exact reserved size */);
269  } else if (indicesOptions_ == INDICES_CPU) {
270  // indices are stored on the CPU
271  FAISS_ASSERT(listId < listOffsetToUserIndex_.size());
272 
273  auto& userIndices = listOffsetToUserIndex_[listId];
274  userIndices.insert(userIndices.begin(), indices, indices + numVecs);
275  } else {
276  // indices are not stored
277  FAISS_ASSERT(indicesOptions_ == INDICES_IVF);
278  }
279 
280  if (prevIndicesData != listIndices->data()) {
281  deviceListIndexPointers_[listId] = listIndices->data();
282  }
283 }
284 
285 } } // namespace
const int numLists_
Number of inverted lists we maintain.
Definition: IVFBase.cuh:89
int maxListLength_
Maximum list length seen.
Definition: IVFBase.cuh:113
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
IVFBase(GpuResources *resources, FlatIndex *quantizer, int bytesPerVector, IndicesOptions indicesOptions, MemorySpace space)
Definition: IVFBase.cu:23
std::vector< std::vector< long > > listOffsetToUserIndex_
Definition: IVFBase.cuh:125
Holder of GPU resources for a particular flat index.
Definition: FlatIndex.cuh:21
int getDim() const
Return the number of dimensions we are indexing.
Definition: IVFBase.cu:98
int getListLength(int listId) const
Definition: IVFBase.cu:198
void reserveMemory(size_t numVecs)
Reserve GPU memory in our inverted lists for this number of vectors.
Definition: IVFBase.cu:43
size_t reclaimMemory_(bool exact)
Definition: IVFBase.cu:109
thrust::device_vector< int > deviceListLengths_
Definition: IVFBase.cuh:110
thrust::device_vector< void * > deviceListIndexPointers_
Definition: IVFBase.cuh:106
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
thrust::device_vector< void * > deviceListDataPointers_
Definition: IVFBase.cuh:102
GpuResources * resources_
Collection of GPU resources that we use.
Definition: IVFBase.cuh:80
const int bytesPerVector_
Number of bytes per vector in the list.
Definition: IVFBase.cuh:92
void updateDeviceListInfo_(cudaStream_t stream)
Update all device-side list pointer and size information.
Definition: IVFBase.cu:136
std::vector< long > getListIndices(int listId) const
Return the list indices of a particular list back to the CPU.
Definition: IVFBase.cu:205
const IndicesOptions indicesOptions_
How are user indices stored on the GPU?
Definition: IVFBase.cuh:95
const MemorySpace space_
What memory space our inverted list storage is in.
Definition: IVFBase.cuh:98
std::vector< std::unique_ptr< DeviceVector< unsigned char > > > deviceListData_
Definition: IVFBase.cuh:119
const int dim_
Expected dimensionality of the vectors.
Definition: IVFBase.cuh:86
void addIndicesFromCpu_(int listId, const long *indices, size_t numVecs)
Shared function to copy indices from CPU to GPU.
Definition: IVFBase.cu:243
size_t reclaimMemory()
Definition: IVFBase.cu:103
size_t getNumLists() const
Returns the number of inverted lists.
Definition: IVFBase.cu:193