Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
FlatIndex.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "FlatIndex.cuh"
12 #include "Distance.cuh"
13 #include "L2Norm.cuh"
14 #include "../utils/CopyUtils.cuh"
15 #include "../utils/DeviceUtils.h"
16 #include "../utils/Transpose.cuh"
17 
18 namespace faiss { namespace gpu {
19 
20 FlatIndex::FlatIndex(GpuResources* res,
21  int dim,
22  bool l2Distance,
23  bool useFloat16,
24  bool useFloat16Accumulator,
25  bool storeTransposed,
26  MemorySpace space) :
27  resources_(res),
28  dim_(dim),
29  useFloat16_(useFloat16),
30  useFloat16Accumulator_(useFloat16Accumulator),
31  storeTransposed_(storeTransposed),
32  l2Distance_(l2Distance),
33  space_(space),
34  num_(0),
35  rawData_(space) {
36 #ifndef FAISS_USE_FLOAT16
37  FAISS_ASSERT(!useFloat16_);
38 #endif
39 }
40 
41 bool
42 FlatIndex::getUseFloat16() const {
43  return useFloat16_;
44 }
45 
46 /// Returns the number of vectors we contain
47 int FlatIndex::getSize() const {
48 #ifdef FAISS_USE_FLOAT16
49  if (useFloat16_) {
50  return vectorsHalf_.getSize(0);
51  }
52 #endif
53 
54  return vectors_.getSize(0);
55 }
56 
57 int FlatIndex::getDim() const {
58 #ifdef FAISS_USE_FLOAT16
59  if (useFloat16_) {
60  return vectorsHalf_.getSize(1);
61  }
62 #endif
63 
64  return vectors_.getSize(1);
65 }
66 
67 void
68 FlatIndex::reserve(size_t numVecs, cudaStream_t stream) {
69  if (useFloat16_) {
70 #ifdef FAISS_USE_FLOAT16
71  rawData_.reserve(numVecs * dim_ * sizeof(half), stream);
72 #endif
73  } else {
74  rawData_.reserve(numVecs * dim_ * sizeof(float), stream);
75  }
76 }
77 
80  return vectors_;
81 }
82 
83 #ifdef FAISS_USE_FLOAT16
85 FlatIndex::getVectorsFloat16Ref() {
86  return vectorsHalf_;
87 }
88 #endif
89 
90 DeviceTensor<float, 2, true>
91 FlatIndex::getVectorsFloat32Copy(cudaStream_t stream) {
92  return getVectorsFloat32Copy(0, num_, stream);
93 }
94 
96 FlatIndex::getVectorsFloat32Copy(int from, int num, cudaStream_t stream) {
97  DeviceTensor<float, 2, true> vecFloat32({num, dim_}, space_);
98 
99  if (useFloat16_) {
100 #ifdef FAISS_USE_FLOAT16
101  runConvertToFloat32(vecFloat32.data(),
102  vectorsHalf_[from].data(),
103  num * dim_, stream);
104 #endif
105  } else {
106  vectors_.copyTo(vecFloat32, stream);
107  }
108 
109  return vecFloat32;
110 }
111 
112 void
113 FlatIndex::query(Tensor<float, 2, true>& input,
114  int k,
115  Tensor<float, 2, true>& outDistances,
116  Tensor<int, 2, true>& outIndices,
117  bool exactDistance) {
118  auto stream = resources_->getDefaultStreamCurrentDevice();
119  auto& mem = resources_->getMemoryManagerCurrentDevice();
120 
121  if (useFloat16_) {
122  // We need to convert to float16
123 #ifdef FAISS_USE_FLOAT16
124  auto inputHalf = toHalf<2>(resources_, stream, input);
125 
126  DeviceTensor<half, 2, true> outDistancesHalf(
127  mem, {outDistances.getSize(0), outDistances.getSize(1)}, stream);
128 
129  query(inputHalf, k, outDistancesHalf, outIndices, exactDistance);
130 
131  if (exactDistance) {
132  // Convert outDistances back
133  fromHalf<2>(stream, outDistancesHalf, outDistances);
134  }
135 #endif
136  } else {
137  if (l2Distance_) {
138  runL2Distance(resources_,
139  vectors_,
140  storeTransposed_ ? &vectorsTransposed_ : nullptr,
141  &norms_,
142  input,
143  k,
144  outDistances,
145  outIndices,
146  // FIXME
147  !exactDistance);
148  } else {
149  runIPDistance(resources_,
150  vectors_,
151  storeTransposed_ ? &vectorsTransposed_ : nullptr,
152  input,
153  k,
154  outDistances,
155  outIndices);
156  }
157  }
158 }
159 
160 #ifdef FAISS_USE_FLOAT16
161 void
162 FlatIndex::query(Tensor<half, 2, true>& input,
163  int k,
164  Tensor<half, 2, true>& outDistances,
165  Tensor<int, 2, true>& outIndices,
166  bool exactDistance) {
167  FAISS_ASSERT(useFloat16_);
168 
169  if (l2Distance_) {
170  runL2Distance(resources_,
171  vectorsHalf_,
172  storeTransposed_ ? &vectorsHalfTransposed_ : nullptr,
173  &normsHalf_,
174  input,
175  k,
176  outDistances,
177  outIndices,
178  useFloat16Accumulator_,
179  // FIXME
180  !exactDistance);
181  } else {
182  runIPDistance(resources_,
183  vectorsHalf_,
184  storeTransposed_ ? &vectorsHalfTransposed_ : nullptr,
185  input,
186  k,
187  outDistances,
188  outIndices,
189  useFloat16Accumulator_);
190  }
191 }
192 #endif
193 
194 void
195 FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
196  if (numVecs == 0) {
197  return;
198  }
199 
200  if (useFloat16_) {
201 #ifdef FAISS_USE_FLOAT16
202  // Make sure that `data` is on our device; we'll run the
203  // conversion on our device
204  auto devData = toDevice<float, 2>(resources_,
205  getCurrentDevice(),
206  (float*) data,
207  stream,
208  {numVecs, dim_});
209 
210  auto devDataHalf = toHalf<2>(resources_, stream, devData);
211 
212  rawData_.append((char*) devDataHalf.data(),
213  devDataHalf.getSizeInBytes(),
214  stream,
215  true /* reserve exactly */);
216 #endif
217  } else {
218  rawData_.append((char*) data,
219  (size_t) dim_ * numVecs * sizeof(float),
220  stream,
221  true /* reserve exactly */);
222  }
223 
224  num_ += numVecs;
225 
226  if (useFloat16_) {
227 #ifdef FAISS_USE_FLOAT16
228  DeviceTensor<half, 2, true> vectorsHalf(
229  (half*) rawData_.data(), {(int) num_, dim_}, space_);
230  vectorsHalf_ = std::move(vectorsHalf);
231 #endif
232  } else {
234  (float*) rawData_.data(), {(int) num_, dim_}, space_);
235  vectors_ = std::move(vectors);
236  }
237 
238  if (storeTransposed_) {
239  if (useFloat16_) {
240 #ifdef FAISS_USE_FLOAT16
241  vectorsHalfTransposed_ =
242  std::move(DeviceTensor<half, 2, true>({dim_, (int) num_}, space_));
243  runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
244 #endif
245  } else {
246  vectorsTransposed_ =
247  std::move(DeviceTensor<float, 2, true>({dim_, (int) num_}, space_));
248  runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
249  }
250  }
251 
252  if (l2Distance_) {
253  // Precompute L2 norms of our database
254  if (useFloat16_) {
255 #ifdef FAISS_USE_FLOAT16
256  DeviceTensor<half, 1, true> normsHalf({(int) num_}, space_);
257  runL2Norm(vectorsHalf_, normsHalf, true, stream);
258  normsHalf_ = std::move(normsHalf);
259 #endif
260  } else {
261  DeviceTensor<float, 1, true> norms({(int) num_}, space_);
262  runL2Norm(vectors_, norms, true, stream);
263  norms_ = std::move(norms);
264  }
265  }
266 }
267 
268 void
270  rawData_.clear();
271  vectors_ = std::move(DeviceTensor<float, 2, true>());
272  norms_ = std::move(DeviceTensor<float, 1, true>());
273  num_ = 0;
274 }
275 
276 } }
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
Definition: FlatIndex.cu:91
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
Definition: FlatIndex.cu:47
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
Definition: FlatIndex.cu:68
void add(const float *data, int numVecs, cudaStream_t stream)
Definition: FlatIndex.cu:195
__host__ __device__ IndexT getSize(int i) const
Definition: Tensor.cuh:224
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Definition: Tensor-inl.cuh:171
Our tensor type.
Definition: Tensor.cuh:30
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
Definition: FlatIndex.cu:79
void reset()
Free all storage.
Definition: FlatIndex.cu:269