Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
FlatIndex.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "FlatIndex.cuh"
12 #include "Distance.cuh"
13 #include "L2Norm.cuh"
14 #include "../utils/CopyUtils.cuh"
15 #include "../utils/DeviceUtils.h"
16 #include "../utils/Transpose.cuh"
17 
18 namespace faiss { namespace gpu {
19 
20 FlatIndex::FlatIndex(GpuResources* res,
21  int dim,
22  bool l2Distance,
23  bool useFloat16,
24  bool useFloat16Accumulator,
25  bool storeTransposed,
26  MemorySpace space) :
27  resources_(res),
28  dim_(dim),
29  useFloat16_(useFloat16),
30  useFloat16Accumulator_(useFloat16Accumulator),
31  storeTransposed_(storeTransposed),
32  l2Distance_(l2Distance),
33  space_(space),
34  num_(0),
35  rawData_(space) {
36 #ifndef FAISS_USE_FLOAT16
37  FAISS_ASSERT(!useFloat16_);
38 #endif
39 }
40 
41 bool
42 FlatIndex::getUseFloat16() const {
43  return useFloat16_;
44 }
45 
46 /// Returns the number of vectors we contain
47 int FlatIndex::getSize() const {
48 #ifdef FAISS_USE_FLOAT16
49  if (useFloat16_) {
50  return vectorsHalf_.getSize(0);
51  }
52 #endif
53 
54  return vectors_.getSize(0);
55 }
56 
57 int FlatIndex::getDim() const {
58 #ifdef FAISS_USE_FLOAT16
59  if (useFloat16_) {
60  return vectorsHalf_.getSize(1);
61  }
62 #endif
63 
64  return vectors_.getSize(1);
65 }
66 
67 void
68 FlatIndex::reserve(size_t numVecs, cudaStream_t stream) {
69  if (useFloat16_) {
70 #ifdef FAISS_USE_FLOAT16
71  rawData_.reserve(numVecs * dim_ * sizeof(half), stream);
72 #endif
73  } else {
74  rawData_.reserve(numVecs * dim_ * sizeof(float), stream);
75  }
76 }
77 
80  return vectors_;
81 }
82 
83 #ifdef FAISS_USE_FLOAT16
85 FlatIndex::getVectorsFloat16Ref() {
86  return vectorsHalf_;
87 }
88 #endif
89 
90 DeviceTensor<float, 2, true>
91 FlatIndex::getVectorsFloat32Copy(cudaStream_t stream) {
92  return getVectorsFloat32Copy(0, num_, stream);
93 }
94 
96 FlatIndex::getVectorsFloat32Copy(int from, int num, cudaStream_t stream) {
97  DeviceTensor<float, 2, true> vecFloat32({num, dim_}, space_);
98 
99  if (useFloat16_) {
100 #ifdef FAISS_USE_FLOAT16
101  runConvertToFloat32(vecFloat32.data(),
102  vectorsHalf_[from].data(),
103  num * dim_, stream);
104 #endif
105  } else {
106  vectors_.copyTo(vecFloat32, stream);
107  }
108 
109  return vecFloat32;
110 }
111 
112 void
113 FlatIndex::query(Tensor<float, 2, true>& input,
114  int k,
115  Tensor<float, 2, true>& outDistances,
116  Tensor<int, 2, true>& outIndices,
117  bool exactDistance,
118  int tileSize) {
119  auto stream = resources_->getDefaultStreamCurrentDevice();
120  auto& mem = resources_->getMemoryManagerCurrentDevice();
121 
122  if (useFloat16_) {
123  // We need to convert to float16
124 #ifdef FAISS_USE_FLOAT16
125  auto inputHalf = toHalf<2>(resources_, stream, input);
126 
127  DeviceTensor<half, 2, true> outDistancesHalf(
128  mem, {outDistances.getSize(0), outDistances.getSize(1)}, stream);
129 
130  query(inputHalf, k, outDistancesHalf, outIndices, exactDistance, tileSize);
131 
132  if (exactDistance) {
133  // Convert outDistances back
134  fromHalf<2>(stream, outDistancesHalf, outDistances);
135  }
136 #endif
137  } else {
138  if (l2Distance_) {
139  runL2Distance(resources_,
140  vectors_,
141  storeTransposed_ ? &vectorsTransposed_ : nullptr,
142  &norms_,
143  input,
144  k,
145  outDistances,
146  outIndices,
147  // FIXME
148  !exactDistance,
149  tileSize);
150  } else {
151  runIPDistance(resources_,
152  vectors_,
153  storeTransposed_ ? &vectorsTransposed_ : nullptr,
154  input,
155  k,
156  outDistances,
157  outIndices,
158  tileSize);
159  }
160  }
161 }
162 
163 #ifdef FAISS_USE_FLOAT16
164 void
165 FlatIndex::query(Tensor<half, 2, true>& input,
166  int k,
167  Tensor<half, 2, true>& outDistances,
168  Tensor<int, 2, true>& outIndices,
169  bool exactDistance,
170  int tileSize) {
171  FAISS_ASSERT(useFloat16_);
172 
173  if (l2Distance_) {
174  runL2Distance(resources_,
175  vectorsHalf_,
176  storeTransposed_ ? &vectorsHalfTransposed_ : nullptr,
177  &normsHalf_,
178  input,
179  k,
180  outDistances,
181  outIndices,
182  useFloat16Accumulator_,
183  // FIXME
184  !exactDistance,
185  tileSize);
186  } else {
187  runIPDistance(resources_,
188  vectorsHalf_,
189  storeTransposed_ ? &vectorsHalfTransposed_ : nullptr,
190  input,
191  k,
192  outDistances,
193  outIndices,
194  useFloat16Accumulator_,
195  tileSize);
196  }
197 }
198 #endif
199 
200 void
201 FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
202  if (numVecs == 0) {
203  return;
204  }
205 
206  if (useFloat16_) {
207 #ifdef FAISS_USE_FLOAT16
208  // Make sure that `data` is on our device; we'll run the
209  // conversion on our device
210  auto devData = toDevice<float, 2>(resources_,
211  getCurrentDevice(),
212  (float*) data,
213  stream,
214  {numVecs, dim_});
215 
216  auto devDataHalf = toHalf<2>(resources_, stream, devData);
217 
218  rawData_.append((char*) devDataHalf.data(),
219  devDataHalf.getSizeInBytes(),
220  stream);
221 #endif
222  } else {
223  rawData_.append((char*) data,
224  (size_t) dim_ * numVecs * sizeof(float),
225  stream);
226  }
227 
228  num_ += numVecs;
229 
230  if (useFloat16_) {
231 #ifdef FAISS_USE_FLOAT16
232  DeviceTensor<half, 2, true> vectorsHalf(
233  (half*) rawData_.data(), {(int) num_, dim_}, space_);
234  vectorsHalf_ = std::move(vectorsHalf);
235 #endif
236  } else {
238  (float*) rawData_.data(), {(int) num_, dim_}, space_);
239  vectors_ = std::move(vectors);
240  }
241 
242  if (storeTransposed_) {
243  if (useFloat16_) {
244 #ifdef FAISS_USE_FLOAT16
245  vectorsHalfTransposed_ =
246  std::move(DeviceTensor<half, 2, true>({dim_, (int) num_}, space_));
247  runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
248 #endif
249  } else {
250  vectorsTransposed_ =
251  std::move(DeviceTensor<float, 2, true>({dim_, (int) num_}, space_));
252  runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
253  }
254  }
255 
256  if (l2Distance_) {
257  // Precompute L2 norms of our database
258  if (useFloat16_) {
259 #ifdef FAISS_USE_FLOAT16
260  DeviceTensor<half, 1, true> normsHalf({(int) num_}, space_);
261  runL2Norm(vectorsHalf_, normsHalf, true, stream);
262  normsHalf_ = std::move(normsHalf);
263 #endif
264  } else {
265  DeviceTensor<float, 1, true> norms({(int) num_}, space_);
266  runL2Norm(vectors_, norms, true, stream);
267  norms_ = std::move(norms);
268  }
269  }
270 }
271 
272 void
274  rawData_.clear();
275  vectors_ = std::move(DeviceTensor<float, 2, true>());
276  norms_ = std::move(DeviceTensor<float, 1, true>());
277  num_ = 0;
278 }
279 
280 } }
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
Definition: FlatIndex.cu:91
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
Definition: FlatIndex.cu:47
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
__host__ void copyTo(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Definition: Tensor-inl.cuh:139
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
Definition: FlatIndex.cu:68
void add(const float *data, int numVecs, cudaStream_t stream)
Definition: FlatIndex.cu:201
Our tensor type.
Definition: Tensor.cuh:30
__host__ __device__ IndexT getSize(int i) const
Definition: Tensor.cuh:221
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
Definition: FlatIndex.cu:79
void reset()
Free all storage.
Definition: FlatIndex.cu:273