Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
FlatIndex.cu
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 
9 #include "FlatIndex.cuh"
10 #include "Distance.cuh"
11 #include "L2Norm.cuh"
12 #include "../utils/CopyUtils.cuh"
13 #include "../utils/DeviceUtils.h"
14 #include "../utils/Transpose.cuh"
15 
16 namespace faiss { namespace gpu {
17 
18 FlatIndex::FlatIndex(GpuResources* res,
19  int dim,
20  bool l2Distance,
21  bool useFloat16,
22  bool useFloat16Accumulator,
23  bool storeTransposed,
24  MemorySpace space) :
25  resources_(res),
26  dim_(dim),
27  useFloat16_(useFloat16),
28  useFloat16Accumulator_(useFloat16Accumulator),
29  storeTransposed_(storeTransposed),
30  l2Distance_(l2Distance),
31  space_(space),
32  num_(0),
33  rawData_(space) {
34 #ifndef FAISS_USE_FLOAT16
35  FAISS_ASSERT(!useFloat16_);
36 #endif
37 }
38 
39 bool
40 FlatIndex::getUseFloat16() const {
41  return useFloat16_;
42 }
43 
44 /// Returns the number of vectors we contain
45 int FlatIndex::getSize() const {
46 #ifdef FAISS_USE_FLOAT16
47  if (useFloat16_) {
48  return vectorsHalf_.getSize(0);
49  }
50 #endif
51 
52  return vectors_.getSize(0);
53 }
54 
55 int FlatIndex::getDim() const {
56 #ifdef FAISS_USE_FLOAT16
57  if (useFloat16_) {
58  return vectorsHalf_.getSize(1);
59  }
60 #endif
61 
62  return vectors_.getSize(1);
63 }
64 
65 void
66 FlatIndex::reserve(size_t numVecs, cudaStream_t stream) {
67  if (useFloat16_) {
68 #ifdef FAISS_USE_FLOAT16
69  rawData_.reserve(numVecs * dim_ * sizeof(half), stream);
70 #endif
71  } else {
72  rawData_.reserve(numVecs * dim_ * sizeof(float), stream);
73  }
74 }
75 
78  return vectors_;
79 }
80 
81 #ifdef FAISS_USE_FLOAT16
83 FlatIndex::getVectorsFloat16Ref() {
84  return vectorsHalf_;
85 }
86 #endif
87 
88 DeviceTensor<float, 2, true>
89 FlatIndex::getVectorsFloat32Copy(cudaStream_t stream) {
90  return getVectorsFloat32Copy(0, num_, stream);
91 }
92 
94 FlatIndex::getVectorsFloat32Copy(int from, int num, cudaStream_t stream) {
95  DeviceTensor<float, 2, true> vecFloat32({num, dim_}, space_);
96 
97  if (useFloat16_) {
98 #ifdef FAISS_USE_FLOAT16
99  runConvertToFloat32(vecFloat32.data(),
100  vectorsHalf_[from].data(),
101  num * dim_, stream);
102 #endif
103  } else {
104  vectors_.copyTo(vecFloat32, stream);
105  }
106 
107  return vecFloat32;
108 }
109 
110 void
111 FlatIndex::query(Tensor<float, 2, true>& input,
112  int k,
113  Tensor<float, 2, true>& outDistances,
114  Tensor<int, 2, true>& outIndices,
115  bool exactDistance) {
116  auto stream = resources_->getDefaultStreamCurrentDevice();
117  auto& mem = resources_->getMemoryManagerCurrentDevice();
118 
119  if (useFloat16_) {
120  // We need to convert to float16
121 #ifdef FAISS_USE_FLOAT16
122  auto inputHalf = toHalf<2>(resources_, stream, input);
123 
124  DeviceTensor<half, 2, true> outDistancesHalf(
125  mem, {outDistances.getSize(0), outDistances.getSize(1)}, stream);
126 
127  query(inputHalf, k, outDistancesHalf, outIndices, exactDistance);
128 
129  if (exactDistance) {
130  // Convert outDistances back
131  fromHalf<2>(stream, outDistancesHalf, outDistances);
132  }
133 #endif
134  } else {
135  if (l2Distance_) {
136  runL2Distance(resources_,
137  storeTransposed_ ? vectorsTransposed_ : vectors_,
138  !storeTransposed_, // is vectors row major?
139  &norms_,
140  input,
141  true, // input is row major
142  k,
143  outDistances,
144  outIndices,
145  !exactDistance);
146  } else {
147  runIPDistance(resources_,
148  storeTransposed_ ? vectorsTransposed_ : vectors_,
149  !storeTransposed_, // is vectors row major?
150  input,
151  true, // input is row major
152  k,
153  outDistances,
154  outIndices);
155  }
156  }
157 }
158 
159 #ifdef FAISS_USE_FLOAT16
160 void
161 FlatIndex::query(Tensor<half, 2, true>& input,
162  int k,
163  Tensor<half, 2, true>& outDistances,
164  Tensor<int, 2, true>& outIndices,
165  bool exactDistance) {
166  FAISS_ASSERT(useFloat16_);
167 
168  if (l2Distance_) {
169  runL2Distance(resources_,
170  storeTransposed_ ? vectorsHalfTransposed_ : vectorsHalf_,
171  !storeTransposed_, // is vectors row major?
172  &normsHalf_,
173  input,
174  true, // input is row major
175  k,
176  outDistances,
177  outIndices,
178  useFloat16Accumulator_,
179  // FIXME
180  !exactDistance);
181  } else {
182  runIPDistance(resources_,
183  storeTransposed_ ? vectorsHalfTransposed_ : vectorsHalf_,
184  !storeTransposed_, // is vectors row major?
185  input,
186  true, // input is row major
187  k,
188  outDistances,
189  outIndices,
190  useFloat16Accumulator_);
191  }
192 }
193 #endif
194 
195 void
196 FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
197  if (numVecs == 0) {
198  return;
199  }
200 
201  if (useFloat16_) {
202 #ifdef FAISS_USE_FLOAT16
203  // Make sure that `data` is on our device; we'll run the
204  // conversion on our device
205  auto devData = toDevice<float, 2>(resources_,
206  getCurrentDevice(),
207  (float*) data,
208  stream,
209  {numVecs, dim_});
210 
211  auto devDataHalf = toHalf<2>(resources_, stream, devData);
212 
213  rawData_.append((char*) devDataHalf.data(),
214  devDataHalf.getSizeInBytes(),
215  stream,
216  true /* reserve exactly */);
217 #endif
218  } else {
219  rawData_.append((char*) data,
220  (size_t) dim_ * numVecs * sizeof(float),
221  stream,
222  true /* reserve exactly */);
223  }
224 
225  num_ += numVecs;
226 
227  if (useFloat16_) {
228 #ifdef FAISS_USE_FLOAT16
229  DeviceTensor<half, 2, true> vectorsHalf(
230  (half*) rawData_.data(), {(int) num_, dim_}, space_);
231  vectorsHalf_ = std::move(vectorsHalf);
232 #endif
233  } else {
235  (float*) rawData_.data(), {(int) num_, dim_}, space_);
236  vectors_ = std::move(vectors);
237  }
238 
239  if (storeTransposed_) {
240  if (useFloat16_) {
241 #ifdef FAISS_USE_FLOAT16
242  vectorsHalfTransposed_ =
243  std::move(DeviceTensor<half, 2, true>({dim_, (int) num_}, space_));
244  runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
245 #endif
246  } else {
247  vectorsTransposed_ =
248  std::move(DeviceTensor<float, 2, true>({dim_, (int) num_}, space_));
249  runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
250  }
251  }
252 
253  if (l2Distance_) {
254  // Precompute L2 norms of our database
255  if (useFloat16_) {
256 #ifdef FAISS_USE_FLOAT16
257  DeviceTensor<half, 1, true> normsHalf({(int) num_}, space_);
258  runL2Norm(vectorsHalf_, true, normsHalf, true, stream);
259  normsHalf_ = std::move(normsHalf);
260 #endif
261  } else {
262  DeviceTensor<float, 1, true> norms({(int) num_}, space_);
263  runL2Norm(vectors_, true, norms, true, stream);
264  norms_ = std::move(norms);
265  }
266  }
267 }
268 
269 void
271  rawData_.clear();
272  vectors_ = std::move(DeviceTensor<float, 2, true>());
273  norms_ = std::move(DeviceTensor<float, 1, true>());
274  num_ = 0;
275 }
276 
277 } }
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
Definition: FlatIndex.cu:89
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
Definition: FlatIndex.cu:45
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
Definition: FlatIndex.cu:66
void add(const float *data, int numVecs, cudaStream_t stream)
Definition: FlatIndex.cu:196
__host__ __device__ IndexT getSize(int i) const
Definition: Tensor.cuh:222
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Definition: Tensor-inl.cuh:169
Our tensor type.
Definition: Tensor.cuh:28
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
Definition: FlatIndex.cu:77
void reset()
Free all storage.
Definition: FlatIndex.cu:270