Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
FlatIndex.cu
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 
10 #include "FlatIndex.cuh"
11 #include "Distance.cuh"
12 #include "L2Norm.cuh"
13 #include "../utils/CopyUtils.cuh"
14 #include "../utils/DeviceUtils.h"
15 #include "../utils/Transpose.cuh"
16 
17 namespace faiss { namespace gpu {
18 
19 FlatIndex::FlatIndex(GpuResources* res,
20  int dim,
21  bool l2Distance,
22  bool useFloat16,
23  bool useFloat16Accumulator,
24  bool storeTransposed,
25  MemorySpace space) :
26  resources_(res),
27  dim_(dim),
28  useFloat16_(useFloat16),
29  useFloat16Accumulator_(useFloat16Accumulator),
30  storeTransposed_(storeTransposed),
31  l2Distance_(l2Distance),
32  space_(space),
33  num_(0),
34  rawData_(space) {
35 #ifndef FAISS_USE_FLOAT16
36  FAISS_ASSERT(!useFloat16_);
37 #endif
38 }
39 
40 bool
41 FlatIndex::getUseFloat16() const {
42  return useFloat16_;
43 }
44 
45 /// Returns the number of vectors we contain
46 int FlatIndex::getSize() const {
47 #ifdef FAISS_USE_FLOAT16
48  if (useFloat16_) {
49  return vectorsHalf_.getSize(0);
50  }
51 #endif
52 
53  return vectors_.getSize(0);
54 }
55 
56 int FlatIndex::getDim() const {
57 #ifdef FAISS_USE_FLOAT16
58  if (useFloat16_) {
59  return vectorsHalf_.getSize(1);
60  }
61 #endif
62 
63  return vectors_.getSize(1);
64 }
65 
66 void
67 FlatIndex::reserve(size_t numVecs, cudaStream_t stream) {
68  if (useFloat16_) {
69 #ifdef FAISS_USE_FLOAT16
70  rawData_.reserve(numVecs * dim_ * sizeof(half), stream);
71 #endif
72  } else {
73  rawData_.reserve(numVecs * dim_ * sizeof(float), stream);
74  }
75 }
76 
79  return vectors_;
80 }
81 
82 #ifdef FAISS_USE_FLOAT16
84 FlatIndex::getVectorsFloat16Ref() {
85  return vectorsHalf_;
86 }
87 #endif
88 
89 DeviceTensor<float, 2, true>
90 FlatIndex::getVectorsFloat32Copy(cudaStream_t stream) {
91  return getVectorsFloat32Copy(0, num_, stream);
92 }
93 
95 FlatIndex::getVectorsFloat32Copy(int from, int num, cudaStream_t stream) {
96  DeviceTensor<float, 2, true> vecFloat32({num, dim_}, space_);
97 
98  if (useFloat16_) {
99 #ifdef FAISS_USE_FLOAT16
100  runConvertToFloat32(vecFloat32.data(),
101  vectorsHalf_[from].data(),
102  num * dim_, stream);
103 #endif
104  } else {
105  vectors_.copyTo(vecFloat32, stream);
106  }
107 
108  return vecFloat32;
109 }
110 
111 void
112 FlatIndex::query(Tensor<float, 2, true>& input,
113  int k,
114  Tensor<float, 2, true>& outDistances,
115  Tensor<int, 2, true>& outIndices,
116  bool exactDistance) {
117  auto stream = resources_->getDefaultStreamCurrentDevice();
118  auto& mem = resources_->getMemoryManagerCurrentDevice();
119 
120  if (useFloat16_) {
121  // We need to convert to float16
122 #ifdef FAISS_USE_FLOAT16
123  auto inputHalf = toHalf<2>(resources_, stream, input);
124 
125  DeviceTensor<half, 2, true> outDistancesHalf(
126  mem, {outDistances.getSize(0), outDistances.getSize(1)}, stream);
127 
128  query(inputHalf, k, outDistancesHalf, outIndices, exactDistance);
129 
130  if (exactDistance) {
131  // Convert outDistances back
132  fromHalf<2>(stream, outDistancesHalf, outDistances);
133  }
134 #endif
135  } else {
136  if (l2Distance_) {
137  runL2Distance(resources_,
138  vectors_,
139  storeTransposed_ ? &vectorsTransposed_ : nullptr,
140  &norms_,
141  input,
142  k,
143  outDistances,
144  outIndices,
145  // FIXME
146  !exactDistance);
147  } else {
148  runIPDistance(resources_,
149  vectors_,
150  storeTransposed_ ? &vectorsTransposed_ : nullptr,
151  input,
152  k,
153  outDistances,
154  outIndices);
155  }
156  }
157 }
158 
159 #ifdef FAISS_USE_FLOAT16
160 void
161 FlatIndex::query(Tensor<half, 2, true>& input,
162  int k,
163  Tensor<half, 2, true>& outDistances,
164  Tensor<int, 2, true>& outIndices,
165  bool exactDistance) {
166  FAISS_ASSERT(useFloat16_);
167 
168  if (l2Distance_) {
169  runL2Distance(resources_,
170  vectorsHalf_,
171  storeTransposed_ ? &vectorsHalfTransposed_ : nullptr,
172  &normsHalf_,
173  input,
174  k,
175  outDistances,
176  outIndices,
177  useFloat16Accumulator_,
178  // FIXME
179  !exactDistance);
180  } else {
181  runIPDistance(resources_,
182  vectorsHalf_,
183  storeTransposed_ ? &vectorsHalfTransposed_ : nullptr,
184  input,
185  k,
186  outDistances,
187  outIndices,
188  useFloat16Accumulator_);
189  }
190 }
191 #endif
192 
193 void
194 FlatIndex::add(const float* data, int numVecs, cudaStream_t stream) {
195  if (numVecs == 0) {
196  return;
197  }
198 
199  if (useFloat16_) {
200 #ifdef FAISS_USE_FLOAT16
201  // Make sure that `data` is on our device; we'll run the
202  // conversion on our device
203  auto devData = toDevice<float, 2>(resources_,
204  getCurrentDevice(),
205  (float*) data,
206  stream,
207  {numVecs, dim_});
208 
209  auto devDataHalf = toHalf<2>(resources_, stream, devData);
210 
211  rawData_.append((char*) devDataHalf.data(),
212  devDataHalf.getSizeInBytes(),
213  stream,
214  true /* reserve exactly */);
215 #endif
216  } else {
217  rawData_.append((char*) data,
218  (size_t) dim_ * numVecs * sizeof(float),
219  stream,
220  true /* reserve exactly */);
221  }
222 
223  num_ += numVecs;
224 
225  if (useFloat16_) {
226 #ifdef FAISS_USE_FLOAT16
227  DeviceTensor<half, 2, true> vectorsHalf(
228  (half*) rawData_.data(), {(int) num_, dim_}, space_);
229  vectorsHalf_ = std::move(vectorsHalf);
230 #endif
231  } else {
233  (float*) rawData_.data(), {(int) num_, dim_}, space_);
234  vectors_ = std::move(vectors);
235  }
236 
237  if (storeTransposed_) {
238  if (useFloat16_) {
239 #ifdef FAISS_USE_FLOAT16
240  vectorsHalfTransposed_ =
241  std::move(DeviceTensor<half, 2, true>({dim_, (int) num_}, space_));
242  runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
243 #endif
244  } else {
245  vectorsTransposed_ =
246  std::move(DeviceTensor<float, 2, true>({dim_, (int) num_}, space_));
247  runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
248  }
249  }
250 
251  if (l2Distance_) {
252  // Precompute L2 norms of our database
253  if (useFloat16_) {
254 #ifdef FAISS_USE_FLOAT16
255  DeviceTensor<half, 1, true> normsHalf({(int) num_}, space_);
256  runL2Norm(vectorsHalf_, normsHalf, true, stream);
257  normsHalf_ = std::move(normsHalf);
258 #endif
259  } else {
260  DeviceTensor<float, 1, true> norms({(int) num_}, space_);
261  runL2Norm(vectors_, norms, true, stream);
262  norms_ = std::move(norms);
263  }
264  }
265 }
266 
267 void
269  rawData_.clear();
270  vectors_ = std::move(DeviceTensor<float, 2, true>());
271  norms_ = std::move(DeviceTensor<float, 1, true>());
272  num_ = 0;
273 }
274 
275 } }
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
Definition: FlatIndex.cu:90
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
Definition: FlatIndex.cu:46
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
Definition: FlatIndex.cu:67
void add(const float *data, int numVecs, cudaStream_t stream)
Definition: FlatIndex.cu:194
__host__ __device__ IndexT getSize(int i) const
Definition: Tensor.cuh:223
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Definition: Tensor-inl.cuh:170
Our tensor type.
Definition: Tensor.cuh:29
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
Definition: FlatIndex.cu:78
void reset()
Free all storage.
Definition: FlatIndex.cu:268