10 #include "FlatIndex.cuh"
11 #include "Distance.cuh"
13 #include "../utils/CopyUtils.cuh"
14 #include "../utils/DeviceUtils.h"
15 #include "../utils/Transpose.cuh"
17 namespace faiss {
namespace gpu {
19 FlatIndex::FlatIndex(GpuResources* res,
23 bool useFloat16Accumulator,
28 useFloat16_(useFloat16),
29 useFloat16Accumulator_(useFloat16Accumulator),
30 storeTransposed_(storeTransposed),
31 l2Distance_(l2Distance),
35 #ifndef FAISS_USE_FLOAT16
36 FAISS_ASSERT(!useFloat16_);
41 FlatIndex::getUseFloat16()
const {
47 #ifdef FAISS_USE_FLOAT16
49 return vectorsHalf_.getSize(0);
56 int FlatIndex::getDim()
const {
57 #ifdef FAISS_USE_FLOAT16
59 return vectorsHalf_.getSize(1);
69 #ifdef FAISS_USE_FLOAT16
70 rawData_.reserve(numVecs * dim_ *
sizeof(half), stream);
73 rawData_.reserve(numVecs * dim_ *
sizeof(
float), stream);
82 #ifdef FAISS_USE_FLOAT16
84 FlatIndex::getVectorsFloat16Ref() {
89 DeviceTensor<float, 2, true>
99 #ifdef FAISS_USE_FLOAT16
100 runConvertToFloat32(vecFloat32.data(),
101 vectorsHalf_[from].data(),
105 vectors_.
copyTo(vecFloat32, stream);
116 bool exactDistance) {
122 #ifdef FAISS_USE_FLOAT16
123 auto inputHalf = toHalf<2>(resources_, stream, input);
128 query(inputHalf, k, outDistancesHalf, outIndices, exactDistance);
132 fromHalf<2>(stream, outDistancesHalf, outDistances);
137 runL2Distance(resources_,
139 storeTransposed_ ? &vectorsTransposed_ :
nullptr,
148 runIPDistance(resources_,
150 storeTransposed_ ? &vectorsTransposed_ :
nullptr,
159 #ifdef FAISS_USE_FLOAT16
161 FlatIndex::query(Tensor<half, 2, true>& input,
163 Tensor<half, 2, true>& outDistances,
164 Tensor<int, 2, true>& outIndices,
165 bool exactDistance) {
166 FAISS_ASSERT(useFloat16_);
169 runL2Distance(resources_,
171 storeTransposed_ ? &vectorsHalfTransposed_ :
nullptr,
177 useFloat16Accumulator_,
181 runIPDistance(resources_,
183 storeTransposed_ ? &vectorsHalfTransposed_ :
nullptr,
188 useFloat16Accumulator_);
200 #ifdef FAISS_USE_FLOAT16
203 auto devData = toDevice<float, 2>(resources_,
209 auto devDataHalf = toHalf<2>(resources_, stream, devData);
211 rawData_.append((
char*) devDataHalf.data(),
212 devDataHalf.getSizeInBytes(),
217 rawData_.append((
char*) data,
218 (
size_t) dim_ * numVecs *
sizeof(
float),
226 #ifdef FAISS_USE_FLOAT16
228 (half*) rawData_.data(), {(int) num_, dim_}, space_);
229 vectorsHalf_ = std::move(vectorsHalf);
233 (
float*) rawData_.data(), {(int) num_, dim_}, space_);
234 vectors_ = std::move(vectors);
237 if (storeTransposed_) {
239 #ifdef FAISS_USE_FLOAT16
240 vectorsHalfTransposed_ =
242 runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
247 runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
254 #ifdef FAISS_USE_FLOAT16
256 runL2Norm(vectorsHalf_, normsHalf,
true, stream);
257 normsHalf_ = std::move(normsHalf);
261 runL2Norm(vectors_, norms,
true, stream);
262 norms_ = std::move(norms);
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
void add(const float *data, int numVecs, cudaStream_t stream)
__host__ __device__ IndexT getSize(int i) const
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
void reset()
Free all storage.