12 #include "FlatIndex.cuh"
13 #include "Distance.cuh"
15 #include "../utils/CopyUtils.cuh"
16 #include "../utils/DeviceUtils.h"
17 #include "../utils/Transpose.cuh"
19 namespace faiss {
namespace gpu {
21 FlatIndex::FlatIndex(GpuResources* res,
25 bool storeTransposed) :
28 useFloat16_(useFloat16),
29 storeTransposed_(storeTransposed),
30 l2Distance_(l2Distance),
32 #ifndef FAISS_USE_FLOAT16
33 FAISS_ASSERT(!useFloat16_);
38 FlatIndex::getUseFloat16()
const {
44 #ifdef FAISS_USE_FLOAT16
46 return vectorsHalf_.getSize(0);
53 int FlatIndex::getDim()
const {
54 #ifdef FAISS_USE_FLOAT16
56 return vectorsHalf_.getSize(1);
63 Tensor<float, 2, true>&
68 #ifdef FAISS_USE_FLOAT16
70 FlatIndex::getVectorsFloat16Ref() {
75 DeviceTensor<float, 2, true>
85 #ifdef FAISS_USE_FLOAT16
86 runConvertToFloat32(vecFloat32.data(),
87 vectorsHalf_[from].data(),
91 vectors_.
copyTo(vecFloat32, stream);
104 auto stream = resources_->getDefaultStreamCurrentDevice();
105 auto& mem = resources_->getMemoryManagerCurrentDevice();
109 #ifdef FAISS_USE_FLOAT16
110 auto inputHalf = toHalf<2>(resources_, stream, input);
115 query(inputHalf, k, outDistancesHalf, outIndices, exactDistance, tileSize);
119 fromHalf<2>(stream, outDistancesHalf, outDistances);
124 runL2Distance(resources_,
126 storeTransposed_ ? &vectorsTransposed_ :
nullptr,
136 runIPDistance(resources_,
138 storeTransposed_ ? &vectorsTransposed_ :
nullptr,
148 #ifdef FAISS_USE_FLOAT16
150 FlatIndex::query(Tensor<half, 2, true>& input,
152 Tensor<half, 2, true>& outDistances,
153 Tensor<int, 2, true>& outIndices,
156 FAISS_ASSERT(useFloat16_);
159 runL2Distance(resources_,
161 storeTransposed_ ? &vectorsHalfTransposed_ :
nullptr,
171 runIPDistance(resources_,
173 storeTransposed_ ? &vectorsHalfTransposed_ :
nullptr,
190 #ifdef FAISS_USE_FLOAT16
193 auto devData = toDevice<float, 2>(resources_,
199 auto devDataHalf = toHalf<2>(resources_, stream, devData);
201 rawData_.append((
char*) devDataHalf.data(),
202 devDataHalf.getSizeInBytes(),
206 rawData_.append((
char*) data,
207 (
size_t) dim_ * numVecs *
sizeof(
float),
214 #ifdef FAISS_USE_FLOAT16
216 (half*) rawData_.data(), {(int) num_, dim_});
217 vectorsHalf_ = std::move(vectorsHalf);
221 (
float*) rawData_.data(), {(int) num_, dim_});
222 vectors_ = std::move(vectors);
225 if (storeTransposed_) {
227 #ifdef FAISS_USE_FLOAT16
228 vectorsHalfTransposed_ =
230 runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
235 runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
242 #ifdef FAISS_USE_FLOAT16
244 runL2Norm(vectorsHalf_, normsHalf,
true, stream);
245 normsHalf_ = std::move(normsHalf);
249 runL2Norm(vectors_, norms,
true, stream);
250 norms_ = std::move(norms);
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
int getSize() const
Returns the number of vectors we contain.
__host__ void copyTo(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
void add(const float *data, int numVecs, cudaStream_t stream)
__host__ __device__ IndexT getSize(int i) const
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
void reset()
Free all storage.