12 #include "FlatIndex.cuh"
13 #include "Distance.cuh"
15 #include "../utils/CopyUtils.cuh"
16 #include "../utils/DeviceUtils.h"
18 namespace faiss {
namespace gpu {
20 FlatIndex::FlatIndex(GpuResources* res,
26 useFloat16_(useFloat16),
27 l2Distance_(l2Distance),
29 #ifndef FAISS_USE_FLOAT16
30 FAISS_ASSERT(!useFloat16_);
35 FlatIndex::getUseFloat16()
const {
41 #ifdef FAISS_USE_FLOAT16
43 return vectorsHalf_.getSize(0);
50 int FlatIndex::getDim()
const {
51 #ifdef FAISS_USE_FLOAT16
53 return vectorsHalf_.getSize(1);
60 Tensor<float, 2, true>&
65 #ifdef FAISS_USE_FLOAT16
67 FlatIndex::getVectorsFloat16Ref() {
72 DeviceTensor<float, 2, true>
82 #ifdef FAISS_USE_FLOAT16
83 runConvertToFloat32(vecFloat32.data(),
84 vectorsHalf_[from].data(),
88 vectors_.
copyTo(vecFloat32, stream);
101 auto stream = resources_->getDefaultStreamCurrentDevice();
102 auto& mem = resources_->getMemoryManagerCurrentDevice();
106 #ifdef FAISS_USE_FLOAT16
107 auto vecsHalf = toHalf<2>(resources_, stream, vecs);
112 query(vecsHalf, k, outDistancesHalf, outIndices, exactDistance, tileSize);
116 fromHalf<2>(stream, outDistancesHalf, outDistances);
121 runL2Distance(resources_,
132 runIPDistance(resources_,
143 #ifdef FAISS_USE_FLOAT16
145 FlatIndex::query(Tensor<half, 2, true>& vecs,
147 Tensor<half, 2, true>& outDistances,
148 Tensor<int, 2, true>& outIndices,
151 FAISS_ASSERT(useFloat16_);
154 runL2Distance(resources_,
165 runIPDistance(resources_,
183 #ifdef FAISS_USE_FLOAT16
186 auto devData = toDevice<float, 2>(resources_,
192 auto devDataHalf = toHalf<2>(resources_, stream, devData);
194 rawData_.append((
char*) devDataHalf.data(),
195 devDataHalf.getSizeInBytes(),
199 rawData_.append((
char*) data,
200 (
size_t) dim_ * numVecs *
sizeof(
float),
207 #ifdef FAISS_USE_FLOAT16
209 (half*) rawData_.data(), {(int) num_, dim_});
210 vectorsHalf_ = std::move(vectorsHalf);
214 (
float*) rawData_.data(), {(int) num_, dim_});
215 vectors_ = std::move(vectors);
221 #ifdef FAISS_USE_FLOAT16
223 runL2Norm(vectorsHalf_, normsHalf,
true, stream);
224 normsHalf_ = std::move(normsHalf);
228 runL2Norm(vectors_, norms,
true, stream);
229 norms_ = std::move(norms);
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
int getSize() const
Returns the number of vectors we contain.
__host__ void copyTo(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
void add(const float *data, int numVecs, cudaStream_t stream)
__host__ __device__ IndexT getSize(int i) const
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
void reset()
Free all storage.