11 #include "FlatIndex.cuh"
12 #include "Distance.cuh"
14 #include "../utils/CopyUtils.cuh"
15 #include "../utils/DeviceUtils.h"
16 #include "../utils/Transpose.cuh"
18 namespace faiss {
namespace gpu {
20 FlatIndex::FlatIndex(GpuResources* res,
24 bool useFloat16Accumulator,
29 useFloat16_(useFloat16),
30 useFloat16Accumulator_(useFloat16Accumulator),
31 storeTransposed_(storeTransposed),
32 l2Distance_(l2Distance),
36 #ifndef FAISS_USE_FLOAT16
37 FAISS_ASSERT(!useFloat16_);
42 FlatIndex::getUseFloat16()
const {
48 #ifdef FAISS_USE_FLOAT16
50 return vectorsHalf_.getSize(0);
57 int FlatIndex::getDim()
const {
58 #ifdef FAISS_USE_FLOAT16
60 return vectorsHalf_.getSize(1);
70 #ifdef FAISS_USE_FLOAT16
71 rawData_.reserve(numVecs * dim_ *
sizeof(half), stream);
74 rawData_.reserve(numVecs * dim_ *
sizeof(
float), stream);
83 #ifdef FAISS_USE_FLOAT16
85 FlatIndex::getVectorsFloat16Ref() {
90 DeviceTensor<float, 2, true>
100 #ifdef FAISS_USE_FLOAT16
101 runConvertToFloat32(vecFloat32.data(),
102 vectorsHalf_[from].data(),
106 vectors_.
copyTo(vecFloat32, stream);
124 #ifdef FAISS_USE_FLOAT16
125 auto inputHalf = toHalf<2>(resources_, stream, input);
130 query(inputHalf, k, outDistancesHalf, outIndices, exactDistance, tileSize);
134 fromHalf<2>(stream, outDistancesHalf, outDistances);
139 runL2Distance(resources_,
141 storeTransposed_ ? &vectorsTransposed_ :
nullptr,
151 runIPDistance(resources_,
153 storeTransposed_ ? &vectorsTransposed_ :
nullptr,
163 #ifdef FAISS_USE_FLOAT16
165 FlatIndex::query(Tensor<half, 2, true>& input,
167 Tensor<half, 2, true>& outDistances,
168 Tensor<int, 2, true>& outIndices,
171 FAISS_ASSERT(useFloat16_);
174 runL2Distance(resources_,
176 storeTransposed_ ? &vectorsHalfTransposed_ :
nullptr,
182 useFloat16Accumulator_,
187 runIPDistance(resources_,
189 storeTransposed_ ? &vectorsHalfTransposed_ :
nullptr,
194 useFloat16Accumulator_,
207 #ifdef FAISS_USE_FLOAT16
210 auto devData = toDevice<float, 2>(resources_,
216 auto devDataHalf = toHalf<2>(resources_, stream, devData);
218 rawData_.append((
char*) devDataHalf.data(),
219 devDataHalf.getSizeInBytes(),
223 rawData_.append((
char*) data,
224 (
size_t) dim_ * numVecs *
sizeof(
float),
231 #ifdef FAISS_USE_FLOAT16
233 (half*) rawData_.data(), {(int) num_, dim_}, space_);
234 vectorsHalf_ = std::move(vectorsHalf);
238 (
float*) rawData_.data(), {(int) num_, dim_}, space_);
239 vectors_ = std::move(vectors);
242 if (storeTransposed_) {
244 #ifdef FAISS_USE_FLOAT16
245 vectorsHalfTransposed_ =
247 runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
252 runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
259 #ifdef FAISS_USE_FLOAT16
261 runL2Norm(vectorsHalf_, normsHalf,
true, stream);
262 normsHalf_ = std::move(normsHalf);
266 runL2Norm(vectors_, norms,
true, stream);
267 norms_ = std::move(norms);
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
__host__ void copyTo(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
void add(const float *data, int numVecs, cudaStream_t stream)
__host__ __device__ IndexT getSize(int i) const
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
void reset()
Free all storage.