11 #include "FlatIndex.cuh"
12 #include "Distance.cuh"
14 #include "../utils/CopyUtils.cuh"
15 #include "../utils/DeviceUtils.h"
16 #include "../utils/Transpose.cuh"
18 namespace faiss {
namespace gpu {
20 FlatIndex::FlatIndex(GpuResources* res,
24 bool useFloat16Accumulator,
29 useFloat16_(useFloat16),
30 useFloat16Accumulator_(useFloat16Accumulator),
31 storeTransposed_(storeTransposed),
32 l2Distance_(l2Distance),
36 #ifndef FAISS_USE_FLOAT16
37 FAISS_ASSERT(!useFloat16_);
42 FlatIndex::getUseFloat16()
const {
48 #ifdef FAISS_USE_FLOAT16
50 return vectorsHalf_.getSize(0);
57 int FlatIndex::getDim()
const {
58 #ifdef FAISS_USE_FLOAT16
60 return vectorsHalf_.getSize(1);
70 #ifdef FAISS_USE_FLOAT16
71 rawData_.reserve(numVecs * dim_ *
sizeof(half), stream);
74 rawData_.reserve(numVecs * dim_ *
sizeof(
float), stream);
83 #ifdef FAISS_USE_FLOAT16
85 FlatIndex::getVectorsFloat16Ref() {
90 DeviceTensor<float, 2, true>
100 #ifdef FAISS_USE_FLOAT16
101 runConvertToFloat32(vecFloat32.data(),
102 vectorsHalf_[from].data(),
106 vectors_.
copyTo(vecFloat32, stream);
117 bool exactDistance) {
123 #ifdef FAISS_USE_FLOAT16
124 auto inputHalf = toHalf<2>(resources_, stream, input);
129 query(inputHalf, k, outDistancesHalf, outIndices, exactDistance);
133 fromHalf<2>(stream, outDistancesHalf, outDistances);
138 runL2Distance(resources_,
140 storeTransposed_ ? &vectorsTransposed_ :
nullptr,
149 runIPDistance(resources_,
151 storeTransposed_ ? &vectorsTransposed_ :
nullptr,
160 #ifdef FAISS_USE_FLOAT16
162 FlatIndex::query(Tensor<half, 2, true>& input,
164 Tensor<half, 2, true>& outDistances,
165 Tensor<int, 2, true>& outIndices,
166 bool exactDistance) {
167 FAISS_ASSERT(useFloat16_);
170 runL2Distance(resources_,
172 storeTransposed_ ? &vectorsHalfTransposed_ :
nullptr,
178 useFloat16Accumulator_,
182 runIPDistance(resources_,
184 storeTransposed_ ? &vectorsHalfTransposed_ :
nullptr,
189 useFloat16Accumulator_);
201 #ifdef FAISS_USE_FLOAT16
204 auto devData = toDevice<float, 2>(resources_,
210 auto devDataHalf = toHalf<2>(resources_, stream, devData);
212 rawData_.append((
char*) devDataHalf.data(),
213 devDataHalf.getSizeInBytes(),
218 rawData_.append((
char*) data,
219 (
size_t) dim_ * numVecs *
sizeof(
float),
227 #ifdef FAISS_USE_FLOAT16
229 (half*) rawData_.data(), {(int) num_, dim_}, space_);
230 vectorsHalf_ = std::move(vectorsHalf);
234 (
float*) rawData_.data(), {(int) num_, dim_}, space_);
235 vectors_ = std::move(vectors);
238 if (storeTransposed_) {
240 #ifdef FAISS_USE_FLOAT16
241 vectorsHalfTransposed_ =
243 runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
248 runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
255 #ifdef FAISS_USE_FLOAT16
257 runL2Norm(vectorsHalf_, normsHalf,
true, stream);
258 normsHalf_ = std::move(normsHalf);
262 runL2Norm(vectors_, norms,
true, stream);
263 norms_ = std::move(norms);
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
void add(const float *data, int numVecs, cudaStream_t stream)
__host__ __device__ IndexT getSize(int i) const
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
void reset()
Free all storage.