9 #include "FlatIndex.cuh"
10 #include "Distance.cuh"
12 #include "../utils/CopyUtils.cuh"
13 #include "../utils/DeviceUtils.h"
14 #include "../utils/Transpose.cuh"
16 namespace faiss {
namespace gpu {
18 FlatIndex::FlatIndex(GpuResources* res,
22 bool useFloat16Accumulator,
27 useFloat16_(useFloat16),
28 useFloat16Accumulator_(useFloat16Accumulator),
29 storeTransposed_(storeTransposed),
30 l2Distance_(l2Distance),
34 #ifndef FAISS_USE_FLOAT16
35 FAISS_ASSERT(!useFloat16_);
40 FlatIndex::getUseFloat16()
const {
46 #ifdef FAISS_USE_FLOAT16
48 return vectorsHalf_.getSize(0);
55 int FlatIndex::getDim()
const {
56 #ifdef FAISS_USE_FLOAT16
58 return vectorsHalf_.getSize(1);
68 #ifdef FAISS_USE_FLOAT16
69 rawData_.reserve(numVecs * dim_ *
sizeof(half), stream);
72 rawData_.reserve(numVecs * dim_ *
sizeof(
float), stream);
81 #ifdef FAISS_USE_FLOAT16
83 FlatIndex::getVectorsFloat16Ref() {
88 DeviceTensor<float, 2, true>
98 #ifdef FAISS_USE_FLOAT16
99 runConvertToFloat32(vecFloat32.data(),
100 vectorsHalf_[from].data(),
104 vectors_.
copyTo(vecFloat32, stream);
115 bool exactDistance) {
121 #ifdef FAISS_USE_FLOAT16
122 auto inputHalf = toHalf<2>(resources_, stream, input);
127 query(inputHalf, k, outDistancesHalf, outIndices, exactDistance);
131 fromHalf<2>(stream, outDistancesHalf, outDistances);
136 runL2Distance(resources_,
137 storeTransposed_ ? vectorsTransposed_ : vectors_,
147 runIPDistance(resources_,
148 storeTransposed_ ? vectorsTransposed_ : vectors_,
159 #ifdef FAISS_USE_FLOAT16
161 FlatIndex::query(Tensor<half, 2, true>& input,
163 Tensor<half, 2, true>& outDistances,
164 Tensor<int, 2, true>& outIndices,
165 bool exactDistance) {
166 FAISS_ASSERT(useFloat16_);
169 runL2Distance(resources_,
170 storeTransposed_ ? vectorsHalfTransposed_ : vectorsHalf_,
178 useFloat16Accumulator_,
182 runIPDistance(resources_,
183 storeTransposed_ ? vectorsHalfTransposed_ : vectorsHalf_,
190 useFloat16Accumulator_);
202 #ifdef FAISS_USE_FLOAT16
205 auto devData = toDevice<float, 2>(resources_,
211 auto devDataHalf = toHalf<2>(resources_, stream, devData);
213 rawData_.append((
char*) devDataHalf.data(),
214 devDataHalf.getSizeInBytes(),
219 rawData_.append((
char*) data,
220 (
size_t) dim_ * numVecs *
sizeof(
float),
228 #ifdef FAISS_USE_FLOAT16
230 (half*) rawData_.data(), {(int) num_, dim_}, space_);
231 vectorsHalf_ = std::move(vectorsHalf);
235 (
float*) rawData_.data(), {(int) num_, dim_}, space_);
236 vectors_ = std::move(vectors);
239 if (storeTransposed_) {
241 #ifdef FAISS_USE_FLOAT16
242 vectorsHalfTransposed_ =
244 runTransposeAny(vectorsHalf_, 0, 1, vectorsHalfTransposed_, stream);
249 runTransposeAny(vectors_, 0, 1, vectorsTransposed_, stream);
256 #ifdef FAISS_USE_FLOAT16
258 runL2Norm(vectorsHalf_,
true, normsHalf,
true, stream);
259 normsHalf_ = std::move(normsHalf);
263 runL2Norm(vectors_,
true, norms,
true, stream);
264 norms_ = std::move(norms);
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
void add(const float *data, int numVecs, cudaStream_t stream)
__host__ __device__ IndexT getSize(int i) const
__host__ void copyTo(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies ourselves into a tensor; sizes must match.
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
void reset()
Free all storage.