12 #include "IVFFlat.cuh"
13 #include "../GpuResources.h"
14 #include "FlatIndex.cuh"
15 #include "InvertedListAppend.cuh"
16 #include "IVFFlatScan.cuh"
17 #include "RemapIndices.h"
18 #include "../utils/CopyUtils.cuh"
19 #include "../utils/DeviceDefs.cuh"
20 #include "../utils/DeviceUtils.h"
21 #include "../utils/Float16.cuh"
22 #include "../utils/HostTensor.cuh"
23 #include "../utils/Transpose.cuh"
25 #include <thrust/host_vector.h>
26 #include <unordered_map>
28 namespace faiss {
namespace gpu {
34 IndicesOptions indicesOptions) :
37 #ifdef FAISS_USE_FLOAT16
39 sizeof(half) * quantizer->getDim()
40 : sizeof(float) * quantizer->getDim(),
42 sizeof(float) * quantizer->getDim(),
45 l2Distance_(l2Distance),
46 useFloat16_(useFloat16) {
47 #ifndef FAISS_USE_FLOAT16
48 FAISS_ASSERT(!useFloat16 | !
"float16 unsupported");
63 auto stream =
resources_->getDefaultStreamCurrentDevice();
73 auto prevData = listData->data();
77 FAISS_ASSERT(listData->size() + lengthInBytes <=
78 (size_t) std::numeric_limits<int>::max());
81 #ifdef FAISS_USE_FLOAT16
90 {(int) numVecs *
dim_});
91 auto halfData = toHalf<1>(
resources_, stream, floatData);
93 listData->append((
unsigned char*) halfData.data(),
99 listData->append((
unsigned char*) vecs,
110 if (prevData != listData->data()) {
124 streamWait({stream}, {0});
134 auto& mem =
resources_->getMemoryManagerCurrentDevice();
135 auto stream =
resources_->getDefaultStreamCurrentDevice();
144 auto listIds = listIds2d.
view<1>({vecs.
getSize(0)});
146 quantizer_->query(vecs, 1, listDistance, listIds2d,
false);
159 std::unordered_map<int, int> assignCounts;
165 for (
int i = 0; i < listIds.
getSize(0); ++i) {
166 int listId = listIdsHost[i];
170 listOffsetHost[i] = -1;
179 auto it = assignCounts.find(listId);
180 if (it != assignCounts.end()) {
181 offset += it->second;
184 assignCounts[listId] = 1;
187 listOffsetHost[i] = offset;
201 for (
auto& counts : assignCounts) {
207 auto& indices = deviceListIndices_[counts.first];
213 indices->resize(indices->size() + counts.second * indexSize, stream);
219 userIndices.resize(newNumVecs);
233 std::vector<int> listIds(assignCounts.size());
235 for (
auto& counts : assignCounts) {
236 listIds[i++] = counts.first;
249 for (
int i = 0; i < hostIndices.
getSize(0); ++i) {
250 int listId = listIdsHost[i];
257 int offset = listOffsetHost[i];
262 FAISS_ASSERT(offset < userIndices.size());
263 userIndices[offset] = hostIndices[i];
272 runIVFFlatInvertedListAppend(listIds,
292 auto& mem =
resources_->getMemoryManagerCurrentDevice();
293 auto stream =
resources_->getDefaultStreamCurrentDevice();
296 FAISS_ASSERT(nprobe <= 1024);
297 FAISS_ASSERT(k <= 1024);
307 coarseDistances(mem, {queries.
getSize(0), nprobe}, stream);
309 coarseIndices(mem, {queries.
getSize(0), nprobe}, stream);
319 runIVFFlatScan(queries,
340 ivfOffsetToUserIndex(hostOutIndices.
data(),
348 outIndices.
copyFrom(hostOutIndices, stream);
357 auto stream =
resources_->getDefaultStreamCurrentDevice();
360 #ifdef FAISS_USE_FLOAT16
361 size_t num = encVecs.size() /
sizeof(half);
364 auto devFloat = fromHalf(
resources_, stream, devHalf);
366 std::vector<float> out(num);
368 hostFloat.
copyFrom(devFloat, stream);
374 size_t num = encVecs.size() /
sizeof(float);
378 std::vector<float> out(num);
380 hostFloat.
copyFrom(devFloat, stream);
const int numLists_
Number of inverted lists we maintain.
int maxListLength_
Maximum list length seen.
int getSize() const
Returns the number of vectors we contain.
std::vector< std::vector< long > > listOffsetToUserIndex_
Holder of GPU resources for a particular flat index.
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
Base inverted list functionality for IVFFlat and IVFPQ.
thrust::device_vector< int > deviceListLengths_
thrust::device_vector< void * > deviceListIndexPointers_
IVFFlat(GpuResources *resources, FlatIndex *quantizer, bool l2Distance, bool useFloat16, IndicesOptions indicesOptions)
Construct from a quantizer that has elemen.
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
FlatIndex * quantizer_
Quantizer object.
thrust::device_vector< void * > deviceListDataPointers_
GpuResources * resources_
Collection of GPU resources that we use.
__host__ __device__ Tensor< T, SubDim, Contig, IndexT, PtrTraits > view(DataPtrType at)
void addCodeVectorsFromCpu(int listId, const float *vecs, const long *indices, size_t numVecs)
const int bytesPerVector_
Number of bytes per vector in the list.
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
std::vector< float > getListVectors(int listId) const
Return the vectors of a particular list back to the CPU.
void updateDeviceListInfo_(cudaStream_t stream)
Update all device-side list pointer and size information.
__host__ __device__ IndexT getSize(int i) const
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
const IndicesOptions indicesOptions_
How are user indices stored on the GPU?
std::vector< std::unique_ptr< DeviceVector< unsigned char > > > deviceListData_
const int dim_
Expected dimensionality of the vectors.
void addIndicesFromCpu_(int listId, const long *indices, size_t numVecs)
Shared function to copy indices from CPU to GPU.