11 #include "IVFFlat.cuh"
12 #include "../GpuResources.h"
13 #include "FlatIndex.cuh"
14 #include "InvertedListAppend.cuh"
15 #include "IVFFlatScan.cuh"
16 #include "RemapIndices.h"
17 #include "../utils/CopyUtils.cuh"
18 #include "../utils/DeviceDefs.cuh"
19 #include "../utils/DeviceUtils.h"
20 #include "../utils/Float16.cuh"
21 #include "../utils/HostTensor.cuh"
22 #include "../utils/Transpose.cuh"
24 #include <thrust/host_vector.h>
25 #include <unordered_map>
27 namespace faiss {
namespace gpu {
33 IndicesOptions indicesOptions,
37 #ifdef FAISS_USE_FLOAT16
39 sizeof(half) * quantizer->getDim()
40 : sizeof(float) * quantizer->getDim(),
42 sizeof(float) * quantizer->getDim(),
46 l2Distance_(l2Distance),
47 useFloat16_(useFloat16) {
48 #ifndef FAISS_USE_FLOAT16
49 FAISS_ASSERT_MSG(!useFloat16,
"float16 unsupported");
74 auto prevData = listData->data();
78 FAISS_ASSERT(listData->size() + lengthInBytes <=
79 (size_t) std::numeric_limits<int>::max());
82 #ifdef FAISS_USE_FLOAT16
91 {(int) numVecs *
dim_});
92 auto halfData = toHalf<1>(
resources_, stream, floatData);
94 listData->append((
unsigned char*) halfData.data(),
100 listData->append((
unsigned char*) vecs,
111 if (prevData != listData->data()) {
125 streamWait({stream}, {0});
145 auto listIds = listIds2d.
view<1>({vecs.
getSize(0)});
147 quantizer_->query(vecs, 1, listDistance, listIds2d,
false);
160 std::unordered_map<int, int> assignCounts;
166 for (
int i = 0; i < listIds.
getSize(0); ++i) {
167 int listId = listIdsHost[i];
171 listOffsetHost[i] = -1;
180 auto it = assignCounts.find(listId);
181 if (it != assignCounts.end()) {
182 offset += it->second;
185 assignCounts[listId] = 1;
188 listOffsetHost[i] = offset;
202 for (
auto& counts : assignCounts) {
208 auto& indices = deviceListIndices_[counts.first];
214 indices->resize(indices->size() + counts.second * indexSize, stream);
220 userIndices.resize(newNumVecs);
234 std::vector<int> listIds(assignCounts.size());
236 for (
auto& counts : assignCounts) {
237 listIds[i++] = counts.first;
250 for (
int i = 0; i < hostIndices.
getSize(0); ++i) {
251 int listId = listIdsHost[i];
258 int offset = listOffsetHost[i];
263 FAISS_ASSERT(offset < userIndices.size());
264 userIndices[offset] = hostIndices[i];
273 runIVFFlatInvertedListAppend(listIds,
297 FAISS_ASSERT(nprobe <= 1024);
298 FAISS_ASSERT(k <= 1024);
308 coarseDistances(mem, {queries.
getSize(0), nprobe}, stream);
310 coarseIndices(mem, {queries.
getSize(0), nprobe}, stream);
320 runIVFFlatScan(queries,
341 ivfOffsetToUserIndex(hostOutIndices.
data(),
349 outIndices.
copyFrom(hostOutIndices, stream);
361 #ifdef FAISS_USE_FLOAT16
362 size_t num = encVecs.size() /
sizeof(half);
365 auto devFloat = fromHalf(
resources_, stream, devHalf);
367 std::vector<float> out(num);
369 hostFloat.
copyFrom(devFloat, stream);
375 size_t num = encVecs.size() /
sizeof(float);
379 std::vector<float> out(num);
381 hostFloat.
copyFrom(devFloat, stream);
const int numLists_
Number of inverted lists we maintain.
int maxListLength_
Maximum list length seen.
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
std::vector< std::vector< long > > listOffsetToUserIndex_
Holder of GPU resources for a particular flat index.
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
Base inverted list functionality for IVFFlat and IVFPQ.
IVFFlat(GpuResources *resources, FlatIndex *quantizer, bool l2Distance, bool useFloat16, IndicesOptions indicesOptions, MemorySpace space)
Construct from a quantizer that has elemen.
thrust::device_vector< int > deviceListLengths_
thrust::device_vector< void * > deviceListIndexPointers_
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
FlatIndex * quantizer_
Quantizer object.
thrust::device_vector< void * > deviceListDataPointers_
GpuResources * resources_
Collection of GPU resources that we use.
__host__ __device__ Tensor< T, SubDim, Contig, IndexT, PtrTraits > view(DataPtrType at)
void addCodeVectorsFromCpu(int listId, const float *vecs, const long *indices, size_t numVecs)
const int bytesPerVector_
Number of bytes per vector in the list.
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
std::vector< float > getListVectors(int listId) const
Return the vectors of a particular list back to the CPU.
void updateDeviceListInfo_(cudaStream_t stream)
Update all device-side list pointer and size information.
__host__ __device__ IndexT getSize(int i) const
__host__ void copyFrom(Tensor< T, Dim, Contig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
const IndicesOptions indicesOptions_
How are user indices stored on the GPU?
std::vector< std::unique_ptr< DeviceVector< unsigned char > > > deviceListData_
const int dim_
Expected dimensionality of the vectors.
void addIndicesFromCpu_(int listId, const long *indices, size_t numVecs)
Shared function to copy indices from CPU to GPU.