10 #include "IVFFlat.cuh"
11 #include "../GpuResources.h"
12 #include "FlatIndex.cuh"
13 #include "InvertedListAppend.cuh"
14 #include "IVFFlatScan.cuh"
15 #include "RemapIndices.h"
16 #include "../utils/CopyUtils.cuh"
17 #include "../utils/DeviceDefs.cuh"
18 #include "../utils/DeviceUtils.h"
19 #include "../utils/Float16.cuh"
20 #include "../utils/HostTensor.cuh"
21 #include "../utils/Transpose.cuh"
23 #include <thrust/host_vector.h>
24 #include <unordered_map>
26 namespace faiss {
namespace gpu {
32 IndicesOptions indicesOptions,
36 #ifdef FAISS_USE_FLOAT16
38 sizeof(half) * quantizer->getDim()
39 : sizeof(float) * quantizer->getDim(),
41 sizeof(float) * quantizer->getDim(),
45 l2Distance_(l2Distance),
46 useFloat16_(useFloat16) {
69 auto prevData = listData->data();
73 FAISS_ASSERT(listData->size() + lengthInBytes <=
74 (size_t) std::numeric_limits<int>::max());
77 #ifdef FAISS_USE_FLOAT16
86 {(int) numVecs *
dim_});
87 auto halfData = toHalf<1>(
resources_, stream, floatData);
89 listData->append((
unsigned char*) halfData.data(),
98 listData->append((
unsigned char*) vecs,
109 if (prevData != listData->data()) {
123 streamWait({stream}, {0});
143 auto listIds = listIds2d.
view<1>({vecs.
getSize(0)});
145 quantizer_->query(vecs, 1, listDistance, listIds2d,
false);
158 std::unordered_map<int, int> assignCounts;
164 for (
int i = 0; i < listIds.
getSize(0); ++i) {
165 int listId = listIdsHost[i];
169 listOffsetHost[i] = -1;
178 auto it = assignCounts.find(listId);
179 if (it != assignCounts.end()) {
180 offset += it->second;
183 assignCounts[listId] = 1;
186 listOffsetHost[i] = offset;
200 for (
auto& counts : assignCounts) {
206 auto& indices = deviceListIndices_[counts.first];
212 indices->resize(indices->size() + counts.second * indexSize, stream);
218 userIndices.resize(newNumVecs);
232 std::vector<int> listIds(assignCounts.size());
234 for (
auto& counts : assignCounts) {
235 listIds[i++] = counts.first;
248 for (
int i = 0; i < hostIndices.
getSize(0); ++i) {
249 int listId = listIdsHost[i];
256 int offset = listOffsetHost[i];
261 FAISS_ASSERT(offset < userIndices.size());
262 userIndices[offset] = hostIndices[i];
271 runIVFFlatInvertedListAppend(listIds,
295 FAISS_ASSERT(nprobe <= 1024);
296 FAISS_ASSERT(k <= 1024);
306 coarseDistances(mem, {queries.
getSize(0), nprobe}, stream);
308 coarseIndices(mem, {queries.
getSize(0), nprobe}, stream);
318 runIVFFlatScan(queries,
339 ivfOffsetToUserIndex(hostOutIndices.
data(),
347 outIndices.
copyFrom(hostOutIndices, stream);
359 #ifdef FAISS_USE_FLOAT16
360 size_t num = encVecs.size() /
sizeof(half);
363 auto devFloat = fromHalf(
resources_, stream, devHalf);
365 std::vector<float> out(num);
367 hostFloat.
copyFrom(devFloat, stream);
373 size_t num = encVecs.size() /
sizeof(float);
377 std::vector<float> out(num);
379 hostFloat.
copyFrom(devFloat, stream);
const int numLists_
Number of inverted lists we maintain.
int maxListLength_
Maximum list length seen.
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
std::vector< std::vector< long > > listOffsetToUserIndex_
Holder of GPU resources for a particular flat index.
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view(DataPtrType at)
Base inverted list functionality for IVFFlat and IVFPQ.
IVFFlat(GpuResources *resources, FlatIndex *quantizer, bool l2Distance, bool useFloat16, IndicesOptions indicesOptions, MemorySpace space)
Construct from a quantizer that has elemen.
thrust::device_vector< int > deviceListLengths_
thrust::device_vector< void * > deviceListIndexPointers_
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
FlatIndex * quantizer_
Quantizer object.
__host__ __device__ IndexT getSize(int i) const
thrust::device_vector< void * > deviceListDataPointers_
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
GpuResources * resources_
Collection of GPU resources that we use.
void addCodeVectorsFromCpu(int listId, const float *vecs, const long *indices, size_t numVecs)
const int bytesPerVector_
Number of bytes per vector in the list.
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
std::vector< float > getListVectors(int listId) const
Return the vectors of a particular list back to the CPU.
void updateDeviceListInfo_(cudaStream_t stream)
Update all device-side list pointer and size information.
const IndicesOptions indicesOptions_
How are user indices stored on the GPU?
std::vector< std::unique_ptr< DeviceVector< unsigned char > > > deviceListData_
const int dim_
Expected dimensionality of the vectors.
void addIndicesFromCpu_(int listId, const long *indices, size_t numVecs)
Shared function to copy indices from CPU to GPU.