10 #include "../GpuResources.h"
11 #include "FlatIndex.cuh"
12 #include "InvertedListAppend.cuh"
13 #include "IVFFlatScan.cuh"
14 #include "RemapIndices.h"
15 #include "../utils/CopyUtils.cuh"
16 #include "../utils/DeviceDefs.cuh"
17 #include "../utils/DeviceUtils.h"
18 #include "../utils/Float16.cuh"
19 #include "../utils/HostTensor.cuh"
20 #include "../utils/Transpose.cuh"
22 #include <thrust/host_vector.h>
23 #include <unordered_map>
25 namespace faiss {
namespace gpu {
31 IndicesOptions indicesOptions,
35 #ifdef FAISS_USE_FLOAT16
37 sizeof(half) * quantizer->getDim()
38 : sizeof(float) * quantizer->getDim(),
40 sizeof(float) * quantizer->getDim(),
44 l2Distance_(l2Distance),
45 useFloat16_(useFloat16) {
68 auto prevData = listData->data();
72 FAISS_ASSERT(listData->size() + lengthInBytes <=
73 (size_t) std::numeric_limits<int>::max());
76 #ifdef FAISS_USE_FLOAT16
85 {(int) numVecs *
dim_});
86 auto halfData = toHalf<1>(
resources_, stream, floatData);
88 listData->append((
unsigned char*) halfData.data(),
97 listData->append((
unsigned char*) vecs,
108 if (prevData != listData->data()) {
122 streamWait({stream}, {0});
142 auto listIds = listIds2d.
view<1>({vecs.
getSize(0)});
144 quantizer_->query(vecs, 1, listDistance, listIds2d,
false);
157 std::unordered_map<int, int> assignCounts;
163 for (
int i = 0; i < listIds.
getSize(0); ++i) {
164 int listId = listIdsHost[i];
168 listOffsetHost[i] = -1;
177 auto it = assignCounts.find(listId);
178 if (it != assignCounts.end()) {
179 offset += it->second;
182 assignCounts[listId] = 1;
185 listOffsetHost[i] = offset;
199 for (
auto& counts : assignCounts) {
205 auto& indices = deviceListIndices_[counts.first];
211 indices->resize(indices->size() + counts.second * indexSize, stream);
217 userIndices.resize(newNumVecs);
231 std::vector<int> listIds(assignCounts.size());
233 for (
auto& counts : assignCounts) {
234 listIds[i++] = counts.first;
247 for (
int i = 0; i < hostIndices.
getSize(0); ++i) {
248 int listId = listIdsHost[i];
255 int offset = listOffsetHost[i];
260 FAISS_ASSERT(offset < userIndices.size());
261 userIndices[offset] = hostIndices[i];
270 runIVFFlatInvertedListAppend(listIds,
294 FAISS_ASSERT(nprobe <= GPU_MAX_SELECTION_K);
295 FAISS_ASSERT(k <= GPU_MAX_SELECTION_K);
305 coarseDistances(mem, {queries.
getSize(0), nprobe}, stream);
307 coarseIndices(mem, {queries.
getSize(0), nprobe}, stream);
317 runIVFFlatScan(queries,
338 ivfOffsetToUserIndex(hostOutIndices.
data(),
346 outIndices.
copyFrom(hostOutIndices, stream);
358 #ifdef FAISS_USE_FLOAT16
359 size_t num = encVecs.size() /
sizeof(half);
362 auto devFloat = fromHalf(
resources_, stream, devHalf);
364 std::vector<float> out(num);
366 hostFloat.
copyFrom(devFloat, stream);
372 size_t num = encVecs.size() /
sizeof(float);
376 std::vector<float> out(num);
378 hostFloat.
copyFrom(devFloat, stream);
const int numLists_
Number of inverted lists we maintain.
int maxListLength_
Maximum list length seen.
cudaStream_t getDefaultStreamCurrentDevice()
Calls getDefaultStream with the current device.
int getSize() const
Returns the number of vectors we contain.
std::vector< std::vector< long > > listOffsetToUserIndex_
Holder of GPU resources for a particular flat index.
__host__ __device__ Tensor< T, SubDim, InnerContig, IndexT, PtrTraits > view(DataPtrType at)
Base inverted list functionality for IVFFlat and IVFPQ.
IVFFlat(GpuResources *resources, FlatIndex *quantizer, bool l2Distance, bool useFloat16, IndicesOptions indicesOptions, MemorySpace space)
Construct from a quantizer that has elemen.
thrust::device_vector< int > deviceListLengths_
thrust::device_vector< void * > deviceListIndexPointers_
int classifyAndAddVectors(Tensor< float, 2, true > &vecs, Tensor< long, 1, true > &indices)
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
__host__ void copyFrom(Tensor< T, Dim, InnerContig, IndexT, PtrTraits > &t, cudaStream_t stream)
Copies a tensor into ourselves; sizes must match.
FlatIndex * quantizer_
Quantizer object.
__host__ __device__ IndexT getSize(int i) const
thrust::device_vector< void * > deviceListDataPointers_
__host__ __device__ DataPtrType data()
Returns a raw pointer to the start of our data.
GpuResources * resources_
Collection of GPU resources that we use.
void addCodeVectorsFromCpu(int listId, const float *vecs, const long *indices, size_t numVecs)
const int bytesPerVector_
Number of bytes per vector in the list.
void query(Tensor< float, 2, true > &queries, int nprobe, int k, Tensor< float, 2, true > &outDistances, Tensor< long, 2, true > &outIndices)
std::vector< float > getListVectors(int listId) const
Return the vectors of a particular list back to the CPU.
void updateDeviceListInfo_(cudaStream_t stream)
Update all device-side list pointer and size information.
const IndicesOptions indicesOptions_
How are user indices stored on the GPU?
std::vector< std::unique_ptr< DeviceVector< unsigned char > > > deviceListData_
const int dim_
Expected dimensionality of the vectors.
void addIndicesFromCpu_(int listId, const long *indices, size_t numVecs)
Shared function to copy indices from CPU to GPU.