274 lines
7.5 KiB
Plaintext
274 lines
7.5 KiB
Plaintext
/**
|
|
* Copyright (c) 2015-present, Facebook, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under the BSD+Patents license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
// Copyright 2004-present Facebook. All Rights Reserved.
|
|
|
|
#include "GpuIndexIVFFlat.h"
|
|
#include "../IndexFlat.h"
|
|
#include "../IndexIVF.h"
|
|
#include "GpuIndexFlat.h"
|
|
#include "GpuResources.h"
|
|
#include "impl/IVFFlat.cuh"
|
|
#include "utils/CopyUtils.cuh"
|
|
#include "utils/DeviceUtils.h"
|
|
#include "utils/Float16.cuh"
|
|
|
|
#include <limits>
|
|
|
|
namespace faiss { namespace gpu {
|
|
|
|
GpuIndexIVFFlat::GpuIndexIVFFlat(GpuResources* resources,
|
|
const faiss::IndexIVFFlat* index,
|
|
GpuIndexIVFFlatConfig config) :
|
|
GpuIndexIVF(resources,
|
|
index->d,
|
|
index->metric_type,
|
|
index->nlist,
|
|
config),
|
|
ivfFlatConfig_(config),
|
|
reserveMemoryVecs_(0),
|
|
index_(nullptr) {
|
|
#ifndef FAISS_USE_FLOAT16
|
|
FAISS_THROW_IF_NOT_MSG(!ivfFlatConfig_.useFloat16IVFStorage,
|
|
"float16 unsupported; need CUDA SDK >= 7.5");
|
|
#endif
|
|
|
|
copyFrom(index);
|
|
}
|
|
|
|
GpuIndexIVFFlat::GpuIndexIVFFlat(GpuResources* resources,
|
|
int dims,
|
|
int nlist,
|
|
faiss::MetricType metric,
|
|
GpuIndexIVFFlatConfig config) :
|
|
GpuIndexIVF(resources, dims, metric, nlist, config),
|
|
ivfFlatConfig_(config),
|
|
reserveMemoryVecs_(0),
|
|
index_(nullptr) {
|
|
|
|
// faiss::Index params
|
|
this->is_trained = false;
|
|
|
|
#ifndef FAISS_USE_FLOAT16
|
|
FAISS_THROW_IF_NOT_MSG(!ivfFlatConfig_.useFloat16IVFStorage,
|
|
"float16 unsupported; need CUDA SDK >= 7.5");
|
|
#endif
|
|
|
|
// We haven't trained ourselves, so don't construct the IVFFlat
|
|
// index yet
|
|
}
|
|
|
|
GpuIndexIVFFlat::~GpuIndexIVFFlat() {
|
|
delete index_;
|
|
}
|
|
|
|
void
|
|
GpuIndexIVFFlat::reserveMemory(size_t numVecs) {
|
|
reserveMemoryVecs_ = numVecs;
|
|
if (index_) {
|
|
index_->reserveMemory(numVecs);
|
|
}
|
|
}
|
|
|
|
void
|
|
GpuIndexIVFFlat::copyFrom(const faiss::IndexIVFFlat* index) {
|
|
DeviceScope scope(device_);
|
|
|
|
GpuIndexIVF::copyFrom(index);
|
|
|
|
// Clear out our old data
|
|
delete index_;
|
|
index_ = nullptr;
|
|
|
|
// The other index might not be trained
|
|
if (!index->is_trained) {
|
|
return;
|
|
}
|
|
|
|
// Otherwise, we can populate ourselves from the other index
|
|
this->is_trained = true;
|
|
|
|
// Copy our lists as well
|
|
index_ = new IVFFlat(resources_,
|
|
quantizer_->getGpuData(),
|
|
index->metric_type == faiss::METRIC_L2,
|
|
ivfFlatConfig_.useFloat16IVFStorage,
|
|
ivfFlatConfig_.indicesOptions,
|
|
memorySpace_);
|
|
|
|
FAISS_ASSERT(index->vecs.size() == index->ids.size());
|
|
for (size_t i = 0; i < index->vecs.size(); ++i) {
|
|
auto& vecs = index->vecs[i];
|
|
auto& ids = index->ids[i];
|
|
|
|
FAISS_ASSERT(vecs.size() % this->d == 0);
|
|
auto numVecs = vecs.size() / this->d;
|
|
FAISS_ASSERT(numVecs == ids.size());
|
|
|
|
// GPU index can only support max int entries per list
|
|
FAISS_THROW_IF_NOT_FMT(ids.size() <=
|
|
(size_t) std::numeric_limits<int>::max(),
|
|
"GPU inverted list can only support "
|
|
"%zu entries; %zu found",
|
|
(size_t) std::numeric_limits<int>::max(),
|
|
ids.size());
|
|
|
|
index_->addCodeVectorsFromCpu(i, vecs.data(), ids.data(), numVecs);
|
|
}
|
|
}
|
|
|
|
void
|
|
GpuIndexIVFFlat::copyTo(faiss::IndexIVFFlat* index) const {
|
|
DeviceScope scope(device_);
|
|
|
|
// We must have the indices in order to copy to ourselves
|
|
FAISS_THROW_IF_NOT_MSG(ivfFlatConfig_.indicesOptions != INDICES_IVF,
|
|
"Cannot copy to CPU as GPU index doesn't retain "
|
|
"indices (INDICES_IVF)");
|
|
|
|
GpuIndexIVF::copyTo(index);
|
|
|
|
// Clear out the old inverted lists
|
|
index->vecs.clear();
|
|
index->vecs.resize(nlist_);
|
|
|
|
// Copy the inverted lists
|
|
if (index_) {
|
|
for (int i = 0; i < nlist_; ++i) {
|
|
index->vecs[i] = index_->getListVectors(i);
|
|
index->ids[i] = index_->getListIndices(i);
|
|
}
|
|
}
|
|
}
|
|
|
|
size_t
|
|
GpuIndexIVFFlat::reclaimMemory() {
|
|
if (index_) {
|
|
DeviceScope scope(device_);
|
|
|
|
return index_->reclaimMemory();
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void
|
|
GpuIndexIVFFlat::reset() {
|
|
if (index_) {
|
|
DeviceScope scope(device_);
|
|
|
|
index_->reset();
|
|
this->ntotal = 0;
|
|
} else {
|
|
FAISS_ASSERT(this->ntotal == 0);
|
|
}
|
|
}
|
|
|
|
void
|
|
GpuIndexIVFFlat::train(Index::idx_t n, const float* x) {
|
|
DeviceScope scope(device_);
|
|
|
|
if (this->is_trained) {
|
|
FAISS_ASSERT(quantizer_->is_trained);
|
|
FAISS_ASSERT(quantizer_->ntotal == nlist_);
|
|
FAISS_ASSERT(index_);
|
|
return;
|
|
}
|
|
|
|
FAISS_ASSERT(!index_);
|
|
|
|
trainQuantizer_(n, x);
|
|
|
|
// The quantizer is now trained; construct the IVF index
|
|
index_ = new IVFFlat(resources_,
|
|
quantizer_->getGpuData(),
|
|
this->metric_type == faiss::METRIC_L2,
|
|
ivfFlatConfig_.useFloat16IVFStorage,
|
|
ivfFlatConfig_.indicesOptions,
|
|
memorySpace_);
|
|
|
|
if (reserveMemoryVecs_) {
|
|
index_->reserveMemory(reserveMemoryVecs_);
|
|
}
|
|
|
|
this->is_trained = true;
|
|
}
|
|
|
|
void
|
|
GpuIndexIVFFlat::addImpl_(Index::idx_t n,
|
|
const float* x,
|
|
const Index::idx_t* xids) {
|
|
// Device is already set in GpuIndex::addInternal_
|
|
FAISS_ASSERT(index_);
|
|
FAISS_ASSERT(n > 0);
|
|
|
|
auto stream = resources_->getDefaultStreamCurrentDevice();
|
|
|
|
auto deviceVecs =
|
|
toDevice<float, 2>(resources_,
|
|
device_,
|
|
const_cast<float*>(x),
|
|
stream,
|
|
{(int) n, index_->getDim()});
|
|
|
|
static_assert(sizeof(long) == sizeof(Index::idx_t), "size mismatch");
|
|
auto deviceIds =
|
|
toDevice<long, 1>(resources_,
|
|
device_,
|
|
const_cast<long*>(xids),
|
|
stream,
|
|
{(int) n});
|
|
|
|
// Not all vectors may be able to be added (some may contain NaNs
|
|
// etc)
|
|
ntotal += index_->classifyAndAddVectors(deviceVecs, deviceIds);
|
|
}
|
|
|
|
void
|
|
GpuIndexIVFFlat::searchImpl_(faiss::Index::idx_t n,
|
|
const float* x,
|
|
faiss::Index::idx_t k,
|
|
float* distances,
|
|
faiss::Index::idx_t* labels) const {
|
|
// Device is already set in GpuIndex::search
|
|
FAISS_ASSERT(index_);
|
|
FAISS_ASSERT(n > 0);
|
|
|
|
auto stream = resources_->getDefaultStream(device_);
|
|
|
|
// Make sure arguments are on the device we desire; use temporary
|
|
// memory allocations to move it if necessary
|
|
auto devX =
|
|
toDevice<float, 2>(resources_,
|
|
device_,
|
|
const_cast<float*>(x),
|
|
stream,
|
|
{(int) n, this->d});
|
|
auto devDistances =
|
|
toDevice<float, 2>(resources_,
|
|
device_,
|
|
distances,
|
|
stream,
|
|
{(int) n, (int) k});
|
|
auto devLabels =
|
|
toDevice<faiss::Index::idx_t, 2>(resources_,
|
|
device_,
|
|
labels,
|
|
stream,
|
|
{(int) n, (int) k});
|
|
|
|
index_->query(devX, nprobe_, k, devDistances, devLabels);
|
|
|
|
// Copy back if necessary
|
|
fromDevice<float, 2>(devDistances, distances, stream);
|
|
fromDevice<faiss::Index::idx_t, 2>(devLabels, labels, stream);
|
|
}
|
|
|
|
|
|
} } // namespace
|