290 lines
9.7 KiB
Plaintext
290 lines
9.7 KiB
Plaintext
/**
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
#include <faiss/gpu/GpuIndexBinaryFlat.h>
|
|
|
|
#include <faiss/gpu/GpuResources.h>
|
|
#include <faiss/gpu/impl/BinaryFlatIndex.cuh>
|
|
#include <faiss/gpu/utils/ConversionOperators.cuh>
|
|
#include <faiss/gpu/utils/CopyUtils.cuh>
|
|
#include <faiss/gpu/utils/DeviceUtils.h>
|
|
|
|
namespace faiss { namespace gpu {
|
|
|
|
/// Default CPU search size for which we use paged copies
|
|
constexpr size_t kMinPageSize = (size_t) 256 * 1024 * 1024;
|
|
|
|
GpuIndexBinaryFlat::GpuIndexBinaryFlat(GpuResources* resources,
|
|
const faiss::IndexBinaryFlat* index,
|
|
GpuIndexBinaryFlatConfig config)
|
|
: IndexBinary(index->d),
|
|
resources_(resources),
|
|
config_(std::move(config)),
|
|
data_(nullptr) {
|
|
FAISS_THROW_IF_NOT_FMT(this->d % 8 == 0,
|
|
"vector dimension (number of bits) "
|
|
"must be divisible by 8 (passed %d)",
|
|
this->d);
|
|
|
|
// Flat index doesn't need training
|
|
this->is_trained = true;
|
|
|
|
copyFrom(index);
|
|
}
|
|
|
|
|
|
GpuIndexBinaryFlat::GpuIndexBinaryFlat(GpuResources* resources,
|
|
int dims,
|
|
GpuIndexBinaryFlatConfig config)
|
|
: IndexBinary(dims),
|
|
resources_(resources),
|
|
config_(std::move(config)),
|
|
data_(nullptr) {
|
|
FAISS_THROW_IF_NOT_FMT(this->d % 8 == 0,
|
|
"vector dimension (number of bits) "
|
|
"must be divisible by 8 (passed %d)",
|
|
this->d);
|
|
|
|
// Flat index doesn't need training
|
|
this->is_trained = true;
|
|
|
|
// Construct index
|
|
DeviceScope scope(config_.device);
|
|
data_ = new BinaryFlatIndex(resources,
|
|
this->d,
|
|
config_.memorySpace);
|
|
}
|
|
|
|
GpuIndexBinaryFlat::~GpuIndexBinaryFlat() {
|
|
delete data_;
|
|
}
|
|
|
|
void
|
|
GpuIndexBinaryFlat::copyFrom(const faiss::IndexBinaryFlat* index) {
|
|
DeviceScope scope(config_.device);
|
|
|
|
this->d = index->d;
|
|
|
|
// GPU code has 32 bit indices
|
|
FAISS_THROW_IF_NOT_FMT(index->ntotal <=
|
|
(faiss::Index::idx_t) std::numeric_limits<int>::max(),
|
|
"GPU index only supports up to %zu indices; "
|
|
"attempting to copy CPU index with %zu parameters",
|
|
(size_t) std::numeric_limits<int>::max(),
|
|
(size_t) index->ntotal);
|
|
this->ntotal = index->ntotal;
|
|
|
|
delete data_;
|
|
data_ = new BinaryFlatIndex(resources_,
|
|
this->d,
|
|
config_.memorySpace);
|
|
|
|
// The index could be empty
|
|
if (index->ntotal > 0) {
|
|
data_->add(index->xb.data(),
|
|
index->ntotal,
|
|
resources_->getDefaultStream(config_.device));
|
|
}
|
|
}
|
|
|
|
void
|
|
GpuIndexBinaryFlat::copyTo(faiss::IndexBinaryFlat* index) const {
|
|
DeviceScope scope(config_.device);
|
|
|
|
index->d = this->d;
|
|
index->ntotal = this->ntotal;
|
|
|
|
FAISS_ASSERT(data_);
|
|
FAISS_ASSERT(data_->getSize() == this->ntotal);
|
|
index->xb.resize(this->ntotal * (this->d / 8));
|
|
|
|
if (this->ntotal > 0) {
|
|
fromDevice(data_->getVectorsRef(),
|
|
index->xb.data(),
|
|
resources_->getDefaultStream(config_.device));
|
|
}
|
|
}
|
|
|
|
void
|
|
GpuIndexBinaryFlat::add(faiss::IndexBinary::idx_t n,
|
|
const uint8_t* x) {
|
|
DeviceScope scope(config_.device);
|
|
|
|
// To avoid multiple re-allocations, ensure we have enough storage
|
|
// available
|
|
data_->reserve(n, resources_->getDefaultStream(config_.device));
|
|
|
|
// Due to GPU indexing in int32, we can't store more than this
|
|
// number of vectors on a GPU
|
|
FAISS_THROW_IF_NOT_FMT(this->ntotal + n <=
|
|
(faiss::Index::idx_t) std::numeric_limits<int>::max(),
|
|
"GPU index only supports up to %zu indices",
|
|
(size_t) std::numeric_limits<int>::max());
|
|
|
|
data_->add((const unsigned char*) x,
|
|
n,
|
|
resources_->getDefaultStream(config_.device));
|
|
this->ntotal += n;
|
|
}
|
|
|
|
void
|
|
GpuIndexBinaryFlat::reset() {
|
|
DeviceScope scope(config_.device);
|
|
|
|
// Free the underlying memory
|
|
data_->reset();
|
|
this->ntotal = 0;
|
|
}
|
|
|
|
void
|
|
GpuIndexBinaryFlat::search(faiss::IndexBinary::idx_t n,
|
|
const uint8_t* x,
|
|
faiss::IndexBinary::idx_t k,
|
|
int32_t* distances,
|
|
faiss::IndexBinary::idx_t* labels) const {
|
|
if (n == 0) {
|
|
return;
|
|
}
|
|
|
|
// For now, only support <= max int results
|
|
FAISS_THROW_IF_NOT_FMT(n <= (Index::idx_t) std::numeric_limits<int>::max(),
|
|
"GPU index only supports up to %zu indices",
|
|
(size_t) std::numeric_limits<int>::max());
|
|
FAISS_THROW_IF_NOT_FMT(k <= (Index::idx_t) getMaxKSelection(),
|
|
"GPU only supports k <= %d (requested %d)",
|
|
getMaxKSelection(),
|
|
(int) k); // select limitation
|
|
|
|
DeviceScope scope(config_.device);
|
|
auto stream = resources_->getDefaultStream(config_.device);
|
|
|
|
// The input vectors may be too large for the GPU, but we still
|
|
// assume that the output distances and labels are not.
|
|
// Go ahead and make space for output distances and labels on the
|
|
// GPU.
|
|
// If we reach a point where all inputs are too big, we can add
|
|
// another level of tiling.
|
|
auto outDistances = toDevice<int32_t, 2>(resources_,
|
|
config_.device,
|
|
distances,
|
|
stream,
|
|
{(int) n, (int) k});
|
|
|
|
// FlatIndex only supports an interface returning int indices
|
|
DeviceTensor<int, 2, true> outIntIndices(
|
|
resources_->getMemoryManagerCurrentDevice(),
|
|
{(int) n, (int) k}, stream);
|
|
|
|
bool usePaged = false;
|
|
|
|
if (getDeviceForAddress(x) == -1) {
|
|
// It is possible that the user is querying for a vector set size
|
|
// `x` that won't fit on the GPU.
|
|
// In this case, we will have to handle paging of the data from CPU
|
|
// -> GPU.
|
|
// Currently, we don't handle the case where the output data won't
|
|
// fit on the GPU (e.g., n * k is too large for the GPU memory).
|
|
size_t dataSize = (size_t) n * (this->d / 8) * sizeof(uint8_t);
|
|
|
|
if (dataSize >= kMinPageSize) {
|
|
searchFromCpuPaged_(n, x, k,
|
|
outDistances.data(),
|
|
outIntIndices.data());
|
|
usePaged = true;
|
|
}
|
|
}
|
|
|
|
if (!usePaged) {
|
|
searchNonPaged_(n, x, k,
|
|
outDistances.data(),
|
|
outIntIndices.data());
|
|
}
|
|
|
|
// Convert and copy int indices out
|
|
auto outIndices = toDevice<faiss::Index::idx_t, 2>(resources_,
|
|
config_.device,
|
|
labels,
|
|
stream,
|
|
{(int) n, (int) k});
|
|
|
|
// Convert int to long
|
|
convertTensor<int, faiss::Index::idx_t, 2>(stream,
|
|
outIntIndices,
|
|
outIndices);
|
|
|
|
// Copy back if necessary
|
|
fromDevice<int32_t, 2>(outDistances, distances, stream);
|
|
fromDevice<faiss::Index::idx_t, 2>(outIndices, labels, stream);
|
|
}
|
|
|
|
void
|
|
GpuIndexBinaryFlat::searchNonPaged_(int n,
|
|
const uint8_t* x,
|
|
int k,
|
|
int32_t* outDistancesData,
|
|
int* outIndicesData) const {
|
|
Tensor<int32_t, 2, true> outDistances(outDistancesData, {n, k});
|
|
Tensor<int, 2, true> outIndices(outIndicesData, {n, k});
|
|
|
|
auto stream = resources_->getDefaultStream(config_.device);
|
|
|
|
// Make sure arguments are on the device we desire; use temporary
|
|
// memory allocations to move it if necessary
|
|
auto vecs = toDevice<uint8_t, 2>(resources_,
|
|
config_.device,
|
|
const_cast<uint8_t*>(x),
|
|
stream,
|
|
{n, (int) (this->d / 8)});
|
|
|
|
data_->query(vecs, k, outDistances, outIndices);
|
|
}
|
|
|
|
void
|
|
GpuIndexBinaryFlat::searchFromCpuPaged_(int n,
|
|
const uint8_t* x,
|
|
int k,
|
|
int32_t* outDistancesData,
|
|
int* outIndicesData) const {
|
|
Tensor<int32_t, 2, true> outDistances(outDistancesData, {n, k});
|
|
Tensor<int, 2, true> outIndices(outIndicesData, {n, k});
|
|
|
|
auto vectorSize = sizeof(uint8_t) * (this->d / 8);
|
|
|
|
// Just page without overlapping copy with compute (as GpuIndexFlat does)
|
|
int batchSize = utils::nextHighestPowerOf2(
|
|
(int) ((size_t) kMinPageSize / vectorSize));
|
|
|
|
for (int cur = 0; cur < n; cur += batchSize) {
|
|
int num = std::min(batchSize, n - cur);
|
|
|
|
auto outDistancesSlice = outDistances.narrowOutermost(cur, num);
|
|
auto outIndicesSlice = outIndices.narrowOutermost(cur, num);
|
|
|
|
searchNonPaged_(num,
|
|
x + (size_t) cur * (this->d / 8),
|
|
k,
|
|
outDistancesSlice.data(),
|
|
outIndicesSlice.data());
|
|
}
|
|
}
|
|
|
|
void
|
|
GpuIndexBinaryFlat::reconstruct(faiss::IndexBinary::idx_t key,
|
|
uint8_t* out) const {
|
|
DeviceScope scope(config_.device);
|
|
|
|
FAISS_THROW_IF_NOT_MSG(key < this->ntotal, "index out of bounds");
|
|
auto stream = resources_->getDefaultStream(config_.device);
|
|
|
|
auto& vecs = data_->getVectorsRef();
|
|
auto vec = vecs[key];
|
|
|
|
fromDevice(vec.data(), out, vecs.getSize(1), stream);
|
|
}
|
|
|
|
} } // namespace gpu
|