faiss/gpu/GpuIndexBinaryFlat.cu

290 lines
9.7 KiB
Plaintext

/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/gpu/GpuIndexBinaryFlat.h>
#include <faiss/gpu/GpuResources.h>
#include <faiss/gpu/impl/BinaryFlatIndex.cuh>
#include <faiss/gpu/utils/ConversionOperators.cuh>
#include <faiss/gpu/utils/CopyUtils.cuh>
#include <faiss/gpu/utils/DeviceUtils.h>
namespace faiss { namespace gpu {
/// Default CPU search size for which we use paged copies
constexpr size_t kMinPageSize = (size_t) 256 * 1024 * 1024;
GpuIndexBinaryFlat::GpuIndexBinaryFlat(GpuResources* resources,
const faiss::IndexBinaryFlat* index,
GpuIndexBinaryFlatConfig config)
: IndexBinary(index->d),
resources_(resources),
config_(std::move(config)),
data_(nullptr) {
FAISS_THROW_IF_NOT_FMT(this->d % 8 == 0,
"vector dimension (number of bits) "
"must be divisible by 8 (passed %d)",
this->d);
// Flat index doesn't need training
this->is_trained = true;
copyFrom(index);
}
GpuIndexBinaryFlat::GpuIndexBinaryFlat(GpuResources* resources,
int dims,
GpuIndexBinaryFlatConfig config)
: IndexBinary(dims),
resources_(resources),
config_(std::move(config)),
data_(nullptr) {
FAISS_THROW_IF_NOT_FMT(this->d % 8 == 0,
"vector dimension (number of bits) "
"must be divisible by 8 (passed %d)",
this->d);
// Flat index doesn't need training
this->is_trained = true;
// Construct index
DeviceScope scope(config_.device);
data_ = new BinaryFlatIndex(resources,
this->d,
config_.memorySpace);
}
GpuIndexBinaryFlat::~GpuIndexBinaryFlat() {
delete data_;
}
void
GpuIndexBinaryFlat::copyFrom(const faiss::IndexBinaryFlat* index) {
DeviceScope scope(config_.device);
this->d = index->d;
// GPU code has 32 bit indices
FAISS_THROW_IF_NOT_FMT(index->ntotal <=
(faiss::Index::idx_t) std::numeric_limits<int>::max(),
"GPU index only supports up to %zu indices; "
"attempting to copy CPU index with %zu parameters",
(size_t) std::numeric_limits<int>::max(),
(size_t) index->ntotal);
this->ntotal = index->ntotal;
delete data_;
data_ = new BinaryFlatIndex(resources_,
this->d,
config_.memorySpace);
// The index could be empty
if (index->ntotal > 0) {
data_->add(index->xb.data(),
index->ntotal,
resources_->getDefaultStream(config_.device));
}
}
void
GpuIndexBinaryFlat::copyTo(faiss::IndexBinaryFlat* index) const {
DeviceScope scope(config_.device);
index->d = this->d;
index->ntotal = this->ntotal;
FAISS_ASSERT(data_);
FAISS_ASSERT(data_->getSize() == this->ntotal);
index->xb.resize(this->ntotal * (this->d / 8));
if (this->ntotal > 0) {
fromDevice(data_->getVectorsRef(),
index->xb.data(),
resources_->getDefaultStream(config_.device));
}
}
void
GpuIndexBinaryFlat::add(faiss::IndexBinary::idx_t n,
const uint8_t* x) {
DeviceScope scope(config_.device);
// To avoid multiple re-allocations, ensure we have enough storage
// available
data_->reserve(n, resources_->getDefaultStream(config_.device));
// Due to GPU indexing in int32, we can't store more than this
// number of vectors on a GPU
FAISS_THROW_IF_NOT_FMT(this->ntotal + n <=
(faiss::Index::idx_t) std::numeric_limits<int>::max(),
"GPU index only supports up to %zu indices",
(size_t) std::numeric_limits<int>::max());
data_->add((const unsigned char*) x,
n,
resources_->getDefaultStream(config_.device));
this->ntotal += n;
}
void
GpuIndexBinaryFlat::reset() {
DeviceScope scope(config_.device);
// Free the underlying memory
data_->reset();
this->ntotal = 0;
}
void
GpuIndexBinaryFlat::search(faiss::IndexBinary::idx_t n,
const uint8_t* x,
faiss::IndexBinary::idx_t k,
int32_t* distances,
faiss::IndexBinary::idx_t* labels) const {
if (n == 0) {
return;
}
// For now, only support <= max int results
FAISS_THROW_IF_NOT_FMT(n <= (Index::idx_t) std::numeric_limits<int>::max(),
"GPU index only supports up to %zu indices",
(size_t) std::numeric_limits<int>::max());
FAISS_THROW_IF_NOT_FMT(k <= (Index::idx_t) getMaxKSelection(),
"GPU only supports k <= %d (requested %d)",
getMaxKSelection(),
(int) k); // select limitation
DeviceScope scope(config_.device);
auto stream = resources_->getDefaultStream(config_.device);
// The input vectors may be too large for the GPU, but we still
// assume that the output distances and labels are not.
// Go ahead and make space for output distances and labels on the
// GPU.
// If we reach a point where all inputs are too big, we can add
// another level of tiling.
auto outDistances = toDevice<int32_t, 2>(resources_,
config_.device,
distances,
stream,
{(int) n, (int) k});
// FlatIndex only supports an interface returning int indices
DeviceTensor<int, 2, true> outIntIndices(
resources_->getMemoryManagerCurrentDevice(),
{(int) n, (int) k}, stream);
bool usePaged = false;
if (getDeviceForAddress(x) == -1) {
// It is possible that the user is querying for a vector set size
// `x` that won't fit on the GPU.
// In this case, we will have to handle paging of the data from CPU
// -> GPU.
// Currently, we don't handle the case where the output data won't
// fit on the GPU (e.g., n * k is too large for the GPU memory).
size_t dataSize = (size_t) n * (this->d / 8) * sizeof(uint8_t);
if (dataSize >= kMinPageSize) {
searchFromCpuPaged_(n, x, k,
outDistances.data(),
outIntIndices.data());
usePaged = true;
}
}
if (!usePaged) {
searchNonPaged_(n, x, k,
outDistances.data(),
outIntIndices.data());
}
// Convert and copy int indices out
auto outIndices = toDevice<faiss::Index::idx_t, 2>(resources_,
config_.device,
labels,
stream,
{(int) n, (int) k});
// Convert int to long
convertTensor<int, faiss::Index::idx_t, 2>(stream,
outIntIndices,
outIndices);
// Copy back if necessary
fromDevice<int32_t, 2>(outDistances, distances, stream);
fromDevice<faiss::Index::idx_t, 2>(outIndices, labels, stream);
}
void
GpuIndexBinaryFlat::searchNonPaged_(int n,
const uint8_t* x,
int k,
int32_t* outDistancesData,
int* outIndicesData) const {
Tensor<int32_t, 2, true> outDistances(outDistancesData, {n, k});
Tensor<int, 2, true> outIndices(outIndicesData, {n, k});
auto stream = resources_->getDefaultStream(config_.device);
// Make sure arguments are on the device we desire; use temporary
// memory allocations to move it if necessary
auto vecs = toDevice<uint8_t, 2>(resources_,
config_.device,
const_cast<uint8_t*>(x),
stream,
{n, (int) (this->d / 8)});
data_->query(vecs, k, outDistances, outIndices);
}
void
GpuIndexBinaryFlat::searchFromCpuPaged_(int n,
const uint8_t* x,
int k,
int32_t* outDistancesData,
int* outIndicesData) const {
Tensor<int32_t, 2, true> outDistances(outDistancesData, {n, k});
Tensor<int, 2, true> outIndices(outIndicesData, {n, k});
auto vectorSize = sizeof(uint8_t) * (this->d / 8);
// Just page without overlapping copy with compute (as GpuIndexFlat does)
int batchSize = utils::nextHighestPowerOf2(
(int) ((size_t) kMinPageSize / vectorSize));
for (int cur = 0; cur < n; cur += batchSize) {
int num = std::min(batchSize, n - cur);
auto outDistancesSlice = outDistances.narrowOutermost(cur, num);
auto outIndicesSlice = outIndices.narrowOutermost(cur, num);
searchNonPaged_(num,
x + (size_t) cur * (this->d / 8),
k,
outDistancesSlice.data(),
outIndicesSlice.data());
}
}
void
GpuIndexBinaryFlat::reconstruct(faiss::IndexBinary::idx_t key,
uint8_t* out) const {
DeviceScope scope(config_.device);
FAISS_THROW_IF_NOT_MSG(key < this->ntotal, "index out of bounds");
auto stream = resources_->getDefaultStream(config_.device);
auto& vecs = data_->getVectorsRef();
auto vec = vecs[key];
fromDevice(vec.data(), out, vecs.getSize(1), stream);
}
} } // namespace gpu