docs/html/GpuIndexFlat_8cu_source.html

 /**

  * Copyright (c) Facebook, Inc. and its affiliates.

  *

  * This source code is licensed under the MIT license found in the

  * LICENSE file in the root directory of this source tree.

  */


 #include "GpuIndexFlat.h"

 #include "../IndexFlat.h"

 #include "GpuResources.h"

 #include "impl/FlatIndex.cuh"

 #include "utils/ConversionOperators.cuh"

 #include "utils/CopyUtils.cuh"

 #include "utils/DeviceUtils.h"

 #include "utils/Float16.cuh"

 #include "utils/StaticUtils.h"


 #include <thrust/execution_policy.h>

 #include <thrust/transform.h>

 #include <limits>


 namespace faiss { namespace gpu {


 GpuIndexFlat::GpuIndexFlat(GpuResources* resources,

                            const faiss::IndexFlat* index,

                            GpuIndexFlatConfig config) :

     GpuIndex(resources, index->d, index->metric_type, config),

     config_(std::move(config)),

     data_(nullptr) {

   verifySettings_();


   // Flat index doesn't need training

   this->is_trained = true;


   copyFrom(index);

 }


 GpuIndexFlat::GpuIndexFlat(GpuResources* resources,

                            int dims,

                            faiss::MetricType metric,

                            GpuIndexFlatConfig config) :

     GpuIndex(resources, dims, metric, config),

     config_(std::move(config)),

     data_(nullptr) {

   verifySettings_();


   // Flat index doesn't need training

   this->is_trained = true;


   // Construct index

   DeviceScope scope(device_);

   data_ = new FlatIndex(resources,

                         dims,

                         metric == faiss::METRIC_L2,

                         config_.useFloat16,

                         config_.useFloat16Accumulator,

                         config_.storeTransposed,

                         memorySpace_);

 }


 GpuIndexFlat::~GpuIndexFlat() {

   delete data_;

 }


 void

 GpuIndexFlat::copyFrom(const faiss::IndexFlat* index) {

   DeviceScope scope(device_);


   this->d = index->d;

   this->metric_type = index->metric_type;


   // GPU code has 32 bit indices

   FAISS_THROW_IF_NOT_FMT(index->ntotal <=

                          (faiss::Index::idx_t) std::numeric_limits<int>::max(),

                          "GPU index only supports up to %zu indices; "

                          "attempting to copy CPU index with %zu parameters",

                          (size_t) std::numeric_limits<int>::max(),

                          (size_t) index->ntotal);

   this->ntotal = index->ntotal;


   delete data_;

   data_ = new FlatIndex(resources_,

                         this->d,

                         index->metric_type == faiss::METRIC_L2,

                         config_.useFloat16,

                         config_.useFloat16Accumulator,

                         config_.storeTransposed,

                         memorySpace_);


   // The index could be empty

   if (index->ntotal > 0) {

     data_->add(index->xb.data(),

                index->ntotal,

                resources_->getDefaultStream(device_));

   }

 }


 void

 GpuIndexFlat::copyTo(faiss::IndexFlat* index) const {

   DeviceScope scope(device_);


   index->d = this->d;

   index->ntotal = this->ntotal;

   index->metric_type = this->metric_type;


   FAISS_ASSERT(data_);

   FAISS_ASSERT(data_->getSize() == this->ntotal);

   index->xb.resize(this->ntotal * this->d);


   auto stream = resources_->getDefaultStream(device_);


   if (this->ntotal > 0) {

     if (config_.useFloat16) {

       auto vecFloat32 = data_->getVectorsFloat32Copy(stream);

       fromDevice(vecFloat32, index->xb.data(), stream);

     } else {

       fromDevice(data_->getVectorsFloat32Ref(), index->xb.data(), stream);

     }

   }

 }


 size_t

 GpuIndexFlat::getNumVecs() const {

   return this->ntotal;

 }


 void

 GpuIndexFlat::reset() {

   DeviceScope scope(device_);


   // Free the underlying memory

   data_->reset();

   this->ntotal = 0;

 }


 void

 GpuIndexFlat::train(Index::idx_t n, const float* x) {

   // nothing to do

 }


 void

 GpuIndexFlat::add(Index::idx_t n, const float* x) {

   FAISS_THROW_IF_NOT_MSG(this->is_trained, "Index not trained");


   // For now, only support <= max int results

   FAISS_THROW_IF_NOT_FMT(n <= (Index::idx_t) std::numeric_limits<int>::max(),

                          "GPU index only supports up to %d indices",

                          std::numeric_limits<int>::max());


   if (n == 0) {

     // nothing to add

     return;

   }


   DeviceScope scope(device_);


   // To avoid multiple re-allocations, ensure we have enough storage

   // available

   data_->reserve(n, resources_->getDefaultStream(device_));


   // If we're not operating in float16 mode, we don't need the input

   // data to be resident on our device; we can add directly.

   if (!config_.useFloat16) {

     addImpl_(n, x, nullptr);

   } else {

     // Otherwise, perform the paging

     GpuIndex::add(n, x);

   }

 }


 bool

 GpuIndexFlat::addImplRequiresIDs_() const {

   return false;

 }


 void

 GpuIndexFlat::addImpl_(int n,

                        const float* x,

                        const Index::idx_t* ids) {

   FAISS_ASSERT(data_);

   FAISS_ASSERT(n > 0);


   // We do not support add_with_ids

   FAISS_THROW_IF_NOT_MSG(!ids, "add_with_ids not supported");


   // Due to GPU indexing in int32, we can't store more than this

   // number of vectors on a GPU

   FAISS_THROW_IF_NOT_FMT(this->ntotal + n <=

                          (faiss::Index::idx_t) std::numeric_limits<int>::max(),

                          "GPU index only supports up to %zu indices",

                          (size_t) std::numeric_limits<int>::max());


   data_->add(x, n, resources_->getDefaultStream(device_));

   this->ntotal += n;

 }


 void

 GpuIndexFlat::searchImpl_(int n,

                           const float* x,

                           int k,

                           float* distances,

                           Index::idx_t* labels) const {

   auto stream = resources_->getDefaultStream(device_);


   // Input and output data are already resident on the GPU

   Tensor<float, 2, true> queries(const_cast<float*>(x), {n, (int) this->d});

   Tensor<float, 2, true> outDistances(distances, {n, k});

   Tensor<Index::idx_t, 2, true> outLabels(labels, {n, k});


   // FlatIndex only supports int indices

   DeviceTensor<int, 2, true> outIntLabels(

     resources_->getMemoryManagerCurrentDevice(), {n, k}, stream);


   data_->query(queries, k, outDistances, outIntLabels, true);


   // Convert int to idx_t

   thrust::transform(thrust::cuda::par.on(stream),

                     outIntLabels.data(),

                     outIntLabels.end(),

                     outLabels.data(),

                     IntToIdxType());

 }


 void

 GpuIndexFlat::reconstruct(faiss::Index::idx_t key,

                           float* out) const {

   DeviceScope scope(device_);


   FAISS_THROW_IF_NOT_MSG(key < this->ntotal, "index out of bounds");

   auto stream = resources_->getDefaultStream(device_);


   if (config_.useFloat16) {

     auto vec = data_->getVectorsFloat32Copy(key, 1, stream);

     fromDevice(vec.data(), out, this->d, stream);

   } else {

     auto vec = data_->getVectorsFloat32Ref()[key];

     fromDevice(vec.data(), out, this->d, stream);

   }

 }


 void

 GpuIndexFlat::reconstruct_n(faiss::Index::idx_t i0,

                             faiss::Index::idx_t num,

                             float* out) const {

   DeviceScope scope(device_);


   FAISS_THROW_IF_NOT_MSG(i0 < this->ntotal, "index out of bounds");

   FAISS_THROW_IF_NOT_MSG(i0 + num - 1 < this->ntotal, "num out of bounds");

   auto stream = resources_->getDefaultStream(device_);


   if (config_.useFloat16) {

     auto vec = data_->getVectorsFloat32Copy(i0, num, stream);

     fromDevice(vec.data(), out, num * this->d, stream);

   } else {

     auto vec = data_->getVectorsFloat32Ref()[i0];

     fromDevice(vec.data(), out, this->d * num, stream);

   }

 }


 void

 GpuIndexFlat::verifySettings_() const {

   // If we want Hgemm, ensure that it is supported on this device

   if (config_.useFloat16Accumulator) {

 #ifdef FAISS_USE_FLOAT16

     FAISS_THROW_IF_NOT_MSG(config_.useFloat16,

                        "useFloat16Accumulator can only be enabled "

                        "with useFloat16");


     FAISS_THROW_IF_NOT_FMT(getDeviceSupportsFloat16Math(config_.device),

                        "Device %d does not support Hgemm "

                        "(useFloat16Accumulator)",

                        config_.device);

 #else

     FAISS_THROW_IF_NOT_MSG(false, "not compiled with float16 support");

 #endif

   }

 }


 //

 // GpuIndexFlatL2

 //


 GpuIndexFlatL2::GpuIndexFlatL2(GpuResources* resources,

                                faiss::IndexFlatL2* index,

                                GpuIndexFlatConfig config) :

     GpuIndexFlat(resources, index, config) {

 }


 GpuIndexFlatL2::GpuIndexFlatL2(GpuResources* resources,

                                int dims,

                                GpuIndexFlatConfig config) :

     GpuIndexFlat(resources, dims, faiss::METRIC_L2, config) {

 }


 void

 GpuIndexFlatL2::copyFrom(faiss::IndexFlatL2* index) {

   GpuIndexFlat::copyFrom(index);

 }


 void

 GpuIndexFlatL2::copyTo(faiss::IndexFlatL2* index) {

   GpuIndexFlat::copyTo(index);

 }


 //

 // GpuIndexFlatIP

 //


 GpuIndexFlatIP::GpuIndexFlatIP(GpuResources* resources,

                                faiss::IndexFlatIP* index,

                                GpuIndexFlatConfig config) :

     GpuIndexFlat(resources, index, config) {

 }


 GpuIndexFlatIP::GpuIndexFlatIP(GpuResources* resources,

                                int dims,

                                GpuIndexFlatConfig config) :

     GpuIndexFlat(resources, dims, faiss::METRIC_INNER_PRODUCT, config) {

 }


 void

 GpuIndexFlatIP::copyFrom(faiss::IndexFlatIP* index) {

   GpuIndexFlat::copyFrom(index);

 }


 void

 GpuIndexFlatIP::copyTo(faiss::IndexFlatIP* index) {

   GpuIndexFlat::copyTo(index);

 }


 } } // namespace

faiss::gpu::FlatIndex::getVectorsFloat32Copy
DeviceTensor< float, 2, true > getVectorsFloat32Copy(cudaStream_t stream)
Definition: FlatIndex.cu:89

faiss::IndexFlat
Definition: IndexFlat.h:21

faiss::gpu::GpuIndexFlatConfig::useFloat16Accumulator
bool useFloat16Accumulator
Definition: GpuIndexFlat.h:38

faiss::gpu::GpuIndexFlatConfig::storeTransposed
bool storeTransposed
Definition: GpuIndexFlat.h:46

faiss::gpu::GpuIndexFlatL2::copyFrom
void copyFrom(faiss::IndexFlatL2 *index)
Definition: GpuIndexFlat.cu:297

faiss::gpu::GpuIndexFlat::copyTo
void copyTo(faiss::IndexFlat *index) const
Definition: GpuIndexFlat.cu:100

faiss::gpu::GpuIndexFlat::reconstruct_n
void reconstruct_n(faiss::Index::idx_t i0, faiss::Index::idx_t num, float *out) const override
Batch reconstruction method.
Definition: GpuIndexFlat.cu:243

faiss::gpu::FlatIndex::getSize
int getSize() const
Returns the number of vectors we contain.
Definition: FlatIndex.cu:45

faiss::gpu::FlatIndex
Holder of GPU resources for a particular flat index.
Definition: FlatIndex.cuh:21

faiss::gpu::GpuIndexFlatL2::copyTo
void copyTo(faiss::IndexFlatL2 *index)
Definition: GpuIndexFlat.cu:302

faiss::IndexFlatL2
Definition: IndexFlat.h:77

faiss::gpu::IntToIdxType
Definition: ConversionOperators.cuh:21

faiss::gpu::GpuIndexFlat::getNumVecs
size_t getNumVecs() const
Returns the number of vectors we contain.
Definition: GpuIndexFlat.cu:124

faiss::gpu::GpuIndexFlat::searchImpl_
void searchImpl_(int n, const float *x, int k, float *distances, faiss::Index::idx_t *labels) const override
Called from GpuIndex for search.
Definition: GpuIndexFlat.cu:199

faiss::gpu::GpuResources
Definition: GpuResources.h:21

faiss::gpu::GpuIndexFlat::GpuIndexFlat
GpuIndexFlat(GpuResources *resources, const faiss::IndexFlat *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
Definition: GpuIndexFlat.cu:25

faiss::IndexFlatIP
Definition: IndexFlat.h:71

faiss::gpu::GpuIndexFlatConfig::useFloat16
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:33

faiss::gpu::GpuIndexConfig::device
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:25

faiss::gpu::GpuIndex
Definition: GpuIndex.h:33

faiss::gpu::GpuIndexFlatL2::GpuIndexFlatL2
GpuIndexFlatL2(GpuResources *resources, faiss::IndexFlatL2 *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
Definition: GpuIndexFlat.cu:284

faiss::gpu::GpuResources::getDefaultStream
virtual cudaStream_t getDefaultStream(int device)=0

faiss::Index::d
int d
vector dimension
Definition: Index.h:66

faiss::Index::idx_t
long idx_t
all indices are this type
Definition: Index.h:62

faiss::gpu::GpuIndexFlatConfig
Definition: GpuIndexFlat.h:25

faiss::gpu::GpuIndexFlat::reconstruct
void reconstruct(faiss::Index::idx_t key, float *out) const override
Definition: GpuIndexFlat.cu:226

faiss::gpu::GpuResources::getMemoryManagerCurrentDevice
DeviceMemory & getMemoryManagerCurrentDevice()
Calls getMemoryManager for the current device.
Definition: GpuResources.cpp:33

faiss::gpu::GpuIndex::device_
const int device_
The GPU device we are resident on.
Definition: GpuIndex.h:126

faiss::gpu::GpuIndexFlatIP::copyTo
void copyTo(faiss::IndexFlatIP *index)
Definition: GpuIndexFlat.cu:328

faiss::gpu::GpuIndexFlatIP::GpuIndexFlatIP
GpuIndexFlatIP(GpuResources *resources, faiss::IndexFlatIP *index, GpuIndexFlatConfig config=GpuIndexFlatConfig())
Definition: GpuIndexFlat.cu:310

faiss::gpu::GpuIndex::resources_
GpuResources * resources_
Manages streams, cuBLAS handles and scratch memory for devices.
Definition: GpuIndex.h:123

faiss::gpu::GpuIndexFlat::addImpl_
void addImpl_(int n, const float *x, const Index::idx_t *ids) override
Called from GpuIndex for add.
Definition: GpuIndexFlat.cu:178

faiss::gpu::FlatIndex::reserve
void reserve(size_t numVecs, cudaStream_t stream)
Reserve storage that can contain at least this many vectors.
Definition: FlatIndex.cu:66

faiss::gpu::FlatIndex::add
void add(const float *data, int numVecs, cudaStream_t stream)
Definition: FlatIndex.cu:196

faiss::Index::ntotal
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67

faiss::gpu::GpuIndex::add
void add(faiss::Index::idx_t, const float *x) override
Definition: GpuIndex.cu:79

faiss::gpu::GpuIndexFlat::copyFrom
void copyFrom(const faiss::IndexFlat *index)
Definition: GpuIndexFlat.cu:67

faiss::gpu::Tensor
Our tensor type.
Definition: Tensor.cuh:28

faiss::gpu::DeviceScope
Definition: DeviceUtils.h:68

faiss::gpu::GpuIndex::memorySpace_
const MemorySpace memorySpace_
The memory space of our primary storage on the GPU.
Definition: GpuIndex.h:129

faiss::gpu::GpuIndexFlat::addImplRequiresIDs_
bool addImplRequiresIDs_() const override
Flat index does not require IDs as there is no storage available for them.
Definition: GpuIndexFlat.cu:173

faiss::gpu::GpuIndexFlat::config_
const GpuIndexFlatConfig config_
Our config object.
Definition: GpuIndexFlat.h:123

faiss::Index::metric_type
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74

faiss::gpu::GpuIndexFlat::add
void add(faiss::Index::idx_t, const float *x) override
Overrides to avoid excessive copies.
Definition: GpuIndexFlat.cu:143

faiss::gpu::FlatIndex::getVectorsFloat32Ref
Tensor< float, 2, true > & getVectorsFloat32Ref()
Returns a reference to our vectors currently in use.
Definition: FlatIndex.cu:77

faiss::gpu::GpuIndexFlat
Definition: GpuIndexFlat.h:52

faiss::gpu::GpuIndexFlat::reset
void reset() override
Clears all vectors from this index.
Definition: GpuIndexFlat.cu:129

faiss::gpu::GpuIndexFlatIP::copyFrom
void copyFrom(faiss::IndexFlatIP *index)
Definition: GpuIndexFlat.cu:323

faiss::Index::is_trained
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:71

faiss::gpu::FlatIndex::reset
void reset()
Free all storage.
Definition: FlatIndex.cu:270

faiss::gpu::DeviceTensor
Definition: DeviceTensor.cuh:22

faiss::IndexFlat::xb
std::vector< float > xb
database vectors, size ntotal * d
Definition: IndexFlat.h:23

faiss::gpu::GpuIndexFlat::train
void train(Index::idx_t n, const float *x) override
This index is not trained, so this does nothing.
Definition: GpuIndexFlat.cu:138

faiss::MetricType
MetricType
Some algorithms support both an inner product version and a L2 search version.
Definition: Index.h:44

faiss::gpu::GpuIndexFlat::data_
FlatIndex * data_
Definition: GpuIndexFlat.h:127