docs/html/TestGpuIndexFlat_8cpp_source.html

 /**

  * Copyright (c) 2015-present, Facebook, Inc.

  * All rights reserved.

  *

  * This source code is licensed under the BSD+Patents license found in the

  * LICENSE file in the root directory of this source tree.

  */


 // Copyright 2004-present Facebook. All Rights Reserved.


 #include "../../IndexFlat.h"

 #include "../GpuIndexFlat.h"

 #include "../StandardGpuResources.h"

 #include "../utils/DeviceUtils.h"

 #include "../test/TestUtils.h"

 #include <gtest/gtest.h>

 #include <sstream>

 #include <vector>


 // FIXME: figure out a better way to test fp16

 constexpr float kF16MaxRelErr = 0.07f;

 constexpr float kF32MaxRelErr = 6e-3f;


 struct TestFlatOptions {

   TestFlatOptions()

       : useL2(true),

         useFloat16(false),

         useTransposed(false),

         numVecsOverride(-1),

         numQueriesOverride(-1),

         kOverride(-1) {

   }


   bool useL2;

   bool useFloat16;

   bool useTransposed;

   int numVecsOverride;

   int numQueriesOverride;

   int kOverride;

 };


 void testFlat(const TestFlatOptions& opt) {

   int numVecs = opt.numVecsOverride > 0 ?

     opt.numVecsOverride : faiss::gpu::randVal(1000, 20000);

   int dim = faiss::gpu::randVal(50, 800);

   int numQuery = opt.numQueriesOverride > 0 ?

     opt.numQueriesOverride : faiss::gpu::randVal(1, 512);


   // Due to loss of precision in a float16 accumulator, for large k,

   // the number of differences is pretty huge. Restrict ourselves to a

   // fairly small `k` for float16

   int k = opt.useFloat16 ?

     std::min(faiss::gpu::randVal(1, 50), numVecs) :

     std::min(faiss::gpu::randVal(1, 1024), numVecs);

   if (opt.kOverride > 0) {

     k = opt.kOverride;

   }


   faiss::IndexFlatIP cpuIndexIP(dim);

   faiss::IndexFlatL2 cpuIndexL2(dim);


   faiss::IndexFlat* cpuIndex =

     opt.useL2 ? (faiss::IndexFlat*) &cpuIndexL2 :

     (faiss::IndexFlat*) &cpuIndexIP;


   // Construct on a random device to test multi-device, if we have

   // multiple devices

   int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);


   faiss::gpu::StandardGpuResources res;

   res.noTempMemory();


   faiss::gpu::GpuIndexFlatConfig config;

   config.device = device;

   config.useFloat16 = opt.useFloat16;

   config.storeTransposed = opt.useTransposed;


   faiss::gpu::GpuIndexFlatIP gpuIndexIP(&res, dim, config);

   faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);


   faiss::gpu::GpuIndexFlat* gpuIndex =

     opt.useL2 ? (faiss::gpu::GpuIndexFlat*) &gpuIndexL2 :

     (faiss::gpu::GpuIndexFlat*) &gpuIndexIP;


   std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);

   cpuIndex->add(numVecs, vecs.data());

   gpuIndex->add(numVecs, vecs.data());


   std::stringstream str;

   str << (opt.useL2 ? "L2" : "IP") << " numVecs " << numVecs

       << " dim " << dim

       << " useFloat16 " << opt.useFloat16

       << " transposed " << opt.useTransposed

       << " numQuery " << numQuery

       << " k " << k;


   // To some extent, we depend upon the relative error for the test

   // for float16

   faiss::gpu::compareIndices(*cpuIndex, *gpuIndex, numQuery, dim, k, str.str(),

                              opt.useFloat16 ? kF16MaxRelErr : kF32MaxRelErr,

                              // FIXME: the fp16 bounds are

                              // useless when math (the accumulator) is

                              // in fp16. Figure out another way to test

                              opt.useFloat16 ? 0.99f : 0.1f,

                              opt.useFloat16 ? 0.65f : 0.015f);

 }


 TEST(TestGpuIndexFlat, IP_Float32) {

   for (int tries = 0; tries < 5; ++tries) {

     faiss::gpu::newTestSeed();


     TestFlatOptions opt;

     opt.useL2 = false;

     opt.useFloat16 = false;

     opt.useTransposed = false;


     testFlat(opt);


     opt.useTransposed = true;

     testFlat(opt);

   }

 }


 TEST(TestGpuIndexFlat, L2_Float32) {

   for (int tries = 0; tries < 5; ++tries) {

     faiss::gpu::newTestSeed();


     TestFlatOptions opt;

     opt.useL2 = true;

     opt.useFloat16 = false;

     opt.useTransposed = false;


     testFlat(opt);


     opt.useTransposed = true;

     testFlat(opt);

   }

 }


 // test specialized k == 1 codepath

 TEST(TestGpuIndexFlat, L2_Float32_K1) {

   for (int tries = 0; tries < 5; ++tries) {

     faiss::gpu::newTestSeed();


     TestFlatOptions opt;

     opt.useL2 = true;

     opt.useFloat16 = false;

     opt.useTransposed = false;

     opt.kOverride = 1;


     testFlat(opt);

   }

 }


 TEST(TestGpuIndexFlat, IP_Float16) {

   for (int tries = 0; tries < 5; ++tries) {

     faiss::gpu::newTestSeed();


     TestFlatOptions opt;

     opt.useL2 = false;

     opt.useFloat16 = true;

     opt.useTransposed = false;


     testFlat(opt);


     opt.useTransposed = true;

     testFlat(opt);

   }

 }


 TEST(TestGpuIndexFlat, L2_Float16) {

   for (int tries = 0; tries < 5; ++tries) {

     faiss::gpu::newTestSeed();


     TestFlatOptions opt;

     opt.useL2 = true;

     opt.useFloat16 = true;

     opt.useTransposed = false;


     testFlat(opt);


     opt.useTransposed = true;

     testFlat(opt);

   }

 }


 // test specialized k == 1 codepath

 TEST(TestGpuIndexFlat, L2_Float16_K1) {

   for (int tries = 0; tries < 5; ++tries) {

     faiss::gpu::newTestSeed();


     TestFlatOptions opt;

     opt.useL2 = true;

     opt.useFloat16 = true;

     opt.useTransposed = false;

     opt.kOverride = 1;


     testFlat(opt);

   }

 }


 // test tiling along a huge vector set

 TEST(TestGpuIndexFlat, L2_Tiling) {

   for (int tries = 0; tries < 3; ++tries) {

     faiss::gpu::newTestSeed();


     TestFlatOptions opt;

     opt.useL2 = true;

     opt.useFloat16 = false;

     opt.useTransposed = false;

     opt.numVecsOverride = 1000000;

     opt.numQueriesOverride = 8;


     testFlat(opt);


     opt.useTransposed = true;

     testFlat(opt);

   }

 }


 TEST(TestGpuIndexFlat, QueryEmpty) {

   faiss::gpu::StandardGpuResources res;

   res.noTempMemory();


   faiss::gpu::GpuIndexFlatConfig config;

   config.device = 0;

   config.useFloat16 = false;

   config.storeTransposed = false;


   int dim = 128;

   faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);


   // Querying an empty index should not blow up, and just return

   // (FLT_MAX, -1)

   int numQuery = 10;

   int k = 50;

   std::vector<float> queries(numQuery * dim, 1.0f);


   std::vector<float> dist(numQuery * k, 0);

   std::vector<faiss::Index::idx_t> ind(numQuery * k);


   gpuIndex.search(numQuery, queries.data(), k, dist.data(), ind.data());


   for (auto d : dist) {

     EXPECT_EQ(d, std::numeric_limits<float>::max());

   }


   for (auto i : ind) {

     EXPECT_EQ(i, -1);

   }

 }


 TEST(TestGpuIndexFlat, CopyFrom) {

   faiss::gpu::newTestSeed();


   int numVecs = faiss::gpu::randVal(100, 200);

   int dim = faiss::gpu::randVal(1, 1000);


   faiss::IndexFlatL2 cpuIndex(dim);


   std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);

   cpuIndex.add(numVecs, vecs.data());


   faiss::gpu::StandardGpuResources res;

   res.noTempMemory();


   // Fill with garbage values

   int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);


   faiss::gpu::GpuIndexFlatConfig config;

   config.device = 0;

   config.useFloat16 = false;

   config.storeTransposed = false;


   faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, 2000, config);

   gpuIndex.copyFrom(&cpuIndex);


   EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);

   EXPECT_EQ(gpuIndex.ntotal, numVecs);


   EXPECT_EQ(cpuIndex.d, gpuIndex.d);

   EXPECT_EQ(cpuIndex.d, dim);


   int idx = faiss::gpu::randVal(0, numVecs - 1);


   std::vector<float> gpuVals(dim);

   gpuIndex.reconstruct(idx, gpuVals.data());


   std::vector<float> cpuVals(dim);

   cpuIndex.reconstruct(idx, cpuVals.data());


   EXPECT_EQ(gpuVals, cpuVals);

 }


 TEST(TestGpuIndexFlat, CopyTo) {

   faiss::gpu::newTestSeed();


   faiss::gpu::StandardGpuResources res;

   res.noTempMemory();


   int numVecs = faiss::gpu::randVal(100, 200);

   int dim = faiss::gpu::randVal(1, 1000);


   int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);


   faiss::gpu::GpuIndexFlatConfig config;

   config.device = device;

   config.useFloat16 = false;

   config.storeTransposed = false;


   faiss::gpu::GpuIndexFlatL2 gpuIndex(&res, dim, config);


   std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);

   gpuIndex.add(numVecs, vecs.data());


   // Fill with garbage values

   faiss::IndexFlatL2 cpuIndex(2000);

   gpuIndex.copyTo(&cpuIndex);


   EXPECT_EQ(cpuIndex.ntotal, gpuIndex.ntotal);

   EXPECT_EQ(gpuIndex.ntotal, numVecs);


   EXPECT_EQ(cpuIndex.d, gpuIndex.d);

   EXPECT_EQ(cpuIndex.d, dim);


   int idx = faiss::gpu::randVal(0, numVecs - 1);


   std::vector<float> gpuVals(dim);

   gpuIndex.reconstruct(idx, gpuVals.data());


   std::vector<float> cpuVals(dim);

   cpuIndex.reconstruct(idx, cpuVals.data());


   EXPECT_EQ(gpuVals, cpuVals);

 }


 TEST(TestGpuIndexFlat, UnifiedMemory) {

   // Construct on a random device to test multi-device, if we have

   // multiple devices

   int device = faiss::gpu::randVal(0, faiss::gpu::getNumDevices() - 1);


   if (!faiss::gpu::getFullUnifiedMemSupport(device)) {

     return;

   }


   int dim = 256;


   // FIXME: GpuIndexFlat doesn't support > 2^31 (vecs * dims) due to

   // kernel indexing, so we can't test unified memory for memory

   // oversubscription.

   size_t numVecs = 50000;

   int numQuery = 10;

   int k = 10;


   faiss::IndexFlatL2 cpuIndexL2(dim);


   faiss::gpu::StandardGpuResources res;

   res.noTempMemory();


   faiss::gpu::GpuIndexFlatConfig config;

   config.device = device;

   config.memorySpace = faiss::gpu::MemorySpace::Unified;


   faiss::gpu::GpuIndexFlatL2 gpuIndexL2(&res, dim, config);


   std::vector<float> vecs = faiss::gpu::randVecs(numVecs, dim);

   cpuIndexL2.add(numVecs, vecs.data());

   gpuIndexL2.add(numVecs, vecs.data());


   // To some extent, we depend upon the relative error for the test

   // for float16

   faiss::gpu::compareIndices(cpuIndexL2, gpuIndexL2,

                              numQuery, dim, k, "Unified Memory",

                              kF32MaxRelErr,

                              0.1f,

                              0.015f);

 }

faiss::IndexFlat
Definition: IndexFlat.h:23

faiss::gpu::GpuIndexFlatConfig::storeTransposed
bool storeTransposed
Definition: GpuIndexFlat.h:48

faiss::gpu::GpuIndexFlat::copyTo
void copyTo(faiss::IndexFlat *index) const
Definition: GpuIndexFlat.cu:120

faiss::gpu::StandardGpuResources::noTempMemory
void noTempMemory()
Definition: StandardGpuResources.cpp:76

faiss::IndexFlatL2
Definition: IndexFlat.h:79

faiss::IndexFlat::reconstruct
void reconstruct(idx_t key, float *recons) const override
Definition: IndexFlat.cpp:118

faiss::IndexFlatIP
Definition: IndexFlat.h:73

faiss::gpu::GpuIndexFlatConfig::useFloat16
bool useFloat16
Whether or not data is stored as float16.
Definition: GpuIndexFlat.h:35

faiss::gpu::GpuIndexConfig::device
int device
GPU device on which the index is resident.
Definition: GpuIndex.h:27

faiss::Index::d
int d
vector dimension
Definition: Index.h:64

faiss::gpu::GpuIndexFlatConfig
Definition: GpuIndexFlat.h:27

faiss::gpu::GpuIndexConfig::memorySpace
MemorySpace memorySpace
Definition: GpuIndex.h:32

faiss::gpu::StandardGpuResources
Definition: StandardGpuResources.h:23

TestFlatOptions
Definition: TestGpuIndexFlat.cpp:24

faiss::gpu::GpuIndexFlat::reconstruct
void reconstruct(faiss::Index::idx_t key, float *out) const override
Definition: GpuIndexFlat.cu:491

faiss::Index::ntotal
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65

faiss::IndexFlat::add
void add(idx_t n, const float *x) override
Definition: IndexFlat.cpp:30

faiss::gpu::GpuIndexFlatIP
Definition: GpuIndexFlat.h:194

faiss::gpu::GpuIndexFlat::copyFrom
void copyFrom(const faiss::IndexFlat *index)
Definition: GpuIndexFlat.cu:87

faiss::gpu::GpuIndexFlatL2
Definition: GpuIndexFlat.h:169

faiss::gpu::GpuIndexFlat::add
void add(faiss::Index::idx_t, const float *x) override
Overrides to avoid excessive copies.
Definition: GpuIndexFlat.cu:162

faiss::gpu::GpuIndexFlat
Definition: GpuIndexFlat.h:54

faiss::gpu::GpuIndexFlat::search
void search(faiss::Index::idx_t n, const float *x, faiss::Index::idx_t k, float *distances, faiss::Index::idx_t *labels) const override
Definition: GpuIndexFlat.cu:205