CodePacker for non-contiguous code layouts (#2625)

Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2625

This diff introduces a new abstraction for the code layouts that are not simply flat one after another.

The packed codes are assumed to be packed together in fixed-size blocks. Hence, code `#i` is stored at offset `i % nvec` of block `floor(i / nvec)`. Each block has size `block_size`.

The `CodePacker` object takes care of the translation between packed and flat codes. The packing / unpacking functions are virtual functions now, but they could as well be inlined for performance.

The `CodePacker` object makes it possible to do manipulations onarrays of codes (including inverted lists) in a uniform way, for example merging / adding / updating / removing / converting to&from CPU.

In this diff, the only non-trivial CodePacker implemnted is for the FastScan code. The new functionality supported is merging IVFFastScan indexes.

Reviewed By: alexanderguzhva

Differential Revision: D42072972

fbshipit-source-id: d1f8bdbcf7ab0f454b5d9c37ba2720fd191833d0
pull/2603/head
Matthijs Douze 2022-12-21 11:06:53 -08:00 committed by Facebook GitHub Bot
parent eaa67d8acf
commit 74ee67aefc
27 changed files with 421 additions and 54 deletions

View File

@ -50,6 +50,7 @@ set(FAISS_SRC
clone_index.cpp
index_factory.cpp
impl/AuxIndexStructures.cpp
impl/CodePacker.cpp
impl/IDSelector.cpp
impl/FaissException.cpp
impl/HNSW.cpp

View File

@ -98,18 +98,21 @@ void IndexFastScan::add(idx_t n, const float* x) {
ntotal += n;
}
CodePacker* IndexFastScan::get_CodePacker() const {
return new CodePackerPQ4(M, bbs);
}
size_t IndexFastScan::remove_ids(const IDSelector& sel) {
idx_t j = 0;
std::vector<uint8_t> buffer(code_size);
CodePackerPQ4 packer(M, bbs);
for (idx_t i = 0; i < ntotal; i++) {
if (sel.is_member(i)) {
// should be removed
} else {
if (i > j) {
for (int sq = 0; sq < M; sq++) {
uint8_t code =
pq4_get_packed_element(codes.data(), bbs, M, i, sq);
pq4_set_packed_element(codes.data(), code, bbs, M, j, sq);
}
packer.unpack_1(codes.data(), i, buffer.data());
packer.pack_1(buffer.data(), j, codes.data());
}
j++;
}
@ -142,12 +145,12 @@ void IndexFastScan::merge_from(Index& otherIndex, idx_t add_id) {
IndexFastScan* other = static_cast<IndexFastScan*>(&otherIndex);
ntotal2 = roundup(ntotal + other->ntotal, bbs);
codes.resize(ntotal2 * M2 / 2);
std::vector<uint8_t> buffer(code_size);
CodePackerPQ4 packer(M, bbs);
for (int i = 0; i < other->ntotal; i++) {
for (int sq = 0; sq < M; sq++) {
uint8_t code =
pq4_get_packed_element(other->codes.data(), bbs, M, i, sq);
pq4_set_packed_element(codes.data(), code, bbs, M, ntotal + i, sq);
}
packer.unpack_1(other->codes.data(), i, buffer.data());
packer.pack_1(buffer.data(), ntotal + i, codes.data());
}
ntotal += other->ntotal;
other->reset();

View File

@ -12,6 +12,8 @@
namespace faiss {
struct CodePacker;
/** Fast scan version of IndexPQ and IndexAQ. Works for 4-bit PQ and AQ for now.
*
* The codes are not stored sequentially but grouped in blocks of size bbs.
@ -25,7 +27,6 @@ namespace faiss {
* 14: no qbs with heap accumulator
* 15: no qbs with reservoir accumulator
*/
struct IndexFastScan : Index {
// implementation to select
int implem = 0;
@ -126,6 +127,9 @@ struct IndexFastScan : Index {
void reconstruct(idx_t key, float* recons) const override;
size_t remove_ids(const IDSelector& sel) override;
CodePacker* get_CodePacker() const;
void merge_from(Index& otherIndex, idx_t add_id = 0) override;
void check_compatible_for_merge(const Index& otherIndex) const override;
};

View File

@ -223,7 +223,7 @@ void IndexFlat1D::search(
perm.size() == ntotal, "Call update_permutation before search");
const float* xb = get_xb();
#pragma omp parallel for
#pragma omp parallel for if (n > 10000)
for (idx_t i = 0; i < n; i++) {
float q = x[i]; // query
float* D = distances + i * k;
@ -233,6 +233,14 @@ void IndexFlat1D::search(
idx_t i0 = 0, i1 = ntotal;
idx_t wp = 0;
if (ntotal == 0) {
for (idx_t j = 0; j < k; j++) {
I[j] = -1;
D[j] = HUGE_VAL;
}
goto done;
}
if (xb[perm[i0]] > q) {
i1 = 0;
goto finish_right;

View File

@ -82,7 +82,7 @@ struct IndexFlatL2 : IndexFlat {
/// optimized version for 1D "vectors".
struct IndexFlat1D : IndexFlatL2 {
bool continuous_update; ///< is the permutation updated continuously?
bool continuous_update = true; ///< is the permutation updated continuously?
std::vector<idx_t> perm; ///< sorted database indices

View File

@ -8,6 +8,7 @@
#include <faiss/IndexFlatCodes.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/CodePacker.h>
#include <faiss/impl/DistanceComputer.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/IDSelector.h>
@ -98,4 +99,8 @@ void IndexFlatCodes::merge_from(Index& otherIndex, idx_t add_id) {
other->reset();
}
CodePacker* IndexFlatCodes::get_CodePacker() const {
return new CodePackerFlat(code_size);
}
} // namespace faiss

View File

@ -15,6 +15,8 @@
namespace faiss {
struct CodePacker;
/** Index that encodes all vectors as fixed-size codes (size code_size). Storage
* is in the codes vector */
struct IndexFlatCodes : Index {
@ -39,8 +41,8 @@ struct IndexFlatCodes : Index {
size_t sa_code_size() const override;
/** remove some ids. NB that Because of the structure of the
* indexing structure, the semantics of this operation are
/** remove some ids. NB that because of the structure of the
* index, the semantics of this operation are
* different from the usual ones: the new ids are shifted */
size_t remove_ids(const IDSelector& sel) override;
@ -51,6 +53,9 @@ struct IndexFlatCodes : Index {
return get_FlatCodesDistanceComputer();
}
// returns a new instance of a CodePacker
CodePacker* get_CodePacker() const;
void check_compatible_for_merge(const Index& otherIndex) const override;
virtual void merge_from(Index& otherIndex, idx_t add_id = 0) override;

View File

@ -22,6 +22,7 @@
#include <faiss/IndexFlat.h>
#include <faiss/impl/AuxIndexStructures.h>
#include <faiss/impl/CodePacker.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/IDSelector.h>
@ -1086,6 +1087,10 @@ void IndexIVF::merge_from(Index& otherIndex, idx_t add_id) {
other->ntotal = 0;
}
CodePacker* IndexIVF::get_CodePacker() const {
return new CodePackerFlat(code_size);
}
void IndexIVF::replace_invlists(InvertedLists* il, bool own) {
if (own_invlists) {
delete invlists;

View File

@ -78,6 +78,7 @@ using IVFSearchParameters = SearchParametersIVF;
struct InvertedListScanner;
struct IndexIVFStats;
struct CodePacker;
/** Index based on a inverted file (IVF)
*
@ -317,6 +318,9 @@ struct IndexIVF : Index, Level1Quantizer {
virtual void merge_from(Index& otherIndex, idx_t add_id) override;
// returns a new instance of a CodePacker
virtual CodePacker* get_CodePacker() const;
/** copy a subset of the entries index to the other index
* see Invlists::copy_subset_to for the meaning of subset_type
*/
@ -349,7 +353,6 @@ struct IndexIVF : Index, Level1Quantizer {
/* The standalone codec interface (except sa_decode that is specific) */
size_t sa_code_size() const override;
void sa_encode(idx_t n, const float* x, uint8_t* bytes) const override;
IndexIVF();

View File

@ -69,7 +69,14 @@ void IndexIVFFastScan::init_fastscan(
code_size = M2 / 2;
is_trained = false;
replace_invlists(new BlockInvertedLists(nlist, bbs, bbs * M2 / 2), true);
replace_invlists(new BlockInvertedLists(nlist, get_CodePacker()), true);
}
void IndexIVFFastScan::init_code_packer() {
auto bil = dynamic_cast<BlockInvertedLists*>(invlists);
FAISS_THROW_IF_NOT(bil);
delete bil->packer; // in case there was one before
bil->packer = get_CodePacker();
}
IndexIVFFastScan::~IndexIVFFastScan() {}
@ -187,6 +194,10 @@ void IndexIVFFastScan::add_with_ids(
ntotal += n;
}
CodePacker* IndexIVFFastScan::get_CodePacker() const {
return new CodePackerPQ4(M, bbs);
}
/*********************************************************
* search
*********************************************************/

View File

@ -67,6 +67,9 @@ struct IndexIVFFastScan : IndexIVF {
MetricType metric,
int bbs);
// initialize the CodePacker in the InvertedLists
void init_code_packer();
~IndexIVFFastScan() override;
/// orig's inverted lists (for debugging)
@ -181,6 +184,8 @@ struct IndexIVFFastScan : IndexIVF {
void reconstruct_from_offset(int64_t list_no, int64_t offset, float* recons)
const override;
CodePacker* get_CodePacker() const override;
// reconstruct orig invlists (for debugging)
void reconstruct_orig_invlists();
};

View File

@ -42,6 +42,7 @@
#include <faiss/impl/ProductQuantizer.h>
#include <faiss/impl/ResidualQuantizer.h>
#include <faiss/impl/ScalarQuantizer.h>
#include <faiss/impl/pq4_fast_scan.h>
#include <faiss/invlists/BlockInvertedLists.h>
@ -240,6 +241,24 @@ IndexHNSW* clone_HNSW(const IndexHNSW* ihnsw) {
return new IndexHNSW(*ihnsw);
}
InvertedLists* clone_InvertedLists(const InvertedLists* invlists) {
if (auto* ails = dynamic_cast<const ArrayInvertedLists*>(invlists)) {
return new ArrayInvertedLists(*ails);
}
if (auto* bils = dynamic_cast<const BlockInvertedLists*>(invlists)) {
auto* bils2 = new BlockInvertedLists(*bils);
if (bils->packer) {
auto* packerPQ4 = dynamic_cast<const CodePackerPQ4*>(bils->packer);
FAISS_THROW_IF_NOT(packerPQ4);
bils2->packer = new CodePackerPQ4(*packerPQ4);
}
return bils2;
}
FAISS_THROW_FMT(
"clone not supported for this type of inverted lists %s",
typeid(*invlists).name());
}
} // anonymous namespace
Index* Cloner::clone_Index(const Index* index) {
@ -263,20 +282,11 @@ Index* Cloner::clone_Index(const Index* index) {
IndexIVF* res = clone_IndexIVF(ivf);
if (ivf->invlists == nullptr) {
res->invlists = nullptr;
} else if (
auto* ails = dynamic_cast<const ArrayInvertedLists*>(
ivf->invlists)) {
res->invlists = new ArrayInvertedLists(*ails);
res->own_invlists = true;
} else if (
auto* bils = dynamic_cast<const BlockInvertedLists*>(
ivf->invlists)) {
res->invlists = new BlockInvertedLists(*bils);
res->own_invlists = true;
} else {
FAISS_THROW_MSG(
"clone not supported for this type of inverted lists");
res->invlists = clone_InvertedLists(ivf->invlists);
res->own_invlists = true;
}
res->own_fields = true;
res->quantizer = clone_Index(ivf->quantizer);
@ -365,7 +375,7 @@ Index* Cloner::clone_Index(const Index* index) {
typeid(*index).name());
}
return nullptr;
}
} // namespace
Quantizer* clone_Quantizer(const Quantizer* quant) {
TRYCLONE(ResidualQuantizer, quant)

View File

@ -0,0 +1,67 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#include <faiss/impl/CodePacker.h>
#include <cassert>
#include <cstring>
namespace faiss {
/*********************************************
* CodePacker
* default of pack_all / unpack_all loops over the _1 versions
*/
void CodePacker::pack_all(const uint8_t* flat_codes, uint8_t* block) const {
for (size_t i = 0; i < nvec; i++) {
pack_1(flat_codes + code_size * i, i, block);
}
}
void CodePacker::unpack_all(const uint8_t* block, uint8_t* flat_codes) const {
for (size_t i = 0; i < nvec; i++) {
unpack_1(block, i, flat_codes + code_size * i);
}
}
/*********************************************
* CodePackerFlat
*/
CodePackerFlat::CodePackerFlat(size_t code_size) {
this->code_size = code_size;
nvec = 1;
block_size = code_size;
}
void CodePackerFlat::pack_all(const uint8_t* flat_codes, uint8_t* block) const {
memcpy(block, flat_codes, code_size);
}
void CodePackerFlat::unpack_all(const uint8_t* block, uint8_t* flat_codes)
const {
memcpy(flat_codes, block, code_size);
}
void CodePackerFlat::pack_1(
const uint8_t* flat_code,
size_t offset,
uint8_t* block) const {
assert(offset == 0);
pack_all(flat_code, block);
}
void CodePackerFlat::unpack_1(
const uint8_t* block,
size_t offset,
uint8_t* flat_code) const {
assert(offset == 0);
unpack_all(block, flat_code);
}
} // namespace faiss

View File

@ -0,0 +1,71 @@
/**
* Copyright (c) Facebook, Inc. and its affiliates.
*
* This source code is licensed under the MIT license found in the
* LICENSE file in the root directory of this source tree.
*/
#pragma once
#include <faiss/MetricType.h>
namespace faiss {
/**
* Packing consists in combining a fixed number of codes of constant size
* (code_size) into a block of data where they may (or may not) be interleaved
* for efficient consumption by distance computation kernels. This exists for
* the "fast_scan" indexes on CPU and for some GPU kernels.
*/
struct CodePacker {
size_t code_size; // input code size in bytes
size_t nvec; // number of vectors per block
size_t block_size; // size of one block in bytes (>= code_size * nvec)
// pack a single code to a block
virtual void pack_1(
const uint8_t*
flat_code, // code to write to the block, size code_size
size_t offset, // offset in the block (0 <= offset < nvec)
uint8_t* block // block to write to (size block_size)
) const = 0;
// unpack a single code from a block
virtual void unpack_1(
const uint8_t* block, // block to read from (size block_size)
size_t offset, // offset in the block (0 <= offset < nvec)
uint8_t* flat_code // where to write the resulting code, size
// code_size
) const = 0;
// pack all code in a block
virtual void pack_all(
const uint8_t* flat_codes, // codes to write to the block, size
// (nvec * code_size)
uint8_t* block // block to write to (size block_size)
) const;
// unpack all code in a block
virtual void unpack_all(
const uint8_t* block, // block to read from (size block_size)
uint8_t* flat_codes // where to write the resulting codes size (nvec
// * code_size)
) const;
virtual ~CodePacker() {}
};
/** Trivial code packer where codes are stored one by one */
struct CodePackerFlat : CodePacker {
explicit CodePackerFlat(size_t code_size);
void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
const final;
void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
const final;
void pack_all(const uint8_t* flat_codes, uint8_t* block) const final;
void unpack_all(const uint8_t* block, uint8_t* flat_codes) const final;
};
} // namespace faiss

View File

@ -279,6 +279,8 @@ static void read_AdditiveQuantizer(AdditiveQuantizer* aq, IOReader* f) {
aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 ||
aq->search_type == AdditiveQuantizer::ST_norm_rq2x4) {
READXBVECTOR(aq->qnorm.codes);
aq->qnorm.ntotal = aq->qnorm.codes.size() / 4;
aq->qnorm.update_permutation();
}
if (aq->search_type == AdditiveQuantizer::ST_norm_lsq2x4 ||
@ -727,6 +729,7 @@ Index* read_index(IOReader* f, int io_flags) {
READ1(ivaqfs->max_train_points);
read_InvertedLists(ivaqfs, f, io_flags);
ivaqfs->init_code_packer();
idx = ivaqfs;
} else if (h == fourcc("IvFl") || h == fourcc("IvFL")) { // legacy
IndexIVFFlat* ivfl = new IndexIVFFlat();
@ -1001,6 +1004,7 @@ Index* read_index(IOReader* f, int io_flags) {
ivpq->nbits = pq.nbits;
ivpq->ksub = (1 << pq.nbits);
ivpq->code_size = pq.code_size;
ivpq->init_code_packer();
idx = ivpq;
} else if (h == fourcc("IRMf")) {

View File

@ -189,6 +189,50 @@ void pq4_set_packed_element(
}
}
/***************************************************************
* CodePackerPQ4 implementation
***************************************************************/
CodePackerPQ4::CodePackerPQ4(size_t nsq, size_t bbs) {
this->nsq = nsq;
nvec = bbs;
code_size = (nsq * 4 + 7) / 8;
block_size = ((nsq + 1) / 2) * bbs;
}
void CodePackerPQ4::pack_1(
const uint8_t* flat_code,
size_t offset,
uint8_t* block) const {
size_t bbs = nvec;
if (offset >= nvec) {
block += (offset / nvec) * block_size;
offset = offset % nvec;
}
for (size_t i = 0; i < code_size; i++) {
uint8_t code = flat_code[i];
pq4_set_packed_element(block, code & 15, bbs, nsq, offset, 2 * i);
pq4_set_packed_element(block, code >> 4, bbs, nsq, offset, 2 * i + 1);
}
}
void CodePackerPQ4::unpack_1(
const uint8_t* block,
size_t offset,
uint8_t* flat_code) const {
size_t bbs = nvec;
if (offset >= nvec) {
block += (offset / nvec) * block_size;
offset = offset % nvec;
}
for (size_t i = 0; i < code_size; i++) {
uint8_t code0, code1;
code0 = pq4_get_packed_element(block, bbs, nsq, offset, 2 * i);
code1 = pq4_get_packed_element(block, bbs, nsq, offset, 2 * i + 1);
flat_code[i] = code0 | (code1 << 4);
}
}
/***************************************************************
* Packing functions for Look-Up Tables (LUT)
***************************************************************/

View File

@ -10,6 +10,8 @@
#include <cstdint>
#include <cstdlib>
#include <faiss/impl/CodePacker.h>
/** PQ4 SIMD packing and accumulation functions
*
* The basic kernel accumulates nq query vectors with bbs = nb * 2 * 16 vectors
@ -84,6 +86,18 @@ void pq4_set_packed_element(
size_t vector_id,
size_t sq);
/** CodePacker API for the PQ4 fast-scan */
struct CodePackerPQ4 : CodePacker {
size_t nsq;
CodePackerPQ4(size_t nsq, size_t bbs);
void pack_1(const uint8_t* flat_code, size_t offset, uint8_t* block)
const final;
void unpack_1(const uint8_t* block, size_t offset, uint8_t* flat_code)
const final;
};
/** Pack Look-up table for consumption by the kernel.
*
* @param nq number of queries

View File

@ -7,6 +7,7 @@
#include <faiss/invlists/BlockInvertedLists.h>
#include <faiss/impl/CodePacker.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/impl/io.h>
@ -25,29 +26,43 @@ BlockInvertedLists::BlockInvertedLists(
codes.resize(nlist);
}
BlockInvertedLists::BlockInvertedLists(size_t nlist, const CodePacker* packer)
: InvertedLists(nlist, InvertedLists::INVALID_CODE_SIZE),
n_per_block(packer->nvec),
block_size(packer->block_size),
packer(packer) {
ids.resize(nlist);
codes.resize(nlist);
}
BlockInvertedLists::BlockInvertedLists()
: InvertedLists(0, InvertedLists::INVALID_CODE_SIZE),
n_per_block(0),
block_size(0) {}
: InvertedLists(0, InvertedLists::INVALID_CODE_SIZE) {}
size_t BlockInvertedLists::add_entries(
size_t list_no,
size_t n_entry,
const idx_t* ids_in,
const uint8_t* code) {
if (n_entry == 0)
if (n_entry == 0) {
return 0;
}
FAISS_THROW_IF_NOT(list_no < nlist);
size_t o = ids[list_no].size();
FAISS_THROW_IF_NOT(
o == 0); // not clear how we should handle subsequent adds
ids[list_no].resize(o + n_entry);
memcpy(&ids[list_no][o], ids_in, sizeof(ids_in[0]) * n_entry);
// copy whole blocks
size_t n_block = (n_entry + n_per_block - 1) / n_per_block;
size_t n_block = (o + n_entry + n_per_block - 1) / n_per_block;
codes[list_no].resize(n_block * block_size);
memcpy(&codes[list_no][o * code_size], code, n_block * block_size);
if (o % block_size == 0) {
// copy whole blocks
memcpy(&codes[list_no][o * code_size], code, n_block * block_size);
} else {
FAISS_THROW_IF_NOT_MSG(packer, "missing code packer");
std::vector<uint8_t> buffer(packer->code_size);
for (size_t i = 0; i < n_entry; i++) {
packer->unpack_1(code, i, buffer.data());
packer->pack_1(buffer.data(), i + o, codes[list_no].data());
}
}
return o;
}
@ -95,7 +110,9 @@ void BlockInvertedLists::update_entries(
*/
}
BlockInvertedLists::~BlockInvertedLists() {}
BlockInvertedLists::~BlockInvertedLists() {
delete packer;
}
/**************************************************
* IO hook implementation

View File

@ -14,6 +14,8 @@
namespace faiss {
struct CodePacker;
/** Inverted Lists that are organized by blocks.
*
* Different from the regular inverted lists, the codes are organized by blocks
@ -28,13 +30,17 @@ namespace faiss {
* data.
*/
struct BlockInvertedLists : InvertedLists {
size_t n_per_block; // nb of vectors stored per block
size_t block_size; // nb bytes per block
size_t n_per_block = 0; // nb of vectors stored per block
size_t block_size = 0; // nb bytes per block
// required to interpret the content of the blocks (owned by this)
const CodePacker* packer = nullptr;
std::vector<AlignedTable<uint8_t>> codes;
std::vector<std::vector<idx_t>> ids;
BlockInvertedLists(size_t nlist, size_t vec_per_block, size_t block_size);
BlockInvertedLists(size_t nlist, const CodePacker* packer);
BlockInvertedLists();

View File

@ -68,6 +68,9 @@ for symbol in dir(this_module):
if issubclass(the_class, SearchParameters):
class_wrappers.handle_SearchParameters(the_class)
if issubclass(the_class, CodePacker):
class_wrappers.handle_CodePacker(the_class)
##############################################################################
# For some classes (IndexIVF, IDSelector), the object holds a reference to
# a C++ object (eg. the quantizer object of IndexIVF). We don't transfer the

View File

@ -103,6 +103,8 @@ vector_name_map = {
def vector_to_array(v):
""" convert a C++ vector to a numpy array """
classname = v.__class__.__name__
if classname.startswith('AlignedTable'):
return AlignedTable_to_array(v)
assert classname.endswith('Vector')
dtype = np.dtype(vector_name_map[classname[:-6]])
a = np.empty(v.size(), dtype=dtype)

View File

@ -807,6 +807,26 @@ def handle_IndexRowwiseMinMax(the_class):
replace_method(the_class, 'train_inplace', replacement_train_inplace)
def handle_CodePacker(the_class):
def replacement_pack_1(self, x, offset, block):
assert x.shape == (self.code_size,)
nblock, block_size = block.shape
assert block_size == self.block_size
assert 0 <= offset < block_size * self.nvec
self.pack_1_c(swig_ptr(x), offset, faiss.swig_ptr(block))
def replacement_unpack_1(self, block, offset):
nblock, block_size = block.shape
assert block_size == self.block_size
assert 0 <= offset < block_size * self.nvec
x = np.zeros(self.code_size, dtype='uint8')
self.unpack_1_c(faiss.swig_ptr(block), offset, swig_ptr(x))
return x
replace_method(the_class, 'pack_1', replacement_pack_1)
replace_method(the_class, 'unpack_1', replacement_unpack_1)
######################################################
# MapLong2Long interface
######################################################
@ -827,7 +847,7 @@ def handle_MapLong2Long(the_class):
replace_method(the_class, 'add', replacement_map_add)
replace_method(the_class, 'search_multiple',
replacement_map_search_multiple)
replacement_map_search_multiple)
######################################################

View File

@ -136,6 +136,7 @@ typedef uint64_t size_t;
#include <faiss/impl/ResidualQuantizer.h>
#include <faiss/impl/LocalSearchQuantizer.h>
#include <faiss/impl/ProductAdditiveQuantizer.h>
#include <faiss/impl/CodePacker.h>
#include <faiss/invlists/BlockInvertedLists.h>
@ -386,6 +387,8 @@ void gpu_sync_all_devices()
%include <faiss/MetricType.h>
%newobject *::get_distance_computer() const;
%newobject *::get_CodePacker() const;
%include <faiss/Index.h>
%include <faiss/impl/DistanceComputer.h>
@ -405,6 +408,7 @@ void gpu_sync_all_devices()
%include <faiss/impl/ResidualQuantizer.h>
%include <faiss/impl/LocalSearchQuantizer.h>
%include <faiss/impl/ProductAdditiveQuantizer.h>
%include <faiss/impl/CodePacker.h>
%include <faiss/VectorTransform.h>
%include <faiss/IndexPreTransform.h>

View File

@ -98,7 +98,9 @@ struct AlignedTableTightAlloc {
AlignedTableTightAlloc<T, A>& operator=(
const AlignedTableTightAlloc<T, A>& other) {
resize(other.numel);
memcpy(ptr, other.ptr, sizeof(T) * numel);
if (numel > 0) {
memcpy(ptr, other.ptr, sizeof(T) * numel);
}
return *this;
}

View File

@ -284,6 +284,17 @@ class TestImplems(unittest.TestCase):
index2.implem = 4
Dref, Iref = index2.search(ds.get_queries(), 10)
# check CodePacker
codes_ref = faiss.vector_to_array(index.codes)
codes_ref = codes_ref.reshape(-1, index.code_size)
index2codes = faiss.vector_to_array(index2.codes)
code_packer = index2.get_CodePacker()
index2codes = index2codes.reshape(-1, code_packer.block_size)
for i in range(0, len(codes_ref), 13):
code_new = code_packer.unpack_1(index2codes, i)
np.testing.assert_array_equal(codes_ref[i], code_new)
self.cache[(d, metric)] = (ds, index, Dref, Iref)
return self.cache[(d, metric)]
@ -300,14 +311,12 @@ class TestImplems(unittest.TestCase):
verify_with_draws(self, Dref, Iref, Dnew, Inew)
def build_fast_scan_index(self, index, params):
index2 = faiss.IndexPQFastScan(index)
index2.implem = 5
return index2
class TestImplem12(TestImplems):
def build_fast_scan_index(self, index, qbs):
@ -403,6 +412,7 @@ class TestImplem15(TestImplems):
def test_2_64(self):
self.do_with_params(32, (2, 64))
class TestAdd(unittest.TestCase):
def do_test_add(self, d, bbs):
@ -661,7 +671,7 @@ class TestPAQFastScan(unittest.TestCase):
def test_accuracy_PLSQ(self):
self.subtest_accuracy("PLSQ")
def test_accuracy_PRQ(self):
self.subtest_accuracy("PRQ")

View File

@ -588,6 +588,16 @@ class TestFlat1D(unittest.TestCase):
max_diff_D = np.abs(ref_D - new_D).max()
assert max_diff_D < 1e-5
def test_size_0(self):
# just make sure it does not crash on small nb
index = faiss.IndexFlat1D()
rs = np.random.RandomState(123)
for i in range(3):
x = np.array([[rs.rand()]])
D, I = index.search(x, 10)
self.assertEqual((I == -1).sum(), 10 - i)
index.add(x)
class OPQRelativeAccuracy(unittest.TestCase):
# translated from test_opq.lua

View File

@ -174,18 +174,28 @@ class TestMerge2(unittest.TestCase):
def test_merge_PreTransform(self):
self.do_flat_codes_test("PCA16,SQ4")
def do_fast_scan_test(self, factory_key, size1):
def do_fast_scan_test(self, factory_key, size1, with_add_id=False):
ds = SyntheticDataset(110, 1000, 1000, 100)
index1 = faiss.index_factory(ds.d, factory_key)
index1.train(ds.get_train())
index_trained = faiss.index_factory(ds.d, factory_key)
index_trained.train(ds.get_train())
# test both clone and index_read/write
if True:
index1 = faiss.deserialize_index(
faiss.serialize_index(index_trained))
else:
index1 = faiss.clone_index(index_trained)
# assert index1.aq.qnorm.ntotal == index_trained.aq.qnorm.ntotal
index1.add(ds.get_database())
_, Iref = index1.search(ds.get_queries(), 5)
index1.reset()
index2 = faiss.index_factory(ds.d, factory_key)
index2.train(ds.get_train())
index2 = faiss.clone_index(index_trained)
index1.add(ds.get_database()[:size1])
index2.add(ds.get_database()[size1:])
index1.merge_from(index2)
if with_add_id:
index1.merge_from(index2, add_id=index1.ntotal)
else:
index1.merge_from(index2)
_, Inew = index1.search(ds.get_queries(), 5)
np.testing.assert_array_equal(Inew, Iref)
@ -201,6 +211,9 @@ class TestMerge2(unittest.TestCase):
def test_merge_IndexAdditiveQuantizerFastScan(self):
self.do_fast_scan_test("RQ10x4fs_32_Nrq2x4", 330)
def test_merge_IVFFastScan(self):
self.do_fast_scan_test("IVF20,PQ5x4fs", 123, with_add_id=True)
def do_test_with_ids(self, factory_key):
ds = SyntheticDataset(32, 300, 300, 100)
rs = np.random.RandomState(123)
@ -224,3 +237,23 @@ class TestMerge2(unittest.TestCase):
def test_merge_IDMap2(self):
self.do_test_with_ids("Flat,IDMap2")
class TestRemoveFastScan(unittest.TestCase):
def do_fast_scan_test(self, factory_key, size1):
ds = SyntheticDataset(110, 1000, 1000, 100)
index1 = faiss.index_factory(ds.d, factory_key)
index1.train(ds.get_train())
index1.reset()
tokeep = [i % 3 == 0 for i in range(ds.nb)]
index1.add(ds.get_database()[tokeep])
_, Iref = index1.search(ds.get_queries(), 5)
index1.reset()
index1.add(ds.get_database())
index1.remove_ids(np.where(np.logical_not(tokeep))[0])
_, Inew = index1.search(ds.get_queries(), 5)
np.testing.assert_array_equal(Inew, Iref)
def test_remove(self):
self.do_fast_scan_test("PQ5x4fs", 320)