1575 lines
46 KiB
C++
1575 lines
46 KiB
C++
|
|
/**
|
|
* Copyright (c) 2015-present, Facebook, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This source code is licensed under the CC-by-NC license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
/* Copyright 2004-present Facebook. All Rights Reserved.
|
|
Inverted list structure.
|
|
*/
|
|
|
|
#include "IndexIVFPQ.h"
|
|
|
|
#include <cstdio>
|
|
#include <cassert>
|
|
|
|
#include <sys/mman.h>
|
|
|
|
#include <algorithm>
|
|
|
|
#include "Heap.h"
|
|
#include "utils.h"
|
|
|
|
#include "Clustering.h"
|
|
#include "IndexFlat.h"
|
|
|
|
#include "hamming.h"
|
|
|
|
#include "FaissAssert.h"
|
|
|
|
#include "AuxIndexStructures.h"
|
|
|
|
namespace faiss {
|
|
|
|
|
|
|
|
|
|
|
|
/*****************************************
|
|
* IndexIVFPQ implementation
|
|
******************************************/
|
|
|
|
IndexIVFPQ::IndexIVFPQ (Index * quantizer, size_t d, size_t nlist,
|
|
size_t M, size_t nbits_per_idx):
|
|
IndexIVF (quantizer, d, nlist, METRIC_L2),
|
|
pq (d, M, nbits_per_idx)
|
|
{
|
|
FAISS_ASSERT (nbits_per_idx <= 8);
|
|
code_size = pq.code_size;
|
|
is_trained = false;
|
|
codes.resize (nlist);
|
|
by_residual = true;
|
|
use_precomputed_table = 0;
|
|
scan_table_threshold = 0;
|
|
max_codes = 0; // means unlimited
|
|
|
|
polysemous_training = nullptr;
|
|
do_polysemous_training = false;
|
|
polysemous_ht = 0;
|
|
|
|
set_typename();
|
|
}
|
|
|
|
|
|
|
|
void IndexIVFPQ::set_typename ()
|
|
{
|
|
std::stringstream s;
|
|
s << "IvfPQ_" << pq.M << "x" << pq.nbits
|
|
<< "[" << nlist << ":" << quantizer->index_typename << "]";
|
|
index_typename = s.str();
|
|
}
|
|
|
|
|
|
void IndexIVFPQ::train_residual (idx_t n, const float *x)
|
|
{
|
|
train_residual_o (n, x, nullptr);
|
|
}
|
|
|
|
|
|
void IndexIVFPQ::train_residual_o (idx_t n, const float *x, float *residuals_2)
|
|
{
|
|
idx_t ntrain = pq.ksub * 64;
|
|
if(n > ntrain) n = ntrain;
|
|
|
|
const float *trainset;
|
|
if (by_residual) {
|
|
if(verbose) printf("computing residuals\n");
|
|
idx_t * assign = new idx_t [n]; // assignement to coarse centroids
|
|
quantizer->assign (n, x, assign);
|
|
float *residuals = new float [n * d];
|
|
for (idx_t i = 0; i < n; i++)
|
|
quantizer->compute_residual (x + i * d, residuals+i*d, assign[i]);
|
|
delete [] assign;
|
|
trainset = residuals;
|
|
} else {
|
|
trainset = x;
|
|
}
|
|
if (verbose)
|
|
printf ("training %zdx%zd product quantizer on %ld vectors in %dD\n",
|
|
pq.M, pq.ksub, n, d);
|
|
pq.verbose = verbose;
|
|
pq.train (n, trainset);
|
|
|
|
if (do_polysemous_training) {
|
|
PolysemousTraining default_pt;
|
|
PolysemousTraining *pt = polysemous_training;
|
|
if (!pt) pt = &default_pt;
|
|
pt->optimize_pq_for_hamming (pq, n, trainset);
|
|
}
|
|
|
|
// prepare second-level residuals for refine PQ
|
|
if (residuals_2) {
|
|
uint8_t *train_codes = new uint8_t [pq.code_size * n];
|
|
pq.compute_codes (trainset, train_codes, n);
|
|
|
|
for (idx_t i = 0; i < n; i++) {
|
|
const float *xx = trainset + i * d;
|
|
float * res = residuals_2 + i * d;
|
|
pq.decode (train_codes + i * pq.code_size, res);
|
|
for (int j = 0; j < d; j++)
|
|
res[j] = xx[j] - res[j];
|
|
}
|
|
|
|
delete [] train_codes;
|
|
}
|
|
|
|
if (by_residual) {
|
|
delete [] trainset;
|
|
precompute_table ();
|
|
}
|
|
|
|
}
|
|
|
|
|
|
/* produce a binary signature based on the residual vector */
|
|
void IndexIVFPQ::encode (long key, const float * x, uint8_t * code) const
|
|
{
|
|
if (by_residual) {
|
|
float residual_vec[d];
|
|
quantizer->compute_residual (x, residual_vec, key);
|
|
pq.compute_code (residual_vec, code);
|
|
}
|
|
else pq.compute_code (x, code);
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void IndexIVFPQ::encode_multiple (size_t n, const long *keys,
|
|
const float * x, uint8_t * xcodes) const
|
|
{
|
|
if (by_residual) {
|
|
float *residuals = new float [n * d];
|
|
// TODO: parallelize?
|
|
for (size_t i = 0; i < n; i++)
|
|
quantizer->compute_residual (x + i * d, residuals + i * d, keys[i]);
|
|
pq.compute_codes (residuals, xcodes, n);
|
|
delete [] residuals;
|
|
} else {
|
|
pq.compute_codes (x, xcodes, n);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void IndexIVFPQ::add_with_ids (idx_t n, const float * x, const long *xids)
|
|
{
|
|
add_core_o (n, x, xids, nullptr);
|
|
}
|
|
|
|
|
|
void IndexIVFPQ::add_core_o (idx_t n, const float * x, const long *xids,
|
|
float *residuals_2, const long *precomputed_idx)
|
|
{
|
|
FAISS_ASSERT (is_trained);
|
|
double t0 = getmillisecs ();
|
|
const long * idx;
|
|
|
|
if (precomputed_idx) {
|
|
idx = precomputed_idx;
|
|
} else {
|
|
long * idx0 = new long [n];
|
|
quantizer->assign (n, x, idx0);
|
|
idx = idx0;
|
|
}
|
|
|
|
double t1 = getmillisecs ();
|
|
uint8_t * xcodes = new uint8_t [n * code_size];
|
|
|
|
const float *to_encode = nullptr;
|
|
|
|
if (by_residual) {
|
|
float *residuals = new float [n * d];
|
|
// TODO: parallelize?
|
|
for (size_t i = 0; i < n; i++) {
|
|
if (idx[i] < 0)
|
|
memset (residuals + i * d, 0, sizeof(*residuals) * d);
|
|
else
|
|
quantizer->compute_residual (
|
|
x + i * d, residuals + i * d, idx[i]);
|
|
}
|
|
to_encode = residuals;
|
|
} else {
|
|
to_encode = x;
|
|
}
|
|
pq.compute_codes (to_encode, xcodes, n);
|
|
|
|
double t2 = getmillisecs ();
|
|
// TODO: parallelize?
|
|
size_t n_ignore = 0;
|
|
for (size_t i = 0; i < n; i++) {
|
|
idx_t key = idx[i];
|
|
if (key < 0) {
|
|
n_ignore ++;
|
|
if (residuals_2)
|
|
memset (residuals_2, 0, sizeof(*residuals_2) * d);
|
|
continue;
|
|
}
|
|
idx_t id = xids ? xids[i] : ntotal + i;
|
|
ids[key].push_back (id);
|
|
uint8_t *code = xcodes + i * code_size;
|
|
for (size_t j = 0; j < code_size; j++)
|
|
codes[key].push_back (code[j]);
|
|
|
|
if (residuals_2) {
|
|
float *res2 = residuals_2 + i * d;
|
|
const float *xi = to_encode + i * d;
|
|
pq.decode (code, res2);
|
|
for (int j = 0; j < d; j++)
|
|
res2[j] = xi[j] - res2[j];
|
|
}
|
|
|
|
if (maintain_direct_map)
|
|
direct_map.push_back (key << 32 | (ids[key].size() - 1));
|
|
}
|
|
|
|
if (by_residual)
|
|
delete [] to_encode;
|
|
|
|
delete [] xcodes;
|
|
if (!precomputed_idx)
|
|
delete [] idx;
|
|
double t3 = getmillisecs ();
|
|
if(verbose) {
|
|
char comment[100] = {0};
|
|
if (n_ignore > 0)
|
|
snprintf (comment, 100, "(%ld vectors ignored)", n_ignore);
|
|
printf(" add_core times: %.3f %.3f %.3f %s\n",
|
|
t1 - t0, t2 - t1, t3 - t2, comment);
|
|
}
|
|
ntotal += n;
|
|
}
|
|
|
|
void IndexIVFPQ::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
|
|
{
|
|
FAISS_ASSERT (ni == 0 || (i0 >= 0 && i0 + ni <= ntotal));
|
|
|
|
std::vector<float> centroid (d);
|
|
|
|
for (int key = 0; key < nlist; key++) {
|
|
const std::vector<long> & idlist = ids[key];
|
|
const uint8_t * code_line = codes[key].data();
|
|
|
|
for (long ofs = 0; ofs < idlist.size(); ofs++) {
|
|
long id = idlist[ofs];
|
|
if (!(id >= i0 && id < i0 + ni)) continue;
|
|
float *r = recons + d * (id - i0);
|
|
if (by_residual) {
|
|
quantizer->reconstruct (key, centroid.data());
|
|
pq.decode (code_line + ofs * pq.code_size, r);
|
|
for (int j = 0; j < d; j++) {
|
|
r[j] += centroid[j];
|
|
}
|
|
}
|
|
else {
|
|
pq.decode (code_line + ofs * pq.code_size, r);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void IndexIVFPQ::reconstruct (idx_t key, float * recons) const
|
|
{
|
|
FAISS_ASSERT (direct_map.size() == ntotal);
|
|
int list_no = direct_map[key] >> 32;
|
|
int ofs = direct_map[key] & 0xffffffff;
|
|
|
|
quantizer->reconstruct (list_no, recons);
|
|
const uint8_t * code = &(codes[list_no][ofs * pq.code_size]);
|
|
|
|
for (size_t m = 0; m < pq.M; m++) {
|
|
float * out = recons + m * pq.dsub;
|
|
const float * cent = pq.get_centroids (m, code[m]);
|
|
for (size_t i = 0; i < pq.dsub; i++) {
|
|
out[i] += cent[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
void IndexIVFPQ::merge_from_residuals (IndexIVF &other_in)
|
|
{
|
|
IndexIVFPQ &other = dynamic_cast<IndexIVFPQ &> (other_in);
|
|
for (int i = 0; i < nlist; i++) {
|
|
codes[i].insert (codes[i].end(),
|
|
other.codes[i].begin(), other.codes[i].end());
|
|
other.codes[i].clear();
|
|
}
|
|
}
|
|
|
|
void IndexIVFPQ::copy_subset_to (IndexIVFPQ & other, int subset_type,
|
|
long a1, long a2) const
|
|
{
|
|
FAISS_ASSERT (nlist == other.nlist);
|
|
FAISS_ASSERT (!other.maintain_direct_map);
|
|
size_t code_size = pq.code_size;
|
|
for (long list_no = 0; list_no < nlist; list_no++) {
|
|
const std::vector<idx_t> & ids_in = ids[list_no];
|
|
std::vector<idx_t> & ids_out = other.ids[list_no];
|
|
const std::vector<uint8_t> & codes_in = codes[list_no];
|
|
std::vector<uint8_t> & codes_out = other.codes[list_no];
|
|
|
|
for (long i = 0; i < ids_in.size(); i++) {
|
|
idx_t id = ids_in[i];
|
|
if (subset_type == 0 && a1 <= id && id < a2) {
|
|
ids_out.push_back (id);
|
|
codes_out.insert (codes_out.end(),
|
|
codes_in.begin() + i * code_size,
|
|
codes_in.begin() + (i + 1) * code_size);
|
|
other.ntotal++;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/** Precomputed tables for residuals
|
|
*
|
|
* During IVFPQ search with by_residual, we compute
|
|
*
|
|
* d = || x - y_C - y_R ||^2
|
|
*
|
|
* where x is the query vector, y_C the coarse centroid, y_R the
|
|
* refined PQ centroid. The expression can be decomposed as:
|
|
*
|
|
* d = || x - y_C ||^2 + || y_R ||^2 + 2 * (y_C|y_R) - 2 * (x|y_R)
|
|
* --------------- --------------------------- -------
|
|
* term 1 term 2 term 3
|
|
*
|
|
* When using multiprobe, we use the following decomposition:
|
|
* - term 1 is the distance to the coarse centroid, that is computed
|
|
* during the 1st stage search.
|
|
* - term 2 can be precomputed, as it does not involve x. However,
|
|
* because of the PQ, it needs nlist * M * ksub storage. This is why
|
|
* use_precomputed_table is off by default
|
|
* - term 3 is the classical non-residual distance table.
|
|
*
|
|
* Since y_R defined by a product quantizer, it is split across
|
|
* subvectors and stored separately for each subvector. If the coarse
|
|
* quantizer is a MultiIndexQuantizer then the table can be stored
|
|
* more compactly.
|
|
*
|
|
* At search time, the tables for term 2 and term 3 are added up. This
|
|
* is faster when the length of the lists is > ksub * M.
|
|
*/
|
|
|
|
void IndexIVFPQ::precompute_table ()
|
|
{
|
|
|
|
|
|
if (use_precomputed_table == 0) { // then choose the type of table
|
|
if (quantizer->metric_type == METRIC_INNER_PRODUCT) {
|
|
fprintf(stderr, "IndexIVFPQ::precompute_table: WARN precomputed "
|
|
"tables not supported for inner product quantizers\n");
|
|
return;
|
|
}
|
|
const MultiIndexQuantizer *miq =
|
|
dynamic_cast<const MultiIndexQuantizer *> (quantizer);
|
|
if (miq && pq.M % miq->pq.M == 0)
|
|
use_precomputed_table = 2;
|
|
else
|
|
use_precomputed_table = 1;
|
|
} // otherwise assume user has set appropriate flag on input
|
|
|
|
|
|
// squared norms of the PQ centroids
|
|
std::vector<float> r_norms (pq.M * pq.ksub, 0.0/0.0);
|
|
for (int m = 0; m < pq.M; m++)
|
|
for (int j = 0; j < pq.ksub; j++)
|
|
r_norms [m * pq.ksub + j] =
|
|
fvec_norm_L2sqr (pq.get_centroids (m, j), pq.dsub);
|
|
|
|
if (use_precomputed_table == 1) {
|
|
|
|
precomputed_table.resize (nlist * pq.M * pq.ksub);
|
|
std::vector<float> centroid (d);
|
|
|
|
for (size_t i = 0; i < nlist; i++) {
|
|
quantizer->reconstruct (i, centroid.data());
|
|
|
|
float *tab = &precomputed_table[i * pq.M * pq.ksub];
|
|
pq.compute_inner_prod_table (centroid.data(), tab);
|
|
fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
|
|
}
|
|
} else if (use_precomputed_table == 2) {
|
|
const MultiIndexQuantizer *miq =
|
|
dynamic_cast<const MultiIndexQuantizer *> (quantizer);
|
|
FAISS_ASSERT (miq);
|
|
const ProductQuantizer &cpq = miq->pq;
|
|
FAISS_ASSERT (pq.M % cpq.M == 0);
|
|
|
|
precomputed_table.resize(cpq.ksub * pq.M * pq.ksub);
|
|
|
|
// reorder PQ centroid table
|
|
std::vector<float> centroids (d * cpq.ksub, 0.0/0.0);
|
|
|
|
for (int m = 0; m < cpq.M; m++) {
|
|
for (size_t i = 0; i < cpq.ksub; i++) {
|
|
memcpy (centroids.data() + i * d + m * cpq.dsub,
|
|
cpq.get_centroids (m, i),
|
|
sizeof (*centroids.data()) * cpq.dsub);
|
|
}
|
|
}
|
|
|
|
pq.compute_inner_prod_tables (cpq.ksub, centroids.data (),
|
|
precomputed_table.data ());
|
|
|
|
for (size_t i = 0; i < cpq.ksub; i++) {
|
|
float *tab = &precomputed_table[i * pq.M * pq.ksub];
|
|
fvec_madd (pq.M * pq.ksub, r_norms.data(), 2.0, tab, tab);
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
|
|
static uint64_t get_cycles () {
|
|
uint32_t high, low;
|
|
asm volatile("rdtsc \n\t"
|
|
: "=a" (low),
|
|
"=d" (high));
|
|
return ((uint64_t)high << 32) | (low);
|
|
}
|
|
|
|
#define TIC t0 = get_cycles()
|
|
#define TOC get_cycles () - t0
|
|
|
|
|
|
|
|
/** QueryTables manages the various ways of searching an
|
|
* IndexIVFPQ. The code contains a lot of branches, depending on:
|
|
* - metric_type: are we computing L2 or Inner product similarity?
|
|
* - by_residual: do we encode raw vectors or residuals?
|
|
* - use_precomputed_table: are x_R|x_C tables precomputed?
|
|
* - polysemous_ht: are we filtering with polysemous codes?
|
|
*/
|
|
struct QueryTables {
|
|
|
|
/*****************************************************
|
|
* General data from the IVFPQ
|
|
*****************************************************/
|
|
|
|
const IndexIVFPQ & ivfpq;
|
|
|
|
// copied from IndexIVFPQ for easier access
|
|
int d;
|
|
const ProductQuantizer & pq;
|
|
MetricType metric_type;
|
|
bool by_residual;
|
|
int use_precomputed_table;
|
|
|
|
// pre-allocated data buffers
|
|
float * sim_table, * sim_table_2;
|
|
float * residual_vec, *decoded_vec;
|
|
|
|
// single data buffer
|
|
std::vector<float> mem;
|
|
|
|
// for table pointers
|
|
std::vector<const float *> sim_table_ptrs;
|
|
|
|
explicit QueryTables (const IndexIVFPQ & ivfpq):
|
|
ivfpq(ivfpq),
|
|
d(ivfpq.d),
|
|
pq (ivfpq.pq),
|
|
metric_type (ivfpq.metric_type),
|
|
by_residual (ivfpq.by_residual),
|
|
use_precomputed_table (ivfpq.use_precomputed_table)
|
|
{
|
|
mem.resize (pq.ksub * pq.M * 2 + d *2);
|
|
sim_table = mem.data();
|
|
sim_table_2 = sim_table + pq.ksub * pq.M;
|
|
residual_vec = sim_table_2 + pq.ksub * pq.M;
|
|
decoded_vec = residual_vec + d;
|
|
|
|
// for polysemous
|
|
if (ivfpq.polysemous_ht != 0) {
|
|
q_code.resize (pq.code_size);
|
|
}
|
|
init_list_cycles = 0;
|
|
sim_table_ptrs.resize (pq.M);
|
|
}
|
|
|
|
/*****************************************************
|
|
* What we do when query is known
|
|
*****************************************************/
|
|
|
|
// field specific to query
|
|
const float * qi;
|
|
|
|
// query-specific intialization
|
|
void init_query (const float * qi) {
|
|
this->qi = qi;
|
|
if (metric_type == METRIC_INNER_PRODUCT)
|
|
init_query_IP ();
|
|
else
|
|
init_query_L2 ();
|
|
if (!by_residual && ivfpq.polysemous_ht != 0)
|
|
pq.compute_code (qi, q_code.data());
|
|
}
|
|
|
|
void init_query_IP () {
|
|
// precompute some tables specific to the query qi
|
|
pq.compute_inner_prod_table (qi, sim_table);
|
|
// we compute negated inner products for use with the maxheap
|
|
for (int i = 0; i < pq.ksub * pq.M; i++) {
|
|
sim_table[i] = - sim_table[i];
|
|
}
|
|
}
|
|
|
|
void init_query_L2 () {
|
|
if (!by_residual) {
|
|
pq.compute_distance_table (qi, sim_table);
|
|
} else if (use_precomputed_table) {
|
|
pq.compute_inner_prod_table (qi, sim_table_2);
|
|
}
|
|
}
|
|
|
|
/*****************************************************
|
|
* When inverted list is known: prepare computations
|
|
*****************************************************/
|
|
|
|
// fields specific to list
|
|
Index::idx_t key;
|
|
float coarse_dis;
|
|
std::vector<uint8_t> q_code;
|
|
|
|
uint64_t init_list_cycles;
|
|
|
|
/// once we know the query and the centroid, we can prepare the
|
|
/// sim_table that will be used for accumulation
|
|
/// and dis0, the initial value
|
|
float precompute_list_tables () {
|
|
float dis0 = 0;
|
|
uint64_t t0; TIC;
|
|
if (by_residual) {
|
|
if (metric_type == METRIC_INNER_PRODUCT)
|
|
dis0 = precompute_list_tables_IP ();
|
|
else
|
|
dis0 = precompute_list_tables_L2 ();
|
|
}
|
|
init_list_cycles += TOC;
|
|
return dis0;
|
|
}
|
|
|
|
float precompute_list_table_pointers () {
|
|
float dis0 = 0;
|
|
uint64_t t0; TIC;
|
|
if (by_residual) {
|
|
if (metric_type == METRIC_INNER_PRODUCT)
|
|
FAISS_ASSERT (!"not implemented");
|
|
else
|
|
dis0 = precompute_list_table_pointers_L2 ();
|
|
}
|
|
init_list_cycles += TOC;
|
|
return dis0;
|
|
}
|
|
|
|
/*****************************************************
|
|
* compute tables for inner prod
|
|
*****************************************************/
|
|
|
|
float precompute_list_tables_IP ()
|
|
{
|
|
// prepare the sim_table that will be used for accumulation
|
|
// and dis0, the initial value
|
|
ivfpq.quantizer->reconstruct (key, decoded_vec);
|
|
// decoded_vec = centroid
|
|
float dis0 = -fvec_inner_product (qi, decoded_vec, d);
|
|
|
|
if (ivfpq.polysemous_ht) {
|
|
for (int i = 0; i < d; i++) {
|
|
residual_vec [i] = qi[i] - decoded_vec[i];
|
|
}
|
|
pq.compute_code (residual_vec, q_code.data());
|
|
}
|
|
return dis0;
|
|
}
|
|
|
|
|
|
/*****************************************************
|
|
* compute tables for L2 distance
|
|
*****************************************************/
|
|
|
|
float precompute_list_tables_L2 ()
|
|
{
|
|
float dis0 = 0;
|
|
|
|
if (use_precomputed_table == 0) {
|
|
ivfpq.quantizer->compute_residual (qi, residual_vec, key);
|
|
pq.compute_distance_table (residual_vec, sim_table);
|
|
} else if (use_precomputed_table == 1) {
|
|
dis0 = coarse_dis;
|
|
|
|
fvec_madd (pq.M * pq.ksub,
|
|
&ivfpq.precomputed_table [key * pq.ksub * pq.M],
|
|
-2.0, sim_table_2,
|
|
sim_table);
|
|
} else if (use_precomputed_table == 2) {
|
|
dis0 = coarse_dis;
|
|
|
|
const MultiIndexQuantizer *miq =
|
|
dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
|
|
FAISS_ASSERT (miq);
|
|
const ProductQuantizer &cpq = miq->pq;
|
|
int Mf = pq.M / cpq.M;
|
|
|
|
const float *qtab = sim_table_2; // query-specific table
|
|
float *ltab = sim_table; // (output) list-specific table
|
|
|
|
long k = key;
|
|
for (int cm = 0; cm < cpq.M; cm++) {
|
|
// compute PQ index
|
|
int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
|
|
k >>= cpq.nbits;
|
|
|
|
// get corresponding table
|
|
const float *pc = &ivfpq.precomputed_table
|
|
[(ki * pq.M + cm * Mf) * pq.ksub];
|
|
|
|
if (ivfpq.polysemous_ht == 0) {
|
|
|
|
// sum up with query-specific table
|
|
fvec_madd (Mf * pq.ksub,
|
|
pc,
|
|
-2.0, qtab,
|
|
ltab);
|
|
ltab += Mf * pq.ksub;
|
|
qtab += Mf * pq.ksub;
|
|
} else {
|
|
for (int m = cm * Mf; m < (cm + 1) * Mf; m++) {
|
|
q_code[m] = fvec_madd_and_argmin
|
|
(pq.ksub, pc, -2, qtab, ltab);
|
|
pc += pq.ksub;
|
|
ltab += pq.ksub;
|
|
qtab += pq.ksub;
|
|
}
|
|
}
|
|
|
|
}
|
|
}
|
|
|
|
return dis0;
|
|
}
|
|
|
|
float precompute_list_table_pointers_L2 ()
|
|
{
|
|
float dis0 = 0;
|
|
|
|
if (use_precomputed_table == 1) {
|
|
dis0 = coarse_dis;
|
|
|
|
const float * s = &ivfpq.precomputed_table [key * pq.ksub * pq.M];
|
|
for (int m = 0; m < pq.M; m++) {
|
|
sim_table_ptrs [m] = s;
|
|
s += pq.ksub;
|
|
}
|
|
} else if (use_precomputed_table == 2) {
|
|
dis0 = coarse_dis;
|
|
|
|
const MultiIndexQuantizer *miq =
|
|
dynamic_cast<const MultiIndexQuantizer *> (ivfpq.quantizer);
|
|
FAISS_ASSERT (miq);
|
|
const ProductQuantizer &cpq = miq->pq;
|
|
int Mf = pq.M / cpq.M;
|
|
|
|
long k = key;
|
|
int m0 = 0;
|
|
for (int cm = 0; cm < cpq.M; cm++) {
|
|
int ki = k & ((uint64_t(1) << cpq.nbits) - 1);
|
|
k >>= cpq.nbits;
|
|
|
|
const float *pc = &ivfpq.precomputed_table
|
|
[(ki * pq.M + cm * Mf) * pq.ksub];
|
|
|
|
for (int m = m0; m < m0 + Mf; m++) {
|
|
sim_table_ptrs [m] = pc;
|
|
pc += pq.ksub;
|
|
}
|
|
m0 += Mf;
|
|
}
|
|
} else FAISS_ASSERT (!"need precomputed tables");
|
|
|
|
if (ivfpq.polysemous_ht) {
|
|
FAISS_ASSERT (!"not implemented");
|
|
// Not clear that it makes sense to implemente this,
|
|
// because it costs M * ksub, which is what we wanted to
|
|
// avoid with the tables pointers.
|
|
}
|
|
|
|
return dis0;
|
|
}
|
|
|
|
|
|
};
|
|
|
|
|
|
/*****************************************************
|
|
* Scaning the codes.
|
|
* The scanning functions call their favorite precompute_*
|
|
* function to precompute the tables they need.
|
|
*****************************************************/
|
|
template <typename IDType>
|
|
struct InvertedListScanner: QueryTables {
|
|
|
|
const uint8_t * __restrict list_codes;
|
|
const IDType * list_ids;
|
|
size_t list_size;
|
|
|
|
explicit InvertedListScanner (const IndexIVFPQ & ivfpq):
|
|
QueryTables (ivfpq)
|
|
{
|
|
FAISS_ASSERT(pq.byte_per_idx == 1);
|
|
n_hamming_pass = 0;
|
|
}
|
|
|
|
/// list_specific intialization
|
|
void init_list (Index::idx_t key, float coarse_dis,
|
|
size_t list_size_in, const IDType *list_ids_in,
|
|
const uint8_t *list_codes_in) {
|
|
this->key = key;
|
|
this->coarse_dis = coarse_dis;
|
|
list_size = list_size_in;
|
|
list_codes = list_codes_in;
|
|
list_ids = list_ids_in;
|
|
}
|
|
|
|
/*****************************************************
|
|
* Scaning the codes: simple PQ scan.
|
|
*****************************************************/
|
|
|
|
/// version of the scan where we use precomputed tables
|
|
void scan_list_with_table (
|
|
size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
|
|
{
|
|
float dis0 = precompute_list_tables ();
|
|
|
|
for (size_t j = 0; j < list_size; j++) {
|
|
|
|
float dis = dis0;
|
|
const float *tab = sim_table;
|
|
|
|
for (size_t m = 0; m < pq.M; m++) {
|
|
dis += tab[*list_codes++];
|
|
tab += pq.ksub;
|
|
}
|
|
|
|
if (dis < heap_sim[0]) {
|
|
maxheap_pop (k, heap_sim, heap_ids);
|
|
long id = store_pairs ? (key << 32 | j) : list_ids[j];
|
|
maxheap_push (k, heap_sim, heap_ids, dis, id);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
/// tables are not precomputed, but pointers are provided to the
|
|
/// relevant X_c|x_r tables
|
|
void scan_list_with_pointer (
|
|
size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
|
|
{
|
|
|
|
float dis0 = precompute_list_table_pointers ();
|
|
|
|
for (size_t j = 0; j < list_size; j++) {
|
|
|
|
float dis = dis0;
|
|
const float *tab = sim_table_2;
|
|
|
|
for (size_t m = 0; m < pq.M; m++) {
|
|
int ci = *list_codes++;
|
|
dis += sim_table_ptrs [m][ci] - 2 * tab [ci];
|
|
tab += pq.ksub;
|
|
}
|
|
|
|
if (dis < heap_sim[0]) {
|
|
maxheap_pop (k, heap_sim, heap_ids);
|
|
long id = store_pairs ? (key << 32 | j) : list_ids[j];
|
|
maxheap_push (k, heap_sim, heap_ids, dis, id);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
/// nothing is precomputed: access residuals on-the-fly
|
|
void scan_on_the_fly_dist (
|
|
size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
|
|
{
|
|
|
|
if (by_residual && use_precomputed_table) {
|
|
scan_list_with_pointer (k, heap_sim, heap_ids, store_pairs);
|
|
return;
|
|
}
|
|
|
|
const float *dvec;
|
|
float dis0 = 0;
|
|
|
|
if (by_residual) {
|
|
if (metric_type == METRIC_INNER_PRODUCT) {
|
|
ivfpq.quantizer->reconstruct (key, residual_vec);
|
|
dis0 = fvec_inner_product (residual_vec, qi, d);
|
|
} else {
|
|
ivfpq.quantizer->compute_residual (qi, residual_vec, key);
|
|
}
|
|
dvec = residual_vec;
|
|
} else {
|
|
dvec = qi;
|
|
dis0 = 0;
|
|
}
|
|
|
|
for (size_t j = 0; j < list_size; j++) {
|
|
|
|
pq.decode (list_codes, decoded_vec);
|
|
list_codes += pq.code_size;
|
|
|
|
float dis;
|
|
if (metric_type == METRIC_INNER_PRODUCT) {
|
|
dis = -dis0 - fvec_inner_product (decoded_vec, qi, d);
|
|
} else {
|
|
dis = fvec_L2sqr (decoded_vec, dvec, d);
|
|
}
|
|
|
|
if (dis < heap_sim[0]) {
|
|
maxheap_pop (k, heap_sim, heap_ids);
|
|
long id = store_pairs ? (key << 32 | j) : list_ids[j];
|
|
maxheap_push (k, heap_sim, heap_ids, dis, id);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*****************************************************
|
|
* Scanning codes with polysemous filtering
|
|
*****************************************************/
|
|
|
|
// code for the query
|
|
size_t n_hamming_pass;
|
|
|
|
|
|
template <class HammingComputer>
|
|
void scan_list_polysemous_hc (
|
|
size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
|
|
{
|
|
float dis0 = precompute_list_tables ();
|
|
int ht = ivfpq.polysemous_ht;
|
|
|
|
int code_size = pq.code_size;
|
|
|
|
HammingComputer hc (q_code.data(), code_size);
|
|
|
|
for (size_t j = 0; j < list_size; j++) {
|
|
const uint8_t *b_code = list_codes;
|
|
int hd = hc.hamming (b_code);
|
|
if (hd < ht) {
|
|
n_hamming_pass ++;
|
|
|
|
float dis = dis0;
|
|
const float *tab = sim_table;
|
|
|
|
for (size_t m = 0; m < pq.M; m++) {
|
|
dis += tab[*b_code++];
|
|
tab += pq.ksub;
|
|
}
|
|
|
|
if (dis < heap_sim[0]) {
|
|
maxheap_pop (k, heap_sim, heap_ids);
|
|
long id = store_pairs ? (key << 32 | j) : list_ids[j];
|
|
maxheap_push (k, heap_sim, heap_ids, dis, id);
|
|
}
|
|
}
|
|
list_codes += code_size;
|
|
}
|
|
}
|
|
|
|
void scan_list_polysemous (
|
|
size_t k, float * heap_sim, long * heap_ids, bool store_pairs)
|
|
{
|
|
switch (pq.code_size) {
|
|
#define HANDLE_CODE_SIZE(cs) \
|
|
case cs: \
|
|
scan_list_polysemous_hc <HammingComputer ## cs> \
|
|
(k, heap_sim, heap_ids, store_pairs); \
|
|
break
|
|
HANDLE_CODE_SIZE(4);
|
|
HANDLE_CODE_SIZE(8);
|
|
HANDLE_CODE_SIZE(16);
|
|
HANDLE_CODE_SIZE(20);
|
|
HANDLE_CODE_SIZE(32);
|
|
HANDLE_CODE_SIZE(64);
|
|
#undef HANDLE_CODE_SIZE
|
|
default:
|
|
if (pq.code_size % 8 == 0)
|
|
scan_list_polysemous_hc <HammingComputerM8>
|
|
(k, heap_sim, heap_ids, store_pairs);
|
|
else
|
|
scan_list_polysemous_hc <HammingComputerM4>
|
|
(k, heap_sim, heap_ids, store_pairs);
|
|
break;
|
|
}
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
} // anonymous namespace
|
|
|
|
|
|
IndexIVFPQStats indexIVFPQ_stats;
|
|
|
|
void IndexIVFPQStats::reset () {
|
|
memset (this, 0, sizeof (*this));
|
|
}
|
|
|
|
|
|
void IndexIVFPQ::search_knn_with_key (
|
|
size_t nx,
|
|
const float * qx,
|
|
const long * keys,
|
|
const float * coarse_dis,
|
|
float_maxheap_array_t * res,
|
|
bool store_pairs) const
|
|
{
|
|
const size_t k = res->k;
|
|
|
|
#pragma omp parallel
|
|
{
|
|
InvertedListScanner<long> qt (*this);
|
|
size_t stats_nlist = 0;
|
|
size_t stats_ncode = 0;
|
|
uint64_t init_query_cycles = 0;
|
|
uint64_t scan_cycles = 0;
|
|
uint64_t heap_cycles = 0;
|
|
|
|
#pragma omp for
|
|
for (size_t i = 0; i < nx; i++) {
|
|
const float *qi = qx + i * d;
|
|
const long * keysi = keys + i * nprobe;
|
|
const float *coarse_dis_i = coarse_dis + i * nprobe;
|
|
float * heap_sim = res->get_val (i);
|
|
long * heap_ids = res->get_ids (i);
|
|
|
|
uint64_t t0;
|
|
TIC;
|
|
maxheap_heapify (k, heap_sim, heap_ids);
|
|
heap_cycles += TOC;
|
|
|
|
TIC;
|
|
qt.init_query (qi);
|
|
init_query_cycles += TOC;
|
|
|
|
size_t nscan = 0;
|
|
|
|
for (size_t ik = 0; ik < nprobe; ik++) {
|
|
long key = keysi[ik]; /* select the list */
|
|
if (key < 0) {
|
|
// not enough centroids for multiprobe
|
|
continue;
|
|
}
|
|
if (key >= (long) nlist) {
|
|
fprintf (stderr, "Invalid key=%ld nlist=%ld\n", key, nlist);
|
|
throw;
|
|
}
|
|
size_t list_size = ids[key].size();
|
|
stats_nlist ++;
|
|
nscan += list_size;
|
|
|
|
if (list_size == 0) continue;
|
|
|
|
qt.init_list (key, coarse_dis_i[ik],
|
|
list_size, ids[key].data(),
|
|
codes[key].data());
|
|
|
|
TIC;
|
|
if (polysemous_ht > 0) {
|
|
qt.scan_list_polysemous
|
|
(k, heap_sim, heap_ids, store_pairs);
|
|
} else if (list_size > scan_table_threshold) {
|
|
qt.scan_list_with_table (k, heap_sim, heap_ids, store_pairs);
|
|
} else {
|
|
qt.scan_on_the_fly_dist (k, heap_sim, heap_ids, store_pairs);
|
|
}
|
|
scan_cycles += TOC;
|
|
|
|
if (max_codes && nscan >= max_codes) break;
|
|
}
|
|
stats_ncode += nscan;
|
|
TIC;
|
|
maxheap_reorder (k, heap_sim, heap_ids);
|
|
|
|
if (metric_type == METRIC_INNER_PRODUCT) {
|
|
for (size_t j = 0; j < k; j++)
|
|
heap_sim[j] = -heap_sim[j];
|
|
}
|
|
heap_cycles += TOC;
|
|
}
|
|
|
|
#pragma omp critical
|
|
{
|
|
indexIVFPQ_stats.n_hamming_pass += qt.n_hamming_pass;
|
|
indexIVFPQ_stats.nlist += stats_nlist;
|
|
indexIVFPQ_stats.ncode += stats_ncode;
|
|
|
|
indexIVFPQ_stats.init_query_cycles += init_query_cycles;
|
|
indexIVFPQ_stats.init_list_cycles += qt.init_list_cycles;
|
|
indexIVFPQ_stats.scan_cycles += scan_cycles - qt.init_list_cycles;
|
|
indexIVFPQ_stats.heap_cycles += heap_cycles;
|
|
}
|
|
|
|
}
|
|
indexIVFPQ_stats.nq += nx;
|
|
}
|
|
|
|
|
|
void IndexIVFPQ::search (idx_t n, const float *x, idx_t k,
|
|
float *distances, idx_t *labels) const
|
|
{
|
|
long * idx = new long [n * nprobe];
|
|
float * coarse_dis = new float [n * nprobe];
|
|
uint64_t t0;
|
|
TIC;
|
|
quantizer->search (n, x, nprobe, coarse_dis, idx);
|
|
indexIVFPQ_stats.assign_cycles += TOC;
|
|
|
|
TIC;
|
|
float_maxheap_array_t res = { size_t(n), size_t(k), labels, distances};
|
|
|
|
search_knn_with_key (n, x, idx, coarse_dis, &res);
|
|
delete [] idx;
|
|
delete [] coarse_dis;
|
|
indexIVFPQ_stats.search_cycles += TOC;
|
|
}
|
|
|
|
|
|
void IndexIVFPQ::reset()
|
|
{
|
|
IndexIVF::reset();
|
|
for (size_t key = 0; key < nlist; key++) {
|
|
codes[key].clear();
|
|
}
|
|
}
|
|
|
|
long IndexIVFPQ::remove_ids (const IDSelector & sel)
|
|
{
|
|
FAISS_ASSERT (!maintain_direct_map ||
|
|
!"direct map remove not implemented");
|
|
long nremove = 0;
|
|
#pragma omp parallel for reduction(+: nremove)
|
|
for (long i = 0; i < nlist; i++) {
|
|
std::vector<idx_t> & idsi = ids[i];
|
|
uint8_t * codesi = codes[i].data();
|
|
|
|
long l = idsi.size(), j = 0;
|
|
while (j < l) {
|
|
if (sel.is_member (idsi[j])) {
|
|
l--;
|
|
idsi [j] = idsi [l];
|
|
memmove (codesi + j * code_size,
|
|
codesi + l * code_size, code_size);
|
|
} else {
|
|
j++;
|
|
}
|
|
}
|
|
if (l < idsi.size()) {
|
|
nremove += idsi.size() - l;
|
|
idsi.resize (l);
|
|
codes[i].resize (l * code_size);
|
|
}
|
|
}
|
|
ntotal -= nremove;
|
|
return nremove;
|
|
}
|
|
|
|
|
|
IndexIVFPQ::IndexIVFPQ ()
|
|
{
|
|
// initialize some runtime values
|
|
use_precomputed_table = 0;
|
|
scan_table_threshold = 0;
|
|
do_polysemous_training = false;
|
|
polysemous_ht = 0;
|
|
max_codes = 0;
|
|
polysemous_training = nullptr;
|
|
}
|
|
|
|
|
|
struct CodeCmp {
|
|
const uint8_t *tab;
|
|
size_t code_size;
|
|
bool operator () (int a, int b) const {
|
|
return cmp (a, b) > 0;
|
|
}
|
|
int cmp (int a, int b) const {
|
|
return memcmp (tab + a * code_size, tab + b * code_size,
|
|
code_size);
|
|
}
|
|
};
|
|
|
|
|
|
size_t IndexIVFPQ::find_duplicates (idx_t *dup_ids, size_t *lims) const
|
|
{
|
|
size_t ngroup = 0;
|
|
lims[0] = 0;
|
|
for (size_t list_no = 0; list_no < nlist; list_no++) {
|
|
size_t n = ids[list_no].size();
|
|
std::vector<int> ord (n);
|
|
for (int i = 0; i < n; i++) ord[i] = i;
|
|
CodeCmp cs = { codes[list_no].data(), code_size };
|
|
std::sort (ord.begin(), ord.end(), cs);
|
|
|
|
const idx_t *list_ids = ids[list_no].data();
|
|
int prev = -1; // all elements from prev to i-1 are equal
|
|
for (int i = 0; i < n; i++) {
|
|
if (prev >= 0 && cs.cmp (ord [prev], ord [i]) == 0) {
|
|
// same as previous => remember
|
|
if (prev + 1 == i) { // start new group
|
|
ngroup++;
|
|
lims[ngroup] = lims[ngroup - 1];
|
|
dup_ids [lims [ngroup]++] = list_ids [ord [prev]];
|
|
}
|
|
dup_ids [lims [ngroup]++] = list_ids [ord [i]];
|
|
} else { // not same as previous.
|
|
prev = i;
|
|
}
|
|
}
|
|
}
|
|
return ngroup;
|
|
}
|
|
|
|
|
|
|
|
|
|
/*****************************************
|
|
* IndexIVFPQR implementation
|
|
******************************************/
|
|
|
|
IndexIVFPQR::IndexIVFPQR (
|
|
Index * quantizer, size_t d, size_t nlist,
|
|
size_t M, size_t nbits_per_idx,
|
|
size_t M_refine, size_t nbits_per_idx_refine):
|
|
IndexIVFPQ (quantizer, d, nlist, M, nbits_per_idx),
|
|
refine_pq (d, M_refine, nbits_per_idx_refine),
|
|
k_factor (4)
|
|
{
|
|
by_residual = true;
|
|
set_typename();
|
|
}
|
|
|
|
IndexIVFPQR::IndexIVFPQR ():
|
|
k_factor (1)
|
|
{
|
|
by_residual = true;
|
|
}
|
|
|
|
|
|
void IndexIVFPQR::set_typename()
|
|
{
|
|
std::stringstream s;
|
|
s << "IvfPQR_" << pq.M << "x" << pq.nbits
|
|
<< "+" << refine_pq.M << "x" << refine_pq.nbits
|
|
<< "[" << nlist << ":" << quantizer->index_typename << "]";
|
|
index_typename = s.str();
|
|
|
|
}
|
|
|
|
|
|
void IndexIVFPQR::reset()
|
|
{
|
|
IndexIVFPQ::reset();
|
|
refine_codes.clear();
|
|
}
|
|
|
|
|
|
|
|
|
|
void IndexIVFPQR::train_residual (idx_t n, const float *x)
|
|
{
|
|
|
|
float * residual_2 = new float [n * d];
|
|
|
|
train_residual_o (n, x, residual_2);
|
|
|
|
if (verbose)
|
|
printf ("training %zdx%zd 2nd level PQ quantizer on %ld %dD-vectors\n",
|
|
refine_pq.M, refine_pq.ksub, n, d);
|
|
|
|
refine_pq.cp.max_points_per_centroid = 1000;
|
|
refine_pq.cp.verbose = verbose;
|
|
|
|
refine_pq.train (n, residual_2);
|
|
delete [] residual_2;
|
|
|
|
}
|
|
|
|
|
|
void IndexIVFPQR::add_with_ids (idx_t n, const float *x, const long *xids) {
|
|
add_core (n, x, xids, nullptr);
|
|
}
|
|
|
|
void IndexIVFPQR::add_core (idx_t n, const float *x, const long *xids,
|
|
const long *precomputed_idx) {
|
|
|
|
float * residual_2 = new float [n * d];
|
|
|
|
idx_t n0 = ntotal;
|
|
|
|
add_core_o (n, x, xids, residual_2, precomputed_idx);
|
|
|
|
refine_codes.resize (ntotal * refine_pq.code_size);
|
|
|
|
refine_pq.compute_codes (
|
|
residual_2, &refine_codes[n0 * refine_pq.code_size], n);
|
|
|
|
delete [] residual_2;
|
|
|
|
}
|
|
|
|
|
|
void IndexIVFPQR::search (
|
|
idx_t n, const float *x, idx_t k,
|
|
float *distances, idx_t *labels) const
|
|
{
|
|
FAISS_ASSERT (is_trained);
|
|
long * idx = new long [n * nprobe];
|
|
float * L1_dis = new float [n * nprobe];
|
|
uint64_t t0;
|
|
TIC;
|
|
quantizer->search (n, x, nprobe, L1_dis, idx);
|
|
indexIVFPQ_stats.assign_cycles += TOC;
|
|
|
|
TIC;
|
|
size_t k_coarse = long(k * k_factor);
|
|
idx_t *coarse_labels = new idx_t [k_coarse * n];
|
|
{ // query with quantizer levels 1 and 2.
|
|
float *coarse_distances = new float [k_coarse * n];
|
|
|
|
faiss::float_maxheap_array_t res_coarse = {
|
|
size_t(n), k_coarse, coarse_labels, coarse_distances};
|
|
search_knn_with_key (n, x, idx, L1_dis, &res_coarse, true);
|
|
delete [] coarse_distances;
|
|
}
|
|
delete [] L1_dis;
|
|
indexIVFPQ_stats.search_cycles += TOC;
|
|
|
|
TIC;
|
|
|
|
// 3rd level refinement
|
|
size_t n_refine = 0;
|
|
#pragma omp parallel reduction(+ : n_refine)
|
|
{
|
|
// tmp buffers
|
|
float *residual_1 = new float [2 * d];
|
|
float *residual_2 = residual_1 + d;
|
|
#pragma omp for
|
|
for (idx_t i = 0; i < n; i++) {
|
|
const float *xq = x + i * d;
|
|
const long * shortlist = coarse_labels + k_coarse * i;
|
|
float * heap_sim = distances + k * i;
|
|
long * heap_ids = labels + k * i;
|
|
maxheap_heapify (k, heap_sim, heap_ids);
|
|
|
|
for (int j = 0; j < k_coarse; j++) {
|
|
long sl = shortlist[j];
|
|
|
|
if (sl == -1) continue;
|
|
|
|
int list_no = sl >> 32;
|
|
int ofs = sl & 0xffffffff;
|
|
|
|
assert (list_no >= 0 && list_no < nlist);
|
|
assert (ofs >= 0 && ofs < ids[list_no].size());
|
|
|
|
// 1st level residual
|
|
quantizer->compute_residual (xq, residual_1, list_no);
|
|
|
|
// 2nd level residual
|
|
const uint8_t * l2code = &codes[list_no][ofs * pq.code_size];
|
|
pq.decode (l2code, residual_2);
|
|
for (int l = 0; l < d; l++)
|
|
residual_2[l] = residual_1[l] - residual_2[l];
|
|
|
|
// 3rd level residual's approximation
|
|
idx_t id = ids[list_no][ofs];
|
|
assert (0 <= id && id < ntotal);
|
|
refine_pq.decode (&refine_codes [id * refine_pq.code_size],
|
|
residual_1);
|
|
|
|
float dis = fvec_L2sqr (residual_1, residual_2, d);
|
|
|
|
if (dis < heap_sim[0]) {
|
|
maxheap_pop (k, heap_sim, heap_ids);
|
|
maxheap_push (k, heap_sim, heap_ids, dis, id);
|
|
}
|
|
n_refine ++;
|
|
}
|
|
maxheap_reorder (k, heap_sim, heap_ids);
|
|
}
|
|
delete [] residual_1;
|
|
}
|
|
delete [] coarse_labels;
|
|
delete [] idx;
|
|
indexIVFPQ_stats.nrefine += n_refine;
|
|
indexIVFPQ_stats.refine_cycles += TOC;
|
|
}
|
|
|
|
void IndexIVFPQR::reconstruct_n (idx_t i0, idx_t ni, float *recons) const
|
|
{
|
|
float *r3 = new float [d];
|
|
|
|
IndexIVFPQ::reconstruct_n (i0, ni, recons);
|
|
|
|
for (idx_t i = i0; i < i0 + ni; i++) {
|
|
float *r = recons + i * d;
|
|
refine_pq.decode (&refine_codes [i * refine_pq.code_size], r3);
|
|
|
|
for (int j = 0; j < d; j++)
|
|
r[j] += r3[j];
|
|
|
|
}
|
|
delete [] r3;
|
|
}
|
|
|
|
void IndexIVFPQR::merge_from_residuals (IndexIVF &other_in)
|
|
{
|
|
IndexIVFPQR &other = dynamic_cast<IndexIVFPQR &> (other_in);
|
|
IndexIVFPQ::merge_from_residuals (other);
|
|
refine_codes.insert (refine_codes.end(),
|
|
other.refine_codes.begin(), other.refine_codes.end());
|
|
other.refine_codes.clear();
|
|
}
|
|
|
|
long IndexIVFPQR::remove_ids (const IDSelector & sel)
|
|
{
|
|
FAISS_ASSERT(!"not implemented");
|
|
}
|
|
|
|
/*****************************************
|
|
* IndexIVFPQCompact implementation
|
|
******************************************/
|
|
|
|
IndexIVFPQCompact::IndexIVFPQCompact ()
|
|
{
|
|
alloc_type = Alloc_type_none;
|
|
limits = nullptr;
|
|
compact_ids = nullptr;
|
|
compact_codes = nullptr;
|
|
}
|
|
|
|
|
|
IndexIVFPQCompact::IndexIVFPQCompact (const IndexIVFPQ &other)
|
|
{
|
|
FAISS_ASSERT (other.ntotal < (1UL << 31) ||
|
|
!"IndexIVFPQCompact cannot store more than 2G images");
|
|
|
|
// here it would be more convenient to just use the
|
|
// copy-constructor, but it would copy the lists as well: too much
|
|
// overhead...
|
|
|
|
// copy fields from Index
|
|
d = other.d;
|
|
ntotal = other.ntotal;
|
|
verbose = other.verbose;
|
|
is_trained = other.is_trained;
|
|
metric_type = other.metric_type;
|
|
|
|
// copy fields from IndexIVF (except ids)
|
|
nlist = other.nlist;
|
|
nprobe = other.nprobe;
|
|
quantizer = other.quantizer;
|
|
quantizer_trains_alone = other.quantizer_trains_alone;
|
|
own_fields = false;
|
|
direct_map = other.direct_map;
|
|
|
|
// copy fields from IndexIVFPQ (except codes)
|
|
by_residual = other.by_residual;
|
|
use_precomputed_table = other.use_precomputed_table;
|
|
precomputed_table = other.precomputed_table;
|
|
code_size = other.code_size;
|
|
pq = other.pq;
|
|
do_polysemous_training = other.do_polysemous_training;
|
|
polysemous_training = nullptr;
|
|
|
|
scan_table_threshold = other.scan_table_threshold;
|
|
max_codes = other.max_codes;
|
|
polysemous_ht = other.polysemous_ht;
|
|
|
|
//allocate
|
|
alloc_type = Alloc_type_new;
|
|
limits = new uint32_t [nlist + 1];
|
|
compact_ids = new uint32_t [ntotal];
|
|
compact_codes = new uint8_t [ntotal * code_size];
|
|
|
|
|
|
// copy content from other
|
|
size_t ofs = 0;
|
|
for (size_t i = 0; i < nlist; i++) {
|
|
limits [i] = ofs;
|
|
const std::vector<long> &other_ids = other.ids[i];
|
|
for (size_t j = 0; j < other_ids.size(); j++) {
|
|
long id = other_ids[j];
|
|
FAISS_ASSERT (id < (1UL << 31) ||
|
|
!"IndexIVFPQCompact cannot store ids > 2G");
|
|
compact_ids[ofs + j] = id;
|
|
}
|
|
memcpy (compact_codes + ofs * code_size,
|
|
other.codes[i].data(),
|
|
other.codes[i].size());
|
|
ofs += other_ids.size();
|
|
}
|
|
FAISS_ASSERT (ofs == ntotal);
|
|
limits [nlist] = ofs;
|
|
|
|
}
|
|
|
|
void IndexIVFPQCompact::add (idx_t, const float *) {
|
|
FAISS_ASSERT (!"cannot add to an IndexIVFPQCompact");
|
|
}
|
|
|
|
void IndexIVFPQCompact::reset () {
|
|
FAISS_ASSERT (!"cannot reset an IndexIVFPQCompact");
|
|
}
|
|
|
|
void IndexIVFPQCompact::train (idx_t, const float *) {
|
|
FAISS_ASSERT (!"cannot train an IndexIVFPQCompact");
|
|
}
|
|
|
|
|
|
|
|
|
|
IndexIVFPQCompact::~IndexIVFPQCompact ()
|
|
{
|
|
if (alloc_type == Alloc_type_new) {
|
|
delete [] limits;
|
|
delete [] compact_codes;
|
|
delete [] compact_ids;
|
|
} else if (alloc_type == Alloc_type_mmap) {
|
|
munmap (mmap_buffer, mmap_length);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
void IndexIVFPQCompact::search_knn_with_key (
|
|
size_t nx,
|
|
const float * qx,
|
|
const long * keys,
|
|
const float * coarse_dis,
|
|
float_maxheap_array_t * res,
|
|
bool store_pairs) const
|
|
{
|
|
const size_t k = res->k;
|
|
|
|
#pragma omp parallel
|
|
{
|
|
InvertedListScanner<uint32_t> qt (*this);
|
|
size_t stats_nlist = 0;
|
|
size_t stats_ncode = 0;
|
|
uint64_t init_query_cycles = 0;
|
|
uint64_t scan_cycles = 0;
|
|
uint64_t heap_cycles = 0;
|
|
|
|
#pragma omp for
|
|
for (size_t i = 0; i < nx; i++) {
|
|
const float *qi = qx + i * d;
|
|
const long * keysi = keys + i * nprobe;
|
|
const float *coarse_dis_i = coarse_dis + i * nprobe;
|
|
float * heap_sim = res->get_val (i);
|
|
long * heap_ids = res->get_ids (i);
|
|
|
|
uint64_t t0;
|
|
TIC;
|
|
maxheap_heapify (k, heap_sim, heap_ids);
|
|
heap_cycles += TOC;
|
|
|
|
TIC;
|
|
qt.init_query (qi);
|
|
init_query_cycles += TOC;
|
|
|
|
size_t nscan = 0;
|
|
|
|
for (size_t ik = 0; ik < nprobe; ik++) {
|
|
long key = keysi[ik]; /* select the list */
|
|
if (key < 0) {
|
|
// not enough centroids for multiprobe
|
|
continue;
|
|
}
|
|
if (key >= (long) nlist) {
|
|
fprintf (stderr, "Invalid key=%ld nlist=%ld\n", key, nlist);
|
|
throw;
|
|
}
|
|
size_t list_size = limits[key + 1] - limits[key];
|
|
stats_nlist ++;
|
|
nscan += list_size;
|
|
|
|
if (list_size == 0) continue;
|
|
|
|
qt.init_list (key, coarse_dis_i[ik],
|
|
list_size, compact_ids + limits[key],
|
|
compact_codes + limits[key] * code_size);
|
|
|
|
TIC;
|
|
if (polysemous_ht > 0) {
|
|
qt.scan_list_polysemous
|
|
(k, heap_sim, heap_ids, store_pairs);
|
|
} else if (list_size > scan_table_threshold) {
|
|
qt.scan_list_with_table (k, heap_sim, heap_ids, store_pairs);
|
|
} else {
|
|
qt.scan_on_the_fly_dist (k, heap_sim, heap_ids, store_pairs);
|
|
}
|
|
scan_cycles += TOC;
|
|
|
|
if (max_codes && nscan >= max_codes) break;
|
|
}
|
|
stats_ncode += nscan;
|
|
TIC;
|
|
maxheap_reorder (k, heap_sim, heap_ids);
|
|
|
|
if (metric_type == METRIC_INNER_PRODUCT) {
|
|
for (size_t j = 0; j < k; j++) {
|
|
heap_sim[i] = -heap_sim[i];
|
|
}
|
|
}
|
|
heap_cycles += TOC;
|
|
}
|
|
|
|
#pragma omp critical
|
|
{
|
|
indexIVFPQ_stats.n_hamming_pass += qt.n_hamming_pass;
|
|
indexIVFPQ_stats.nlist += stats_nlist;
|
|
indexIVFPQ_stats.ncode += stats_ncode;
|
|
|
|
indexIVFPQ_stats.init_query_cycles += init_query_cycles;
|
|
indexIVFPQ_stats.init_list_cycles += qt.init_list_cycles;
|
|
indexIVFPQ_stats.scan_cycles += scan_cycles - qt.init_list_cycles;
|
|
indexIVFPQ_stats.heap_cycles += heap_cycles;
|
|
}
|
|
|
|
}
|
|
indexIVFPQ_stats.nq += nx;
|
|
}
|
|
|
|
|
|
|
|
} // namespace faiss
|