expose threat-safe stats (#1438)

Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/1438

This diff changes Faiss and the `combined_index.py` to propagate thread-safe stats to handler.py

Reviewed By: MDSilber

Differential Revision: D24082543

fbshipit-source-id: 944e6b7630daeede5eb9501b81557a6fe5afec44
pull/1443/head
Matthijs Douze 2020-10-03 23:24:26 -07:00 committed by Facebook GitHub Bot
parent 6918f4ee48
commit 5ad630635c
6 changed files with 255 additions and 21 deletions

View File

@ -14,6 +14,7 @@
#include <faiss/IndexPreTransform.h>
#include <faiss/impl/FaissAssert.h>
#include <faiss/MetaIndexes.h>
#include <faiss/utils/utils.h>
@ -310,17 +311,34 @@ void set_invlist_range (Index *index, long i0, long i1,
ivf->ntotal = index->ntotal = ntotal;
}
static size_t count_ndis(const IndexIVF * index_ivf, size_t n_list_scan,
const idx_t *Iq)
{
size_t nb_dis = 0;
const InvertedLists *il = index_ivf->invlists;
for (idx_t i = 0; i < n_list_scan; i++) {
if (Iq[i] >= 0) {
nb_dis += il->list_size(Iq[i]);
}
}
return nb_dis;
}
void search_with_parameters (const Index *index,
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
IVFSearchParameters *params,
size_t *nb_dis_ptr)
const IVFSearchParameters *params,
size_t *nb_dis_ptr,
double *ms_per_stage)
{
FAISS_THROW_IF_NOT (params);
const float *prev_x = x;
ScopeDeleter<float> del;
double t0 = getmillisecs();
if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
x = ip->apply_chain (n, x);
if (x != prev_x) {
@ -329,6 +347,8 @@ void search_with_parameters (const Index *index,
index = ip->index;
}
double t1 = getmillisecs();
std::vector<idx_t> Iq(params->nprobe * n);
std::vector<float> Dq(params->nprobe * n);
@ -339,21 +359,76 @@ void search_with_parameters (const Index *index,
Dq.data(), Iq.data());
if (nb_dis_ptr) {
size_t nb_dis = 0;
const InvertedLists *il = index_ivf->invlists;
for (idx_t i = 0; i < n * params->nprobe; i++) {
if (Iq[i] >= 0) {
nb_dis += il->list_size(Iq[i]);
}
}
*nb_dis_ptr = nb_dis;
*nb_dis_ptr = count_ndis (index_ivf, n * params->nprobe, Iq.data());
}
double t2 = getmillisecs();
index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
distances, labels,
false, params);
double t3 = getmillisecs();
if (ms_per_stage) {
ms_per_stage[0] = t1 - t0;
ms_per_stage[1] = t2 - t1;
ms_per_stage[2] = t3 - t2;
}
}
void range_search_with_parameters (const Index *index,
idx_t n, const float *x, float radius,
RangeSearchResult *result,
const IVFSearchParameters *params,
size_t *nb_dis_ptr,
double *ms_per_stage)
{
FAISS_THROW_IF_NOT (params);
const float *prev_x = x;
ScopeDeleter<float> del;
double t0 = getmillisecs();
if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
x = ip->apply_chain (n, x);
if (x != prev_x) {
del.set(x);
}
index = ip->index;
}
double t1 = getmillisecs();
std::vector<idx_t> Iq(params->nprobe * n);
std::vector<float> Dq(params->nprobe * n);
const IndexIVF *index_ivf = dynamic_cast<const IndexIVF *>(index);
FAISS_THROW_IF_NOT (index_ivf);
index_ivf->quantizer->search(n, x, params->nprobe,
Dq.data(), Iq.data());
if (nb_dis_ptr) {
*nb_dis_ptr = count_ndis (index_ivf, n * params->nprobe, Iq.data());
}
double t2 = getmillisecs();
index_ivf->range_search_preassigned(
n, x, radius, Iq.data(), Dq.data(),
result, false, params
);
double t3 = getmillisecs();
if (ms_per_stage) {
ms_per_stage[0] = t1 - t0;
ms_per_stage[1] = t2 - t1;
ms_per_stage[2] = t3 - t2;
}
}
} } // namespace faiss::ivflib

View File

@ -120,14 +120,32 @@ ArrayInvertedLists * get_invlist_range (const Index *index,
void set_invlist_range (Index *index, long i0, long i1,
ArrayInvertedLists * src);
// search an IndexIVF, possibly embedded in an IndexPreTransform with
// given parameters. Optionally returns the number of distances
// computed
void search_with_parameters (const Index *index,
/** search an IndexIVF, possibly embedded in an IndexPreTransform with
* given parameters. This is a way to set the nprobe and get
* statdistics in a thread-safe way.
*
* Optionally returns (if non-nullptr):
* - nb_dis: number of distances computed
* - ms_per_stage: [0]: preprocessing time
* [1]: coarse quantization,
* [2]: list scanning
*/
void search_with_parameters (
const Index *index,
idx_t n, const float *x, idx_t k,
float *distances, idx_t *labels,
IVFSearchParameters *params,
size_t *nb_dis = nullptr);
const IVFSearchParameters *params,
size_t *nb_dis = nullptr,
double *ms_per_stage = nullptr);
/** same as search_with_parameters but for range search */
void range_search_with_parameters (
const Index *index,
idx_t n, const float *x, float radius,
RangeSearchResult *result,
const IVFSearchParameters *params,
size_t *nb_dis = nullptr,
double *ms_per_stage = nullptr);

View File

@ -559,11 +559,14 @@ void IndexIVF::range_search (idx_t nx, const float *x, float radius,
void IndexIVF::range_search_preassigned (
idx_t nx, const float *x, float radius,
const idx_t *keys, const float *coarse_dis,
RangeSearchResult *result) const
RangeSearchResult *result,
bool store_pairs,
const IVFSearchParameters *params) const
{
long nprobe = params ? params->nprobe : this->nprobe;
long max_codes = params ? params->max_codes : this->max_codes;
size_t nlistv = 0, ndis = 0;
bool store_pairs = false;
bool interrupt = false;
std::mutex exception_mutex;

View File

@ -196,7 +196,9 @@ struct IndexIVF: Index, Level1Quantizer {
void range_search_preassigned(idx_t nx, const float *x, float radius,
const idx_t *keys, const float *coarse_dis,
RangeSearchResult *result) const;
RangeSearchResult *result,
bool store_pairs=false,
const IVFSearchParameters *params=nullptr) const;
/// get a scanner for this index (store_pairs means ignore labels)
virtual InvertedListScanner *get_InvertedListScanner (

View File

@ -662,6 +662,72 @@ def replacement_map_search_multiple(self, keys):
replace_method(MapLong2Long, 'add', replacement_map_add)
replace_method(MapLong2Long, 'search_multiple', replacement_map_search_multiple)
search_with_parameters_c = search_with_parameters
def search_with_parameters(index, x, k, params=None, output_stats=False):
n, d = x.shape
assert d == index.d
if not params:
# if not provided use the ones set in the IVF object
params = IVFSearchParameters()
index_ivf = extract_index_ivf(index)
params.nprobe = index_ivf.nprobe
params.max_codes = index_ivf.max_codes
nb_dis = np.empty(1, 'uint64')
ms_per_stage = np.empty(3, 'float64')
distances = np.empty((n, k), dtype=np.float32)
labels = np.empty((n, k), dtype=np.int64)
search_with_parameters_c(
index, n, swig_ptr(x),
k, swig_ptr(distances),
swig_ptr(labels),
params, swig_ptr(nb_dis), swig_ptr(ms_per_stage)
)
if not output_stats:
return distances, labels
else:
stats = {
'ndis': nb_dis[0],
'pre_transform_ms': ms_per_stage[0],
'coarse_quantizer_ms': ms_per_stage[1],
'invlist_scan_ms': ms_per_stage[2],
}
return distances, labels, stats
range_search_with_parameters_c = range_search_with_parameters
def range_search_with_parameters(index, x, radius, params=None, output_stats=False):
n, d = x.shape
assert d == index.d
if not params:
# if not provided use the ones set in the IVF object
params = IVFSearchParameters()
index_ivf = extract_index_ivf(index)
params.nprobe = index_ivf.nprobe
params.max_codes = index_ivf.max_codes
nb_dis = np.empty(1, 'uint64')
ms_per_stage = np.empty(3, 'float64')
res = RangeSearchResult(n)
range_search_with_parameters_c(
index, n, swig_ptr(x),
radius, res,
params, swig_ptr(nb_dis), swig_ptr(ms_per_stage)
)
lims = rev_swig_ptr(res.lims, n + 1).copy()
nd = int(lims[-1])
Dout = rev_swig_ptr(res.distances, nd).copy()
Iout = rev_swig_ptr(res.labels, nd).copy()
if not output_stats:
return lims, Dout, Iout
else:
stats = {
'ndis': nb_dis[0],
'pre_transform_ms': ms_per_stage[0],
'coarse_quantizer_ms': ms_per_stage[1],
'invlist_scan_ms': ms_per_stage[2],
}
return lims, Dout, Iout, stats
###########################################
# Kmeans object

View File

@ -76,3 +76,73 @@ class TestSequentialScan(unittest.TestCase):
assert np.all(D == ref_D)
assert np.all(I == ref_I)
class TestSearchWithParameters(unittest.TestCase):
def test_search_with_parameters(self):
d = 20
index = faiss.index_factory(d, 'IVF100,SQ8')
rs = np.random.RandomState(123)
xt = rs.rand(5000, d).astype('float32')
xb = rs.rand(10000, d).astype('float32')
index.train(xt)
index.nprobe = 3
index.add(xb)
k = 15
xq = rs.rand(200, d).astype('float32')
stats = faiss.cvar.indexIVF_stats
stats.reset()
Dref, Iref = index.search(xq, k)
ref_ndis = stats.ndis
# make sure the nprobe used is the one from params not the one
# set in the index
index.nprobe = 1
params = faiss.IVFSearchParameters()
params.nprobe = 3
Dnew, Inew, stats2 = faiss.search_with_parameters(
index, xq, k, params, output_stats=True)
np.testing.assert_array_equal(Inew, Iref)
np.testing.assert_array_equal(Dnew, Dref)
self.assertEqual(stats2["ndis"], ref_ndis)
def test_range_search_with_parameters(self):
d = 20
index = faiss.index_factory(d, 'IVF100,SQ8')
rs = np.random.RandomState(123)
xt = rs.rand(5000, d).astype('float32')
xb = rs.rand(10000, d).astype('float32')
index.train(xt)
index.nprobe = 3
index.add(xb)
xq = rs.rand(200, d).astype('float32')
Dpre, _ = index.search(xq, 15)
radius = float(np.median(Dpre[:, -1]))
print("Radius=", radius)
stats = faiss.cvar.indexIVF_stats
stats.reset()
Lref, Dref, Iref = index.range_search(xq, radius)
ref_ndis = stats.ndis
# make sure the nprobe used is the one from params not the one
# set in the index
index.nprobe = 1
params = faiss.IVFSearchParameters()
params.nprobe = 3
Lnew, Dnew, Inew, stats2 = faiss.range_search_with_parameters(
index, xq, radius, params, output_stats=True)
np.testing.assert_array_equal(Lnew, Lref)
np.testing.assert_array_equal(Inew, Iref)
np.testing.assert_array_equal(Dnew, Dref)
self.assertEqual(stats2["ndis"], ref_ndis)