expose threat-safe stats (#1438)
Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/1438 This diff changes Faiss and the `combined_index.py` to propagate thread-safe stats to handler.py Reviewed By: MDSilber Differential Revision: D24082543 fbshipit-source-id: 944e6b7630daeede5eb9501b81557a6fe5afec44pull/1443/head
parent
6918f4ee48
commit
5ad630635c
|
@ -14,6 +14,7 @@
|
||||||
#include <faiss/IndexPreTransform.h>
|
#include <faiss/IndexPreTransform.h>
|
||||||
#include <faiss/impl/FaissAssert.h>
|
#include <faiss/impl/FaissAssert.h>
|
||||||
#include <faiss/MetaIndexes.h>
|
#include <faiss/MetaIndexes.h>
|
||||||
|
#include <faiss/utils/utils.h>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -310,17 +311,34 @@ void set_invlist_range (Index *index, long i0, long i1,
|
||||||
ivf->ntotal = index->ntotal = ntotal;
|
ivf->ntotal = index->ntotal = ntotal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static size_t count_ndis(const IndexIVF * index_ivf, size_t n_list_scan,
|
||||||
|
const idx_t *Iq)
|
||||||
|
{
|
||||||
|
size_t nb_dis = 0;
|
||||||
|
const InvertedLists *il = index_ivf->invlists;
|
||||||
|
for (idx_t i = 0; i < n_list_scan; i++) {
|
||||||
|
if (Iq[i] >= 0) {
|
||||||
|
nb_dis += il->list_size(Iq[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nb_dis;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void search_with_parameters (const Index *index,
|
void search_with_parameters (const Index *index,
|
||||||
idx_t n, const float *x, idx_t k,
|
idx_t n, const float *x, idx_t k,
|
||||||
float *distances, idx_t *labels,
|
float *distances, idx_t *labels,
|
||||||
IVFSearchParameters *params,
|
const IVFSearchParameters *params,
|
||||||
size_t *nb_dis_ptr)
|
size_t *nb_dis_ptr,
|
||||||
|
double *ms_per_stage)
|
||||||
{
|
{
|
||||||
FAISS_THROW_IF_NOT (params);
|
FAISS_THROW_IF_NOT (params);
|
||||||
const float *prev_x = x;
|
const float *prev_x = x;
|
||||||
ScopeDeleter<float> del;
|
ScopeDeleter<float> del;
|
||||||
|
|
||||||
|
double t0 = getmillisecs();
|
||||||
|
|
||||||
if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
|
if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
|
||||||
x = ip->apply_chain (n, x);
|
x = ip->apply_chain (n, x);
|
||||||
if (x != prev_x) {
|
if (x != prev_x) {
|
||||||
|
@ -329,6 +347,8 @@ void search_with_parameters (const Index *index,
|
||||||
index = ip->index;
|
index = ip->index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double t1 = getmillisecs();
|
||||||
|
|
||||||
std::vector<idx_t> Iq(params->nprobe * n);
|
std::vector<idx_t> Iq(params->nprobe * n);
|
||||||
std::vector<float> Dq(params->nprobe * n);
|
std::vector<float> Dq(params->nprobe * n);
|
||||||
|
|
||||||
|
@ -339,21 +359,76 @@ void search_with_parameters (const Index *index,
|
||||||
Dq.data(), Iq.data());
|
Dq.data(), Iq.data());
|
||||||
|
|
||||||
if (nb_dis_ptr) {
|
if (nb_dis_ptr) {
|
||||||
size_t nb_dis = 0;
|
*nb_dis_ptr = count_ndis (index_ivf, n * params->nprobe, Iq.data());
|
||||||
const InvertedLists *il = index_ivf->invlists;
|
|
||||||
for (idx_t i = 0; i < n * params->nprobe; i++) {
|
|
||||||
if (Iq[i] >= 0) {
|
|
||||||
nb_dis += il->list_size(Iq[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*nb_dis_ptr = nb_dis;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
double t2 = getmillisecs();
|
||||||
|
|
||||||
index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
|
index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
|
||||||
distances, labels,
|
distances, labels,
|
||||||
false, params);
|
false, params);
|
||||||
|
double t3 = getmillisecs();
|
||||||
|
if (ms_per_stage) {
|
||||||
|
ms_per_stage[0] = t1 - t0;
|
||||||
|
ms_per_stage[1] = t2 - t1;
|
||||||
|
ms_per_stage[2] = t3 - t2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void range_search_with_parameters (const Index *index,
|
||||||
|
idx_t n, const float *x, float radius,
|
||||||
|
RangeSearchResult *result,
|
||||||
|
const IVFSearchParameters *params,
|
||||||
|
size_t *nb_dis_ptr,
|
||||||
|
double *ms_per_stage)
|
||||||
|
{
|
||||||
|
FAISS_THROW_IF_NOT (params);
|
||||||
|
const float *prev_x = x;
|
||||||
|
ScopeDeleter<float> del;
|
||||||
|
|
||||||
|
double t0 = getmillisecs();
|
||||||
|
|
||||||
|
if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
|
||||||
|
x = ip->apply_chain (n, x);
|
||||||
|
if (x != prev_x) {
|
||||||
|
del.set(x);
|
||||||
|
}
|
||||||
|
index = ip->index;
|
||||||
|
}
|
||||||
|
|
||||||
|
double t1 = getmillisecs();
|
||||||
|
|
||||||
|
std::vector<idx_t> Iq(params->nprobe * n);
|
||||||
|
std::vector<float> Dq(params->nprobe * n);
|
||||||
|
|
||||||
|
const IndexIVF *index_ivf = dynamic_cast<const IndexIVF *>(index);
|
||||||
|
FAISS_THROW_IF_NOT (index_ivf);
|
||||||
|
|
||||||
|
index_ivf->quantizer->search(n, x, params->nprobe,
|
||||||
|
Dq.data(), Iq.data());
|
||||||
|
|
||||||
|
if (nb_dis_ptr) {
|
||||||
|
*nb_dis_ptr = count_ndis (index_ivf, n * params->nprobe, Iq.data());
|
||||||
|
}
|
||||||
|
|
||||||
|
double t2 = getmillisecs();
|
||||||
|
|
||||||
|
index_ivf->range_search_preassigned(
|
||||||
|
n, x, radius, Iq.data(), Dq.data(),
|
||||||
|
result, false, params
|
||||||
|
);
|
||||||
|
|
||||||
|
double t3 = getmillisecs();
|
||||||
|
if (ms_per_stage) {
|
||||||
|
ms_per_stage[0] = t1 - t0;
|
||||||
|
ms_per_stage[1] = t2 - t1;
|
||||||
|
ms_per_stage[2] = t3 - t2;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} } // namespace faiss::ivflib
|
} } // namespace faiss::ivflib
|
||||||
|
|
|
@ -120,14 +120,32 @@ ArrayInvertedLists * get_invlist_range (const Index *index,
|
||||||
void set_invlist_range (Index *index, long i0, long i1,
|
void set_invlist_range (Index *index, long i0, long i1,
|
||||||
ArrayInvertedLists * src);
|
ArrayInvertedLists * src);
|
||||||
|
|
||||||
// search an IndexIVF, possibly embedded in an IndexPreTransform with
|
/** search an IndexIVF, possibly embedded in an IndexPreTransform with
|
||||||
// given parameters. Optionally returns the number of distances
|
* given parameters. This is a way to set the nprobe and get
|
||||||
// computed
|
* statdistics in a thread-safe way.
|
||||||
void search_with_parameters (const Index *index,
|
*
|
||||||
idx_t n, const float *x, idx_t k,
|
* Optionally returns (if non-nullptr):
|
||||||
float *distances, idx_t *labels,
|
* - nb_dis: number of distances computed
|
||||||
IVFSearchParameters *params,
|
* - ms_per_stage: [0]: preprocessing time
|
||||||
size_t *nb_dis = nullptr);
|
* [1]: coarse quantization,
|
||||||
|
* [2]: list scanning
|
||||||
|
*/
|
||||||
|
void search_with_parameters (
|
||||||
|
const Index *index,
|
||||||
|
idx_t n, const float *x, idx_t k,
|
||||||
|
float *distances, idx_t *labels,
|
||||||
|
const IVFSearchParameters *params,
|
||||||
|
size_t *nb_dis = nullptr,
|
||||||
|
double *ms_per_stage = nullptr);
|
||||||
|
|
||||||
|
/** same as search_with_parameters but for range search */
|
||||||
|
void range_search_with_parameters (
|
||||||
|
const Index *index,
|
||||||
|
idx_t n, const float *x, float radius,
|
||||||
|
RangeSearchResult *result,
|
||||||
|
const IVFSearchParameters *params,
|
||||||
|
size_t *nb_dis = nullptr,
|
||||||
|
double *ms_per_stage = nullptr);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -559,11 +559,14 @@ void IndexIVF::range_search (idx_t nx, const float *x, float radius,
|
||||||
void IndexIVF::range_search_preassigned (
|
void IndexIVF::range_search_preassigned (
|
||||||
idx_t nx, const float *x, float radius,
|
idx_t nx, const float *x, float radius,
|
||||||
const idx_t *keys, const float *coarse_dis,
|
const idx_t *keys, const float *coarse_dis,
|
||||||
RangeSearchResult *result) const
|
RangeSearchResult *result,
|
||||||
|
bool store_pairs,
|
||||||
|
const IVFSearchParameters *params) const
|
||||||
{
|
{
|
||||||
|
long nprobe = params ? params->nprobe : this->nprobe;
|
||||||
|
long max_codes = params ? params->max_codes : this->max_codes;
|
||||||
|
|
||||||
size_t nlistv = 0, ndis = 0;
|
size_t nlistv = 0, ndis = 0;
|
||||||
bool store_pairs = false;
|
|
||||||
|
|
||||||
bool interrupt = false;
|
bool interrupt = false;
|
||||||
std::mutex exception_mutex;
|
std::mutex exception_mutex;
|
||||||
|
|
|
@ -196,7 +196,9 @@ struct IndexIVF: Index, Level1Quantizer {
|
||||||
|
|
||||||
void range_search_preassigned(idx_t nx, const float *x, float radius,
|
void range_search_preassigned(idx_t nx, const float *x, float radius,
|
||||||
const idx_t *keys, const float *coarse_dis,
|
const idx_t *keys, const float *coarse_dis,
|
||||||
RangeSearchResult *result) const;
|
RangeSearchResult *result,
|
||||||
|
bool store_pairs=false,
|
||||||
|
const IVFSearchParameters *params=nullptr) const;
|
||||||
|
|
||||||
/// get a scanner for this index (store_pairs means ignore labels)
|
/// get a scanner for this index (store_pairs means ignore labels)
|
||||||
virtual InvertedListScanner *get_InvertedListScanner (
|
virtual InvertedListScanner *get_InvertedListScanner (
|
||||||
|
|
|
@ -662,6 +662,72 @@ def replacement_map_search_multiple(self, keys):
|
||||||
replace_method(MapLong2Long, 'add', replacement_map_add)
|
replace_method(MapLong2Long, 'add', replacement_map_add)
|
||||||
replace_method(MapLong2Long, 'search_multiple', replacement_map_search_multiple)
|
replace_method(MapLong2Long, 'search_multiple', replacement_map_search_multiple)
|
||||||
|
|
||||||
|
search_with_parameters_c = search_with_parameters
|
||||||
|
|
||||||
|
def search_with_parameters(index, x, k, params=None, output_stats=False):
|
||||||
|
n, d = x.shape
|
||||||
|
assert d == index.d
|
||||||
|
if not params:
|
||||||
|
# if not provided use the ones set in the IVF object
|
||||||
|
params = IVFSearchParameters()
|
||||||
|
index_ivf = extract_index_ivf(index)
|
||||||
|
params.nprobe = index_ivf.nprobe
|
||||||
|
params.max_codes = index_ivf.max_codes
|
||||||
|
nb_dis = np.empty(1, 'uint64')
|
||||||
|
ms_per_stage = np.empty(3, 'float64')
|
||||||
|
distances = np.empty((n, k), dtype=np.float32)
|
||||||
|
labels = np.empty((n, k), dtype=np.int64)
|
||||||
|
search_with_parameters_c(
|
||||||
|
index, n, swig_ptr(x),
|
||||||
|
k, swig_ptr(distances),
|
||||||
|
swig_ptr(labels),
|
||||||
|
params, swig_ptr(nb_dis), swig_ptr(ms_per_stage)
|
||||||
|
)
|
||||||
|
if not output_stats:
|
||||||
|
return distances, labels
|
||||||
|
else:
|
||||||
|
stats = {
|
||||||
|
'ndis': nb_dis[0],
|
||||||
|
'pre_transform_ms': ms_per_stage[0],
|
||||||
|
'coarse_quantizer_ms': ms_per_stage[1],
|
||||||
|
'invlist_scan_ms': ms_per_stage[2],
|
||||||
|
}
|
||||||
|
return distances, labels, stats
|
||||||
|
|
||||||
|
range_search_with_parameters_c = range_search_with_parameters
|
||||||
|
|
||||||
|
def range_search_with_parameters(index, x, radius, params=None, output_stats=False):
|
||||||
|
n, d = x.shape
|
||||||
|
assert d == index.d
|
||||||
|
if not params:
|
||||||
|
# if not provided use the ones set in the IVF object
|
||||||
|
params = IVFSearchParameters()
|
||||||
|
index_ivf = extract_index_ivf(index)
|
||||||
|
params.nprobe = index_ivf.nprobe
|
||||||
|
params.max_codes = index_ivf.max_codes
|
||||||
|
nb_dis = np.empty(1, 'uint64')
|
||||||
|
ms_per_stage = np.empty(3, 'float64')
|
||||||
|
res = RangeSearchResult(n)
|
||||||
|
range_search_with_parameters_c(
|
||||||
|
index, n, swig_ptr(x),
|
||||||
|
radius, res,
|
||||||
|
params, swig_ptr(nb_dis), swig_ptr(ms_per_stage)
|
||||||
|
)
|
||||||
|
lims = rev_swig_ptr(res.lims, n + 1).copy()
|
||||||
|
nd = int(lims[-1])
|
||||||
|
Dout = rev_swig_ptr(res.distances, nd).copy()
|
||||||
|
Iout = rev_swig_ptr(res.labels, nd).copy()
|
||||||
|
if not output_stats:
|
||||||
|
return lims, Dout, Iout
|
||||||
|
else:
|
||||||
|
stats = {
|
||||||
|
'ndis': nb_dis[0],
|
||||||
|
'pre_transform_ms': ms_per_stage[0],
|
||||||
|
'coarse_quantizer_ms': ms_per_stage[1],
|
||||||
|
'invlist_scan_ms': ms_per_stage[2],
|
||||||
|
}
|
||||||
|
return lims, Dout, Iout, stats
|
||||||
|
|
||||||
|
|
||||||
###########################################
|
###########################################
|
||||||
# Kmeans object
|
# Kmeans object
|
||||||
|
|
|
@ -76,3 +76,73 @@ class TestSequentialScan(unittest.TestCase):
|
||||||
|
|
||||||
assert np.all(D == ref_D)
|
assert np.all(D == ref_D)
|
||||||
assert np.all(I == ref_I)
|
assert np.all(I == ref_I)
|
||||||
|
|
||||||
|
|
||||||
|
class TestSearchWithParameters(unittest.TestCase):
|
||||||
|
|
||||||
|
def test_search_with_parameters(self):
|
||||||
|
d = 20
|
||||||
|
index = faiss.index_factory(d, 'IVF100,SQ8')
|
||||||
|
|
||||||
|
rs = np.random.RandomState(123)
|
||||||
|
xt = rs.rand(5000, d).astype('float32')
|
||||||
|
xb = rs.rand(10000, d).astype('float32')
|
||||||
|
index.train(xt)
|
||||||
|
index.nprobe = 3
|
||||||
|
index.add(xb)
|
||||||
|
k = 15
|
||||||
|
xq = rs.rand(200, d).astype('float32')
|
||||||
|
|
||||||
|
stats = faiss.cvar.indexIVF_stats
|
||||||
|
stats.reset()
|
||||||
|
Dref, Iref = index.search(xq, k)
|
||||||
|
ref_ndis = stats.ndis
|
||||||
|
|
||||||
|
# make sure the nprobe used is the one from params not the one
|
||||||
|
# set in the index
|
||||||
|
index.nprobe = 1
|
||||||
|
params = faiss.IVFSearchParameters()
|
||||||
|
params.nprobe = 3
|
||||||
|
|
||||||
|
Dnew, Inew, stats2 = faiss.search_with_parameters(
|
||||||
|
index, xq, k, params, output_stats=True)
|
||||||
|
|
||||||
|
np.testing.assert_array_equal(Inew, Iref)
|
||||||
|
np.testing.assert_array_equal(Dnew, Dref)
|
||||||
|
|
||||||
|
self.assertEqual(stats2["ndis"], ref_ndis)
|
||||||
|
|
||||||
|
def test_range_search_with_parameters(self):
|
||||||
|
d = 20
|
||||||
|
index = faiss.index_factory(d, 'IVF100,SQ8')
|
||||||
|
|
||||||
|
rs = np.random.RandomState(123)
|
||||||
|
xt = rs.rand(5000, d).astype('float32')
|
||||||
|
xb = rs.rand(10000, d).astype('float32')
|
||||||
|
index.train(xt)
|
||||||
|
index.nprobe = 3
|
||||||
|
index.add(xb)
|
||||||
|
xq = rs.rand(200, d).astype('float32')
|
||||||
|
|
||||||
|
Dpre, _ = index.search(xq, 15)
|
||||||
|
radius = float(np.median(Dpre[:, -1]))
|
||||||
|
print("Radius=", radius)
|
||||||
|
stats = faiss.cvar.indexIVF_stats
|
||||||
|
stats.reset()
|
||||||
|
Lref, Dref, Iref = index.range_search(xq, radius)
|
||||||
|
ref_ndis = stats.ndis
|
||||||
|
|
||||||
|
# make sure the nprobe used is the one from params not the one
|
||||||
|
# set in the index
|
||||||
|
index.nprobe = 1
|
||||||
|
params = faiss.IVFSearchParameters()
|
||||||
|
params.nprobe = 3
|
||||||
|
|
||||||
|
Lnew, Dnew, Inew, stats2 = faiss.range_search_with_parameters(
|
||||||
|
index, xq, radius, params, output_stats=True)
|
||||||
|
|
||||||
|
np.testing.assert_array_equal(Lnew, Lref)
|
||||||
|
np.testing.assert_array_equal(Inew, Iref)
|
||||||
|
np.testing.assert_array_equal(Dnew, Dref)
|
||||||
|
|
||||||
|
self.assertEqual(stats2["ndis"], ref_ndis)
|
||||||
|
|
Loading…
Reference in New Issue