expose threat-safe stats (#1438)
Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/1438 This diff changes Faiss and the `combined_index.py` to propagate thread-safe stats to handler.py Reviewed By: MDSilber Differential Revision: D24082543 fbshipit-source-id: 944e6b7630daeede5eb9501b81557a6fe5afec44pull/1443/head
parent
6918f4ee48
commit
5ad630635c
|
@ -14,6 +14,7 @@
|
|||
#include <faiss/IndexPreTransform.h>
|
||||
#include <faiss/impl/FaissAssert.h>
|
||||
#include <faiss/MetaIndexes.h>
|
||||
#include <faiss/utils/utils.h>
|
||||
|
||||
|
||||
|
||||
|
@ -310,17 +311,34 @@ void set_invlist_range (Index *index, long i0, long i1,
|
|||
ivf->ntotal = index->ntotal = ntotal;
|
||||
}
|
||||
|
||||
static size_t count_ndis(const IndexIVF * index_ivf, size_t n_list_scan,
|
||||
const idx_t *Iq)
|
||||
{
|
||||
size_t nb_dis = 0;
|
||||
const InvertedLists *il = index_ivf->invlists;
|
||||
for (idx_t i = 0; i < n_list_scan; i++) {
|
||||
if (Iq[i] >= 0) {
|
||||
nb_dis += il->list_size(Iq[i]);
|
||||
}
|
||||
}
|
||||
return nb_dis;
|
||||
}
|
||||
|
||||
|
||||
|
||||
void search_with_parameters (const Index *index,
|
||||
idx_t n, const float *x, idx_t k,
|
||||
float *distances, idx_t *labels,
|
||||
IVFSearchParameters *params,
|
||||
size_t *nb_dis_ptr)
|
||||
const IVFSearchParameters *params,
|
||||
size_t *nb_dis_ptr,
|
||||
double *ms_per_stage)
|
||||
{
|
||||
FAISS_THROW_IF_NOT (params);
|
||||
const float *prev_x = x;
|
||||
ScopeDeleter<float> del;
|
||||
|
||||
double t0 = getmillisecs();
|
||||
|
||||
if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
|
||||
x = ip->apply_chain (n, x);
|
||||
if (x != prev_x) {
|
||||
|
@ -329,6 +347,8 @@ void search_with_parameters (const Index *index,
|
|||
index = ip->index;
|
||||
}
|
||||
|
||||
double t1 = getmillisecs();
|
||||
|
||||
std::vector<idx_t> Iq(params->nprobe * n);
|
||||
std::vector<float> Dq(params->nprobe * n);
|
||||
|
||||
|
@ -339,21 +359,76 @@ void search_with_parameters (const Index *index,
|
|||
Dq.data(), Iq.data());
|
||||
|
||||
if (nb_dis_ptr) {
|
||||
size_t nb_dis = 0;
|
||||
const InvertedLists *il = index_ivf->invlists;
|
||||
for (idx_t i = 0; i < n * params->nprobe; i++) {
|
||||
if (Iq[i] >= 0) {
|
||||
nb_dis += il->list_size(Iq[i]);
|
||||
}
|
||||
}
|
||||
*nb_dis_ptr = nb_dis;
|
||||
*nb_dis_ptr = count_ndis (index_ivf, n * params->nprobe, Iq.data());
|
||||
}
|
||||
|
||||
double t2 = getmillisecs();
|
||||
|
||||
index_ivf->search_preassigned(n, x, k, Iq.data(), Dq.data(),
|
||||
distances, labels,
|
||||
false, params);
|
||||
double t3 = getmillisecs();
|
||||
if (ms_per_stage) {
|
||||
ms_per_stage[0] = t1 - t0;
|
||||
ms_per_stage[1] = t2 - t1;
|
||||
ms_per_stage[2] = t3 - t2;
|
||||
}
|
||||
}
|
||||
|
||||
void range_search_with_parameters (const Index *index,
|
||||
idx_t n, const float *x, float radius,
|
||||
RangeSearchResult *result,
|
||||
const IVFSearchParameters *params,
|
||||
size_t *nb_dis_ptr,
|
||||
double *ms_per_stage)
|
||||
{
|
||||
FAISS_THROW_IF_NOT (params);
|
||||
const float *prev_x = x;
|
||||
ScopeDeleter<float> del;
|
||||
|
||||
double t0 = getmillisecs();
|
||||
|
||||
if (auto ip = dynamic_cast<const IndexPreTransform *> (index)) {
|
||||
x = ip->apply_chain (n, x);
|
||||
if (x != prev_x) {
|
||||
del.set(x);
|
||||
}
|
||||
index = ip->index;
|
||||
}
|
||||
|
||||
double t1 = getmillisecs();
|
||||
|
||||
std::vector<idx_t> Iq(params->nprobe * n);
|
||||
std::vector<float> Dq(params->nprobe * n);
|
||||
|
||||
const IndexIVF *index_ivf = dynamic_cast<const IndexIVF *>(index);
|
||||
FAISS_THROW_IF_NOT (index_ivf);
|
||||
|
||||
index_ivf->quantizer->search(n, x, params->nprobe,
|
||||
Dq.data(), Iq.data());
|
||||
|
||||
if (nb_dis_ptr) {
|
||||
*nb_dis_ptr = count_ndis (index_ivf, n * params->nprobe, Iq.data());
|
||||
}
|
||||
|
||||
double t2 = getmillisecs();
|
||||
|
||||
index_ivf->range_search_preassigned(
|
||||
n, x, radius, Iq.data(), Dq.data(),
|
||||
result, false, params
|
||||
);
|
||||
|
||||
double t3 = getmillisecs();
|
||||
if (ms_per_stage) {
|
||||
ms_per_stage[0] = t1 - t0;
|
||||
ms_per_stage[1] = t2 - t1;
|
||||
ms_per_stage[2] = t3 - t2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
} } // namespace faiss::ivflib
|
||||
|
|
|
@ -120,14 +120,32 @@ ArrayInvertedLists * get_invlist_range (const Index *index,
|
|||
void set_invlist_range (Index *index, long i0, long i1,
|
||||
ArrayInvertedLists * src);
|
||||
|
||||
// search an IndexIVF, possibly embedded in an IndexPreTransform with
|
||||
// given parameters. Optionally returns the number of distances
|
||||
// computed
|
||||
void search_with_parameters (const Index *index,
|
||||
/** search an IndexIVF, possibly embedded in an IndexPreTransform with
|
||||
* given parameters. This is a way to set the nprobe and get
|
||||
* statdistics in a thread-safe way.
|
||||
*
|
||||
* Optionally returns (if non-nullptr):
|
||||
* - nb_dis: number of distances computed
|
||||
* - ms_per_stage: [0]: preprocessing time
|
||||
* [1]: coarse quantization,
|
||||
* [2]: list scanning
|
||||
*/
|
||||
void search_with_parameters (
|
||||
const Index *index,
|
||||
idx_t n, const float *x, idx_t k,
|
||||
float *distances, idx_t *labels,
|
||||
IVFSearchParameters *params,
|
||||
size_t *nb_dis = nullptr);
|
||||
const IVFSearchParameters *params,
|
||||
size_t *nb_dis = nullptr,
|
||||
double *ms_per_stage = nullptr);
|
||||
|
||||
/** same as search_with_parameters but for range search */
|
||||
void range_search_with_parameters (
|
||||
const Index *index,
|
||||
idx_t n, const float *x, float radius,
|
||||
RangeSearchResult *result,
|
||||
const IVFSearchParameters *params,
|
||||
size_t *nb_dis = nullptr,
|
||||
double *ms_per_stage = nullptr);
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -559,11 +559,14 @@ void IndexIVF::range_search (idx_t nx, const float *x, float radius,
|
|||
void IndexIVF::range_search_preassigned (
|
||||
idx_t nx, const float *x, float radius,
|
||||
const idx_t *keys, const float *coarse_dis,
|
||||
RangeSearchResult *result) const
|
||||
RangeSearchResult *result,
|
||||
bool store_pairs,
|
||||
const IVFSearchParameters *params) const
|
||||
{
|
||||
long nprobe = params ? params->nprobe : this->nprobe;
|
||||
long max_codes = params ? params->max_codes : this->max_codes;
|
||||
|
||||
size_t nlistv = 0, ndis = 0;
|
||||
bool store_pairs = false;
|
||||
|
||||
bool interrupt = false;
|
||||
std::mutex exception_mutex;
|
||||
|
|
|
@ -196,7 +196,9 @@ struct IndexIVF: Index, Level1Quantizer {
|
|||
|
||||
void range_search_preassigned(idx_t nx, const float *x, float radius,
|
||||
const idx_t *keys, const float *coarse_dis,
|
||||
RangeSearchResult *result) const;
|
||||
RangeSearchResult *result,
|
||||
bool store_pairs=false,
|
||||
const IVFSearchParameters *params=nullptr) const;
|
||||
|
||||
/// get a scanner for this index (store_pairs means ignore labels)
|
||||
virtual InvertedListScanner *get_InvertedListScanner (
|
||||
|
|
|
@ -662,6 +662,72 @@ def replacement_map_search_multiple(self, keys):
|
|||
replace_method(MapLong2Long, 'add', replacement_map_add)
|
||||
replace_method(MapLong2Long, 'search_multiple', replacement_map_search_multiple)
|
||||
|
||||
search_with_parameters_c = search_with_parameters
|
||||
|
||||
def search_with_parameters(index, x, k, params=None, output_stats=False):
|
||||
n, d = x.shape
|
||||
assert d == index.d
|
||||
if not params:
|
||||
# if not provided use the ones set in the IVF object
|
||||
params = IVFSearchParameters()
|
||||
index_ivf = extract_index_ivf(index)
|
||||
params.nprobe = index_ivf.nprobe
|
||||
params.max_codes = index_ivf.max_codes
|
||||
nb_dis = np.empty(1, 'uint64')
|
||||
ms_per_stage = np.empty(3, 'float64')
|
||||
distances = np.empty((n, k), dtype=np.float32)
|
||||
labels = np.empty((n, k), dtype=np.int64)
|
||||
search_with_parameters_c(
|
||||
index, n, swig_ptr(x),
|
||||
k, swig_ptr(distances),
|
||||
swig_ptr(labels),
|
||||
params, swig_ptr(nb_dis), swig_ptr(ms_per_stage)
|
||||
)
|
||||
if not output_stats:
|
||||
return distances, labels
|
||||
else:
|
||||
stats = {
|
||||
'ndis': nb_dis[0],
|
||||
'pre_transform_ms': ms_per_stage[0],
|
||||
'coarse_quantizer_ms': ms_per_stage[1],
|
||||
'invlist_scan_ms': ms_per_stage[2],
|
||||
}
|
||||
return distances, labels, stats
|
||||
|
||||
range_search_with_parameters_c = range_search_with_parameters
|
||||
|
||||
def range_search_with_parameters(index, x, radius, params=None, output_stats=False):
|
||||
n, d = x.shape
|
||||
assert d == index.d
|
||||
if not params:
|
||||
# if not provided use the ones set in the IVF object
|
||||
params = IVFSearchParameters()
|
||||
index_ivf = extract_index_ivf(index)
|
||||
params.nprobe = index_ivf.nprobe
|
||||
params.max_codes = index_ivf.max_codes
|
||||
nb_dis = np.empty(1, 'uint64')
|
||||
ms_per_stage = np.empty(3, 'float64')
|
||||
res = RangeSearchResult(n)
|
||||
range_search_with_parameters_c(
|
||||
index, n, swig_ptr(x),
|
||||
radius, res,
|
||||
params, swig_ptr(nb_dis), swig_ptr(ms_per_stage)
|
||||
)
|
||||
lims = rev_swig_ptr(res.lims, n + 1).copy()
|
||||
nd = int(lims[-1])
|
||||
Dout = rev_swig_ptr(res.distances, nd).copy()
|
||||
Iout = rev_swig_ptr(res.labels, nd).copy()
|
||||
if not output_stats:
|
||||
return lims, Dout, Iout
|
||||
else:
|
||||
stats = {
|
||||
'ndis': nb_dis[0],
|
||||
'pre_transform_ms': ms_per_stage[0],
|
||||
'coarse_quantizer_ms': ms_per_stage[1],
|
||||
'invlist_scan_ms': ms_per_stage[2],
|
||||
}
|
||||
return lims, Dout, Iout, stats
|
||||
|
||||
|
||||
###########################################
|
||||
# Kmeans object
|
||||
|
|
|
@ -76,3 +76,73 @@ class TestSequentialScan(unittest.TestCase):
|
|||
|
||||
assert np.all(D == ref_D)
|
||||
assert np.all(I == ref_I)
|
||||
|
||||
|
||||
class TestSearchWithParameters(unittest.TestCase):
|
||||
|
||||
def test_search_with_parameters(self):
|
||||
d = 20
|
||||
index = faiss.index_factory(d, 'IVF100,SQ8')
|
||||
|
||||
rs = np.random.RandomState(123)
|
||||
xt = rs.rand(5000, d).astype('float32')
|
||||
xb = rs.rand(10000, d).astype('float32')
|
||||
index.train(xt)
|
||||
index.nprobe = 3
|
||||
index.add(xb)
|
||||
k = 15
|
||||
xq = rs.rand(200, d).astype('float32')
|
||||
|
||||
stats = faiss.cvar.indexIVF_stats
|
||||
stats.reset()
|
||||
Dref, Iref = index.search(xq, k)
|
||||
ref_ndis = stats.ndis
|
||||
|
||||
# make sure the nprobe used is the one from params not the one
|
||||
# set in the index
|
||||
index.nprobe = 1
|
||||
params = faiss.IVFSearchParameters()
|
||||
params.nprobe = 3
|
||||
|
||||
Dnew, Inew, stats2 = faiss.search_with_parameters(
|
||||
index, xq, k, params, output_stats=True)
|
||||
|
||||
np.testing.assert_array_equal(Inew, Iref)
|
||||
np.testing.assert_array_equal(Dnew, Dref)
|
||||
|
||||
self.assertEqual(stats2["ndis"], ref_ndis)
|
||||
|
||||
def test_range_search_with_parameters(self):
|
||||
d = 20
|
||||
index = faiss.index_factory(d, 'IVF100,SQ8')
|
||||
|
||||
rs = np.random.RandomState(123)
|
||||
xt = rs.rand(5000, d).astype('float32')
|
||||
xb = rs.rand(10000, d).astype('float32')
|
||||
index.train(xt)
|
||||
index.nprobe = 3
|
||||
index.add(xb)
|
||||
xq = rs.rand(200, d).astype('float32')
|
||||
|
||||
Dpre, _ = index.search(xq, 15)
|
||||
radius = float(np.median(Dpre[:, -1]))
|
||||
print("Radius=", radius)
|
||||
stats = faiss.cvar.indexIVF_stats
|
||||
stats.reset()
|
||||
Lref, Dref, Iref = index.range_search(xq, radius)
|
||||
ref_ndis = stats.ndis
|
||||
|
||||
# make sure the nprobe used is the one from params not the one
|
||||
# set in the index
|
||||
index.nprobe = 1
|
||||
params = faiss.IVFSearchParameters()
|
||||
params.nprobe = 3
|
||||
|
||||
Lnew, Dnew, Inew, stats2 = faiss.range_search_with_parameters(
|
||||
index, xq, radius, params, output_stats=True)
|
||||
|
||||
np.testing.assert_array_equal(Lnew, Lref)
|
||||
np.testing.assert_array_equal(Inew, Iref)
|
||||
np.testing.assert_array_equal(Dnew, Dref)
|
||||
|
||||
self.assertEqual(stats2["ndis"], ref_ndis)
|
||||
|
|
Loading…
Reference in New Issue