Proposal IDSelectorCombination (#2742)

Summary:
Adds support for an IDSelector that takes in two IDSelectors and can perform a boolean operation on their is_member outcomes.

Current implementation is pretty naive and doesn't try to do any optimizations on the types of IDSelectors combined.

Also test cases are definitely lacking but can add more once approach is agreed upon.

Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2742

Reviewed By: algoriddle

Differential Revision: D43904855

Pulled By: mdouze

fbshipit-source-id: bbe687800a19b418ca30c9257fb0334c64ab5f52
pull/2750/head
Kaelen Haag 2023-03-08 08:48:54 -08:00 committed by Facebook GitHub Bot
parent 6d0294ba6c
commit 1ee15ef3c3
3 changed files with 79 additions and 1 deletions

View File

@ -131,4 +131,43 @@ struct IDSelectorAll : IDSelector {
virtual ~IDSelectorAll() {}
};
/// does an AND operation on the the two given IDSelector's is_membership
/// results.
struct IDSelectorAnd : IDSelector {
const IDSelector* lhs;
const IDSelector* rhs;
IDSelectorAnd(const IDSelector* lhs, const IDSelector* rhs)
: lhs(lhs), rhs(rhs) {}
bool is_member(idx_t id) const final {
return lhs->is_member(id) && rhs->is_member(id);
};
virtual ~IDSelectorAnd() {}
};
/// does an OR operation on the the two given IDSelector's is_membership
/// results.
struct IDSelectorOr : IDSelector {
const IDSelector* lhs;
const IDSelector* rhs;
IDSelectorOr(const IDSelector* lhs, const IDSelector* rhs)
: lhs(lhs), rhs(rhs) {}
bool is_member(idx_t id) const final {
return lhs->is_member(id) || rhs->is_member(id);
};
virtual ~IDSelectorOr() {}
};
/// does an XOR operation on the the two given IDSelector's is_membership
/// results.
struct IDSelectorXOr : IDSelector {
const IDSelector* lhs;
const IDSelector* rhs;
IDSelectorXOr(const IDSelector* lhs, const IDSelector* rhs)
: lhs(lhs), rhs(rhs) {}
bool is_member(idx_t id) const final {
return lhs->is_member(id) ^ rhs->is_member(id);
};
virtual ~IDSelectorXOr() {}
};
} // namespace faiss

View File

@ -189,6 +189,9 @@ add_ref_in_constructor(BufferedIOWriter, 0)
add_ref_in_constructor(BufferedIOReader, 0)
add_ref_in_constructor(IDSelectorNot, 0)
add_ref_in_constructor(IDSelectorAnd, slice(2))
add_ref_in_constructor(IDSelectorOr, slice(2))
add_ref_in_constructor(IDSelectorXOr, slice(2))
# seems really marginal...
# remove_ref_from_method(IndexReplicas, 'removeIndex', 0)

View File

@ -23,7 +23,7 @@ class TestSelector(unittest.TestCase):
def do_test_id_selector(self, index_key, id_selector_type="batch", mt=faiss.METRIC_L2):
""" Verify that the id selector returns the subset of results that are
members according to the IDSelector.
Supports id_selector_type="batch", "bitmap", "range", "range_sorted", "not"
Supports id_selector_type="batch", "bitmap", "range", "range_sorted", "and", "or", "xor"
"""
ds = datasets.SyntheticDataset(32, 1000, 100, 20)
index = faiss.index_factory(ds.d, index_key, mt)
@ -33,6 +33,24 @@ class TestSelector(unittest.TestCase):
# reference result
if "range" in id_selector_type:
subset = np.arange(30, 80).astype('int64')
elif id_selector_type == "or":
lhs_rs = np.random.RandomState(123)
lhs_subset = lhs_rs.choice(ds.nb, 50, replace=False).astype("int64")
rhs_rs = np.random.RandomState(456)
rhs_subset = rhs_rs.choice(ds.nb, 20, replace=False).astype("int64")
subset = np.union1d(lhs_subset, rhs_subset)
elif id_selector_type == "and":
lhs_rs = np.random.RandomState(123)
lhs_subset = lhs_rs.choice(ds.nb, 50, replace=False).astype("int64")
rhs_rs = np.random.RandomState(456)
rhs_subset = rhs_rs.choice(ds.nb, 10, replace=False).astype("int64")
subset = np.intersect1d(lhs_subset, rhs_subset)
elif id_selector_type == "xor":
lhs_rs = np.random.RandomState(123)
lhs_subset = lhs_rs.choice(ds.nb, 50, replace=False).astype("int64")
rhs_rs = np.random.RandomState(456)
rhs_subset = rhs_rs.choice(ds.nb, 40, replace=False).astype("int64")
subset = np.setxor1d(lhs_subset, rhs_subset)
else:
rs = np.random.RandomState(123)
subset = rs.choice(ds.nb, 50, replace=False).astype("int64")
@ -81,6 +99,21 @@ class TestSelector(unittest.TestCase):
if i not in ssubset
]).astype('int64')
sel = faiss.IDSelectorNot(faiss.IDSelectorBatch(inverse_subset))
elif id_selector_type == "or":
sel = faiss.IDSelectorOr(
faiss.IDSelectorBatch(lhs_subset),
faiss.IDSelectorBatch(rhs_subset)
)
elif id_selector_type == "and":
sel = faiss.IDSelectorAnd(
faiss.IDSelectorBatch(lhs_subset),
faiss.IDSelectorBatch(rhs_subset)
)
elif id_selector_type == "xor":
sel = faiss.IDSelectorXOr(
faiss.IDSelectorBatch(lhs_subset),
faiss.IDSelectorBatch(rhs_subset)
)
else:
sel = faiss.IDSelectorBatch(subset)
@ -148,6 +181,9 @@ class TestSelector(unittest.TestCase):
def test_Flat_id_not(self):
self.do_test_id_selector("Flat", id_selector_type="not")
def test_Flat_id_or(self):
self.do_test_id_selector("Flat", id_selector_type="or")
# not implemented