use dispatcher function to call HammingComputer (#2918)

Summary:
Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2918

The HammingComputer class is optimized for several vector sizes. So far it's been the caller's responsiblity to instanciate the relevant optimized version.

This diff introduces a `dispatch_HammingComputer` function that can be called with a template class that is instanciated for all existing optimized HammingComputer's.

Reviewed By: algoriddle

Differential Revision: D46858553

fbshipit-source-id: 32c31689bba7c0b406b309fc8574c95fa24022ba
pull/2922/head
Matthijs Douze 2023-06-26 14:06:10 -07:00 committed by Facebook GitHub Bot
parent a27036aa72
commit a91a2887fe
13 changed files with 367 additions and 688 deletions

View File

@ -18,6 +18,66 @@
using namespace faiss; using namespace faiss;
// These implementations are currently slower than HammingComputerDefault so
// they are not in the main faiss anymore.
struct HammingComputerM8 {
const uint64_t* a;
int n;
HammingComputerM8() {}
HammingComputerM8(const uint8_t* a8, int code_size) {
set(a8, code_size);
}
void set(const uint8_t* a8, int code_size) {
assert(code_size % 8 == 0);
a = (uint64_t*)a8;
n = code_size / 8;
}
int hamming(const uint8_t* b8) const {
const uint64_t* b = (uint64_t*)b8;
int accu = 0;
for (int i = 0; i < n; i++)
accu += popcount64(a[i] ^ b[i]);
return accu;
}
inline int get_code_size() const {
return n * 8;
}
};
struct HammingComputerM4 {
const uint32_t* a;
int n;
HammingComputerM4() {}
HammingComputerM4(const uint8_t* a4, int code_size) {
set(a4, code_size);
}
void set(const uint8_t* a4, int code_size) {
assert(code_size % 4 == 0);
a = (uint32_t*)a4;
n = code_size / 4;
}
int hamming(const uint8_t* b8) const {
const uint32_t* b = (uint32_t*)b8;
int accu = 0;
for (int i = 0; i < n; i++)
accu += popcount64(a[i] ^ b[i]);
return accu;
}
inline int get_code_size() const {
return n * 4;
}
};
template <class T> template <class T>
void hamming_cpt_test( void hamming_cpt_test(
int code_size, int code_size,

View File

@ -281,31 +281,21 @@ struct FlatHammingDis : DistanceComputer {
} }
}; };
struct BuildDistanceComputer {
using T = DistanceComputer*;
template <class HammingComputer>
DistanceComputer* f(IndexBinaryFlat* flat_storage) {
return new FlatHammingDis<HammingComputer>(*flat_storage);
}
};
} // namespace } // namespace
DistanceComputer* IndexBinaryHNSW::get_distance_computer() const { DistanceComputer* IndexBinaryHNSW::get_distance_computer() const {
IndexBinaryFlat* flat_storage = dynamic_cast<IndexBinaryFlat*>(storage); IndexBinaryFlat* flat_storage = dynamic_cast<IndexBinaryFlat*>(storage);
FAISS_ASSERT(flat_storage != nullptr); FAISS_ASSERT(flat_storage != nullptr);
BuildDistanceComputer bd;
switch (code_size) { return dispatch_HammingComputer(code_size, bd, flat_storage);
case 4:
return new FlatHammingDis<HammingComputer4>(*flat_storage);
case 8:
return new FlatHammingDis<HammingComputer8>(*flat_storage);
case 16:
return new FlatHammingDis<HammingComputer16>(*flat_storage);
case 20:
return new FlatHammingDis<HammingComputer20>(*flat_storage);
case 32:
return new FlatHammingDis<HammingComputer32>(*flat_storage);
case 64:
return new FlatHammingDis<HammingComputer64>(*flat_storage);
default:
break;
}
return new FlatHammingDis<HammingComputerDefault>(*flat_storage);
} }
} // namespace faiss } // namespace faiss

View File

@ -176,6 +176,14 @@ void search_single_query_template(
} while (fe.next()); } while (fe.next());
} }
struct Run_search_single_query {
using T = void;
template <class HammingComputer, class... Types>
T f(Types... args) {
search_single_query_template<HammingComputer>(args...);
}
};
template <class SearchResults> template <class SearchResults>
void search_single_query( void search_single_query(
const IndexBinaryHash& index, const IndexBinaryHash& index,
@ -184,29 +192,9 @@ void search_single_query(
size_t& n0, size_t& n0,
size_t& nlist, size_t& nlist,
size_t& ndis) { size_t& ndis) {
#define HC(name) \ Run_search_single_query r;
search_single_query_template<name>(index, q, res, n0, nlist, ndis); dispatch_HammingComputer(
switch (index.code_size) { index.code_size, r, index, q, res, n0, nlist, ndis);
case 4:
HC(HammingComputer4);
break;
case 8:
HC(HammingComputer8);
break;
case 16:
HC(HammingComputer16);
break;
case 20:
HC(HammingComputer20);
break;
case 32:
HC(HammingComputer32);
break;
default:
HC(HammingComputerDefault);
break;
}
#undef HC
} }
} // anonymous namespace } // anonymous namespace
@ -349,15 +337,15 @@ namespace {
template <class HammingComputer, class SearchResults> template <class HammingComputer, class SearchResults>
static void verify_shortlist( static void verify_shortlist(
const IndexBinaryFlat& index, const IndexBinaryFlat* index,
const uint8_t* q, const uint8_t* q,
const std::unordered_set<idx_t>& shortlist, const std::unordered_set<idx_t>& shortlist,
SearchResults& res) { SearchResults& res) {
size_t code_size = index.code_size; size_t code_size = index->code_size;
size_t nlist = 0, ndis = 0, n0 = 0; size_t nlist = 0, ndis = 0, n0 = 0;
HammingComputer hc(q, code_size); HammingComputer hc(q, code_size);
const uint8_t* codes = index.xb.data(); const uint8_t* codes = index->xb.data();
for (auto i : shortlist) { for (auto i : shortlist) {
int dis = hc.hamming(codes + i * code_size); int dis = hc.hamming(codes + i * code_size);
@ -365,6 +353,14 @@ static void verify_shortlist(
} }
} }
struct Run_verify_shortlist {
using T = void;
template <class HammingComputer, class... Types>
void f(Types... args) {
verify_shortlist<HammingComputer>(args...);
}
};
template <class SearchResults> template <class SearchResults>
void search_1_query_multihash( void search_1_query_multihash(
const IndexBinaryMultiHash& index, const IndexBinaryMultiHash& index,
@ -405,29 +401,9 @@ void search_1_query_multihash(
ndis += shortlist.size(); ndis += shortlist.size();
// verify shortlist // verify shortlist
Run_verify_shortlist r;
#define HC(name) verify_shortlist<name>(*index.storage, xi, shortlist, res) dispatch_HammingComputer(
switch (index.code_size) { index.code_size, r, index.storage, xi, shortlist, res);
case 4:
HC(HammingComputer4);
break;
case 8:
HC(HammingComputer8);
break;
case 16:
HC(HammingComputer16);
break;
case 20:
HC(HammingComputer20);
break;
case 32:
HC(HammingComputer32);
break;
default:
HC(HammingComputerDefault);
break;
}
#undef HC
} }
} // anonymous namespace } // anonymous namespace

View File

@ -370,7 +370,7 @@ struct IVFBinaryScannerL2 : BinaryInvertedListScanner {
}; };
void search_knn_hamming_heap( void search_knn_hamming_heap(
const IndexBinaryIVF& ivf, const IndexBinaryIVF* ivf,
size_t n, size_t n,
const uint8_t* __restrict x, const uint8_t* __restrict x,
idx_t k, idx_t k,
@ -380,10 +380,10 @@ void search_knn_hamming_heap(
idx_t* __restrict labels, idx_t* __restrict labels,
bool store_pairs, bool store_pairs,
const IVFSearchParameters* params) { const IVFSearchParameters* params) {
idx_t nprobe = params ? params->nprobe : ivf.nprobe; idx_t nprobe = params ? params->nprobe : ivf->nprobe;
nprobe = std::min((idx_t)ivf.nlist, nprobe); nprobe = std::min((idx_t)ivf->nlist, nprobe);
idx_t max_codes = params ? params->max_codes : ivf.max_codes; idx_t max_codes = params ? params->max_codes : ivf->max_codes;
MetricType metric_type = ivf.metric_type; MetricType metric_type = ivf->metric_type;
// almost verbatim copy from IndexIVF::search_preassigned // almost verbatim copy from IndexIVF::search_preassigned
@ -394,11 +394,11 @@ void search_knn_hamming_heap(
#pragma omp parallel if (n > 1) reduction(+ : nlistv, ndis, nheap) #pragma omp parallel if (n > 1) reduction(+ : nlistv, ndis, nheap)
{ {
std::unique_ptr<BinaryInvertedListScanner> scanner( std::unique_ptr<BinaryInvertedListScanner> scanner(
ivf.get_InvertedListScanner(store_pairs)); ivf->get_InvertedListScanner(store_pairs));
#pragma omp for #pragma omp for
for (idx_t i = 0; i < n; i++) { for (idx_t i = 0; i < n; i++) {
const uint8_t* xi = x + i * ivf.code_size; const uint8_t* xi = x + i * ivf->code_size;
scanner->set_query(xi); scanner->set_query(xi);
const idx_t* keysi = keys + i * nprobe; const idx_t* keysi = keys + i * nprobe;
@ -420,23 +420,24 @@ void search_knn_hamming_heap(
continue; continue;
} }
FAISS_THROW_IF_NOT_FMT( FAISS_THROW_IF_NOT_FMT(
key < (idx_t)ivf.nlist, key < (idx_t)ivf->nlist,
"Invalid key=%" PRId64 " at ik=%zd nlist=%zd\n", "Invalid key=%" PRId64 " at ik=%zd nlist=%zd\n",
key, key,
ik, ik,
ivf.nlist); ivf->nlist);
scanner->set_list(key, coarse_dis[i * nprobe + ik]); scanner->set_list(key, coarse_dis[i * nprobe + ik]);
nlistv++; nlistv++;
size_t list_size = ivf.invlists->list_size(key); size_t list_size = ivf->invlists->list_size(key);
InvertedLists::ScopedCodes scodes(ivf.invlists, key); InvertedLists::ScopedCodes scodes(ivf->invlists, key);
std::unique_ptr<InvertedLists::ScopedIds> sids; std::unique_ptr<InvertedLists::ScopedIds> sids;
const idx_t* ids = nullptr; const idx_t* ids = nullptr;
if (!store_pairs) { if (!store_pairs) {
sids.reset(new InvertedLists::ScopedIds(ivf.invlists, key)); sids.reset(
new InvertedLists::ScopedIds(ivf->invlists, key));
ids = sids->get(); ids = sids->get();
} }
@ -466,7 +467,7 @@ void search_knn_hamming_heap(
template <class HammingComputer, bool store_pairs> template <class HammingComputer, bool store_pairs>
void search_knn_hamming_count( void search_knn_hamming_count(
const IndexBinaryIVF& ivf, const IndexBinaryIVF* ivf,
size_t nx, size_t nx,
const uint8_t* __restrict x, const uint8_t* __restrict x,
const idx_t* __restrict keys, const idx_t* __restrict keys,
@ -474,21 +475,21 @@ void search_knn_hamming_count(
int32_t* __restrict distances, int32_t* __restrict distances,
idx_t* __restrict labels, idx_t* __restrict labels,
const IVFSearchParameters* params) { const IVFSearchParameters* params) {
const int nBuckets = ivf.d + 1; const int nBuckets = ivf->d + 1;
std::vector<int> all_counters(nx * nBuckets, 0); std::vector<int> all_counters(nx * nBuckets, 0);
std::unique_ptr<idx_t[]> all_ids_per_dis(new idx_t[nx * nBuckets * k]); std::unique_ptr<idx_t[]> all_ids_per_dis(new idx_t[nx * nBuckets * k]);
idx_t nprobe = params ? params->nprobe : ivf.nprobe; idx_t nprobe = params ? params->nprobe : ivf->nprobe;
nprobe = std::min((idx_t)ivf.nlist, nprobe); nprobe = std::min((idx_t)ivf->nlist, nprobe);
idx_t max_codes = params ? params->max_codes : ivf.max_codes; idx_t max_codes = params ? params->max_codes : ivf->max_codes;
std::vector<HCounterState<HammingComputer>> cs; std::vector<HCounterState<HammingComputer>> cs;
for (size_t i = 0; i < nx; ++i) { for (size_t i = 0; i < nx; ++i) {
cs.push_back(HCounterState<HammingComputer>( cs.push_back(HCounterState<HammingComputer>(
all_counters.data() + i * nBuckets, all_counters.data() + i * nBuckets,
all_ids_per_dis.get() + i * nBuckets * k, all_ids_per_dis.get() + i * nBuckets * k,
x + i * ivf.code_size, x + i * ivf->code_size,
ivf.d, ivf->d,
k)); k));
} }
@ -508,27 +509,28 @@ void search_knn_hamming_count(
continue; continue;
} }
FAISS_THROW_IF_NOT_FMT( FAISS_THROW_IF_NOT_FMT(
key < (idx_t)ivf.nlist, key < (idx_t)ivf->nlist,
"Invalid key=%" PRId64 " at ik=%zd nlist=%zd\n", "Invalid key=%" PRId64 " at ik=%zd nlist=%zd\n",
key, key,
ik, ik,
ivf.nlist); ivf->nlist);
nlistv++; nlistv++;
size_t list_size = ivf.invlists->list_size(key); size_t list_size = ivf->invlists->list_size(key);
InvertedLists::ScopedCodes scodes(ivf.invlists, key); InvertedLists::ScopedCodes scodes(ivf->invlists, key);
const uint8_t* list_vecs = scodes.get(); const uint8_t* list_vecs = scodes.get();
const idx_t* ids = const idx_t* ids =
store_pairs ? nullptr : ivf.invlists->get_ids(key); store_pairs ? nullptr : ivf->invlists->get_ids(key);
for (size_t j = 0; j < list_size; j++) { for (size_t j = 0; j < list_size; j++) {
const uint8_t* yj = list_vecs + ivf.code_size * j; const uint8_t* yj = list_vecs + ivf->code_size * j;
idx_t id = store_pairs ? (key << 32 | j) : ids[j]; idx_t id = store_pairs ? (key << 32 | j) : ids[j];
csi.update_counter(yj, id); csi.update_counter(yj, id);
} }
if (ids) if (ids) {
ivf.invlists->release_ids(key, ids); ivf->invlists->release_ids(key, ids);
}
nscan += list_size; nscan += list_size;
if (max_codes && nscan >= max_codes) if (max_codes && nscan >= max_codes)
@ -634,7 +636,7 @@ struct BlockSearchVariableK {
template <class HammingComputer> template <class HammingComputer>
void search_knn_hamming_per_invlist( void search_knn_hamming_per_invlist(
const IndexBinaryIVF& ivf, const IndexBinaryIVF* ivf,
size_t n, size_t n,
const uint8_t* __restrict x, const uint8_t* __restrict x,
idx_t k, idx_t k,
@ -644,12 +646,12 @@ void search_knn_hamming_per_invlist(
idx_t* __restrict labels, idx_t* __restrict labels,
bool store_pairs, bool store_pairs,
const IVFSearchParameters* params) { const IVFSearchParameters* params) {
idx_t nprobe = params ? params->nprobe : ivf.nprobe; idx_t nprobe = params ? params->nprobe : ivf->nprobe;
nprobe = std::min((idx_t)ivf.nlist, nprobe); nprobe = std::min((idx_t)ivf->nlist, nprobe);
idx_t max_codes = params ? params->max_codes : ivf.max_codes; idx_t max_codes = params ? params->max_codes : ivf->max_codes;
FAISS_THROW_IF_NOT(max_codes == 0); FAISS_THROW_IF_NOT(max_codes == 0);
FAISS_THROW_IF_NOT(!store_pairs); FAISS_THROW_IF_NOT(!store_pairs);
MetricType metric_type = ivf.metric_type; MetricType metric_type = ivf->metric_type;
// reorder buckets // reorder buckets
std::vector<int64_t> lims(n + 1); std::vector<int64_t> lims(n + 1);
@ -658,18 +660,18 @@ void search_knn_hamming_per_invlist(
for (idx_t i = 0; i < n * nprobe; i++) { for (idx_t i = 0; i < n * nprobe; i++) {
keys[i] = keys_in[i]; keys[i] = keys_in[i];
} }
matrix_bucket_sort_inplace(n, nprobe, keys, ivf.nlist, lims.data(), 0); matrix_bucket_sort_inplace(n, nprobe, keys, ivf->nlist, lims.data(), 0);
using C = CMax<int32_t, idx_t>; using C = CMax<int32_t, idx_t>;
heap_heapify<C>(n * k, distances, labels); heap_heapify<C>(n * k, distances, labels);
const size_t code_size = ivf.code_size; const size_t code_size = ivf->code_size;
for (idx_t l = 0; l < ivf.nlist; l++) { for (idx_t l = 0; l < ivf->nlist; l++) {
idx_t l0 = lims[l], nq = lims[l + 1] - l0; idx_t l0 = lims[l], nq = lims[l + 1] - l0;
InvertedLists::ScopedCodes scodes(ivf.invlists, l); InvertedLists::ScopedCodes scodes(ivf->invlists, l);
InvertedLists::ScopedIds sidx(ivf.invlists, l); InvertedLists::ScopedIds sidx(ivf->invlists, l);
idx_t nb = ivf.invlists->list_size(l); idx_t nb = ivf->invlists->list_size(l);
const uint8_t* bcodes = scodes.get(); const uint8_t* bcodes = scodes.get();
const idx_t* ids = sidx.get(); const idx_t* ids = sidx.get();
@ -735,151 +737,70 @@ void search_knn_hamming_per_invlist(
} }
} }
template <bool store_pairs> struct Run_search_knn_hamming_per_invlist {
void search_knn_hamming_count_1( using T = void;
const IndexBinaryIVF& ivf,
size_t nx,
const uint8_t* x,
const idx_t* keys,
int k,
int32_t* distances,
idx_t* labels,
const IVFSearchParameters* params) {
switch (ivf.code_size) {
#define HANDLE_CS(cs) \
case cs: \
search_knn_hamming_count<HammingComputer##cs, store_pairs>( \
ivf, nx, x, keys, k, distances, labels, params); \
break;
HANDLE_CS(4);
HANDLE_CS(8);
HANDLE_CS(16);
HANDLE_CS(20);
HANDLE_CS(32);
HANDLE_CS(64);
#undef HANDLE_CS
default:
search_knn_hamming_count<HammingComputerDefault, store_pairs>(
ivf, nx, x, keys, k, distances, labels, params);
break;
}
}
void search_knn_hamming_per_invlist_1( template <class HammingComputer, class... Types>
const IndexBinaryIVF& ivf, void f(Types... args) {
size_t n, search_knn_hamming_per_invlist<HammingComputer>(args...);
const uint8_t* x,
idx_t k,
const idx_t* keys,
const int32_t* coarse_dis,
int32_t* distances,
idx_t* labels,
bool store_pairs,
const IVFSearchParameters* params) {
switch (ivf.code_size) {
#define HANDLE_CS(cs) \
case cs: \
search_knn_hamming_per_invlist<HammingComputer##cs>( \
ivf, \
n, \
x, \
k, \
keys, \
coarse_dis, \
distances, \
labels, \
store_pairs, \
params); \
break;
HANDLE_CS(4);
HANDLE_CS(8);
HANDLE_CS(16);
HANDLE_CS(20);
HANDLE_CS(32);
HANDLE_CS(64);
#undef HANDLE_CS
default:
search_knn_hamming_per_invlist<HammingComputerDefault>(
ivf,
n,
x,
k,
keys,
coarse_dis,
distances,
labels,
store_pairs,
params);
break;
} }
} };
template <bool store_pairs>
struct Run_search_knn_hamming_count {
using T = void;
template <class HammingComputer, class... Types>
void f(Types... args) {
search_knn_hamming_count<HammingComputer, store_pairs>(args...);
}
};
struct BuildScanner {
using T = BinaryInvertedListScanner*;
template <class HammingComputer>
T f(size_t code_size, bool store_pairs) {
return new IVFBinaryScannerL2<HammingComputer>(code_size, store_pairs);
}
};
} // anonymous namespace } // anonymous namespace
BinaryInvertedListScanner* IndexBinaryIVF::get_InvertedListScanner( BinaryInvertedListScanner* IndexBinaryIVF::get_InvertedListScanner(
bool store_pairs) const { bool store_pairs) const {
#define HC(name) return new IVFBinaryScannerL2<name>(code_size, store_pairs) BuildScanner bs;
switch (code_size) { return dispatch_HammingComputer(code_size, bs, code_size, store_pairs);
case 4:
HC(HammingComputer4);
case 8:
HC(HammingComputer8);
case 16:
HC(HammingComputer16);
case 20:
HC(HammingComputer20);
case 32:
HC(HammingComputer32);
case 64:
HC(HammingComputer64);
default:
HC(HammingComputerDefault);
}
#undef HC
} }
void IndexBinaryIVF::search_preassigned( void IndexBinaryIVF::search_preassigned(
idx_t n, idx_t n,
const uint8_t* x, const uint8_t* x,
idx_t k, idx_t k,
const idx_t* idx, const idx_t* cidx,
const int32_t* coarse_dis, const int32_t* cdis,
int32_t* distances, int32_t* dis,
idx_t* labels, idx_t* idx,
bool store_pairs, bool store_pairs,
const IVFSearchParameters* params) const { const IVFSearchParameters* params) const {
if (per_invlist_search) { if (per_invlist_search) {
search_knn_hamming_per_invlist_1( Run_search_knn_hamming_per_invlist r;
*this, // clang-format off
n, dispatch_HammingComputer(
x, code_size, r, this, n, x, k,
k, cidx, cdis, dis, idx, store_pairs, params);
idx, // clang-format on
coarse_dis,
distances,
labels,
store_pairs,
params);
} else if (use_heap) { } else if (use_heap) {
search_knn_hamming_heap( search_knn_hamming_heap(
*this, this, n, x, k, cidx, cdis, dis, idx, store_pairs, params);
n, } else if (store_pairs) { // !use_heap && store_pairs
x, Run_search_knn_hamming_count<true> r;
k, dispatch_HammingComputer(
idx, code_size, r, this, n, x, cidx, k, dis, idx, params);
coarse_dis, } else { // !use_heap && !store_pairs
distances, Run_search_knn_hamming_count<false> r;
labels, dispatch_HammingComputer(
store_pairs, code_size, r, this, n, x, cidx, k, dis, idx, params);
params);
} else {
if (store_pairs) {
search_knn_hamming_count_1<true>(
*this, n, x, idx, k, distances, labels, params);
} else {
search_knn_hamming_count_1<false>(
*this, n, x, idx, k, distances, labels, params);
}
} }
} }

View File

@ -1154,30 +1154,23 @@ struct IVFPQScannerT : QueryTables {
{ indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; } { indexIVFPQ_stats.n_hamming_pass += n_hamming_pass; }
} }
template <class SearchResultType>
struct Run_scan_list_polysemous_hc {
using T = void;
template <class HammingComputer, class... Types>
void f(const IVFPQScannerT* scanner, Types... args) {
scanner->scan_list_polysemous_hc<HammingComputer, SearchResultType>(
args...);
}
};
template <class SearchResultType> template <class SearchResultType>
void scan_list_polysemous( void scan_list_polysemous(
size_t ncode, size_t ncode,
const uint8_t* codes, const uint8_t* codes,
SearchResultType& res) const { SearchResultType& res) const {
switch (pq.code_size) { Run_scan_list_polysemous_hc<SearchResultType> r;
#define HANDLE_CODE_SIZE(cs) \ dispatch_HammingComputer(pq.code_size, r, this, ncode, codes, res);
case cs: \
scan_list_polysemous_hc<HammingComputer##cs, SearchResultType>( \
ncode, codes, res); \
break
HANDLE_CODE_SIZE(4);
HANDLE_CODE_SIZE(8);
HANDLE_CODE_SIZE(16);
HANDLE_CODE_SIZE(20);
HANDLE_CODE_SIZE(32);
HANDLE_CODE_SIZE(64);
#undef HANDLE_CODE_SIZE
default:
scan_list_polysemous_hc<
HammingComputerDefault,
SearchResultType>(ncode, codes, res);
break;
}
} }
}; };

View File

@ -288,26 +288,23 @@ struct IVFScanner : InvertedListScanner {
} }
}; };
struct BuildScanner {
using T = InvertedListScanner*;
template <class HammingComputer>
static T f(const IndexIVFSpectralHash* index, bool store_pairs) {
return new IVFScanner<HammingComputer>(index, store_pairs);
}
};
} // anonymous namespace } // anonymous namespace
InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner( InvertedListScanner* IndexIVFSpectralHash::get_InvertedListScanner(
bool store_pairs, bool store_pairs,
const IDSelector* sel) const { const IDSelector* sel) const {
FAISS_THROW_IF_NOT(!sel); FAISS_THROW_IF_NOT(!sel);
switch (code_size) { BuildScanner bs;
#define HANDLE_CODE_SIZE(cs) \ return dispatch_HammingComputer(code_size, bs, this, store_pairs);
case cs: \
return new IVFScanner<HammingComputer##cs>(this, store_pairs)
HANDLE_CODE_SIZE(4);
HANDLE_CODE_SIZE(8);
HANDLE_CODE_SIZE(16);
HANDLE_CODE_SIZE(20);
HANDLE_CODE_SIZE(32);
HANDLE_CODE_SIZE(64);
#undef HANDLE_CODE_SIZE
default:
return new IVFScanner<HammingComputerDefault>(this, store_pairs);
}
} }
void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) { void IndexIVFSpectralHash::replace_vt(VectorTransform* vt_in, bool own) {

View File

@ -263,21 +263,23 @@ void IndexPQStats::reset() {
IndexPQStats indexPQ_stats; IndexPQStats indexPQ_stats;
namespace {
template <class HammingComputer> template <class HammingComputer>
static size_t polysemous_inner_loop( size_t polysemous_inner_loop(
const IndexPQ& index, const IndexPQ* index,
const float* dis_table_qi, const float* dis_table_qi,
const uint8_t* q_code, const uint8_t* q_code,
size_t k, size_t k,
float* heap_dis, float* heap_dis,
int64_t* heap_ids, int64_t* heap_ids,
int ht) { int ht) {
int M = index.pq.M; int M = index->pq.M;
int code_size = index.pq.code_size; int code_size = index->pq.code_size;
int ksub = index.pq.ksub; int ksub = index->pq.ksub;
size_t ntotal = index.ntotal; size_t ntotal = index->ntotal;
const uint8_t* b_code = index.codes.data(); const uint8_t* b_code = index->codes.data();
size_t n_pass_i = 0; size_t n_pass_i = 0;
@ -305,6 +307,16 @@ static size_t polysemous_inner_loop(
return n_pass_i; return n_pass_i;
} }
struct Run_polysemous_inner_loop {
using T = size_t;
template <class HammingComputer, class... Types>
size_t f(Types... args) {
return polysemous_inner_loop<HammingComputer>(args...);
}
};
} // anonymous namespace
void IndexPQ::search_core_polysemous( void IndexPQ::search_core_polysemous(
idx_t n, idx_t n,
const float* x, const float* x,
@ -355,45 +367,24 @@ void IndexPQ::search_core_polysemous(
maxheap_heapify(k, heap_dis, heap_ids); maxheap_heapify(k, heap_dis, heap_ids);
if (!generalized_hamming) { if (!generalized_hamming) {
switch (pq.code_size) { Run_polysemous_inner_loop r;
#define DISPATCH(cs) \ n_pass += dispatch_HammingComputer(
case cs: \ pq.code_size,
n_pass += polysemous_inner_loop<HammingComputer##cs>( \ r,
*this, \ this,
dis_table_qi, \ dis_table_qi,
q_code, \ q_code,
k, \ k,
heap_dis, \ heap_dis,
heap_ids, \ heap_ids,
polysemous_ht); \ polysemous_ht);
break;
DISPATCH(4)
DISPATCH(8)
DISPATCH(16)
DISPATCH(32)
DISPATCH(20)
default:
if (pq.code_size % 4 == 0) {
n_pass += polysemous_inner_loop<HammingComputerDefault>(
*this,
dis_table_qi,
q_code,
k,
heap_dis,
heap_ids,
polysemous_ht);
} else {
bad_code_size++;
}
break;
}
#undef DISPATCH
} else { // generalized hamming } else { // generalized hamming
switch (pq.code_size) { switch (pq.code_size) {
#define DISPATCH(cs) \ #define DISPATCH(cs) \
case cs: \ case cs: \
n_pass += polysemous_inner_loop<GenHammingComputer##cs>( \ n_pass += polysemous_inner_loop<GenHammingComputer##cs>( \
*this, \ this, \
dis_table_qi, \ dis_table_qi, \
q_code, \ q_code, \
k, \ k, \
@ -407,7 +398,7 @@ void IndexPQ::search_core_polysemous(
default: default:
if (pq.code_size % 8 == 0) { if (pq.code_size % 8 == 0) {
n_pass += polysemous_inner_loop<GenHammingComputerM8>( n_pass += polysemous_inner_loop<GenHammingComputerM8>(
*this, this,
dis_table_qi, dis_table_qi,
q_code, q_code,
k, k,

View File

@ -5,14 +5,13 @@
* LICENSE file in the root directory of this source tree. * LICENSE file in the root directory of this source tree.
*/ */
// -*- c++ -*-
/* /*
* Implementation of Hamming related functions (distances, smallest distance * Implementation of Hamming related functions (distances, smallest distance
* selection with regular heap|radix and probabilistic heap|radix. * selection with regular heap|radix and probabilistic heap|radix.
* *
* IMPLEMENTATION NOTES * IMPLEMENTATION NOTES
* Bitvectors are generally assumed to be multiples of 64 bits. * Optimal speed is typically obtained for vector sizes of multiples of 64
* bits.
* *
* hamdis_t is used for distances because at this time * hamdis_t is used for distances because at this time
* it is not clear how we will need to balance * it is not clear how we will need to balance
@ -20,8 +19,6 @@
* - memory usage * - memory usage
* - cache-misses when dealing with large volumes of data (lower bits is better) * - cache-misses when dealing with large volumes of data (lower bits is better)
* *
* The hamdis_t should optimally be compatibe with one of the Torch Storage
* (Byte,Short,Long) and therefore should be signed for 2-bytes and 4-bytes
*/ */
#include <faiss/utils/hamming.h> #include <faiss/utils/hamming.h>
@ -165,9 +162,11 @@ size_t match_hamming_thres(
return posm; return posm;
} }
namespace {
/* Return closest neighbors w.r.t Hamming distance, using a heap. */ /* Return closest neighbors w.r.t Hamming distance, using a heap. */
template <class HammingComputer> template <class HammingComputer>
static void hammings_knn_hc( void hammings_knn_hc(
int bytes_per_code, int bytes_per_code,
int_maxheap_array_t* __restrict ha, int_maxheap_array_t* __restrict ha,
const uint8_t* __restrict bs1, const uint8_t* __restrict bs1,
@ -234,7 +233,7 @@ static void hammings_knn_hc(
/* Return closest neighbors w.r.t Hamming distance, using max count. */ /* Return closest neighbors w.r.t Hamming distance, using max count. */
template <class HammingComputer> template <class HammingComputer>
static void hammings_knn_mc( void hammings_knn_mc(
int bytes_per_code, int bytes_per_code,
const uint8_t* __restrict a, const uint8_t* __restrict a,
const uint8_t* __restrict b, const uint8_t* __restrict b,
@ -287,6 +286,63 @@ static void hammings_knn_mc(
} }
} }
template <class HammingComputer>
void hamming_range_search(
const uint8_t* a,
const uint8_t* b,
size_t na,
size_t nb,
int radius,
size_t code_size,
RangeSearchResult* res) {
#pragma omp parallel
{
RangeSearchPartialResult pres(res);
#pragma omp for
for (int64_t i = 0; i < na; i++) {
HammingComputer hc(a + i * code_size, code_size);
const uint8_t* yi = b;
RangeQueryResult& qres = pres.new_result(i);
for (size_t j = 0; j < nb; j++) {
int dis = hc.hamming(yi);
if (dis < radius) {
qres.add(dis, j);
}
yi += code_size;
}
}
pres.finalize();
}
}
struct Run_hammings_knn_hc {
using T = void;
template <class HammingComputer, class... Types>
void f(Types... args) {
hammings_knn_hc<HammingComputer>(args...);
}
};
struct Run_hammings_knn_mc {
using T = void;
template <class HammingComputer, class... Types>
void f(Types... args) {
hammings_knn_mc<HammingComputer>(args...);
}
};
struct Run_hamming_range_search {
using T = void;
template <class HammingComputer, class... Types>
void f(Types... args) {
hamming_range_search<HammingComputer>(args...);
}
};
} // namespace
/* Functions to maps vectors to bits. Assume proper allocation done beforehand, /* Functions to maps vectors to bits. Assume proper allocation done beforehand,
meaning that b should be be able to receive as many bits as x may produce. */ meaning that b should be be able to receive as many bits as x may produce. */
@ -437,28 +493,9 @@ void hammings_knn_hc(
size_t ncodes, size_t ncodes,
int order, int order,
ApproxTopK_mode_t approx_topk_mode) { ApproxTopK_mode_t approx_topk_mode) {
switch (ncodes) { Run_hammings_knn_hc r;
case 4: dispatch_HammingComputer(
hammings_knn_hc<faiss::HammingComputer4>( ncodes, r, ncodes, ha, a, b, nb, order, true, approx_topk_mode);
4, ha, a, b, nb, order, true, approx_topk_mode);
break;
case 8:
hammings_knn_hc<faiss::HammingComputer8>(
8, ha, a, b, nb, order, true, approx_topk_mode);
break;
case 16:
hammings_knn_hc<faiss::HammingComputer16>(
16, ha, a, b, nb, order, true, approx_topk_mode);
break;
case 32:
hammings_knn_hc<faiss::HammingComputer32>(
32, ha, a, b, nb, order, true, approx_topk_mode);
break;
default:
hammings_knn_hc<faiss::HammingComputerDefault>(
ncodes, ha, a, b, nb, order, true, approx_topk_mode);
break;
}
} }
void hammings_knn_mc( void hammings_knn_mc(
@ -470,58 +507,9 @@ void hammings_knn_mc(
size_t ncodes, size_t ncodes,
int32_t* __restrict distances, int32_t* __restrict distances,
int64_t* __restrict labels) { int64_t* __restrict labels) {
switch (ncodes) { Run_hammings_knn_mc r;
case 4: dispatch_HammingComputer(
hammings_knn_mc<faiss::HammingComputer4>( ncodes, r, ncodes, a, b, na, nb, k, distances, labels);
4, a, b, na, nb, k, distances, labels);
break;
case 8:
hammings_knn_mc<faiss::HammingComputer8>(
8, a, b, na, nb, k, distances, labels);
break;
case 16:
hammings_knn_mc<faiss::HammingComputer16>(
16, a, b, na, nb, k, distances, labels);
break;
case 32:
hammings_knn_mc<faiss::HammingComputer32>(
32, a, b, na, nb, k, distances, labels);
break;
default:
hammings_knn_mc<faiss::HammingComputerDefault>(
ncodes, a, b, na, nb, k, distances, labels);
break;
}
}
template <class HammingComputer>
static void hamming_range_search_template(
const uint8_t* a,
const uint8_t* b,
size_t na,
size_t nb,
int radius,
size_t code_size,
RangeSearchResult* res) {
#pragma omp parallel
{
RangeSearchPartialResult pres(res);
#pragma omp for
for (int64_t i = 0; i < na; i++) {
HammingComputer hc(a + i * code_size, code_size);
const uint8_t* yi = b;
RangeQueryResult& qres = pres.new_result(i);
for (size_t j = 0; j < nb; j++) {
int dis = hc.hamming(yi);
if (dis < radius) {
qres.add(dis, j);
}
yi += code_size;
}
}
pres.finalize();
}
} }
void hamming_range_search( void hamming_range_search(
@ -532,27 +520,9 @@ void hamming_range_search(
int radius, int radius,
size_t code_size, size_t code_size,
RangeSearchResult* result) { RangeSearchResult* result) {
#define HC(name) \ Run_hamming_range_search r;
hamming_range_search_template<name>(a, b, na, nb, radius, code_size, result) dispatch_HammingComputer(
code_size, r, a, b, na, nb, radius, code_size, result);
switch (code_size) {
case 4:
HC(HammingComputer4);
break;
case 8:
HC(HammingComputer8);
break;
case 16:
HC(HammingComputer16);
break;
case 32:
HC(HammingComputer32);
break;
default:
HC(HammingComputerDefault);
break;
}
#undef HC
} }
/* Count number of matches given a max threshold */ /* Count number of matches given a max threshold */

View File

@ -345,93 +345,6 @@ struct HammingComputerDefault {
} }
}; };
// more inefficient than HammingComputerDefault (obsolete)
struct HammingComputerM8 {
const uint64_t* a;
int n;
HammingComputerM8() {}
HammingComputerM8(const uint8_t* a8, int code_size) {
set(a8, code_size);
}
void set(const uint8_t* a8, int code_size) {
assert(code_size % 8 == 0);
a = (uint64_t*)a8;
n = code_size / 8;
}
int hamming(const uint8_t* b8) const {
const uint64_t* b = (uint64_t*)b8;
int accu = 0;
for (int i = 0; i < n; i++)
accu += popcount64(a[i] ^ b[i]);
return accu;
}
inline int get_code_size() const {
return n * 8;
}
};
// more inefficient than HammingComputerDefault (obsolete)
struct HammingComputerM4 {
const uint32_t* a;
int n;
HammingComputerM4() {}
HammingComputerM4(const uint8_t* a4, int code_size) {
set(a4, code_size);
}
void set(const uint8_t* a4, int code_size) {
assert(code_size % 4 == 0);
a = (uint32_t*)a4;
n = code_size / 4;
}
int hamming(const uint8_t* b8) const {
const uint32_t* b = (uint32_t*)b8;
int accu = 0;
for (int i = 0; i < n; i++)
accu += popcount64(a[i] ^ b[i]);
return accu;
}
inline int get_code_size() const {
return n * 4;
}
};
/***************************************************************************
* Equivalence with a template class when code size is known at compile time
**************************************************************************/
// default template
template <int CODE_SIZE>
struct HammingComputer : HammingComputerDefault {
HammingComputer(const uint8_t* a, int code_size)
: HammingComputerDefault(a, code_size) {}
};
#define SPECIALIZED_HC(CODE_SIZE) \
template <> \
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
HammingComputer(const uint8_t* a) \
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
}
SPECIALIZED_HC(4);
SPECIALIZED_HC(8);
SPECIALIZED_HC(16);
SPECIALIZED_HC(20);
SPECIALIZED_HC(32);
SPECIALIZED_HC(64);
#undef SPECIALIZED_HC
/*************************************************************************** /***************************************************************************
* generalized Hamming = number of bytes that are different between * generalized Hamming = number of bytes that are different between
* two codes. * two codes.

View File

@ -17,6 +17,7 @@ using hamdis_t = int32_t;
namespace faiss { namespace faiss {
// trust the compiler to provide efficient popcount implementations
inline int popcount32(uint32_t x) { inline int popcount32(uint32_t x) {
return __builtin_popcount(x); return __builtin_popcount(x);
} }

View File

@ -329,93 +329,6 @@ struct HammingComputerDefault {
} }
}; };
// more inefficient than HammingComputerDefault (obsolete)
struct HammingComputerM8 {
const uint64_t* a;
int n;
HammingComputerM8() {}
HammingComputerM8(const uint8_t* a8, int code_size) {
set(a8, code_size);
}
void set(const uint8_t* a8, int code_size) {
assert(code_size % 8 == 0);
a = (uint64_t*)a8;
n = code_size / 8;
}
int hamming(const uint8_t* b8) const {
const uint64_t* b = (uint64_t*)b8;
int accu = 0;
for (int i = 0; i < n; i++)
accu += popcount64(a[i] ^ b[i]);
return accu;
}
inline int get_code_size() const {
return n * 8;
}
};
// more inefficient than HammingComputerDefault (obsolete)
struct HammingComputerM4 {
const uint32_t* a;
int n;
HammingComputerM4() {}
HammingComputerM4(const uint8_t* a4, int code_size) {
set(a4, code_size);
}
void set(const uint8_t* a4, int code_size) {
assert(code_size % 4 == 0);
a = (uint32_t*)a4;
n = code_size / 4;
}
int hamming(const uint8_t* b8) const {
const uint32_t* b = (uint32_t*)b8;
int accu = 0;
for (int i = 0; i < n; i++)
accu += popcount64(a[i] ^ b[i]);
return accu;
}
inline int get_code_size() const {
return n * 4;
}
};
/***************************************************************************
* Equivalence with a template class when code size is known at compile time
**************************************************************************/
// default template
template <int CODE_SIZE>
struct HammingComputer : HammingComputerDefault {
HammingComputer(const uint8_t* a, int code_size)
: HammingComputerDefault(a, code_size) {}
};
#define SPECIALIZED_HC(CODE_SIZE) \
template <> \
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
HammingComputer(const uint8_t* a) \
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
}
SPECIALIZED_HC(4);
SPECIALIZED_HC(8);
SPECIALIZED_HC(16);
SPECIALIZED_HC(20);
SPECIALIZED_HC(32);
SPECIALIZED_HC(64);
#undef SPECIALIZED_HC
/*************************************************************************** /***************************************************************************
* generalized Hamming = number of bytes that are different between * generalized Hamming = number of bytes that are different between
* two codes. * two codes.

View File

@ -23,4 +23,61 @@
#include <faiss/utils/hamming_distance/generic-inl.h> #include <faiss/utils/hamming_distance/generic-inl.h>
#endif #endif
namespace faiss {
/***************************************************************************
* Equivalence with a template class when code size is known at compile time
**************************************************************************/
// default template
template <int CODE_SIZE>
struct HammingComputer : HammingComputerDefault {
HammingComputer(const uint8_t* a, int code_size)
: HammingComputerDefault(a, code_size) {}
};
#define SPECIALIZED_HC(CODE_SIZE) \
template <> \
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
HammingComputer(const uint8_t* a) \
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
}
SPECIALIZED_HC(4);
SPECIALIZED_HC(8);
SPECIALIZED_HC(16);
SPECIALIZED_HC(20);
SPECIALIZED_HC(32);
SPECIALIZED_HC(64);
#undef SPECIALIZED_HC
/***************************************************************************
* Dispatching function that takes a code size and a consumer object
* the consumer object should contain a retun type t and a operation template
* function f() that to be called to perform the operation.
**************************************************************************/
template <class Consumer, class... Types>
typename Consumer::T dispatch_HammingComputer(
int code_size,
Consumer& consumer,
Types... args) {
switch (code_size) {
#define DISPATCH_HC(CODE_SIZE) \
case CODE_SIZE: \
return consumer.template f<HammingComputer##CODE_SIZE>(args...);
DISPATCH_HC(4);
DISPATCH_HC(8);
DISPATCH_HC(16);
DISPATCH_HC(20);
DISPATCH_HC(32);
DISPATCH_HC(64);
default:
return consumer.template f<HammingComputerDefault>(args...);
}
}
} // namespace faiss
#endif #endif

View File

@ -392,109 +392,6 @@ struct HammingComputerDefault {
} }
}; };
// more inefficient than HammingComputerDefault (obsolete)
struct HammingComputerM8 {
const uint64_t* a;
int n;
HammingComputerM8() {}
HammingComputerM8(const uint8_t* a8, int code_size) {
set(a8, code_size);
}
void set(const uint8_t* a8, int code_size) {
assert(code_size % 8 == 0);
a = (uint64_t*)a8;
n = code_size / 8;
}
int hamming(const uint8_t* b8) const {
const uint64_t* b = (uint64_t*)b8;
int n4 = (n / 4) * 4;
int accu = 0;
int i = 0;
for (; i < n4; i += 4) {
accu += ::faiss::hamming<256>(a + i, b + i);
}
for (; i < n; i++) {
accu += popcount64(a[i] ^ b[i]);
}
return accu;
}
inline int get_code_size() const {
return n * 8;
}
};
// more inefficient than HammingComputerDefault (obsolete)
struct HammingComputerM4 {
const uint32_t* a;
int n;
HammingComputerM4() {}
HammingComputerM4(const uint8_t* a4, int code_size) {
set(a4, code_size);
}
void set(const uint8_t* a4, int code_size) {
assert(code_size % 4 == 0);
a = (uint32_t*)a4;
n = code_size / 4;
}
int hamming(const uint8_t* b8) const {
const uint32_t* b = (uint32_t*)b8;
int n8 = (n / 8) * 8;
int accu = 0;
int i = 0;
for (; i < n8; i += 8) {
accu += ::faiss::hamming<256>(
(const uint64_t*)(a + i), (const uint64_t*)(b + i));
}
for (; i < n; i++) {
accu += popcount64(a[i] ^ b[i]);
}
return accu;
}
inline int get_code_size() const {
return n * 4;
}
};
/***************************************************************************
* Equivalence with a template class when code size is known at compile time
**************************************************************************/
// default template
template <int CODE_SIZE>
struct HammingComputer : HammingComputerDefault {
HammingComputer(const uint8_t* a, int code_size)
: HammingComputerDefault(a, code_size) {}
};
#define SPECIALIZED_HC(CODE_SIZE) \
template <> \
struct HammingComputer<CODE_SIZE> : HammingComputer##CODE_SIZE { \
HammingComputer(const uint8_t* a) \
: HammingComputer##CODE_SIZE(a, CODE_SIZE) {} \
}
SPECIALIZED_HC(4);
SPECIALIZED_HC(8);
SPECIALIZED_HC(16);
SPECIALIZED_HC(20);
SPECIALIZED_HC(32);
SPECIALIZED_HC(64);
#undef SPECIALIZED_HC
/*************************************************************************** /***************************************************************************
* generalized Hamming = number of bytes that are different between * generalized Hamming = number of bytes that are different between
* two codes. * two codes.