20 #include "FaissAssert.h"
21 #include "AuxIndexStructures.h"
31 IndexPQ::IndexPQ (
int d,
size_t M,
size_t nbits,
MetricType metric):
32 Index(d, metric), pq(d, M, nbits)
59 if (ntrain_perm > n / 4)
62 printf (
"PQ training on %ld points, remains %ld points: "
63 "training polysemous on %s\n",
64 n - ntrain_perm, ntrain_perm,
65 ntrain_perm == 0 ?
"centroids" :
"these");
67 pq.train(n - ntrain_perm, x);
70 pq, ntrain_perm, x + (n - ntrain_perm) *
d);
89 if (sel.is_member (i)) {
98 long nremove = ntotal - j;
115 FAISS_THROW_IF_NOT (ni == 0 || (i0 >= 0 && i0 + ni <=
ntotal));
116 for (
idx_t i = 0; i < ni; i++) {
125 FAISS_THROW_IF_NOT (key >= 0 && key <
ntotal);
144 float *distances,
idx_t *labels)
const
147 if (search_type ==
ST_PQ) {
151 size_t(n), size_t(k), labels, distances };
155 size_t(n), size_t(k), labels, distances };
158 indexPQ_stats.nq += n;
159 indexPQ_stats.ncode += n *
ntotal;
166 search_core_polysemous (n, x, k, distances, labels);
170 uint8_t * q_codes =
new uint8_t [n *
pq.
code_size];
179 for (
size_t i = 0; i < n; i++) {
180 const float *xi = x + i *
d;
182 for (
int j = 0; j <
d; j++)
183 if (xi[j] > 0) code [j>>3] |= 1 << (j & 7);
187 if (search_type ==
ST_SDC) {
190 size_t(n), size_t(k), labels, distances};
192 pq.search_sdc (q_codes, n,
codes.data(),
ntotal, &res,
true);
195 int * idistances =
new int [n * k];
199 size_t (n), size_t (k), labels, idistances};
201 if (search_type ==
ST_HE) {
213 for (
int i = 0; i < k * n; i++)
214 distances[i] = idistances[i];
219 indexPQ_stats.nq += n;
220 indexPQ_stats.ncode += n *
ntotal;
228 void IndexPQStats::reset()
230 nq = ncode = n_hamming_pass = 0;
233 IndexPQStats indexPQ_stats;
236 template <
class HammingComputer>
237 static size_t polysemous_inner_loop (
238 const IndexPQ & index,
239 const float *dis_table_qi,
const uint8_t *q_code,
240 size_t k,
float *heap_dis,
long *heap_ids)
244 int code_size = index.pq.code_size;
245 int ksub = index.pq.ksub;
246 size_t ntotal = index.ntotal;
247 int ht = index.polysemous_ht;
249 const uint8_t *b_code = index.codes.data();
253 HammingComputer hc (q_code, code_size);
255 for (
long bi = 0; bi < ntotal; bi++) {
256 int hd = hc.hamming (b_code);
262 const float * dis_table = dis_table_qi;
263 for (
int m = 0; m < M; m++) {
264 dis += dis_table [b_code[m]];
268 if (dis < heap_dis[0]) {
269 maxheap_pop (k, heap_dis, heap_ids);
270 maxheap_push (k, heap_dis, heap_ids, dis, bi);
279 void IndexPQ::search_core_polysemous (idx_t n,
const float *x, idx_t k,
280 float *distances, idx_t *labels)
const
282 FAISS_THROW_IF_NOT (
pq.
nbits == 8);
285 float * dis_tables =
new float [n *
pq.
ksub *
pq.
M];
286 ScopeDeleter<float> del (dis_tables);
290 uint8_t * q_codes =
new uint8_t [n *
pq.
code_size];
291 ScopeDeleter<uint8_t> del2 (q_codes);
296 #pragma omp parallel for
297 for (
idx_t qi = 0; qi < n; qi++) {
306 #pragma omp parallel for reduction (+: n_pass)
307 for (
idx_t qi = 0; qi < n; qi++) {
308 const uint8_t * q_code = q_codes + qi *
pq.
code_size;
310 const float * dis_table_qi = dis_tables + qi *
pq.
M *
pq.
ksub;
312 long * heap_ids = labels + qi * k;
313 float *heap_dis = distances + qi * k;
314 maxheap_heapify (k, heap_dis, heap_ids);
320 n_pass += polysemous_inner_loop<HammingComputer4>
321 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
324 n_pass += polysemous_inner_loop<HammingComputer8>
325 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
328 n_pass += polysemous_inner_loop<HammingComputer16>
329 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
332 n_pass += polysemous_inner_loop<HammingComputer32>
333 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
336 n_pass += polysemous_inner_loop<HammingComputer20>
337 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
341 n_pass += polysemous_inner_loop<HammingComputerM8>
342 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
344 n_pass += polysemous_inner_loop<HammingComputerM4>
345 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
348 "code size %zd not supported for polysemous",
356 n_pass += polysemous_inner_loop<GenHammingComputer8>
357 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
360 n_pass += polysemous_inner_loop<GenHammingComputer16>
361 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
364 n_pass += polysemous_inner_loop<GenHammingComputer32>
365 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
369 n_pass += polysemous_inner_loop<GenHammingComputerM8>
370 (*
this, dis_table_qi, q_code, k, heap_dis, heap_ids);
373 "code size %zd not supported for polysemous",
379 maxheap_reorder (k, heap_dis, heap_ids);
382 indexPQ_stats.nq += n;
383 indexPQ_stats.ncode += n *
ntotal;
384 indexPQ_stats.n_hamming_pass += n_pass;
402 uint8_t * q_codes =
new uint8_t [n *
pq.
code_size];
412 idx_t nb,
const float *xb,
417 FAISS_THROW_IF_NOT (
pq.
nbits == 8);
420 uint8_t * q_codes =
new uint8_t [n *
pq.
code_size];
429 del_b_codes.set (b_codes);
433 b_codes =
codes.data();
436 memset (hist, 0,
sizeof(*hist) * (nbits + 1));
441 std::vector<long> histi (nbits + 1);
442 hamdis_t *distances =
new hamdis_t [nb * bs];
445 for (
size_t q0 = 0; q0 < n; q0 += bs) {
454 for (
size_t i = 0; i < nb * (q1 - q0); i++)
455 histi [distances [i]]++;
459 for (
int i = 0; i <= nbits; i++)
491 template <
typename T>
492 struct PreSortedArray {
497 explicit PreSortedArray (
int N): N(N) {
499 void init (
const T*x) {
509 return x[n] - x[n - 1];
513 int get_ord (
int n) {
519 template <
typename T>
522 bool operator() (
size_t i,
size_t j) {
531 template <
typename T>
535 std::vector<int> perm;
537 explicit SortedArray (
int N) {
542 void init (
const T*x) {
544 for (
int n = 0; n < N; n++)
546 ArgSort<T> cmp = {x };
547 std::sort (perm.begin(), perm.end(), cmp);
557 return x[perm[n]] - x[perm[n - 1]];
561 int get_ord (
int n) {
572 void partial_sort (
int k,
int n,
573 const typename C::T * vals,
typename C::TI * perm) {
575 for (
int i = 1; i < k; i++) {
576 indirect_heap_push<C> (i + 1, vals, perm, perm[i]);
580 for (
int i = k; i < n; i++) {
581 typename C::TI
id = perm[i];
582 typename C::TI top = perm[0];
584 if (C::cmp(vals[top], vals[
id])) {
585 indirect_heap_pop<C> (k, vals, perm);
586 indirect_heap_push<C> (k, vals, perm, id);
594 for (
int i = k - 1; i > 0; i--) {
595 typename C::TI top = perm[0];
596 indirect_heap_pop<C> (i + 1, vals, perm);
602 template <
typename T>
603 struct SemiSortedArray {
608 typedef CMax<T, int> HC;
609 std::vector<int> perm;
613 int initial_k, k_factor;
615 explicit SemiSortedArray (
int N) {
623 void init (
const T*x) {
625 for (
int n = 0; n < N; n++)
632 void grow (
int next_k) {
634 partial_sort<HC> (next_k - k, N - k, x, &perm[k]);
637 ArgSort<T> cmp = {x };
638 std::sort (perm.begin() + k, perm.end(), cmp);
652 int next_k = (k + 1) * k_factor - 1;
655 return x[perm[n]] - x[perm[n - 1]];
659 int get_ord (
int n) {
693 template <
typename T,
class SSA,
bool use_seen>
704 typedef CMin<T, long> HC;
705 size_t heap_capacity, heap_size;
709 std::vector <SSA> ssx;
715 std::vector <uint8_t> seen;
717 MinSumK (
int K,
int M,
int nbit,
int N):
718 K(K), M(M), nbit(nbit), N(N) {
719 heap_capacity = K * M;
720 assert (N <= (1 << nbit));
723 bh_val =
new T[heap_capacity];
724 bh_ids =
new long[heap_capacity];
727 long n_ids = weight(M);
728 seen.resize ((n_ids + 7) / 8);
731 for (
int m = 0; m < M; m++)
732 ssx.push_back (SSA(N));
736 long weight (
int i) {
737 return 1 << (i * nbit);
740 bool is_seen (
long i) {
741 return (seen[i >> 3] >> (i & 7)) & 1;
744 void mark_seen (
long i) {
746 seen [i >> 3] |= 1 << (i & 7);
749 void run (
const T *x,
long ldx,
750 T * sums,
long * terms) {
753 for (
int m = 0; m < M; m++) {
762 for (
int m = 0; m < M; m++) {
763 sum += ssx[m].get_0();
766 for (
int m = 0; m < M; m++) {
767 heap_push<HC> (++heap_size, bh_val, bh_ids,
768 sum + ssx[m].get_diff(1),
773 for (
int k = 1; k < K; k++) {
776 while (is_seen (bh_ids[0])) {
777 assert (heap_size > 0);
778 heap_pop<HC> (heap_size--, bh_val, bh_ids);
781 assert (heap_size > 0);
783 T sum = sums[k] = bh_val[0];
784 long ti = terms[k] = bh_ids[0];
788 heap_pop<HC> (heap_size--, bh_val, bh_ids);
791 heap_pop<HC> (heap_size--, bh_val, bh_ids);
792 }
while (heap_size > 0 && bh_ids[0] == ti);
797 for (
int m = 0; m < M; m++) {
798 long n = ii & ((1L << nbit) - 1);
800 if (n + 1 >= N)
continue;
802 enqueue_follower (ti, m, n, sum);
813 for (
int k = 0; k < K; k++) {
820 for (
int m = 0; m < M; m++) {
821 long n = ii & ((1L << nbit) - 1);
822 ti += long(ssx[m].get_ord(n)) << (nbit * m);
830 void enqueue_follower (
long ti,
int m,
int n, T sum) {
831 T next_sum = sum + ssx[m].get_diff(n + 1);
832 long next_ti = ti + weight(m);
833 heap_push<HC> (++heap_size, bh_val, bh_ids, next_sum, next_ti);
845 MultiIndexQuantizer::MultiIndexQuantizer (
int d,
848 Index(d, METRIC_L2), pq(d, M, nbits)
863 for (
int m = 0; m < pq.
M; m++)
869 float *distances,
idx_t *labels)
const {
875 for (
idx_t i0 = 0; i0 < n; i0 += bs) {
876 idx_t i1 = std::min(i0 + bs, n);
878 printf(
"MultiIndexQuantizer::search: %ld:%ld / %ld\n",
881 search (i1 - i0, x + i0 *
d, k,
888 float * dis_tables =
new float [n * pq.
ksub * pq.
M];
896 #pragma omp parallel for
897 for (
int i = 0; i < n; i++) {
898 const float * dis_table = dis_tables + i * pq.
ksub * pq.
M;
902 for (
int s = 0; s < pq.
M; s++) {
903 float vmin = HUGE_VALF;
907 if (dis_table[j] < vmin) {
913 label |= lmin << (s * pq.
nbits);
914 dis_table += pq.
ksub;
924 #pragma omp parallel if(n > 1)
926 MinSumK <float, SemiSortedArray<float>,
false>
929 for (
int i = 0; i < n; i++) {
930 msk.run (dis_tables + i * pq.
ksub * pq.
M, pq.
ksub,
931 distances + i * k, labels + i * k);
944 for (
int m = 0; m < pq.
M; m++) {
945 long n = jj & ((1L << pq.
nbits) - 1);
954 "This index has virtual elements, "
955 "it does not support add");
960 FAISS_THROW_MSG (
"This index has virtual elements, "
961 "it does not support reset");
979 MultiIndexQuantizer2::MultiIndexQuantizer2 (
980 int d,
size_t M,
size_t nbits,
984 assign_indexes.resize (M);
985 for (
int i = 0; i < M; i++) {
986 FAISS_THROW_IF_NOT_MSG(
987 indexes[i]->d == pq.
dsub,
988 "Provided sub-index has incorrect size");
989 assign_indexes[i] = indexes[i];
994 MultiIndexQuantizer2::MultiIndexQuantizer2 (
996 Index *assign_index_0,
997 Index *assign_index_1):
998 MultiIndexQuantizer (d, 2, nbits)
1000 FAISS_THROW_IF_NOT_MSG(
1001 assign_index_0->d == pq.
dsub &&
1002 assign_index_1->d == pq.
dsub,
1003 "Provided sub-index has incorrect size");
1004 assign_indexes.resize (2);
1005 assign_indexes [0] = assign_index_0;
1006 assign_indexes [1] = assign_index_1;
1014 for (
int i = 0; i < pq.
M; i++) {
1022 float* distances,
idx_t* labels)
const
1027 int k2 = std::min(K,
long(pq.
ksub));
1030 long dsub = pq.
dsub, ksub = pq.
ksub;
1033 std::vector<idx_t> sub_ids(n * M * k2);
1034 std::vector<float> sub_dis(n * M * k2);
1035 std::vector<float> xsub(n * dsub);
1037 for (
int m = 0; m < M; m++) {
1038 float *xdest = xsub.data();
1039 const float *xsrc = x + m * dsub;
1040 for (
int j = 0; j < n; j++) {
1041 memcpy(xdest, xsrc, dsub *
sizeof(xdest[0]));
1048 &sub_dis[k2 * n * m],
1049 &sub_ids[k2 * n * m]);
1056 for (
int i = 0; i < n; i++) {
1060 for (
int m = 0; m < M; m++) {
1061 float vmin = sub_dis[i + m * n];
1062 idx_t lmin = sub_ids[i + m * n];
1064 label |= lmin << (m * pq.
nbits);
1066 distances [i] = dis;
1072 #pragma omp parallel if(n > 1)
1074 MinSumK <float, PreSortedArray<float>,
false>
1075 msk(K, pq.
M, pq.
nbits, k2);
1077 for (
int i = 0; i < n; i++) {
1078 idx_t *li = labels + i * K;
1079 msk.run (&sub_dis[i * k2], k2 * n,
1080 distances + i * K, li);
1084 const idx_t *idmap0 = sub_ids.data() + i * k2;
1085 long ld_idmap = k2 * n;
1086 long mask1 = ksub - 1L;
1088 for (
int k = 0; k < K; k++) {
1089 const idx_t *idmap = idmap0;
1093 for (
int m = 0; m < M; m++) {
1094 long s = vin & mask1;
1096 vout |= idmap[s] << bs;
void hammings_knn_hc(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t ncodes, int order)
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
size_t nbits
number of bits per quantization index
void decode(const uint8_t *code, float *x) const
decode a vector from a given code (or n vectors if third argument)
Hamming distance on codes.
bool do_polysemous_training
false = standard PQ
void train(idx_t n, const float *x) override
void reset() override
removes all elements from the database.
void train(idx_t n, const float *x) override
size_t dsub
dimensionality of each subvector
void compute_distance_tables(size_t nx, const float *x, float *dis_tables) const
void compute_code_from_distance_table(const float *tab, uint8_t *code) const
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
long idx_t
all indices are this type
void hamming_distance_histogram(idx_t n, const float *x, idx_t nb, const float *xb, long *dist_histogram)
void search(const float *x, size_t nx, const uint8_t *codes, const size_t ncodes, float_maxheap_array_t *res, bool init_finalize_heap=true) const
void train(idx_t n, const float *x) override
size_t code_size
byte per indexed vector
long remove_ids(const IDSelector &sel) override
Filter on generalized Hamming.
size_t ksub
number of centroids for each subquantizer
void search_ip(const float *x, size_t nx, const uint8_t *codes, const size_t ncodes, float_minheap_array_t *res, bool init_finalize_heap=true) const
ProductQuantizer pq
The product quantizer used to encode the vectors.
idx_t ntotal
total nb of indexed vectors
bool verbose
verbosity level
void add(idx_t n, const float *x) override
void hamming_distance_table(idx_t n, const float *x, int32_t *dis) const
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void reconstruct(idx_t key, float *recons) const override
MetricType metric_type
type of metric this index uses for search
size_t M
number of subquantizers
void reconstruct_n(idx_t i0, idx_t ni, float *recons) const override
asymmetric product quantizer (default)
void reconstruct(idx_t key, float *recons) const override
HE filter (using ht) + PQ combination.
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
void add(idx_t n, const float *x) override
add and reset will crash at runtime
bool is_trained
set if the Index does not require training, or if training is done already
void reset() override
removes all elements from the database.
float * get_centroids(size_t m, size_t i)
return the centroids associated with subvector m
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
bool verbose
verbose during training?
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
symmetric product quantizer (SDC)
int polysemous_ht
Hamming threshold used for polysemy.
PolysemousTraining polysemous_training
parameters used for the polysemous training
std::vector< Index * > assign_indexes
M Indexes on d / M dimensions.
MetricType
Some algorithms support both an inner product version and a L2 search version.
void generalized_hammings_knn_hc(int_maxheap_array_t *ha, const uint8_t *a, const uint8_t *b, size_t nb, size_t code_size, int ordered)