11 #include "ProductQuantizer.h"
20 #include "FaissAssert.h"
21 #include "VectorTransform.h"
22 #include "IndexFlat.h"
30 int sgemm_ (
const char *transa,
const char *transb, FINTEGER *m, FINTEGER *
31 n, FINTEGER *k,
const float *alpha,
const float *a,
32 FINTEGER *lda,
const float *b, FINTEGER *
33 ldb,
float *beta,
float *c, FINTEGER *ldc);
42 template <
typename CT,
class C>
43 void pq_estimators_from_tables_Mmul4 (
int M,
const CT * codes,
45 const float * __restrict dis_table,
52 for (
size_t j = 0; j < ncodes; j++) {
54 const float *dt = dis_table;
56 for (
size_t m = 0; m < M; m+=4) {
58 dism = dt[*codes++]; dt += ksub;
59 dism += dt[*codes++]; dt += ksub;
60 dism += dt[*codes++]; dt += ksub;
61 dism += dt[*codes++]; dt += ksub;
65 if (C::cmp (heap_dis[0], dis)) {
66 heap_pop<C> (k, heap_dis, heap_ids);
67 heap_push<C> (k, heap_dis, heap_ids, dis, j);
73 template <
typename CT,
class C>
74 void pq_estimators_from_tables_M4 (
const CT * codes,
76 const float * __restrict dis_table,
83 for (
size_t j = 0; j < ncodes; j++) {
85 const float *dt = dis_table;
86 dis = dt[*codes++]; dt += ksub;
87 dis += dt[*codes++]; dt += ksub;
88 dis += dt[*codes++]; dt += ksub;
91 if (C::cmp (heap_dis[0], dis)) {
92 heap_pop<C> (k, heap_dis, heap_ids);
93 heap_push<C> (k, heap_dis, heap_ids, dis, j);
99 template <
typename CT,
class C>
100 static inline void pq_estimators_from_tables (
const ProductQuantizer * pq,
103 const float * dis_table,
111 pq_estimators_from_tables_M4<CT, C> (codes, ncodes,
112 dis_table, pq->ksub, k,
117 if (pq->M % 4 == 0) {
118 pq_estimators_from_tables_Mmul4<CT, C> (pq->M, codes, ncodes,
119 dis_table, pq->ksub, k,
125 const size_t M = pq->M;
126 const size_t ksub = pq->ksub;
127 for (
size_t j = 0; j < ncodes; j++) {
129 const float * __restrict dt = dis_table;
130 for (
int m = 0; m < M; m++) {
134 if (C::cmp (heap_dis[0], dis)) {
135 heap_pop<C> (k, heap_dis, heap_ids);
136 heap_push<C> (k, heap_dis, heap_ids, dis, j);
148 ProductQuantizer::ProductQuantizer (
size_t d,
size_t M,
size_t nbits):
149 d(d), M(M), nbits(nbits), assign_index(nullptr)
151 set_derived_values ();
154 ProductQuantizer::ProductQuantizer ():
155 d(0), M(1), nbits(0), assign_index(nullptr)
157 set_derived_values ();
164 FAISS_THROW_IF_NOT (
d % M == 0);
171 train_type = Train_default;
178 ksub *
dsub *
sizeof (centroids_[0]));
182 static void init_hypercube (
int d,
int nbits,
183 int n,
const float * x,
187 std::vector<float> mean (d);
188 for (
int i = 0; i < n; i++)
189 for (
int j = 0; j < d; j++)
190 mean [j] += x[i * d + j];
193 for (
int j = 0; j < d; j++) {
195 if (fabs(mean[j]) > maxm) maxm = fabs(mean[j]);
198 for (
int i = 0; i < (1 << nbits); i++) {
199 float * cent = centroids + i * d;
200 for (
int j = 0; j < nbits; j++)
201 cent[j] = mean [j] + (((i >> j) & 1) ? 1 : -1) * maxm;
202 for (
int j = nbits; j < d; j++)
209 static void init_hypercube_pca (
int d,
int nbits,
210 int n,
const float * x,
213 PCAMatrix pca (d, nbits);
217 for (
int i = 0; i < (1 << nbits); i++) {
218 float * cent = centroids + i * d;
219 for (
int j = 0; j < d; j++) {
220 cent[j] = pca.mean[j];
222 for (
int k = 0; k < nbits; k++)
224 sqrt (pca.eigenvalues [k]) *
225 (((i >> k) & 1) ? 1 : -1) *
226 pca.PCAMat [j + k * d];
232 void ProductQuantizer::train (
int n,
const float * x)
236 final_train_type = train_type;
240 final_train_type = Train_default;
241 printf (
"cannot train hypercube: nbits=%ld > log2(d=%ld)\n",
246 float * xslice =
new float[n *
dsub];
247 ScopeDeleter<float> del (xslice);
248 for (
int m = 0; m <
M; m++) {
249 for (
int j = 0; j < n; j++)
250 memcpy (xslice + j *
dsub,
251 x + j * d + m * dsub,
252 dsub *
sizeof(
float));
254 Clustering clus (dsub, ksub,
cp);
257 if (final_train_type != Train_default) {
258 clus.centroids.resize (dsub * ksub);
261 switch (final_train_type) {
263 init_hypercube (dsub, nbits, n, xslice,
264 clus.centroids.data ());
267 init_hypercube_pca (dsub, nbits, n, xslice,
268 clus.centroids.data ());
271 memcpy (clus.centroids.data(),
273 dsub * ksub *
sizeof (float));
280 printf (
"Training PQ slice %d/%zd\n", m, M);
282 IndexFlatL2 index (dsub);
290 Clustering clus (dsub, ksub,
cp);
294 printf (
"Training all PQ slices at once\n");
297 IndexFlatL2 index (dsub);
300 for (
int m = 0; m <
M; m++) {
310 float distances [
ksub];
311 for (
size_t m = 0; m <
M; m++) {
314 const float * xsub = x + m *
dsub;
316 fvec_L2sqr_ny (distances, xsub,
get_centroids(m, 0), dsub, ksub);
320 for (i = 0; i <
ksub; i++) {
321 float dis = distances [i];
328 case 1: code[m] = (uint8_t) idxm;
break;
329 case 2: ((uint16_t *) code)[m] = (uint16_t) idxm;
break;
338 for (
size_t m = 0; m <
M; m++) {
340 sizeof(
float) * dsub);
343 const uint16_t *c = (
const uint16_t*) code;
344 for (
size_t m = 0; m <
M; m++) {
346 sizeof(
float) * dsub);
354 for (
size_t i = 0; i < n; i++) {
363 for (
size_t m = 0; m <
M; m++) {
368 for (
size_t j = 0; j <
ksub; j++) {
376 case 1: code[m] = (uint8_t) idxm;
break;
377 case 2: ((uint16_t *) code)[m] = (uint16_t) idxm;
break;
388 #pragma omp parallel for
389 for (
size_t i = 0; i < n; i++)
393 float *dis_tables =
new float [n * ksub *
M];
397 #pragma omp parallel for
398 for (
size_t i = 0; i < n; i++) {
400 const float * tab = dis_tables + i * ksub *
M;
408 float * dis_table)
const
412 for (m = 0; m <
M; m++) {
413 fvec_L2sqr_ny (dis_table + m * ksub,
421 void ProductQuantizer::compute_inner_prod_table (
const float * x,
422 float * dis_table)
const
426 for (m = 0; m <
M; m++) {
427 fvec_inner_products_ny (dis_table + m * ksub,
439 float * dis_tables)
const
444 #pragma omp parallel for
445 for (
size_t i = 0; i < nx; i++) {
451 for (
int m = 0; m <
M; m++) {
454 ksub, centroids.data() + m * dsub *
ksub,
455 dis_tables + ksub * m,
461 void ProductQuantizer::compute_inner_prod_tables (
464 float * dis_tables)
const
469 #pragma omp parallel for
470 for (
size_t i = 0; i < nx; i++) {
471 compute_inner_prod_table (x + i * d, dis_tables + i * ksub * M);
477 for (
int m = 0; m <
M; m++) {
478 FINTEGER ldc = ksub *
M, nxi = nx, ksubi =
ksub,
479 dsubi =
dsub, di =
d;
480 float one = 1.0, zero = 0;
482 sgemm_ (
"Transposed",
"Not transposed",
483 &ksubi, &nxi, &dsubi,
484 &one, ¢roids [m * dsub * ksub], &dsubi,
486 &zero, dis_tables + ksub * m, &ldc);
492 template <
typename CT,
class C>
493 static void pq_knn_search_with_tables (
494 const ProductQuantizer * pq,
495 const float *dis_tables,
496 const uint8_t * codes,
499 bool init_finalize_heap)
501 size_t k = res->k, nx = res->nh;
502 size_t ksub = pq->ksub, M = pq->M;
505 #pragma omp parallel for
506 for (
size_t i = 0; i < nx; i++) {
508 const float* dis_table = dis_tables + i * ksub * M;
511 long * __restrict heap_ids = res->ids + i * k;
512 float * __restrict heap_dis = res->val + i * k;
514 if (init_finalize_heap) {
515 heap_heapify<C> (k, heap_dis, heap_ids);
518 pq_estimators_from_tables<CT, C> (pq,
521 k, heap_dis, heap_ids);
522 if (init_finalize_heap) {
523 heap_reorder<C> (k, heap_dis, heap_ids);
539 const uint8_t * codes,
542 bool init_finalize_heap)
const
544 FAISS_THROW_IF_NOT (nx == res->
nh);
545 float * dis_tables =
new float [nx * ksub *
M];
551 pq_knn_search_with_tables<uint8_t, CMax<float, long> > (
552 this, dis_tables, codes, ncodes, res, init_finalize_heap);
555 pq_knn_search_with_tables<uint16_t, CMax<float, long> > (
556 this, dis_tables, codes, ncodes, res, init_finalize_heap);
564 const uint8_t * codes,
567 bool init_finalize_heap)
const
569 FAISS_THROW_IF_NOT (nx == res->
nh);
570 float * dis_tables =
new float [nx * ksub *
M];
572 compute_inner_prod_tables (nx, x, dis_tables);
576 pq_knn_search_with_tables<uint8_t, CMin<float, long> > (
577 this, dis_tables, codes, ncodes, res, init_finalize_heap);
580 pq_knn_search_with_tables<uint16_t, CMin<float, long> > (
581 this, dis_tables, codes, ncodes, res, init_finalize_heap);
588 static float sqr (
float x) {
592 void ProductQuantizer::compute_sdc_table ()
596 for (
int m = 0; m <
M; m++) {
598 const float *cents = centroids.data() + m * ksub *
dsub;
602 for (
int i = 0; i <
ksub; i++) {
603 const float *centi = cents + i *
dsub;
604 for (
int j = 0; j <
ksub; j++) {
606 const float *centj = cents + j *
dsub;
607 for (
int k = 0; k <
dsub; k++)
608 accu += sqr (centi[k] - centj[k]);
609 dis_tab [i + j *
ksub] = accu;
615 void ProductQuantizer::search_sdc (
const uint8_t * qcodes,
617 const uint8_t * bcodes,
619 float_maxheap_array_t * res,
620 bool init_finalize_heap)
const
627 #pragma omp parallel for
628 for (
size_t i = 0; i < nq; i++) {
631 long * heap_ids = res->ids + i * k;
632 float * heap_dis = res->val + i * k;
633 const uint8_t * qcode = qcodes + i *
code_size;
635 if (init_finalize_heap)
636 maxheap_heapify (k, heap_dis, heap_ids);
638 const uint8_t * bcode = bcodes;
639 for (
size_t j = 0; j < nb; j++) {
642 for (
int m = 0; m <
M; m++) {
643 dis += tab[bcode[m] + qcode[m] *
ksub];
646 if (dis < heap_dis[0]) {
647 maxheap_pop (k, heap_dis, heap_ids);
648 maxheap_push (k, heap_dis, heap_ids, dis, j);
653 if (init_finalize_heap)
654 maxheap_reorder (k, heap_dis, heap_ids);
void set_params(const float *centroids, int m)
Define the centroids for subquantizer m.
intialize centroids with nbits-D hypercube
size_t nbits
number of bits per quantization index
void decode(const uint8_t *code, float *x) const
decode a vector from a given code (or n vectors if third argument)
size_t byte_per_idx
nb bytes per code component (1 or 2)
intialize centroids with nbits-D hypercube
void set_derived_values()
compute derived values when d, M and nbits have been set
std::vector< float > sdc_table
Symmetric Distance Table.
share dictionary accross PQ segments
size_t dsub
dimensionality of each subvector
void compute_distance_tables(size_t nx, const float *x, float *dis_tables) const
void compute_code_from_distance_table(const float *tab, uint8_t *code) const
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
void compute_distance_table(const float *x, float *dis_table) const
void search(const float *x, size_t nx, const uint8_t *codes, const size_t ncodes, float_maxheap_array_t *res, bool init_finalize_heap=true) const
size_t code_size
byte per indexed vector
size_t ksub
number of centroids for each subquantizer
void search_ip(const float *x, size_t nx, const uint8_t *codes, const size_t ncodes, float_minheap_array_t *res, bool init_finalize_heap=true) const
void pairwise_L2sqr(long d, long nq, const float *xq, long nb, const float *xb, float *dis, long ldq, long ldb, long ldd)
void compute_code(const float *x, uint8_t *code) const
Quantize one vector with the product quantizer.
the centroids are already initialized
ClusteringParameters cp
parameters used during clustering
size_t M
number of subquantizers
float * get_centroids(size_t m, size_t i)
return the centroids associated with subvector m
size_t d
size of the input vectors
bool verbose
verbose during training?
std::vector< float > centroids
Centroid table, size M * ksub * dsub.
train_type_t
initialization