9 #include "PolysemousTraining.h"
20 #include "FaissAssert.h"
33 SimulatedAnnealingParameters::SimulatedAnnealingParameters ()
36 init_temperature = 0.7;
37 temperature_decay = pow (0.9, 1/500.);
43 only_bit_flips =
false;
49 double PermutationObjective::cost_update (
50 const int *perm,
int iw,
int jw)
const
52 double orig_cost = compute_cost (perm);
54 std::vector<int> perm2 (n);
55 for (
int i = 0; i < n; i++)
60 double new_cost = compute_cost (perm2.data());
61 return new_cost - orig_cost;
76 FAISS_THROW_IF_NOT (n < 100000 && n >=0 );
79 SimulatedAnnealingOptimizer::~SimulatedAnnealingOptimizer ()
85 double SimulatedAnnealingOptimizer::run_optimization (
int * best_perm)
87 double min_cost = 1e30;
90 for (
int it = 0; it < n_redo; it++) {
91 std::vector<int> perm(n);
92 for (
int i = 0; i <
n; i++)
95 for (
int i = 0; i <
n; i++) {
97 std::swap (perm[i], perm[j]);
100 float cost = optimize (perm.data());
101 if (logfile) fprintf (logfile,
"\n");
103 printf (
" optimization run %d: cost=%g %s\n",
104 it, cost, cost < min_cost ?
"keep" :
"");
106 if (cost < min_cost) {
107 memcpy (best_perm, perm.data(),
sizeof(perm[0]) * n);
116 double SimulatedAnnealingOptimizer::optimize (
int *perm)
118 double cost =
init_cost = obj->compute_cost (perm);
120 while (!(n <= (1 << log2n))) log2n++;
121 double temperature = init_temperature;
122 int n_swap = 0, n_hot = 0;
123 for (
int it = 0; it < n_iter; it++) {
124 temperature = temperature * temperature_decay;
126 if (only_bit_flips) {
128 jw = iw ^ (1 << rnd->
rand_int (log2n));
134 double delta_cost = obj->cost_update (perm, iw, jw);
135 if (delta_cost < 0 || rnd->rand_float () < temperature) {
136 std::swap (perm[iw], perm[jw]);
139 if (delta_cost >= 0) n_hot++;
141 if (verbose > 2 || (verbose > 1 && it % 10000 == 0)) {
142 printf (
" iteration %d cost %g temp %g n_swap %d "
144 it, cost, temperature, n_swap, n_hot);
148 fprintf (logfile,
"%d %g %g %d %d\n",
149 it, cost, temperature, n_swap, n_hot);
152 if (verbose > 1) printf(
"\n");
169 static inline int hamming_dis (uint64_t a, uint64_t b)
171 return __builtin_popcountl (a ^ b);
177 struct ReproduceWithHammingObjective : PermutationObjective {
179 double dis_weight_factor;
181 static double sqr (
double x) {
return x * x; }
186 double dis_weight (
double x)
const
188 return exp (-dis_weight_factor * x);
191 std::vector<double> target_dis;
192 std::vector<double> weights;
195 double compute_cost(
const int* perm)
const override {
197 for (
int i = 0; i < n; i++) {
198 for (
int j = 0; j < n; j++) {
199 double wanted = target_dis[i * n + j];
200 double w = weights[i * n + j];
201 double actual = hamming_dis(perm[i], perm[j]);
202 cost += w * sqr(wanted - actual);
211 double cost_update(
const int* perm,
int iw,
int jw)
const override {
212 double delta_cost = 0;
214 for (
int i = 0; i < n; i++) {
216 for (
int j = 0; j < n; j++) {
217 double wanted = target_dis[i * n + j], w = weights[i * n + j];
218 double actual = hamming_dis(perm[i], perm[j]);
219 delta_cost -= w * sqr(wanted - actual);
221 hamming_dis(perm[jw], perm[j == iw ? jw : j == jw ? iw : j]);
222 delta_cost += w * sqr(wanted - new_actual);
224 }
else if (i == jw) {
225 for (
int j = 0; j < n; j++) {
226 double wanted = target_dis[i * n + j], w = weights[i * n + j];
227 double actual = hamming_dis(perm[i], perm[j]);
228 delta_cost -= w * sqr(wanted - actual);
230 hamming_dis(perm[iw], perm[j == iw ? jw : j == jw ? iw : j]);
231 delta_cost += w * sqr(wanted - new_actual);
236 double wanted = target_dis[i * n + j], w = weights[i * n + j];
237 double actual = hamming_dis(perm[i], perm[j]);
238 delta_cost -= w * sqr(wanted - actual);
239 double new_actual = hamming_dis(perm[i], perm[jw]);
240 delta_cost += w * sqr(wanted - new_actual);
244 double wanted = target_dis[i * n + j], w = weights[i * n + j];
245 double actual = hamming_dis(perm[i], perm[j]);
246 delta_cost -= w * sqr(wanted - actual);
247 double new_actual = hamming_dis(perm[i], perm[iw]);
248 delta_cost += w * sqr(wanted - new_actual);
258 ReproduceWithHammingObjective (
260 const std::vector<double> & dis_table,
261 double dis_weight_factor):
262 nbits (nbits), dis_weight_factor (dis_weight_factor)
265 FAISS_THROW_IF_NOT (dis_table.size() == n * n);
266 set_affine_target_dis (dis_table);
269 void set_affine_target_dis (
const std::vector<double> & dis_table)
271 double sum = 0, sum2 = 0;
273 for (
int i = 0; i < n2; i++) {
274 sum += dis_table [i];
275 sum2 += dis_table [i] * dis_table [i];
277 double mean = sum / n2;
278 double stddev = sqrt(sum2 / n2 - (sum / n2) * (sum / n2));
280 target_dis.resize (n2);
282 for (
int i = 0; i < n2; i++) {
284 double td = (dis_table [i] - mean) / stddev * sqrt(nbits / 4) +
288 weights.push_back (dis_weight (td));
293 ~ReproduceWithHammingObjective()
override {}
300 double ReproduceDistancesObjective::dis_weight (
double x)
const
302 return exp (-dis_weight_factor * x);
306 double ReproduceDistancesObjective::get_source_dis (
int i,
int j)
const
312 double ReproduceDistancesObjective::compute_cost (
const int *perm)
const
315 for (
int i = 0; i < n; i++) {
316 for (
int j = 0; j < n; j++) {
318 double w =
weights [i * n + j];
319 double actual = get_source_dis (perm[i], perm[j]);
320 cost += w * sqr (wanted - actual);
328 double ReproduceDistancesObjective::cost_update(
329 const int *perm,
int iw,
int jw)
const
331 double delta_cost = 0;
332 for (
int i = 0; i < n; i++) {
334 for (
int j = 0; j < n; j++) {
337 double actual = get_source_dis (perm[i], perm[j]);
338 delta_cost -= w * sqr (wanted - actual);
339 double new_actual = get_source_dis (
341 perm[j == iw ? jw : j == jw ? iw : j]);
342 delta_cost += w * sqr (wanted - new_actual);
344 }
else if (i == jw) {
345 for (
int j = 0; j < n; j++) {
348 double actual = get_source_dis (perm[i], perm[j]);
349 delta_cost -= w * sqr (wanted - actual);
350 double new_actual = get_source_dis (
352 perm[j == iw ? jw : j == jw ? iw : j]);
353 delta_cost += w * sqr (wanted - new_actual);
360 double actual = get_source_dis (perm[i], perm[j]);
361 delta_cost -= w * sqr (wanted - actual);
362 double new_actual = get_source_dis (perm[i], perm[jw]);
363 delta_cost += w * sqr (wanted - new_actual);
369 double actual = get_source_dis (perm[i], perm[j]);
370 delta_cost -= w * sqr (wanted - actual);
371 double new_actual = get_source_dis (perm[i], perm[iw]);
372 delta_cost += w * sqr (wanted - new_actual);
381 ReproduceDistancesObjective::ReproduceDistancesObjective (
383 const double *source_dis_in,
384 const double *target_dis_in,
385 double dis_weight_factor):
386 dis_weight_factor (dis_weight_factor),
387 target_dis (target_dis_in)
390 set_affine_target_dis (source_dis_in);
393 void ReproduceDistancesObjective::compute_mean_stdev (
394 const double *tab,
size_t n2,
395 double *mean_out,
double *stddev_out)
397 double sum = 0, sum2 = 0;
398 for (
int i = 0; i < n2; i++) {
400 sum2 += tab [i] * tab [i];
402 double mean = sum / n2;
403 double stddev = sqrt(sum2 / n2 - (sum / n2) * (sum / n2));
405 *stddev_out = stddev;
408 void ReproduceDistancesObjective::set_affine_target_dis (
409 const double *source_dis_in)
413 double mean_src, stddev_src;
414 compute_mean_stdev (source_dis_in, n2, &mean_src, &stddev_src);
416 double mean_target, stddev_target;
417 compute_mean_stdev (
target_dis, n2, &mean_target, &stddev_target);
419 printf (
"map mean %g std %g -> mean %g std %g\n",
420 mean_src, stddev_src, mean_target, stddev_target);
425 for (
int i = 0; i < n2; i++) {
427 source_dis[i] = (source_dis_in[i] - mean_src) / stddev_src
428 * stddev_target + mean_target;
442 template <
typename Ttab,
typename Taccu>
450 std::vector<Ttab> n_gt;
458 const Ttab *p = n_gt.data();
459 for (
int i = 0; i < nc; i++) {
461 for (
int j = 0; j < nc; j++) {
463 for (
int k = 0; k < nc; k++) {
465 if (hamming_dis (ip, jp) <
466 hamming_dis (ip, kp)) {
489 if (iw > jw) std::swap (iw, jw);
492 const Ttab * n_gt_i = n_gt.data();
493 for (
int i = 0; i < nc; i++) {
495 int ip = perm [i == iw ? jw : i == jw ? iw : i];
503 accu += update_i_plane (perm, iw, jw,
513 Taccu update_i (
const int *perm,
int iw,
int jw,
514 int ip0,
int ip,
const Ttab * n_gt_i)
const
517 const Ttab *n_gt_ij = n_gt_i;
518 for (
int j = 0; j < nc; j++) {
520 int jp = perm [j == iw ? jw : j == jw ? iw : j];
521 for (
int k = 0; k < nc; k++) {
523 int kp = perm [k == iw ? jw : k == jw ? iw : k];
524 int ng = n_gt_ij [k];
525 if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
528 if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp0)) {
538 Taccu update_i_plane (
const int *perm,
int iw,
int jw,
539 int ip0,
int ip,
const Ttab * n_gt_i)
const
542 const Ttab *n_gt_ij = n_gt_i;
544 for (
int j = 0; j < nc; j++) {
545 if (j != iw && j != jw) {
547 for (
int k = 0; k < nc; k++) {
548 if (k != iw && k != jw) {
550 Ttab ng = n_gt_ij [k];
551 if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
554 if (hamming_dis (ip0, jp) < hamming_dis (ip0, kp)) {
566 inline Taccu
update_k (
const int *perm,
int iw,
int jw,
567 int ip0,
int ip,
int jp0,
int jp,
569 const Ttab * n_gt_ij)
const
573 int kp = perm [k == iw ? jw : k == jw ? iw : k];
574 Ttab ng = n_gt_ij [k];
575 if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
578 if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp0)) {
586 int ip0,
int ip,
int jp0,
int jp,
587 const Ttab * n_gt_ij)
const
590 for (
int k = 0; k < nc; k++) {
591 if (k == iw || k == jw)
continue;
593 Ttab ng = n_gt_ij [k];
594 if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
597 if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp)) {
607 int ip0,
int ip,
const Ttab * n_gt_i)
const
610 const Ttab *n_gt_ij = n_gt_i;
612 for (
int j = 0; j < nc; j++) {
614 int jp = perm [j == iw ? jw : j == jw ? iw : j];
616 accu +=
update_k (perm, iw, jw, ip0, ip, jp0, jp, iw, n_gt_ij);
617 accu +=
update_k (perm, iw, jw, ip0, ip, jp0, jp, jw, n_gt_ij);
620 accu +=
update_j_line (perm, iw, jw, ip0, ip, jp0, jp, n_gt_ij);
635 double cost_update(
const int* perm,
int iw,
int jw)
const override {
640 ~Score3Computer()
override {}
649 bool operator () (
int a,
int b) {
return tab[a] < tab[b]; }
657 const uint32_t *qcodes, *bcodes;
658 const float *gt_distances;
661 const uint32_t *qcodes,
const uint32_t *bcodes,
662 const float *gt_distances):
663 nbits(nbits), nq(nq), nb(nb), qcodes(qcodes),
664 bcodes(bcodes), gt_distances(gt_distances)
667 n_gt.resize (nc * nc * nc);
672 double rank_weight (
int r)
674 return 1.0 / (r + 1);
683 const std::vector<int> & b)
685 int nb = b.size(), na = a.size();
689 for (
int i = 0; i < na; i++) {
691 while (j < nb && ai >= b[j]) j++;
694 for (
int k = j; k < b.size(); k++)
695 accu_i += rank_weight (b[k] - ai);
697 accu += rank_weight (ai) * accu_i;
705 for (
int q = 0; q < nq; q++) {
706 const float *gtd = gt_distances + q * nb;
707 const uint32_t *cb = bcodes;
708 float * n_gt_q = & n_gt [qcodes[q] * nc * nc];
710 printf(
"init gt for q=%d/%d \r", q, nq); fflush(stdout);
712 std::vector<int> rankv (nb);
713 int * ranks = rankv.data();
716 std::vector<std::vector<int> > tab (nc);
720 for (
int j = 0; j < nb; j++) ranks[j] = j;
721 std::sort (ranks, ranks + nb, s);
724 for (
int rank = 0; rank < nb; rank++) {
725 int i = ranks [rank];
726 tab [cb[i]].push_back (rank);
732 for (
int i = 0; i < nc; i++) {
733 std::vector<int> & di = tab[i];
734 for (
int j = 0; j < nc; j++) {
735 std::vector<int> & dj = tab[j];
754 PolysemousTraining::PolysemousTraining ()
757 ntrain_permutation = 0;
758 dis_weight_factor = log(2);
770 int nbits = pq.
nbits;
772 #pragma omp parallel for
773 for (
int m = 0; m < pq.
M; m++) {
774 std::vector<double> dis_table;
780 for (
int i = 0; i < n; i++) {
781 for (
int j = 0; j < n; j++) {
782 dis_table.push_back (
fvec_L2sqr (centroids + i * dsub,
783 centroids + j * dsub,
788 std::vector<int> perm (n);
789 ReproduceWithHammingObjective obj (
796 if (log_pattern.size()) {
798 snprintf (fname, 256, log_pattern.c_str(), m);
799 printf (
"opening log file %s\n", fname);
800 optim.logfile = fopen (fname,
"w");
801 FAISS_THROW_IF_NOT_MSG (optim.logfile,
"could not open logfile");
803 double final_cost = optim.run_optimization (perm.data());
806 printf (
"SimulatedAnnealingOptimizer for m=%d: %g -> %g\n",
810 if (log_pattern.size()) fclose (optim.logfile);
812 std::vector<float> centroids_copy;
813 for (
int i = 0; i < dsub * n; i++)
814 centroids_copy.push_back (centroids[i]);
816 for (
int i = 0; i < n; i++)
817 memcpy (centroids + perm[i] * dsub,
818 centroids_copy.data() + i * dsub,
819 dsub *
sizeof(centroids[0]));
832 int nbits = pq.
nbits;
834 std::vector<uint8_t> all_codes (pq.
code_size * n);
841 pq.compute_sdc_table ();
843 #pragma omp parallel for
844 for (
int m = 0; m < pq.
M; m++) {
846 std::vector <uint32_t> codes;
847 std::vector <float> gt_distances;
850 std::vector<float> xtrain (n * dsub);
851 for (
int i = 0; i < n; i++)
852 memcpy (xtrain.data() + i * dsub,
853 x + i * pq.
d + m * dsub,
854 sizeof(float) * dsub);
857 for (
int i = 0; i < n; i++)
858 codes [i] = all_codes [i * pq.
code_size + m];
860 nq = n / 4; nb = n - nq;
861 const float *xq = xtrain.data();
862 const float *xb = xq + nq * dsub;
864 gt_distances.resize (nq * nb);
869 gt_distances.data());
872 codes.resize (2 * nq);
873 for (
int i = 0; i < nq; i++)
874 codes[i] = codes [i + nq] = i;
876 gt_distances.resize (nq * nb);
878 memcpy (gt_distances.data (),
880 sizeof (float) * nq * nb);
887 codes.data(), codes.data() + nq,
888 gt_distances.data ());
892 printf(
" m=%d, nq=%ld, nb=%ld, intialize RankingScore "
899 if (log_pattern.size()) {
901 snprintf (fname, 256, log_pattern.c_str(), m);
902 printf (
"opening log file %s\n", fname);
903 optim.logfile = fopen (fname,
"w");
904 FAISS_THROW_IF_NOT_FMT (optim.logfile,
905 "could not open logfile %s", fname);
908 std::vector<int> perm (pq.
ksub);
910 double final_cost = optim.run_optimization (perm.data());
911 printf (
"SimulatedAnnealingOptimizer for m=%d: %g -> %g\n",
914 if (log_pattern.size()) fclose (optim.logfile);
918 std::vector<float> centroids_copy;
919 for (
int i = 0; i < dsub * pq.
ksub; i++)
920 centroids_copy.push_back (centroids[i]);
922 for (
int i = 0; i < pq.
ksub; i++)
923 memcpy (centroids + perm[i] * dsub,
924 centroids_copy.data() + i * dsub,
925 dsub *
sizeof(centroids[0]));
934 size_t n,
const float *x)
const
936 if (optimization_type == OT_None) {
944 pq.compute_sdc_table ();
random generator that can be used in multithreaded contexts
size_t nbits
number of bits per quantization index
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
size_t byte_per_idx
nb bytes per code component (1 or 2)
Taccu compute_update(const int *perm, int iw, int jw) const
std::vector< float > sdc_table
Symmetric Distance Table.
SimulatedAnnealingOptimizer(PermutationObjective *obj, const SimulatedAnnealingParameters &p)
logs values of the cost function
int n
size of the permutation
Taccu compute(const int *perm) const
size_t dsub
dimensionality of each subvector
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
Taccu update_j_line(const int *perm, int iw, int jw, int ip0, int ip, int jp0, int jp, const Ttab *n_gt_ij) const
compute update on a line of k's, where i and j are swapped
const double * target_dis
wanted distances (size n^2)
size_t code_size
byte per indexed vector
double init_cost
remember intial cost of optimization
int rand_int()
random 31-bit positive integer
size_t ksub
number of centroids for each subquantizer
void optimize_ranking(ProductQuantizer &pq, size_t n, const float *x) const
called by optimize_pq_for_hamming
void pairwise_L2sqr(long d, long nq, const float *xq, long nb, const float *xb, float *dis, long ldq, long ldb, long ldd)
double compute_cost(const int *perm) const override
double getmillisecs()
ms elapsed since some arbitrary epoch
std::vector< double > weights
weights for each distance (size n^2)
double accum_gt_weight_diff(const std::vector< int > &a, const std::vector< int > &b)
parameters used for the simulated annealing method
Taccu update_i_cross(const int *perm, int iw, int jw, int ip0, int ip, const Ttab *n_gt_i) const
considers the 2 pairs of crossing lines j=iw or jw and k = iw or kw
size_t M
number of subquantizers
abstract class for the loss function
Taccu update_k(const int *perm, int iw, int jw, int ip0, int ip, int jp0, int jp, int k, const Ttab *n_gt_ij) const
used for the 8 cells were the 3 indices are swapped
std::vector< double > source_dis
"real" corrected distances (size n^2)
float * get_centroids(size_t m, size_t i)
return the centroids associated with subvector m
void optimize_reproduce_distances(ProductQuantizer &pq) const
called by optimize_pq_for_hamming
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
size_t d
size of the input vectors
Simulated annealing optimization algorithm for permutations.