10 #include "PolysemousTraining.h"
21 #include "FaissAssert.h"
34 SimulatedAnnealingParameters::SimulatedAnnealingParameters ()
37 init_temperature = 0.7;
38 temperature_decay = pow (0.9, 1/500.);
44 only_bit_flips =
false;
50 double PermutationObjective::cost_update (
51 const int *perm,
int iw,
int jw)
const
53 double orig_cost = compute_cost (perm);
55 std::vector<int> perm2 (n);
56 for (
int i = 0; i < n; i++)
61 double new_cost = compute_cost (perm2.data());
62 return new_cost - orig_cost;
77 FAISS_ASSERT (n < 100000 && n >=0 );
80 SimulatedAnnealingOptimizer::~SimulatedAnnealingOptimizer ()
86 double SimulatedAnnealingOptimizer::run_optimization (
int * best_perm)
88 double min_cost = 1e30;
91 for (
int it = 0; it < n_redo; it++) {
92 std::vector<int> perm(n);
93 for (
int i = 0; i <
n; i++)
96 for (
int i = 0; i <
n; i++) {
98 std::swap (perm[i], perm[j]);
101 float cost = optimize (perm.data());
102 if (logfile) fprintf (logfile,
"\n");
104 printf (
" optimization run %d: cost=%g %s\n",
105 it, cost, cost < min_cost ?
"keep" :
"");
107 if (cost < min_cost) {
108 memcpy (best_perm, perm.data(),
sizeof(perm[0]) * n);
117 double SimulatedAnnealingOptimizer::optimize (
int *perm)
119 double cost =
init_cost = obj->compute_cost (perm);
121 while (!(n <= (1 << log2n))) log2n++;
122 double temperature = init_temperature;
123 int n_swap = 0, n_hot = 0;
124 for (
int it = 0; it < n_iter; it++) {
125 temperature = temperature * temperature_decay;
127 if (only_bit_flips) {
129 jw = iw ^ (1 << rnd->
rand_int (log2n));
135 double delta_cost = obj->cost_update (perm, iw, jw);
136 if (delta_cost < 0 || rnd->rand_float () < temperature) {
137 std::swap (perm[iw], perm[jw]);
140 if (delta_cost >= 0) n_hot++;
142 if (verbose > 2 || (verbose > 1 && it % 10000 == 0)) {
143 printf (
" iteration %d cost %g temp %g n_swap %d "
145 it, cost, temperature, n_swap, n_hot);
149 fprintf (logfile,
"%d %g %g %d %d\n",
150 it, cost, temperature, n_swap, n_hot);
153 if (verbose > 1) printf(
"\n");
170 static inline int hamming_dis (uint64_t a, uint64_t b)
172 return __builtin_popcountl (a ^ b);
178 struct ReproduceWithHammingObjective : PermutationObjective {
180 double dis_weight_factor;
182 static double sqr (
double x) {
return x * x; }
187 double dis_weight (
double x)
const
189 return exp (-dis_weight_factor * x);
192 std::vector<double> target_dis;
193 std::vector<double> weights;
196 virtual double compute_cost (
const int *perm)
const
199 for (
int i = 0; i < n; i++) {
200 for (
int j = 0; j < n; j++) {
201 double wanted = target_dis [i * n + j];
202 double w = weights [i * n + j];
203 double actual = hamming_dis (perm[i], perm[j]);
204 cost += w * sqr (wanted - actual);
213 double cost_update (
const int *perm,
int iw,
int jw)
const
215 double delta_cost = 0;
217 for (
int i = 0; i < n; i++) {
219 for (
int j = 0; j < n; j++) {
220 double wanted = target_dis [i * n + j],
221 w = weights [i * n + j];
222 double actual = hamming_dis (perm[i], perm[j]);
223 delta_cost -= w * sqr (wanted - actual);
224 double new_actual = hamming_dis (
226 perm[j == iw ? jw : j == jw ? iw : j]);
227 delta_cost += w * sqr (wanted - new_actual);
229 }
else if (i == jw) {
230 for (
int j = 0; j < n; j++) {
231 double wanted = target_dis [i * n + j],
232 w = weights [i * n + j];
233 double actual = hamming_dis (perm[i], perm[j]);
234 delta_cost -= w * sqr (wanted - actual);
235 double new_actual = hamming_dis (
237 perm[j == iw ? jw : j == jw ? iw : j]);
238 delta_cost += w * sqr (wanted - new_actual);
243 double wanted = target_dis [i * n + j],
244 w = weights [i * n + j];
245 double actual = hamming_dis (perm[i], perm[j]);
246 delta_cost -= w * sqr (wanted - actual);
247 double new_actual = hamming_dis (perm[i], perm[jw]);
248 delta_cost += w * sqr (wanted - new_actual);
252 double wanted = target_dis [i * n + j],
253 w = weights [i * n + j];
254 double actual = hamming_dis (perm[i], perm[j]);
255 delta_cost -= w * sqr (wanted - actual);
256 double new_actual = hamming_dis (perm[i], perm[iw]);
257 delta_cost += w * sqr (wanted - new_actual);
267 ReproduceWithHammingObjective (
269 const std::vector<double> & dis_table,
270 double dis_weight_factor):
271 nbits (nbits), dis_weight_factor (dis_weight_factor)
274 FAISS_ASSERT (dis_table.size() == n * n);
275 set_affine_target_dis (dis_table);
278 void set_affine_target_dis (
const std::vector<double> & dis_table)
280 double sum = 0, sum2 = 0;
282 for (
int i = 0; i < n2; i++) {
283 sum += dis_table [i];
284 sum2 += dis_table [i] * dis_table [i];
286 double mean = sum / n2;
287 double stddev = sqrt(sum2 / n2 - (sum / n2) * (sum / n2));
289 target_dis.resize (n2);
291 for (
int i = 0; i < n2; i++) {
293 double td = (dis_table [i] - mean) / stddev * sqrt(nbits / 4) +
297 weights.push_back (dis_weight (td));
302 virtual ~ReproduceWithHammingObjective () {}
309 double ReproduceDistancesObjective::dis_weight (
double x)
const
311 return exp (-dis_weight_factor * x);
315 double ReproduceDistancesObjective::get_source_dis (
int i,
int j)
const
321 double ReproduceDistancesObjective::compute_cost (
const int *perm)
const
324 for (
int i = 0; i < n; i++) {
325 for (
int j = 0; j < n; j++) {
327 double w =
weights [i * n + j];
328 double actual = get_source_dis (perm[i], perm[j]);
329 cost += w * sqr (wanted - actual);
337 double ReproduceDistancesObjective::cost_update(
338 const int *perm,
int iw,
int jw)
const
340 double delta_cost = 0;
341 for (
int i = 0; i < n; i++) {
343 for (
int j = 0; j < n; j++) {
346 double actual = get_source_dis (perm[i], perm[j]);
347 delta_cost -= w * sqr (wanted - actual);
348 double new_actual = get_source_dis (
350 perm[j == iw ? jw : j == jw ? iw : j]);
351 delta_cost += w * sqr (wanted - new_actual);
353 }
else if (i == jw) {
354 for (
int j = 0; j < n; j++) {
357 double actual = get_source_dis (perm[i], perm[j]);
358 delta_cost -= w * sqr (wanted - actual);
359 double new_actual = get_source_dis (
361 perm[j == iw ? jw : j == jw ? iw : j]);
362 delta_cost += w * sqr (wanted - new_actual);
369 double actual = get_source_dis (perm[i], perm[j]);
370 delta_cost -= w * sqr (wanted - actual);
371 double new_actual = get_source_dis (perm[i], perm[jw]);
372 delta_cost += w * sqr (wanted - new_actual);
378 double actual = get_source_dis (perm[i], perm[j]);
379 delta_cost -= w * sqr (wanted - actual);
380 double new_actual = get_source_dis (perm[i], perm[iw]);
381 delta_cost += w * sqr (wanted - new_actual);
390 ReproduceDistancesObjective::ReproduceDistancesObjective (
392 const double *source_dis_in,
393 const double *target_dis_in,
394 double dis_weight_factor):
395 dis_weight_factor (dis_weight_factor),
396 target_dis (target_dis_in)
399 set_affine_target_dis (source_dis_in);
402 void ReproduceDistancesObjective::compute_mean_stdev (
403 const double *tab,
size_t n2,
404 double *mean_out,
double *stddev_out)
406 double sum = 0, sum2 = 0;
407 for (
int i = 0; i < n2; i++) {
409 sum2 += tab [i] * tab [i];
411 double mean = sum / n2;
412 double stddev = sqrt(sum2 / n2 - (sum / n2) * (sum / n2));
414 *stddev_out = stddev;
417 void ReproduceDistancesObjective::set_affine_target_dis (
418 const double *source_dis_in)
422 double mean_src, stddev_src;
423 compute_mean_stdev (source_dis_in, n2, &mean_src, &stddev_src);
425 double mean_target, stddev_target;
426 compute_mean_stdev (
target_dis, n2, &mean_target, &stddev_target);
428 printf (
"map mean %g std %g -> mean %g std %g\n",
429 mean_src, stddev_src, mean_target, stddev_target);
434 for (
int i = 0; i < n2; i++) {
436 source_dis[i] = (source_dis_in[i] - mean_src) / stddev_src
437 * stddev_target + mean_target;
451 template <
typename Ttab,
typename Taccu>
459 std::vector<Ttab> n_gt;
467 const Ttab *p = n_gt.data();
468 for (
int i = 0; i < nc; i++) {
470 for (
int j = 0; j < nc; j++) {
472 for (
int k = 0; k < nc; k++) {
474 if (hamming_dis (ip, jp) <
475 hamming_dis (ip, kp)) {
498 if (iw > jw) std::swap (iw, jw);
501 const Ttab * n_gt_i = n_gt.data();
502 for (
int i = 0; i < nc; i++) {
504 int ip = perm [i == iw ? jw : i == jw ? iw : i];
512 accu += update_i_plane (perm, iw, jw,
522 Taccu update_i (
const int *perm,
int iw,
int jw,
523 int ip0,
int ip,
const Ttab * n_gt_i)
const
526 const Ttab *n_gt_ij = n_gt_i;
527 for (
int j = 0; j < nc; j++) {
529 int jp = perm [j == iw ? jw : j == jw ? iw : j];
530 for (
int k = 0; k < nc; k++) {
532 int kp = perm [k == iw ? jw : k == jw ? iw : k];
533 int ng = n_gt_ij [k];
534 if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
537 if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp0)) {
547 Taccu update_i_plane (
const int *perm,
int iw,
int jw,
548 int ip0,
int ip,
const Ttab * n_gt_i)
const
551 const Ttab *n_gt_ij = n_gt_i;
553 for (
int j = 0; j < nc; j++) {
554 if (j != iw && j != jw) {
556 for (
int k = 0; k < nc; k++) {
557 if (k != iw && k != jw) {
559 Ttab ng = n_gt_ij [k];
560 if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
563 if (hamming_dis (ip0, jp) < hamming_dis (ip0, kp)) {
575 inline Taccu
update_k (
const int *perm,
int iw,
int jw,
576 int ip0,
int ip,
int jp0,
int jp,
578 const Ttab * n_gt_ij)
const
582 int kp = perm [k == iw ? jw : k == jw ? iw : k];
583 Ttab ng = n_gt_ij [k];
584 if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
587 if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp0)) {
595 int ip0,
int ip,
int jp0,
int jp,
596 const Ttab * n_gt_ij)
const
599 for (
int k = 0; k < nc; k++) {
600 if (k == iw || k == jw)
continue;
602 Ttab ng = n_gt_ij [k];
603 if (hamming_dis (ip, jp) < hamming_dis (ip, kp)) {
606 if (hamming_dis (ip0, jp0) < hamming_dis (ip0, kp)) {
616 int ip0,
int ip,
const Ttab * n_gt_i)
const
619 const Ttab *n_gt_ij = n_gt_i;
621 for (
int j = 0; j < nc; j++) {
623 int jp = perm [j == iw ? jw : j == jw ? iw : j];
625 accu +=
update_k (perm, iw, jw, ip0, ip, jp0, jp, iw, n_gt_ij);
626 accu +=
update_k (perm, iw, jw, ip0, ip, jp0, jp, jw, n_gt_ij);
629 accu +=
update_j_line (perm, iw, jw, ip0, ip, jp0, jp, n_gt_ij);
646 virtual double cost_update (
const int *perm,
int iw,
int jw)
const
652 virtual ~Score3Computer () {}
662 bool operator () (
int a,
int b) {
return tab[a] < tab[b]; }
670 const uint32_t *qcodes, *bcodes;
671 const float *gt_distances;
674 const uint32_t *qcodes,
const uint32_t *bcodes,
675 const float *gt_distances):
676 nbits(nbits), nq(nq), nb(nb), qcodes(qcodes),
677 bcodes(bcodes), gt_distances(gt_distances)
680 n_gt.resize (nc * nc * nc);
685 double rank_weight (
int r)
687 return 1.0 / (r + 1);
696 const std::vector<int> & b)
698 int nb = b.size(), na = a.size();
702 for (
int i = 0; i < na; i++) {
704 while (j < nb && ai >= b[j]) j++;
707 for (
int k = j; k < b.size(); k++)
708 accu_i += rank_weight (b[k] - ai);
710 accu += rank_weight (ai) * accu_i;
718 for (
int q = 0; q < nq; q++) {
719 const float *gtd = gt_distances + q * nb;
720 const uint32_t *cb = bcodes;
721 float * n_gt_q = & n_gt [qcodes[q] * nc * nc];
723 printf(
"init gt for q=%d/%d \r", q, nq); fflush(stdout);
725 std::vector<int> rankv (nb);
726 int * ranks = rankv.data();
729 std::vector<std::vector<int> > tab (nc);
733 for (
int j = 0; j < nb; j++) ranks[j] = j;
734 std::sort (ranks, ranks + nb, s);
737 for (
int rank = 0; rank < nb; rank++) {
738 int i = ranks [rank];
739 tab [cb[i]].push_back (rank);
745 for (
int i = 0; i < nc; i++) {
746 std::vector<int> & di = tab[i];
747 for (
int j = 0; j < nc; j++) {
748 std::vector<int> & dj = tab[j];
767 PolysemousTraining::PolysemousTraining ()
770 ntrain_permutation = 0;
771 dis_weight_factor = log(2);
783 int nbits = pq.
nbits;
785 #pragma omp parallel for
786 for (
int m = 0; m < pq.
M; m++) {
787 std::vector<double> dis_table;
793 for (
int i = 0; i < n; i++) {
794 for (
int j = 0; j < n; j++) {
795 dis_table.push_back (
fvec_L2sqr (centroids + i * dsub,
796 centroids + j * dsub,
801 std::vector<int> perm (n);
802 ReproduceWithHammingObjective obj (
809 if (log_pattern.size()) {
811 snprintf (fname, 256, log_pattern.c_str(), m);
812 printf (
"opening log file %s\n", fname);
813 optim.logfile = fopen (fname,
"w");
814 FAISS_ASSERT (optim.logfile || !
"could not open logfile");
816 double final_cost = optim.run_optimization (perm.data());
819 printf (
"SimulatedAnnealingOptimizer for m=%d: %g -> %g\n",
823 if (log_pattern.size()) fclose (optim.logfile);
825 std::vector<float> centroids_copy;
826 for (
int i = 0; i < dsub * n; i++)
827 centroids_copy.push_back (centroids[i]);
829 for (
int i = 0; i < n; i++)
830 memcpy (centroids + perm[i] * dsub,
831 centroids_copy.data() + i * dsub,
832 dsub *
sizeof(centroids[0]));
845 int nbits = pq.
nbits;
847 std::vector<uint8_t> all_codes (pq.
code_size * n);
854 pq.compute_sdc_table ();
856 #pragma omp parallel for
857 for (
int m = 0; m < pq.
M; m++) {
859 std::vector <uint32_t> codes;
860 std::vector <float> gt_distances;
863 std::vector<float> xtrain (n * dsub);
864 for (
int i = 0; i < n; i++)
865 memcpy (xtrain.data() + i * dsub,
866 x + i * pq.
d + m * dsub,
867 sizeof(float) * dsub);
870 for (
int i = 0; i < n; i++)
871 codes [i] = all_codes [i * pq.
code_size + m];
873 nq = n / 4; nb = n - nq;
874 const float *xq = xtrain.data();
875 const float *xb = xq + nq * dsub;
877 gt_distances.resize (nq * nb);
882 gt_distances.data());
885 codes.resize (2 * nq);
886 for (
int i = 0; i < nq; i++)
887 codes[i] = codes [i + nq] = i;
889 gt_distances.resize (nq * nb);
891 memcpy (gt_distances.data (),
893 sizeof (float) * nq * nb);
900 codes.data(), codes.data() + nq,
901 gt_distances.data ());
903 printf(
" m=%d, nq=%ld, nb=%ld, intialize RankingScore "
910 if (log_pattern.size()) {
912 snprintf (fname, 256, log_pattern.c_str(), m);
913 printf (
"opening log file %s\n", fname);
914 optim.logfile = fopen (fname,
"w");
915 FAISS_ASSERT (optim.logfile || !
"could not open logfile");
918 std::vector<int> perm (pq.
ksub);
920 double final_cost = optim.run_optimization (perm.data());
921 printf (
"SimulatedAnnealingOptimizer for m=%d: %g -> %g\n",
924 if (log_pattern.size()) fclose (optim.logfile);
930 std::vector<float> centroids_copy;
931 for (
int i = 0; i < dsub * pq.
ksub; i++)
932 centroids_copy.push_back (centroids[i]);
934 for (
int i = 0; i < pq.
ksub; i++)
935 memcpy (centroids + perm[i] * dsub,
936 centroids_copy.data() + i * dsub,
937 dsub *
sizeof(centroids[0]));
946 size_t n,
const float *x)
const
948 if (optimization_type == OT_None) {
956 pq.compute_sdc_table ();
random generator that can be used in multithreaded contexts
size_t nbits
number of bits per quantization index
float fvec_L2sqr(const float *x, const float *y, size_t d)
Squared L2 distance between two vectors.
size_t byte_per_idx
nb bytes per code component (1 or 2)
Taccu compute_update(const int *perm, int iw, int jw) const
std::vector< float > sdc_table
Symmetric Distance Table.
SimulatedAnnealingOptimizer(PermutationObjective *obj, const SimulatedAnnealingParameters &p)
logs values of the cost function
int n
size of the permutation
Taccu compute(const int *perm) const
size_t dsub
dimensionality of each subvector
void compute_codes(const float *x, uint8_t *codes, size_t n) const
same as compute_code for several vectors
Taccu update_j_line(const int *perm, int iw, int jw, int ip0, int ip, int jp0, int jp, const Ttab *n_gt_ij) const
compute update on a line of k's, where i and j are swapped
const double * target_dis
wanted distances (size n^2)
size_t code_size
byte per indexed vector
double init_cost
remember intial cost of optimization
int rand_int()
random 31-bit positive integer
size_t ksub
number of centroids for each subquantizer
void optimize_ranking(ProductQuantizer &pq, size_t n, const float *x) const
called by optimize_pq_for_hamming
void pairwise_L2sqr(long d, long nq, const float *xq, long nb, const float *xb, float *dis, long ldq, long ldb, long ldd)
double getmillisecs()
ms elapsed since some arbitrary epoch
std::vector< double > weights
weights for each distance (size n^2)
double accum_gt_weight_diff(const std::vector< int > &a, const std::vector< int > &b)
parameters used for the simulated annealing method
Taccu update_i_cross(const int *perm, int iw, int jw, int ip0, int ip, const Ttab *n_gt_i) const
considers the 2 pairs of crossing lines j=iw or jw and k = iw or kw
size_t M
number of subquantizers
abstract class for the loss function
Taccu update_k(const int *perm, int iw, int jw, int ip0, int ip, int jp0, int jp, int k, const Ttab *n_gt_ij) const
used for the 8 cells were the 3 indices are swapped
std::vector< double > source_dis
"real" corrected distances (size n^2)
float * get_centroids(size_t m, size_t i)
return the centroids associated with subvector m
void optimize_reproduce_distances(ProductQuantizer &pq) const
called by optimize_pq_for_hamming
void optimize_pq_for_hamming(ProductQuantizer &pq, size_t n, const float *x) const
size_t d
size of the input vectors
Simulated annealing optimization algorithm for permutations.
virtual double compute_cost(const int *perm) const