Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/index_io.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #include "index_io.h"
13 
14 #include <cstdio>
15 #include <cstdlib>
16 
17 #include <sys/mman.h>
18 
19 #include "FaissAssert.h"
20 
21 #include "IndexFlat.h"
22 #include "VectorTransform.h"
23 #include "IndexLSH.h"
24 #include "IndexPQ.h"
25 #include "IndexIVF.h"
26 #include "IndexIVFPQ.h"
27 
28 /*************************************************************
29  * The I/O format is the content of the class. For objects that are
30  * inherited, like Index, a 4-character-code indicates which child
31  * class this is an instance of.
32  *
33  * In this case, the fields of the parent class are written first,
34  * then the ones for the child classes. Note that this requires
35  * classes to be serialized to have a constructor without parameters,
36  * so that the fields can be filled in later.
37  **************************************************************/
38 
39 
40 
41 namespace faiss {
42 
43 static uint32_t fourcc (const char sx[4]) {
44  const unsigned char *x = (unsigned char*)sx;
45  return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
46 }
47 
48 /*************************************************************
49  * I/O macros
50  *
51  * we use macros so that we have a line number to report in
52  * abort (). This makes debugging a lot easier.
53  **************************************************************/
54 
55 
56 #define WRITEANDCHECK(ptr, n) { \
57  size_t ret = fwrite (ptr, sizeof (* (ptr)), n, f); \
58  FAISS_ASSERT (ret == (n) || !"write error"); \
59  }
60 
61 #define READANDCHECK(ptr, n) { \
62  size_t ret = fread (ptr, sizeof (* (ptr)), n, f); \
63  FAISS_ASSERT (ret == (n) || !"write error"); \
64  }
65 
66 #define WRITE1(x) WRITEANDCHECK(&(x), 1)
67 #define READ1(x) READANDCHECK(&(x), 1)
68 
69 #define WRITEVECTOR(vec) { \
70  size_t size = (vec).size (); \
71  WRITEANDCHECK (&size, 1); \
72  WRITEANDCHECK ((vec).data (), size); \
73  }
74 
75 #define READVECTOR(vec) { \
76  long size; \
77  READANDCHECK (&size, 1); \
78  FAISS_ASSERT (size >= 0 && size < (1L << 40)); \
79  (vec).resize (size); \
80  READANDCHECK ((vec).data (), size); \
81  }
82 
83 
84 // Macros for read/write arrays aligned to 16 bytes in the
85 // file. Useful when mmapped.
86 
87 #define WRITETABPAD16(tab, size_in) { \
88  size_t size = (size_in); \
89  WRITEANDCHECK (&size, 1); \
90  uint8_t padding[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; \
91  int idx = ftell(f) % 16; \
92  padding [idx] = 15 - idx; \
93  WRITEANDCHECK (padding + idx, 16 - idx); \
94  WRITEANDCHECK ((tab), size); \
95 }
96 
97 #define READTABPAD16(tab, basetype, expected_size) { \
98  size_t size; \
99  READANDCHECK (&size, 1); \
100  FAISS_ASSERT ((expected_size) == size); \
101  uint8_t padding[16], npad; \
102  READ1(npad); \
103  FAISS_ASSERT (npad < 16); \
104  READANDCHECK (padding, npad); \
105  (tab) = new basetype [size]; \
106  READANDCHECK ((tab), size); \
107 }
108 
109 // read only the array header, return its offset and skip over it
110 #define TABOFFSETPAD16(taboffset, basetype, expected_size) { \
111  size_t size; \
112  READANDCHECK (&size, 1); \
113  FAISS_ASSERT ((expected_size) == size); \
114  uint8_t padding[16], npad; \
115  READ1(npad); \
116  FAISS_ASSERT (npad < 16); \
117  READANDCHECK (padding, npad); \
118  taboffset = ftell(f); \
119  fseek (f, sizeof(basetype) * size, SEEK_CUR); \
120 }
121 
122 
123 
124 
125 /*************************************************************
126  * Write
127  **************************************************************/
128 
129 static void write_index_header (const Index *idx, FILE *f) {
130  WRITE1 (idx->d);
131  WRITE1 (idx->ntotal);
132  Index::idx_t dummy = 1 << 20;
133  WRITE1 (dummy);
134  WRITE1 (dummy);
135  WRITE1 (idx->is_trained);
136  WRITE1 (idx->metric_type);
137 }
138 
139 
140 
141 void write_VectorTransform (const VectorTransform *vt, FILE *f) {
142  if (const LinearTransform * lt =
143  dynamic_cast < const LinearTransform *> (vt)) {
144  if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
145  uint32_t h = fourcc ("rrot");
146  WRITE1 (h);
147  } else if (const PCAMatrix * pca =
148  dynamic_cast<const PCAMatrix *>(lt)) {
149  uint32_t h = fourcc ("PcAm");
150  WRITE1 (h);
151  WRITE1 (pca->eigen_power);
152  WRITE1 (pca->random_rotation);
153  WRITE1 (pca->balanced_bins);
154  WRITEVECTOR (pca->mean);
155  WRITEVECTOR (pca->eigenvalues);
156  WRITEVECTOR (pca->PCAMat);
157  } else {
158  // generic LinearTransform (includes OPQ)
159  uint32_t h = fourcc ("LTra");
160  WRITE1 (h);
161  }
162  WRITE1 (lt->have_bias);
163  WRITEVECTOR (lt->A);
164  WRITEVECTOR (lt->b);
165  } else if (const RemapDimensionsTransform *rdt =
166  dynamic_cast<const RemapDimensionsTransform *>(vt)) {
167  uint32_t h = fourcc ("RmDT");
168  WRITE1 (h);
169  WRITEVECTOR (rdt->map);
170  } else FAISS_ASSERT (!"cannot serialize this");
171  // common fields
172  WRITE1 (vt->d_in);
173  WRITE1 (vt->d_out);
174  WRITE1 (vt->is_trained);
175 }
176 
177 static void write_ProductQuantizer (const ProductQuantizer *pq, FILE *f) {
178  WRITE1 (pq->d);
179  WRITE1 (pq->M);
180  WRITE1 (pq->nbits);
181  WRITEVECTOR (pq->centroids);
182 }
183 
184 void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
185  FILE *f = fopen (fname, "w");
186  if (!f) {
187  fprintf (stderr, "cannot open %s for writing:", fname);
188  perror ("");
189  abort ();
190  }
191  write_ProductQuantizer (pq, f);
192  fclose (f);
193 }
194 
195 
196 
197 static void write_ivf_header (const IndexIVF * ivf, FILE *f,
198  bool include_ids = true) {
199  write_index_header (ivf, f);
200  WRITE1 (ivf->nlist);
201  WRITE1 (ivf->nprobe);
202  write_index (ivf->quantizer, f);
203  if (include_ids) {
204  for (size_t i = 0; i < ivf->nlist; i++)
205  WRITEVECTOR (ivf->ids[i]);
206  }
207  WRITE1 (ivf->maintain_direct_map);
208  WRITEVECTOR (ivf->direct_map);
209 }
210 
211 void write_index (const Index *idx, FILE *f) {
212  if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
213  uint32_t h = fourcc (
214  idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
215  idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr);
216  WRITE1 (h);
217  write_index_header (idx, f);
218  WRITEVECTOR (idxf->xb);
219  } else if(const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
220  uint32_t h = fourcc ("IxHe");
221  WRITE1 (h);
222  write_index_header (idx, f);
223  WRITE1 (idxl->nbits);
224  WRITE1 (idxl->rotate_data);
225  WRITE1 (idxl->train_thresholds);
226  WRITEVECTOR (idxl->thresholds);
227  WRITE1 (idxl->bytes_per_vec);
228  write_VectorTransform (&idxl->rrot, f);
229  WRITEVECTOR (idxl->codes);
230  } else if(const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
231  uint32_t h = fourcc ("IxPq");
232  WRITE1 (h);
233  write_index_header (idx, f);
234  write_ProductQuantizer (&idxp->pq, f);
235  WRITEVECTOR (idxp->codes);
236  // search params -- maybe not useful to store?
237  WRITE1 (idxp->search_type);
238  WRITE1 (idxp->encode_signs);
239  WRITE1 (idxp->polysemous_ht);
240  } else if(const IndexIVFFlat * ivfl =
241  dynamic_cast<const IndexIVFFlat *> (idx)) {
242  uint32_t h = fourcc ("IvFl");
243  WRITE1 (h);
244  write_ivf_header (ivfl, f);
245  for(int i = 0; i < ivfl->nlist; i++)
246  WRITEVECTOR (ivfl->vecs[i]);
247  } else if(const IndexIVFPQ * ivpq =
248  dynamic_cast<const IndexIVFPQ *> (idx)) {
249  const IndexIVFPQR * ivfpqr = dynamic_cast<const IndexIVFPQR *> (idx);
250  const IndexIVFPQCompact * ivfpqc =
251  dynamic_cast<const IndexIVFPQCompact *> (idx);
252  uint32_t h = fourcc (ivfpqr ? "IvQR" : ivfpqc ? "IvPC" : "IvPQ");
253  WRITE1 (h);
254  write_ivf_header (ivpq, f, !ivfpqc);
255  WRITE1 (ivpq->by_residual);
256  WRITE1 (ivpq->code_size);
257  write_ProductQuantizer (&ivpq->pq, f);
258  if (!ivfpqc) {
259  for(int i = 0; i < ivpq->codes.size(); i++)
260  WRITEVECTOR (ivpq->codes[i]);
261  }
262  if (ivfpqr) {
263  write_ProductQuantizer (&ivfpqr->refine_pq, f);
264  WRITEVECTOR (ivfpqr->refine_codes);
265  WRITE1 (ivfpqr->k_factor);
266  }
267  if (ivfpqc) {
268  WRITETABPAD16 (ivfpqc->limits, ivfpqc->nlist + 1);
269  WRITETABPAD16 (ivfpqc->compact_ids, ivfpqc->ntotal);
270  WRITETABPAD16 (ivfpqc->compact_codes,
271  ivfpqc->ntotal * ivfpqc->code_size);
272  }
273  } else if(const IndexPreTransform * ixpt =
274  dynamic_cast<const IndexPreTransform *> (idx)) {
275  uint32_t h = fourcc ("IxPT");
276  WRITE1 (h);
277  write_index_header (ixpt, f);
278  int nt = ixpt->chain.size();
279  WRITE1 (nt);
280  for (int i = 0; i < nt; i++)
281  write_VectorTransform (ixpt->chain[i], f);
282  write_index (ixpt->index, f);
283  } else if(const MultiIndexQuantizer * imiq =
284  dynamic_cast<const MultiIndexQuantizer *> (idx)) {
285  uint32_t h = fourcc ("Imiq");
286  WRITE1 (h);
287  write_index_header (imiq, f);
288  write_ProductQuantizer (&imiq->pq, f);
289  } else if(const IndexRefineFlat * idxrf =
290  dynamic_cast<const IndexRefineFlat *> (idx)) {
291  uint32_t h = fourcc ("IxRF");
292  WRITE1 (h);
293  write_index_header (idxrf, f);
294  write_index (idxrf->base_index, f);
295  write_index (&idxrf->refine_index, f);
296  WRITE1 (idxrf->k_factor);
297  } else {
298  FAISS_ASSERT (!"don't know how to serialize this type of index");
299  }
300 }
301 
302 void write_index (const Index *idx, const char *fname) {
303  FILE *f = fopen (fname, "w");
304  if (!f) {
305  fprintf (stderr, "cannot open %s for writing:", fname);
306  perror ("");
307  abort ();
308  }
309  write_index (idx, f);
310  fclose (f);
311 }
312 
313 void write_VectorTransform (const VectorTransform *vt, const char *fname) {
314  FILE *f = fopen (fname, "w");
315  if (!f) {
316  fprintf (stderr, "cannot open %s for writing:", fname);
317  perror ("");
318  abort ();
319  }
320  write_VectorTransform (vt, f);
321  fclose (f);
322 }
323 
324 /*************************************************************
325  * Read
326  **************************************************************/
327 
328 static void read_index_header (Index *idx, FILE *f) {
329  READ1 (idx->d);
330  READ1 (idx->ntotal);
331  Index::idx_t dummy;
332  READ1 (dummy);
333  READ1 (dummy);
334  READ1 (idx->is_trained);
335  READ1 (idx->metric_type);
336  idx->verbose = false;
337 }
338 
339 VectorTransform* read_VectorTransform (FILE *f) {
340  uint32_t h;
341  READ1 (h);
342  VectorTransform *vt = nullptr;
343 
344  if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
345  h == fourcc ("LTra") || h == fourcc ("PcAm")) {
346  LinearTransform *lt = nullptr;
347  if (h == fourcc ("rrot")) {
348  lt = new RandomRotationMatrix ();
349  } else if (h == fourcc ("PCAm") ||
350  h == fourcc ("PcAm")) {
351  PCAMatrix * pca = new PCAMatrix ();
352  READ1 (pca->eigen_power);
353  READ1 (pca->random_rotation);
354  if (h == fourcc ("PcAm"))
355  READ1 (pca->balanced_bins);
356  READVECTOR (pca->mean);
357  READVECTOR (pca->eigenvalues);
358  READVECTOR (pca->PCAMat);
359  lt = pca;
360  } else if (h == fourcc ("LTra")) {
361  lt = new LinearTransform ();
362  }
363  READ1 (lt->have_bias);
364  READVECTOR (lt->A);
365  READVECTOR (lt->b);
366  vt = lt;
367  } else if (h == fourcc ("RmDT")) {
368  RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
369  READVECTOR (rdt->map);
370  vt = rdt;
371  } else FAISS_ASSERT(!"fourcc not recognized");
372  READ1 (vt->d_in);
373  READ1 (vt->d_out);
374  READ1 (vt->is_trained);
375  return vt;
376 }
377 
378 static void read_ProductQuantizer (ProductQuantizer *pq, FILE *f) {
379  READ1 (pq->d);
380  READ1 (pq->M);
381  READ1 (pq->nbits);
382  pq->set_derived_values ();
383  READVECTOR (pq->centroids);
384 }
385 
386 ProductQuantizer * read_ProductQuantizer (const char*fname) {
387  FILE *f = fopen (fname, "r");
388  if (!f) {
389  fprintf (stderr, "cannot open %s for reading:", fname);
390  perror ("");
391  abort ();
392  }
393  ProductQuantizer *pq = new ProductQuantizer();
394  read_ProductQuantizer(pq, f);
395  fclose(f);
396  return pq;
397 }
398 
399 static void read_ivf_header (IndexIVF * ivf, FILE *f,
400  bool include_ids = true) {
401  read_index_header (ivf, f);
402  READ1 (ivf->nlist);
403  READ1 (ivf->nprobe);
404  ivf->quantizer = read_index (f);
405  ivf->own_fields = true;
406  if (include_ids) {
407  ivf->ids.resize (ivf->nlist);
408  for (size_t i = 0; i < ivf->nlist; i++)
409  READVECTOR (ivf->ids[i]);
410  }
411  READ1 (ivf->maintain_direct_map);
412  READVECTOR (ivf->direct_map);
413 }
414 
415 static IndexIVFPQ *read_ivfpq (FILE *f, uint32_t h, bool try_mmap)
416 {
417 
418  IndexIVFPQR *ivfpqr =
419  h == fourcc ("IvQR") ? new IndexIVFPQR () : nullptr;
420  IndexIVFPQCompact *ivfpqc =
421  h == fourcc ("IvPC") ? new IndexIVFPQCompact () : nullptr;
422  IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : ivfpqc ? ivfpqc : new IndexIVFPQ ();
423  read_ivf_header (ivpq, f, !ivfpqc);
424  READ1 (ivpq->by_residual);
425  READ1 (ivpq->code_size);
426  read_ProductQuantizer (&ivpq->pq, f);
427  if (!ivfpqc) {
428  ivpq->codes.resize (ivpq->nlist);
429  for (size_t i = 0; i < ivpq->nlist; i++)
430  READVECTOR (ivpq->codes[i]);
431  }
432  // precomputed table not stored. It is cheaper to recompute it
433  ivpq->use_precomputed_table = 0;
434  if (ivpq->by_residual)
435  ivpq->precompute_table ();
436  if (ivfpqr) {
437  read_ProductQuantizer (&ivfpqr->refine_pq, f);
438  READVECTOR (ivfpqr->refine_codes);
439  READ1 (ivfpqr->k_factor);
440  }
441  if (ivfpqc) {
442  if (!try_mmap) {
443  READTABPAD16 (ivfpqc->limits, uint32_t, ivfpqc->nlist + 1);
444  READTABPAD16 (ivfpqc->compact_ids, uint32_t, ivfpqc->ntotal);
445  READTABPAD16 (ivfpqc->compact_codes, uint8_t,
446  ivfpqc->ntotal * ivfpqc->code_size);
447  } else {
448  long offset_limits, offset_compact_ids, offset_compact_codes;
449  TABOFFSETPAD16 (offset_limits, uint32_t, ivfpqc->nlist + 1);
450  TABOFFSETPAD16 (offset_compact_ids, uint32_t, ivfpqc->ntotal);
451  TABOFFSETPAD16 (offset_compact_codes, uint8_t,
452  ivfpqc->ntotal * ivfpqc->code_size);
453  ivfpqc->mmap_length = ftell (f);
454  // mmap the whole file
455  ivfpqc->mmap_buffer = (char*)mmap (
456  nullptr, ivfpqc->mmap_length,
457  PROT_READ, MAP_SHARED, fileno (f), 0);
458  if (!ivfpqc->mmap_buffer) {
459  perror ("mmap failed");
460  abort ();
461  }
462  // at this point the file can be closed, it does not
463  // invalidate the mapping
464  ivfpqc->limits = (uint32_t*)(ivfpqc->mmap_buffer + offset_limits);
465  ivfpqc->compact_ids = (uint32_t*)(ivfpqc->mmap_buffer +
466  offset_compact_ids);
467  ivfpqc->compact_codes = (uint8_t*)(ivfpqc->mmap_buffer +
468  offset_compact_codes);
469  }
470  }
471  return ivpq;
472 }
473 
474 int read_old_fmt_hack = 0;
475 
476 Index *read_index (FILE * f, bool try_mmap) {
477  Index * idx = nullptr;
478  uint32_t h;
479  READ1 (h);
480  if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) {
481  IndexFlat *idxf;
482  if (h == fourcc ("IxFI")) idxf = new IndexFlatIP ();
483  else idxf = new IndexFlatL2 ();
484  read_index_header (idxf, f);
485  READVECTOR (idxf->xb);
486  FAISS_ASSERT (idxf->xb.size() == idxf->ntotal * idxf->d);
487  idx = idxf;
488  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
489  IndexLSH * idxl = new IndexLSH ();
490  read_index_header (idxl, f);
491  READ1 (idxl->nbits);
492  READ1 (idxl->rotate_data);
493  READ1 (idxl->train_thresholds);
494  READVECTOR (idxl->thresholds);
495  READ1 (idxl->bytes_per_vec);
496  if (h == fourcc("IxHE")) {
497  FAISS_ASSERT (idxl->nbits % 64 == 0 ||
498  !"can only read old format IndexLSH with "
499  "nbits multiple of 64");
500  idxl->bytes_per_vec *= 8;
501  }
502  {
503  RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
504  (read_VectorTransform (f));
505  FAISS_ASSERT(rrot || !"expected a random rotation");
506  idxl->rrot = *rrot;
507  delete rrot;
508  }
509  READVECTOR (idxl->codes);
510  FAISS_ASSERT (idxl->rrot.d_in == idxl->d &&
511  idxl->rrot.d_out == idxl->nbits);
512  FAISS_ASSERT (idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
513  idx = idxl;
514  } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
515  h == fourcc ("IxPq")) {
516  // IxPQ and IxPo were merged into the same IndexPQ object
517  IndexPQ * idxp =new IndexPQ ();
518  read_index_header (idxp, f);
519  read_ProductQuantizer (&idxp->pq, f);
520  READVECTOR (idxp->codes);
521  if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
522  READ1 (idxp->search_type);
523  READ1 (idxp->encode_signs);
524  READ1 (idxp->polysemous_ht);
525  }
526  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
527  // when they were in fact using L2. Therefore, we force metric type
528  // to L2 when the old format is detected
529  if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
530  idxp->metric_type = METRIC_L2;
531  }
532  idx = idxp;
533  } else if(h == fourcc ("IvFl")) {
534  IndexIVFFlat * ivfl = new IndexIVFFlat ();
535  read_ivf_header (ivfl, f);
536  ivfl->vecs.resize (ivfl->nlist);
537  for (size_t i = 0; i < ivfl->nlist; i++)
538  READVECTOR (ivfl->vecs[i]);
539  idx = ivfl;
540 
541  } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
542  h == fourcc ("IvPC")) {
543 
544  idx = read_ivfpq (f, h, try_mmap);
545 
546  } else if(h == fourcc ("IxPT")) {
547  IndexPreTransform * ixpt = new IndexPreTransform();
548  ixpt->own_fields = true;
549  read_index_header (ixpt, f);
550  int nt;
551  if (read_old_fmt_hack == 2) {
552  nt = 1;
553  } else {
554  READ1 (nt);
555  }
556  for (int i = 0; i < nt; i++)
557  ixpt->chain.push_back (read_VectorTransform (f));
558  ixpt->index = read_index (f);
559  idx = ixpt;
560  } else if(h == fourcc ("Imiq")) {
562  read_index_header (imiq, f);
563  read_ProductQuantizer (&imiq->pq, f);
564  idx = imiq;
565  } else if(h == fourcc ("IxRF")) {
566  IndexRefineFlat *idxrf = new IndexRefineFlat ();
567  read_index_header (idxrf, f);
568  idxrf->base_index = read_index(f);
569  idxrf->own_fields = true;
570  IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f));
571  std::swap (*rf, idxrf->refine_index);
572  delete rf;
573  READ1 (idxrf->k_factor);
574  idx = idxrf;
575  } else {
576  fprintf (stderr, "Index type 0x%08x not supported\n", h);
577  abort ();
578  }
579  idx->set_typename();
580  return idx;
581 }
582 
583 Index *read_index (const char *fname, bool try_mmap) {
584  FILE *f = fopen (fname, "r");
585  if (!f) {
586  fprintf (stderr, "cannot open %s for reading:", fname);
587  perror ("");
588  abort ();
589  }
590  Index *idx = read_index (f, try_mmap);
591  fclose (f);
592  return idx;
593 }
594 
595 VectorTransform *read_VectorTransform (const char *fname) {
596  FILE *f = fopen (fname, "r");
597  if (!f) {
598  fprintf (stderr, "cannot open %s for reading:", fname);
599  perror ("");
600  abort ();
601  }
602  VectorTransform *vt = read_VectorTransform (f);
603  fclose (f);
604  return vt;
605 }
606 
607 /*************************************************************
608  * cloning functions
609  **************************************************************/
610 
611 
612 
613 Index * clone_index (const Index *index)
614 {
615  Cloner cl;
616  return cl.clone_Index (index);
617 }
618 
619 // assumes there is a copy constructor ready. Always try from most
620 // specific to most general
621 #define TRYCLONE(classname, obj) \
622  if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
623  return new classname(*clo); \
624  } else
625 
626 VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt)
627 {
628  TRYCLONE (RemapDimensionsTransform, vt)
629  TRYCLONE (OPQMatrix, vt)
630  TRYCLONE (PCAMatrix, vt)
631  TRYCLONE (RandomRotationMatrix, vt)
632  TRYCLONE (LinearTransform, vt)
633  {
634  FAISS_ASSERT(!"clone not supported for this type of VectorTransform");
635  }
636  return nullptr;
637 }
638 
639 IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf)
640 {
641  TRYCLONE (IndexIVFPQR, ivf)
642  TRYCLONE (IndexIVFPQ, ivf)
643  TRYCLONE (IndexIVFFlat, ivf)
644  {
645  FAISS_ASSERT(!"clone not supported for this type of IndexIVF");
646  }
647  return nullptr;
648 }
649 
650 Index *Cloner::clone_Index (const Index *index)
651 {
652  TRYCLONE (IndexPQ, index)
653  TRYCLONE (IndexLSH, index)
654  TRYCLONE (IndexFlatL2, index)
655  TRYCLONE (IndexFlatIP, index)
656  TRYCLONE (IndexFlat, index)
657  TRYCLONE (MultiIndexQuantizer, index)
658  if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
659  IndexIVF *res = clone_IndexIVF (ivf);
660  res->own_fields = true;
661  res->quantizer = clone_Index (ivf->quantizer);
662  return res;
663  } else if (const IndexPreTransform * ipt =
664  dynamic_cast<const IndexPreTransform*> (index)) {
665  IndexPreTransform *res = new IndexPreTransform ();
666  res->d = ipt->d;
667  res->index = clone_Index (ipt->index);
668  for (int i = 0; i < ipt->chain.size(); i++)
669  res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
670  res->own_fields = true;
671  return res;
672  } else {
673  FAISS_ASSERT(!"clone not supported for this type of Index");
674  }
675  return nullptr;
676 }
677 
678 
679 } // namespace faiss
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:35
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
Index * read_index(FILE *f, bool try_mmap)
Definition: index_io.cpp:476
int bytes_per_vec
nb of 8-bits per encoded vector
Definition: IndexLSH.h:29
std::vector< float > thresholds
thresholds to compare with
Definition: IndexLSH.h:35
bool train_thresholds
whether we train thresholds or use 0
Definition: IndexLSH.h:31
Index * base_index
faster index to pre-select the vectors that should be filtered
Definition: IndexFlat.h:111
IndexFlat refine_index
storage for full vectors
Definition: IndexFlat.h:108
bool own_fields
should the base index be deallocated?
Definition: IndexFlat.h:112
int d
vector dimension
Definition: Index.h:66
RandomRotationMatrix rrot
optional random rotation
Definition: IndexLSH.h:33
long idx_t
all indices are this type
Definition: Index.h:64
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:32
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
bool own_fields
! the sub-index
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
size_t nlist
number of possible key values
Definition: IndexIVF.h:47
int d_out
! input dimension
int nbits
nb of bits per vector
Definition: IndexLSH.h:28
std::vector< float > xb
database vectors, size ntotal * d
Definition: IndexFlat.h:26
int polysemous_ht
Hamming threshold used for polysemy.
Definition: IndexPQ.h:93
bool rotate_data
whether to apply a random rotation to input
Definition: IndexLSH.h:30
std::vector< uint8_t > codes
encoded dataset
Definition: IndexLSH.h:38
std::vector< std::vector< float > > vecs
Definition: IndexIVF.h:116