Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/index_io.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "index_io.h"
12 
13 #include <cstdio>
14 #include <cstdlib>
15 
16 #include <sys/mman.h>
17 
18 #include "FaissAssert.h"
19 
20 #include "IndexFlat.h"
21 #include "VectorTransform.h"
22 #include "IndexLSH.h"
23 #include "IndexPQ.h"
24 #include "IndexIVF.h"
25 #include "IndexIVFPQ.h"
26 #include "MetaIndexes.h"
27 #include "IndexScalarQuantizer.h"
28 
29 /*************************************************************
30  * The I/O format is the content of the class. For objects that are
31  * inherited, like Index, a 4-character-code (fourcc) indicates which
32  * child class this is an instance of.
33  *
34  * In this case, the fields of the parent class are written first,
35  * then the ones for the child classes. Note that this requires
36  * classes to be serialized to have a constructor without parameters,
37  * so that the fields can be filled in later. The default constructor
38  * should set reasonable defaults for all fields.
39  *
40  * The fourccs are assigned arbitrarily. When the class changed (added
41  * or deprecated fields), the fourcc can be replaced. New code should
42  * be able to read the old fourcc and fill in new classes.
43  *
44  * TODO: serialization to strings for use in Python pickle or Torch
45  * serialization.
46  *
47  * TODO: in this file, the read functions that encouter errors may
48  * leak memory.
49  **************************************************************/
50 
51 
52 
53 namespace faiss {
54 
55 static uint32_t fourcc (const char sx[4]) {
56  const unsigned char *x = (unsigned char*)sx;
57  return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
58 }
59 
60 /*************************************************************
61  * I/O macros
62  *
63  * we use macros so that we have a line number to report in
64  * abort (). This makes debugging a lot easier.
65  **************************************************************/
66 
67 
68 #define WRITEANDCHECK(ptr, n) { \
69  size_t ret = fwrite (ptr, sizeof (* (ptr)), n, f); \
70  FAISS_THROW_IF_NOT_MSG (ret == (n), "write error"); \
71  }
72 
73 #define READANDCHECK(ptr, n) { \
74  size_t ret = fread (ptr, sizeof (* (ptr)), n, f); \
75  FAISS_THROW_IF_NOT_MSG (ret == (n), "read error"); \
76  }
77 
78 #define WRITE1(x) WRITEANDCHECK(&(x), 1)
79 #define READ1(x) READANDCHECK(&(x), 1)
80 
81 #define WRITEVECTOR(vec) { \
82  size_t size = (vec).size (); \
83  WRITEANDCHECK (&size, 1); \
84  WRITEANDCHECK ((vec).data (), size); \
85  }
86 
87 #define READVECTOR(vec) { \
88  long size; \
89  READANDCHECK (&size, 1); \
90  FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
91  (vec).resize (size); \
92  READANDCHECK ((vec).data (), size); \
93  }
94 
96  FILE *f;
97  ScopeFileCloser (FILE *f): f (f) {}
98  ~ScopeFileCloser () {fclose (f); }
99 };
100 
101 // Macros for read/write arrays aligned to 16 bytes in the
102 // file. Useful when mmapped.
103 
104 #define WRITETABPAD16(tab, size_in) { \
105  size_t size = (size_in); \
106  WRITEANDCHECK (&size, 1); \
107  uint8_t padding[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; \
108  int idx = ftell(f) % 16; \
109  padding [idx] = 15 - idx; \
110  WRITEANDCHECK (padding + idx, 16 - idx); \
111  WRITEANDCHECK ((tab), size); \
112 }
113 
114 #define READTABPAD16(tab, basetype, expected_size) { \
115  size_t size; \
116  READANDCHECK (&size, 1); \
117  FAISS_THROW_IF_NOT ((expected_size) == size); \
118  uint8_t padding[16], npad; \
119  READ1(npad); \
120  FAISS_THROW_IF_NOT (npad < 16); \
121  READANDCHECK (padding, npad); \
122  (tab) = new basetype [size]; \
123  READANDCHECK ((tab), size); \
124 }
125 
126 // read only the array header, return its offset and skip over it
127 #define TABOFFSETPAD16(taboffset, basetype, expected_size) { \
128  size_t size; \
129  READANDCHECK (&size, 1); \
130  FAISS_THROW_IF_NOT ((expected_size) == size); \
131  uint8_t padding[16], npad; \
132  READ1(npad); \
133  FAISS_THROW_IF_NOT (npad < 16); \
134  READANDCHECK (padding, npad); \
135  taboffset = ftell(f); \
136  fseek (f, sizeof(basetype) * size, SEEK_CUR); \
137 }
138 
139 
140 
141 
142 /*************************************************************
143  * Write
144  **************************************************************/
145 
146 static void write_index_header (const Index *idx, FILE *f) {
147  WRITE1 (idx->d);
148  WRITE1 (idx->ntotal);
149  Index::idx_t dummy = 1 << 20;
150  WRITE1 (dummy);
151  WRITE1 (dummy);
152  WRITE1 (idx->is_trained);
153  WRITE1 (idx->metric_type);
154 }
155 
156 
157 
158 void write_VectorTransform (const VectorTransform *vt, FILE *f) {
159  if (const LinearTransform * lt =
160  dynamic_cast < const LinearTransform *> (vt)) {
161  if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
162  uint32_t h = fourcc ("rrot");
163  WRITE1 (h);
164  } else if (const PCAMatrix * pca =
165  dynamic_cast<const PCAMatrix *>(lt)) {
166  uint32_t h = fourcc ("PcAm");
167  WRITE1 (h);
168  WRITE1 (pca->eigen_power);
169  WRITE1 (pca->random_rotation);
170  WRITE1 (pca->balanced_bins);
171  WRITEVECTOR (pca->mean);
172  WRITEVECTOR (pca->eigenvalues);
173  WRITEVECTOR (pca->PCAMat);
174  } else {
175  // generic LinearTransform (includes OPQ)
176  uint32_t h = fourcc ("LTra");
177  WRITE1 (h);
178  }
179  WRITE1 (lt->have_bias);
180  WRITEVECTOR (lt->A);
181  WRITEVECTOR (lt->b);
182  } else if (const RemapDimensionsTransform *rdt =
183  dynamic_cast<const RemapDimensionsTransform *>(vt)) {
184  uint32_t h = fourcc ("RmDT");
185  WRITE1 (h);
186  WRITEVECTOR (rdt->map);
187  } else if (const NormalizationTransform *nt =
188  dynamic_cast<const NormalizationTransform *>(vt)) {
189  uint32_t h = fourcc ("VNrm");
190  WRITE1 (h);
191  WRITE1 (nt->norm);
192  } else {
193  FAISS_THROW_MSG ("cannot serialize this");
194  }
195  // common fields
196  WRITE1 (vt->d_in);
197  WRITE1 (vt->d_out);
198  WRITE1 (vt->is_trained);
199 }
200 
201 static void write_ProductQuantizer (const ProductQuantizer *pq, FILE *f) {
202  WRITE1 (pq->d);
203  WRITE1 (pq->M);
204  WRITE1 (pq->nbits);
205  WRITEVECTOR (pq->centroids);
206 }
207 
208 static void write_ScalarQuantizer (const ScalarQuantizer *ivsc, FILE *f) {
209  WRITE1 (ivsc->qtype);
210  WRITE1 (ivsc->rangestat);
211  WRITE1 (ivsc->rangestat_arg);
212  WRITE1 (ivsc->d);
213  WRITE1 (ivsc->code_size);
214  WRITEVECTOR (ivsc->trained);
215 }
216 
217 void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
218  FILE *f = fopen (fname, "w");
219  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
220  ScopeFileCloser closer(f);
221  write_ProductQuantizer (pq, f);
222 }
223 
224 
225 
226 static void write_ivf_header (const IndexIVF * ivf, FILE *f,
227  bool include_ids = true) {
228  write_index_header (ivf, f);
229  WRITE1 (ivf->nlist);
230  WRITE1 (ivf->nprobe);
231  write_index (ivf->quantizer, f);
232  if (include_ids) {
233  for (size_t i = 0; i < ivf->nlist; i++)
234  WRITEVECTOR (ivf->ids[i]);
235  }
236  WRITE1 (ivf->maintain_direct_map);
237  WRITEVECTOR (ivf->direct_map);
238 }
239 
240 void write_index (const Index *idx, FILE *f) {
241  if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
242  uint32_t h = fourcc (
243  idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
244  idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr);
245  WRITE1 (h);
246  write_index_header (idx, f);
247  WRITEVECTOR (idxf->xb);
248  } else if(const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
249  uint32_t h = fourcc ("IxHe");
250  WRITE1 (h);
251  write_index_header (idx, f);
252  WRITE1 (idxl->nbits);
253  WRITE1 (idxl->rotate_data);
254  WRITE1 (idxl->train_thresholds);
255  WRITEVECTOR (idxl->thresholds);
256  WRITE1 (idxl->bytes_per_vec);
257  write_VectorTransform (&idxl->rrot, f);
258  WRITEVECTOR (idxl->codes);
259  } else if(const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
260  uint32_t h = fourcc ("IxPq");
261  WRITE1 (h);
262  write_index_header (idx, f);
263  write_ProductQuantizer (&idxp->pq, f);
264  WRITEVECTOR (idxp->codes);
265  // search params -- maybe not useful to store?
266  WRITE1 (idxp->search_type);
267  WRITE1 (idxp->encode_signs);
268  WRITE1 (idxp->polysemous_ht);
269  } else if(const IndexScalarQuantizer * idxs =
270  dynamic_cast<const IndexScalarQuantizer *> (idx)) {
271  uint32_t h = fourcc ("IxSQ");
272  WRITE1 (h);
273  write_index_header (idx, f);
274  write_ScalarQuantizer (&idxs->sq, f);
275  WRITEVECTOR (idxs->codes);
276  } else if(const IndexIVFFlat * ivfl =
277  dynamic_cast<const IndexIVFFlat *> (idx)) {
278  uint32_t h = fourcc ("IvFl");
279  WRITE1 (h);
280  write_ivf_header (ivfl, f);
281  for(int i = 0; i < ivfl->nlist; i++)
282  WRITEVECTOR (ivfl->vecs[i]);
283  } else if(const IndexIVFScalarQuantizer * ivsc =
284  dynamic_cast<const IndexIVFScalarQuantizer *> (idx)) {
285  uint32_t h = fourcc ("IvSQ");
286  WRITE1 (h);
287  write_ivf_header (ivsc, f);
288  write_ScalarQuantizer (&ivsc->sq, f);
289  WRITE1 (ivsc->code_size);
290  for(int i = 0; i < ivsc->nlist; i++)
291  WRITEVECTOR (ivsc->codes[i]);
292  } else if(const IndexIVFPQ * ivpq =
293  dynamic_cast<const IndexIVFPQ *> (idx)) {
294  const IndexIVFPQR * ivfpqr = dynamic_cast<const IndexIVFPQR *> (idx);
295  const IndexIVFPQCompact * ivfpqc =
296  dynamic_cast<const IndexIVFPQCompact *> (idx);
297  uint32_t h = fourcc (ivfpqr ? "IvQR" : ivfpqc ? "IvPC" : "IvPQ");
298  WRITE1 (h);
299  write_ivf_header (ivpq, f, !ivfpqc);
300  WRITE1 (ivpq->by_residual);
301  WRITE1 (ivpq->code_size);
302  write_ProductQuantizer (&ivpq->pq, f);
303  if (!ivfpqc) {
304  for(int i = 0; i < ivpq->codes.size(); i++)
305  WRITEVECTOR (ivpq->codes[i]);
306  }
307  if (ivfpqr) {
308  write_ProductQuantizer (&ivfpqr->refine_pq, f);
309  WRITEVECTOR (ivfpqr->refine_codes);
310  WRITE1 (ivfpqr->k_factor);
311  }
312  if (ivfpqc) {
313  WRITETABPAD16 (ivfpqc->limits, ivfpqc->nlist + 1);
314  WRITETABPAD16 (ivfpqc->compact_ids, ivfpqc->ntotal);
315  WRITETABPAD16 (ivfpqc->compact_codes,
316  ivfpqc->ntotal * ivfpqc->code_size);
317  }
318  } else if(const IndexPreTransform * ixpt =
319  dynamic_cast<const IndexPreTransform *> (idx)) {
320  uint32_t h = fourcc ("IxPT");
321  WRITE1 (h);
322  write_index_header (ixpt, f);
323  int nt = ixpt->chain.size();
324  WRITE1 (nt);
325  for (int i = 0; i < nt; i++)
326  write_VectorTransform (ixpt->chain[i], f);
327  write_index (ixpt->index, f);
328  } else if(const MultiIndexQuantizer * imiq =
329  dynamic_cast<const MultiIndexQuantizer *> (idx)) {
330  uint32_t h = fourcc ("Imiq");
331  WRITE1 (h);
332  write_index_header (imiq, f);
333  write_ProductQuantizer (&imiq->pq, f);
334  } else if(const IndexRefineFlat * idxrf =
335  dynamic_cast<const IndexRefineFlat *> (idx)) {
336  uint32_t h = fourcc ("IxRF");
337  WRITE1 (h);
338  write_index_header (idxrf, f);
339  write_index (idxrf->base_index, f);
340  write_index (&idxrf->refine_index, f);
341  WRITE1 (idxrf->k_factor);
342  } else if(const IndexIDMap * idxmap =
343  dynamic_cast<const IndexIDMap *> (idx)) {
344  uint32_t h =
345  dynamic_cast<const IndexIDMap2 *> (idx) ? fourcc ("IxM2") :
346  fourcc ("IxMp");
347  // no need to store additional info for IndexIDMap2
348  WRITE1 (h);
349  write_index_header (idxmap, f);
350  write_index (idxmap->index, f);
351  WRITEVECTOR (idxmap->id_map);
352  } else {
353  FAISS_THROW_MSG ("don't know how to serialize this type of index");
354  }
355 }
356 
357 void write_index (const Index *idx, const char *fname) {
358  FILE *f = fopen (fname, "w");
359  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
360  ScopeFileCloser closer(f);
361  write_index (idx, f);
362 }
363 
364 void write_VectorTransform (const VectorTransform *vt, const char *fname) {
365  FILE *f = fopen (fname, "w");
366  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
367  ScopeFileCloser closer(f);
368  write_VectorTransform (vt, f);
369 }
370 
371 /*************************************************************
372  * Read
373  **************************************************************/
374 
375 static void read_index_header (Index *idx, FILE *f) {
376  READ1 (idx->d);
377  READ1 (idx->ntotal);
378  Index::idx_t dummy;
379  READ1 (dummy);
380  READ1 (dummy);
381  READ1 (idx->is_trained);
382  READ1 (idx->metric_type);
383  idx->verbose = false;
384 }
385 
386 VectorTransform* read_VectorTransform (FILE *f) {
387  uint32_t h;
388  READ1 (h);
389  VectorTransform *vt = nullptr;
390 
391  if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
392  h == fourcc ("LTra") || h == fourcc ("PcAm")) {
393  LinearTransform *lt = nullptr;
394  if (h == fourcc ("rrot")) {
395  lt = new RandomRotationMatrix ();
396  } else if (h == fourcc ("PCAm") ||
397  h == fourcc ("PcAm")) {
398  PCAMatrix * pca = new PCAMatrix ();
399  READ1 (pca->eigen_power);
400  READ1 (pca->random_rotation);
401  if (h == fourcc ("PcAm"))
402  READ1 (pca->balanced_bins);
403  READVECTOR (pca->mean);
404  READVECTOR (pca->eigenvalues);
405  READVECTOR (pca->PCAMat);
406  lt = pca;
407  } else if (h == fourcc ("LTra")) {
408  lt = new LinearTransform ();
409  }
410  READ1 (lt->have_bias);
411  READVECTOR (lt->A);
412  READVECTOR (lt->b);
413  vt = lt;
414  } else if (h == fourcc ("RmDT")) {
415  RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
416  READVECTOR (rdt->map);
417  vt = rdt;
418  } else if (h == fourcc ("VNrm")) {
419  NormalizationTransform *nt = new NormalizationTransform ();
420  READ1 (nt->norm);
421  vt = nt;
422  } else {
423  FAISS_THROW_MSG("fourcc not recognized");
424  }
425  READ1 (vt->d_in);
426  READ1 (vt->d_out);
427  READ1 (vt->is_trained);
428  return vt;
429 }
430 
431 static void read_ProductQuantizer (ProductQuantizer *pq, FILE *f) {
432  READ1 (pq->d);
433  READ1 (pq->M);
434  READ1 (pq->nbits);
435  pq->set_derived_values ();
436  READVECTOR (pq->centroids);
437 }
438 
439 static void read_ScalarQuantizer (ScalarQuantizer *ivsc, FILE *f) {
440  READ1 (ivsc->qtype);
441  READ1 (ivsc->rangestat);
442  READ1 (ivsc->rangestat_arg);
443  READ1 (ivsc->d);
444  READ1 (ivsc->code_size);
445  READVECTOR (ivsc->trained);
446 }
447 
448 ProductQuantizer * read_ProductQuantizer (const char*fname) {
449  FILE *f = fopen (fname, "r");
450  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
451  ScopeFileCloser closer(f);
452  ProductQuantizer *pq = new ProductQuantizer();
453  ScopeDeleter1<ProductQuantizer> del (pq);
454  read_ProductQuantizer(pq, f);
455  del.release ();
456  return pq;
457 }
458 
459 static void read_ivf_header (IndexIVF * ivf, FILE *f,
460  bool include_ids = true) {
461  read_index_header (ivf, f);
462  READ1 (ivf->nlist);
463  READ1 (ivf->nprobe);
464  ivf->quantizer = read_index (f);
465  ivf->own_fields = true;
466  if (include_ids) {
467  ivf->ids.resize (ivf->nlist);
468  for (size_t i = 0; i < ivf->nlist; i++)
469  READVECTOR (ivf->ids[i]);
470  }
471  READ1 (ivf->maintain_direct_map);
472  READVECTOR (ivf->direct_map);
473 }
474 
475 static IndexIVFPQ *read_ivfpq (FILE *f, uint32_t h, bool try_mmap)
476 {
477 
478  IndexIVFPQR *ivfpqr =
479  h == fourcc ("IvQR") ? new IndexIVFPQR () : nullptr;
480  IndexIVFPQCompact *ivfpqc =
481  h == fourcc ("IvPC") ? new IndexIVFPQCompact () : nullptr;
482  IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : ivfpqc ? ivfpqc : new IndexIVFPQ ();
483  read_ivf_header (ivpq, f, !ivfpqc);
484  READ1 (ivpq->by_residual);
485  READ1 (ivpq->code_size);
486  read_ProductQuantizer (&ivpq->pq, f);
487  if (!ivfpqc) {
488  ivpq->codes.resize (ivpq->nlist);
489  for (size_t i = 0; i < ivpq->nlist; i++)
490  READVECTOR (ivpq->codes[i]);
491  }
492  // precomputed table not stored. It is cheaper to recompute it
493  ivpq->use_precomputed_table = 0;
494  if (ivpq->by_residual)
495  ivpq->precompute_table ();
496  if (ivfpqr) {
497  read_ProductQuantizer (&ivfpqr->refine_pq, f);
498  READVECTOR (ivfpqr->refine_codes);
499  READ1 (ivfpqr->k_factor);
500  }
501  if (ivfpqc) {
502  if (!try_mmap) {
503  READTABPAD16 (ivfpqc->limits, uint32_t, ivfpqc->nlist + 1);
504  READTABPAD16 (ivfpqc->compact_ids, uint32_t, ivfpqc->ntotal);
505  READTABPAD16 (ivfpqc->compact_codes, uint8_t,
506  ivfpqc->ntotal * ivfpqc->code_size);
507  } else {
508  long offset_limits, offset_compact_ids, offset_compact_codes;
509  TABOFFSETPAD16 (offset_limits, uint32_t, ivfpqc->nlist + 1);
510  TABOFFSETPAD16 (offset_compact_ids, uint32_t, ivfpqc->ntotal);
511  TABOFFSETPAD16 (offset_compact_codes, uint8_t,
512  ivfpqc->ntotal * ivfpqc->code_size);
513  ivfpqc->mmap_length = ftell (f);
514  // mmap the whole file
515  ivfpqc->mmap_buffer = (char*)mmap (
516  nullptr, ivfpqc->mmap_length,
517  PROT_READ, MAP_SHARED, fileno (f), 0);
518  if (!ivfpqc->mmap_buffer) {
519  perror ("mmap failed");
520  abort ();
521  }
522  // at this point the file can be closed, it does not
523  // invalidate the mapping
524  ivfpqc->limits = (uint32_t*)(ivfpqc->mmap_buffer + offset_limits);
525  ivfpqc->compact_ids = (uint32_t*)(ivfpqc->mmap_buffer +
526  offset_compact_ids);
527  ivfpqc->compact_codes = (uint8_t*)(ivfpqc->mmap_buffer +
528  offset_compact_codes);
529  }
530  }
531  return ivpq;
532 }
533 
534 int read_old_fmt_hack = 0;
535 
536 Index *read_index (FILE * f, bool try_mmap) {
537  Index * idx = nullptr;
538  uint32_t h;
539  READ1 (h);
540  if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) {
541  IndexFlat *idxf;
542  if (h == fourcc ("IxFI")) idxf = new IndexFlatIP ();
543  else idxf = new IndexFlatL2 ();
544  read_index_header (idxf, f);
545  READVECTOR (idxf->xb);
546  FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
547  // leak!
548  idx = idxf;
549  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
550  IndexLSH * idxl = new IndexLSH ();
551  read_index_header (idxl, f);
552  READ1 (idxl->nbits);
553  READ1 (idxl->rotate_data);
554  READ1 (idxl->train_thresholds);
555  READVECTOR (idxl->thresholds);
556  READ1 (idxl->bytes_per_vec);
557  if (h == fourcc("IxHE")) {
558  FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
559  "can only read old format IndexLSH with "
560  "nbits multiple of 64 (got %d)",
561  (int) idxl->nbits);
562  // leak
563  idxl->bytes_per_vec *= 8;
564  }
565  {
566  RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
567  (read_VectorTransform (f));
568  FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
569  idxl->rrot = *rrot;
570  delete rrot;
571  }
572  READVECTOR (idxl->codes);
573  FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
574  idxl->rrot.d_out == idxl->nbits);
575  FAISS_THROW_IF_NOT (
576  idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
577  idx = idxl;
578  } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
579  h == fourcc ("IxPq")) {
580  // IxPQ and IxPo were merged into the same IndexPQ object
581  IndexPQ * idxp =new IndexPQ ();
582  read_index_header (idxp, f);
583  read_ProductQuantizer (&idxp->pq, f);
584  READVECTOR (idxp->codes);
585  if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
586  READ1 (idxp->search_type);
587  READ1 (idxp->encode_signs);
588  READ1 (idxp->polysemous_ht);
589  }
590  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
591  // when they were in fact using L2. Therefore, we force metric type
592  // to L2 when the old format is detected
593  if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
594  idxp->metric_type = METRIC_L2;
595  }
596  idx = idxp;
597  } else if(h == fourcc ("IvFl")) {
598  IndexIVFFlat * ivfl = new IndexIVFFlat ();
599  read_ivf_header (ivfl, f);
600  ivfl->vecs.resize (ivfl->nlist);
601  for (size_t i = 0; i < ivfl->nlist; i++)
602  READVECTOR (ivfl->vecs[i]);
603  idx = ivfl;
604  } else if (h == fourcc ("IxSQ")) {
606  read_index_header (idxs, f);
607  read_ScalarQuantizer (&idxs->sq, f);
608  READVECTOR (idxs->codes);
609  idxs->code_size = idxs->sq.code_size;
610  idx = idxs;
611  } else if(h == fourcc ("IvSQ")) {
613  read_ivf_header (ivsc, f);
614  ivsc->codes.resize(ivsc->nlist);
615  read_ScalarQuantizer (&ivsc->sq, f);
616  READ1 (ivsc->code_size);
617  for(int i = 0; i < ivsc->nlist; i++)
618  READVECTOR (ivsc->codes[i]);
619  idx = ivsc;
620  } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
621  h == fourcc ("IvPC")) {
622 
623  idx = read_ivfpq (f, h, try_mmap);
624 
625  } else if(h == fourcc ("IxPT")) {
626  IndexPreTransform * ixpt = new IndexPreTransform();
627  ixpt->own_fields = true;
628  read_index_header (ixpt, f);
629  int nt;
630  if (read_old_fmt_hack == 2) {
631  nt = 1;
632  } else {
633  READ1 (nt);
634  }
635  for (int i = 0; i < nt; i++) {
636  ixpt->chain.push_back (read_VectorTransform (f));
637  }
638  ixpt->index = read_index (f);
639  idx = ixpt;
640  } else if(h == fourcc ("Imiq")) {
642  read_index_header (imiq, f);
643  read_ProductQuantizer (&imiq->pq, f);
644  idx = imiq;
645  } else if(h == fourcc ("IxRF")) {
646  IndexRefineFlat *idxrf = new IndexRefineFlat ();
647  read_index_header (idxrf, f);
648  idxrf->base_index = read_index(f);
649  idxrf->own_fields = true;
650  IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f));
651  std::swap (*rf, idxrf->refine_index);
652  delete rf;
653  READ1 (idxrf->k_factor);
654  idx = idxrf;
655  } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
656  bool is_map2 = h == fourcc ("IxM2");
657  IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
658  read_index_header (idxmap, f);
659  idxmap->index = read_index (f);
660  idxmap->own_fields = true;
661  READVECTOR (idxmap->id_map);
662  if (is_map2) {
663  static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
664  }
665  idx = idxmap;
666  } else {
667  fprintf (stderr, "Index type 0x%08x not supported\n", h);
668  abort ();
669  }
670  return idx;
671 }
672 
673 
674 
675 Index *read_index (const char *fname, bool try_mmap) {
676  FILE *f = fopen (fname, "r");
677  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for reading:", fname);
678  Index *idx = read_index (f, try_mmap);
679  fclose (f);
680  return idx;
681 }
682 
683 VectorTransform *read_VectorTransform (const char *fname) {
684  FILE *f = fopen (fname, "r");
685  if (!f) {
686  fprintf (stderr, "cannot open %s for reading:", fname);
687  perror ("");
688  abort ();
689  }
690  VectorTransform *vt = read_VectorTransform (f);
691  fclose (f);
692  return vt;
693 }
694 
695 /*************************************************************
696  * cloning functions
697  **************************************************************/
698 
699 
700 
701 Index * clone_index (const Index *index)
702 {
703  Cloner cl;
704  return cl.clone_Index (index);
705 }
706 
707 // assumes there is a copy constructor ready. Always try from most
708 // specific to most general
709 #define TRYCLONE(classname, obj) \
710  if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
711  return new classname(*clo); \
712  } else
713 
714 VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt)
715 {
716  TRYCLONE (RemapDimensionsTransform, vt)
717  TRYCLONE (OPQMatrix, vt)
718  TRYCLONE (PCAMatrix, vt)
719  TRYCLONE (RandomRotationMatrix, vt)
720  TRYCLONE (LinearTransform, vt)
721  {
722  FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
723  }
724  return nullptr;
725 }
726 
727 IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf)
728 {
729  TRYCLONE (IndexIVFPQR, ivf)
730  TRYCLONE (IndexIVFPQ, ivf)
731  TRYCLONE (IndexIVFFlat, ivf)
732  TRYCLONE (IndexIVFScalarQuantizer, ivf)
733  {
734  FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
735  }
736  return nullptr;
737 }
738 
739 Index *Cloner::clone_Index (const Index *index)
740 {
741  TRYCLONE (IndexPQ, index)
742  TRYCLONE (IndexLSH, index)
743  TRYCLONE (IndexFlatL2, index)
744  TRYCLONE (IndexFlatIP, index)
745  TRYCLONE (IndexFlat, index)
746  TRYCLONE (IndexScalarQuantizer, index)
747  TRYCLONE (MultiIndexQuantizer, index)
748  if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
749  IndexIVF *res = clone_IndexIVF (ivf);
750  res->own_fields = true;
751  res->quantizer = clone_Index (ivf->quantizer);
752  return res;
753  } else if (const IndexPreTransform * ipt =
754  dynamic_cast<const IndexPreTransform*> (index)) {
755  IndexPreTransform *res = new IndexPreTransform ();
756  res->d = ipt->d;
757  res->index = clone_Index (ipt->index);
758  for (int i = 0; i < ipt->chain.size(); i++)
759  res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
760  res->own_fields = true;
761  return res;
762  } else {
763  FAISS_THROW_MSG( "clone not supported for this type of Index");
764  }
765  return nullptr;
766 }
767 
768 
769 } // namespace faiss
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:34
size_t code_size
bytes per vector
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
Index * read_index(FILE *f, bool try_mmap)
Definition: index_io.cpp:536
int bytes_per_vec
nb of 8-bits per encoded vector
Definition: IndexLSH.h:28
std::vector< float > thresholds
thresholds to compare with
Definition: IndexLSH.h:34
bool train_thresholds
whether we train thresholds or use 0
Definition: IndexLSH.h:30
Index * base_index
faster index to pre-select the vectors that should be filtered
Definition: IndexFlat.h:109
std::vector< std::vector< uint8_t > > codes
inverted list codes.
IndexFlat refine_index
storage for full vectors
Definition: IndexFlat.h:106
bool own_fields
should the base index be deallocated?
Definition: IndexFlat.h:110
int d
vector dimension
Definition: Index.h:64
std::vector< long > id_map
! whether pointers are deleted in destructo
Definition: MetaIndexes.h:29
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
RandomRotationMatrix rrot
optional random rotation
Definition: IndexLSH.h:32
ScalarQuantizer sq
Used to encode the vectors.
long idx_t
all indices are this type
Definition: Index.h:62
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:31
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
bool own_fields
! the sub-index
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
int d_out
! input dimension
int nbits
nb of bits per vector
Definition: IndexLSH.h:27
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
std::vector< float > xb
database vectors, size ntotal * d
Definition: IndexFlat.h:25
int polysemous_ht
Hamming threshold used for polysemy.
Definition: IndexPQ.h:91
bool rotate_data
whether to apply a random rotation to input
Definition: IndexLSH.h:29
std::vector< uint8_t > codes
encoded dataset
Definition: IndexLSH.h:37
std::vector< std::vector< float > > vecs
Definition: IndexIVF.h:135
bool own_fields
! the sub-index
Definition: MetaIndexes.h:28