Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/index_io.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "index_io.h"
12 
13 #include <cstdio>
14 #include <cstdlib>
15 
16 #include <sys/mman.h>
17 
18 #include "FaissAssert.h"
19 
20 #include "IndexFlat.h"
21 #include "VectorTransform.h"
22 #include "IndexLSH.h"
23 #include "IndexPQ.h"
24 #include "IndexIVF.h"
25 #include "IndexIVFPQ.h"
26 #include "MetaIndexes.h"
27 #include "IndexScalarQuantizer.h"
28 
29 /*************************************************************
30  * The I/O format is the content of the class. For objects that are
31  * inherited, like Index, a 4-character-code (fourcc) indicates which
32  * child class this is an instance of.
33  *
34  * In this case, the fields of the parent class are written first,
35  * then the ones for the child classes. Note that this requires
36  * classes to be serialized to have a constructor without parameters,
37  * so that the fields can be filled in later. The default constructor
38  * should set reasonable defaults for all fields.
39  *
40  * The fourccs are assigned arbitrarily. When the class changed (added
41  * or deprecated fields), the fourcc can be replaced. New code should
42  * be able to read the old fourcc and fill in new classes.
43  *
44  * TODO: serialization to strings for use in Python pickle or Torch
45  * serialization.
46  *
47  * TODO: in this file, the read functions that encouter errors may
48  * leak memory.
49  **************************************************************/
50 
51 
52 
53 namespace faiss {
54 
55 static uint32_t fourcc (const char sx[4]) {
56  const unsigned char *x = (unsigned char*)sx;
57  return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
58 }
59 
60 /*************************************************************
61  * I/O macros
62  *
63  * we use macros so that we have a line number to report in
64  * abort (). This makes debugging a lot easier.
65  **************************************************************/
66 
67 
68 #define WRITEANDCHECK(ptr, n) { \
69  size_t ret = fwrite (ptr, sizeof (* (ptr)), n, f); \
70  FAISS_THROW_IF_NOT_MSG (ret == (n), "write error"); \
71  }
72 
73 #define READANDCHECK(ptr, n) { \
74  size_t ret = fread (ptr, sizeof (* (ptr)), n, f); \
75  FAISS_THROW_IF_NOT_MSG (ret == (n), "read error"); \
76  }
77 
78 #define WRITE1(x) WRITEANDCHECK(&(x), 1)
79 #define READ1(x) READANDCHECK(&(x), 1)
80 
81 #define WRITEVECTOR(vec) { \
82  size_t size = (vec).size (); \
83  WRITEANDCHECK (&size, 1); \
84  WRITEANDCHECK ((vec).data (), size); \
85  }
86 
87 #define READVECTOR(vec) { \
88  long size; \
89  READANDCHECK (&size, 1); \
90  FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
91  (vec).resize (size); \
92  READANDCHECK ((vec).data (), size); \
93  }
94 
96  FILE *f;
97  ScopeFileCloser (FILE *f): f (f) {}
98  ~ScopeFileCloser () {fclose (f); }
99 };
100 
101 // Macros for read/write arrays aligned to 16 bytes in the
102 // file. Useful when mmapped.
103 
104 #define WRITETABPAD16(tab, size_in) { \
105  size_t size = (size_in); \
106  WRITEANDCHECK (&size, 1); \
107  uint8_t padding[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; \
108  int idx = ftell(f) % 16; \
109  padding [idx] = 15 - idx; \
110  WRITEANDCHECK (padding + idx, 16 - idx); \
111  WRITEANDCHECK ((tab), size); \
112 }
113 
114 #define READTABPAD16(tab, basetype, expected_size) { \
115  size_t size; \
116  READANDCHECK (&size, 1); \
117  FAISS_THROW_IF_NOT ((expected_size) == size); \
118  uint8_t padding[16], npad; \
119  READ1(npad); \
120  FAISS_THROW_IF_NOT (npad < 16); \
121  READANDCHECK (padding, npad); \
122  (tab) = new basetype [size]; \
123  READANDCHECK ((tab), size); \
124 }
125 
126 // read only the array header, return its offset and skip over it
127 #define TABOFFSETPAD16(taboffset, basetype, expected_size) { \
128  size_t size; \
129  READANDCHECK (&size, 1); \
130  FAISS_THROW_IF_NOT ((expected_size) == size); \
131  uint8_t padding[16], npad; \
132  READ1(npad); \
133  FAISS_THROW_IF_NOT (npad < 16); \
134  READANDCHECK (padding, npad); \
135  taboffset = ftell(f); \
136  fseek (f, sizeof(basetype) * size, SEEK_CUR); \
137 }
138 
139 
140 
141 
142 /*************************************************************
143  * Write
144  **************************************************************/
145 
146 static void write_index_header (const Index *idx, FILE *f) {
147  WRITE1 (idx->d);
148  WRITE1 (idx->ntotal);
149  Index::idx_t dummy = 1 << 20;
150  WRITE1 (dummy);
151  WRITE1 (dummy);
152  WRITE1 (idx->is_trained);
153  WRITE1 (idx->metric_type);
154 }
155 
156 
157 
158 void write_VectorTransform (const VectorTransform *vt, FILE *f) {
159  if (const LinearTransform * lt =
160  dynamic_cast < const LinearTransform *> (vt)) {
161  if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
162  uint32_t h = fourcc ("rrot");
163  WRITE1 (h);
164  } else if (const PCAMatrix * pca =
165  dynamic_cast<const PCAMatrix *>(lt)) {
166  uint32_t h = fourcc ("PcAm");
167  WRITE1 (h);
168  WRITE1 (pca->eigen_power);
169  WRITE1 (pca->random_rotation);
170  WRITE1 (pca->balanced_bins);
171  WRITEVECTOR (pca->mean);
172  WRITEVECTOR (pca->eigenvalues);
173  WRITEVECTOR (pca->PCAMat);
174  } else {
175  // generic LinearTransform (includes OPQ)
176  uint32_t h = fourcc ("LTra");
177  WRITE1 (h);
178  }
179  WRITE1 (lt->have_bias);
180  WRITEVECTOR (lt->A);
181  WRITEVECTOR (lt->b);
182  } else if (const RemapDimensionsTransform *rdt =
183  dynamic_cast<const RemapDimensionsTransform *>(vt)) {
184  uint32_t h = fourcc ("RmDT");
185  WRITE1 (h);
186  WRITEVECTOR (rdt->map);
187  } else if (const NormalizationTransform *nt =
188  dynamic_cast<const NormalizationTransform *>(vt)) {
189  uint32_t h = fourcc ("VNrm");
190  WRITE1 (h);
191  WRITE1 (nt->norm);
192  } else {
193  FAISS_THROW_MSG ("cannot serialize this");
194  }
195  // common fields
196  WRITE1 (vt->d_in);
197  WRITE1 (vt->d_out);
198  WRITE1 (vt->is_trained);
199 }
200 
201 static void write_ProductQuantizer (const ProductQuantizer *pq, FILE *f) {
202  WRITE1 (pq->d);
203  WRITE1 (pq->M);
204  WRITE1 (pq->nbits);
205  WRITEVECTOR (pq->centroids);
206 }
207 
208 static void write_ScalarQuantizer (const ScalarQuantizer *ivsc, FILE *f) {
209  WRITE1 (ivsc->qtype);
210  WRITE1 (ivsc->rangestat);
211  WRITE1 (ivsc->rangestat_arg);
212  WRITE1 (ivsc->d);
213  WRITE1 (ivsc->code_size);
214  WRITEVECTOR (ivsc->trained);
215 }
216 
217 void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
218  FILE *f = fopen (fname, "w");
219  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
220  ScopeFileCloser closer(f);
221  write_ProductQuantizer (pq, f);
222 }
223 
224 
225 
226 static void write_ivf_header (const IndexIVF * ivf, FILE *f,
227  bool include_ids = true) {
228  write_index_header (ivf, f);
229  WRITE1 (ivf->nlist);
230  WRITE1 (ivf->nprobe);
231  write_index (ivf->quantizer, f);
232  if (include_ids) {
233  for (size_t i = 0; i < ivf->nlist; i++)
234  WRITEVECTOR (ivf->ids[i]);
235  }
236  WRITE1 (ivf->maintain_direct_map);
237  WRITEVECTOR (ivf->direct_map);
238 }
239 
240 void write_index (const Index *idx, FILE *f) {
241  if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
242  uint32_t h = fourcc (
243  idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
244  idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr);
245  WRITE1 (h);
246  write_index_header (idx, f);
247  WRITEVECTOR (idxf->xb);
248  } else if(const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
249  uint32_t h = fourcc ("IxHe");
250  WRITE1 (h);
251  write_index_header (idx, f);
252  WRITE1 (idxl->nbits);
253  WRITE1 (idxl->rotate_data);
254  WRITE1 (idxl->train_thresholds);
255  WRITEVECTOR (idxl->thresholds);
256  WRITE1 (idxl->bytes_per_vec);
257  write_VectorTransform (&idxl->rrot, f);
258  WRITEVECTOR (idxl->codes);
259  } else if(const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
260  uint32_t h = fourcc ("IxPq");
261  WRITE1 (h);
262  write_index_header (idx, f);
263  write_ProductQuantizer (&idxp->pq, f);
264  WRITEVECTOR (idxp->codes);
265  // search params -- maybe not useful to store?
266  WRITE1 (idxp->search_type);
267  WRITE1 (idxp->encode_signs);
268  WRITE1 (idxp->polysemous_ht);
269  } else if(const IndexScalarQuantizer * idxs =
270  dynamic_cast<const IndexScalarQuantizer *> (idx)) {
271  uint32_t h = fourcc ("IxSQ");
272  WRITE1 (h);
273  write_index_header (idx, f);
274  write_ScalarQuantizer (&idxs->sq, f);
275  WRITEVECTOR (idxs->codes);
276  } else if(const IndexIVFFlat * ivfl =
277  dynamic_cast<const IndexIVFFlat *> (idx)) {
278  uint32_t h = fourcc ("IvFL");
279  WRITE1 (h);
280  write_ivf_header (ivfl, f);
281  for(int i = 0; i < ivfl->nlist; i++)
282  WRITEVECTOR (ivfl->codes[i]);
283  } else if(const IndexIVFScalarQuantizer * ivsc =
284  dynamic_cast<const IndexIVFScalarQuantizer *> (idx)) {
285  uint32_t h = fourcc ("IvSQ");
286  WRITE1 (h);
287  write_ivf_header (ivsc, f);
288  write_ScalarQuantizer (&ivsc->sq, f);
289  WRITE1 (ivsc->code_size);
290  for(int i = 0; i < ivsc->nlist; i++)
291  WRITEVECTOR (ivsc->codes[i]);
292  } else if(const IndexIVFPQ * ivpq =
293  dynamic_cast<const IndexIVFPQ *> (idx)) {
294  const IndexIVFPQR * ivfpqr = dynamic_cast<const IndexIVFPQR *> (idx);
295  const IndexIVFPQCompact * ivfpqc =
296  dynamic_cast<const IndexIVFPQCompact *> (idx);
297  uint32_t h = fourcc (ivfpqr ? "IvQR" : ivfpqc ? "IvPC" : "IvPQ");
298  WRITE1 (h);
299  write_ivf_header (ivpq, f, !ivfpqc);
300  WRITE1 (ivpq->by_residual);
301  WRITE1 (ivpq->code_size);
302  write_ProductQuantizer (&ivpq->pq, f);
303  if (!ivfpqc) {
304  for(int i = 0; i < ivpq->codes.size(); i++)
305  WRITEVECTOR (ivpq->codes[i]);
306  }
307  if (ivfpqr) {
308  write_ProductQuantizer (&ivfpqr->refine_pq, f);
309  WRITEVECTOR (ivfpqr->refine_codes);
310  WRITE1 (ivfpqr->k_factor);
311  }
312  if (ivfpqc) {
313  WRITETABPAD16 (ivfpqc->limits, ivfpqc->nlist + 1);
314  WRITETABPAD16 (ivfpqc->compact_ids, ivfpqc->ntotal);
315  WRITETABPAD16 (ivfpqc->compact_codes,
316  ivfpqc->ntotal * ivfpqc->code_size);
317  }
318  } else if(const IndexPreTransform * ixpt =
319  dynamic_cast<const IndexPreTransform *> (idx)) {
320  uint32_t h = fourcc ("IxPT");
321  WRITE1 (h);
322  write_index_header (ixpt, f);
323  int nt = ixpt->chain.size();
324  WRITE1 (nt);
325  for (int i = 0; i < nt; i++)
326  write_VectorTransform (ixpt->chain[i], f);
327  write_index (ixpt->index, f);
328  } else if(const MultiIndexQuantizer * imiq =
329  dynamic_cast<const MultiIndexQuantizer *> (idx)) {
330  uint32_t h = fourcc ("Imiq");
331  WRITE1 (h);
332  write_index_header (imiq, f);
333  write_ProductQuantizer (&imiq->pq, f);
334  } else if(const IndexRefineFlat * idxrf =
335  dynamic_cast<const IndexRefineFlat *> (idx)) {
336  uint32_t h = fourcc ("IxRF");
337  WRITE1 (h);
338  write_index_header (idxrf, f);
339  write_index (idxrf->base_index, f);
340  write_index (&idxrf->refine_index, f);
341  WRITE1 (idxrf->k_factor);
342  } else if(const IndexIDMap * idxmap =
343  dynamic_cast<const IndexIDMap *> (idx)) {
344  uint32_t h =
345  dynamic_cast<const IndexIDMap2 *> (idx) ? fourcc ("IxM2") :
346  fourcc ("IxMp");
347  // no need to store additional info for IndexIDMap2
348  WRITE1 (h);
349  write_index_header (idxmap, f);
350  write_index (idxmap->index, f);
351  WRITEVECTOR (idxmap->id_map);
352  } else {
353  FAISS_THROW_MSG ("don't know how to serialize this type of index");
354  }
355 }
356 
357 void write_index (const Index *idx, const char *fname) {
358  FILE *f = fopen (fname, "w");
359  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
360  ScopeFileCloser closer(f);
361  write_index (idx, f);
362 }
363 
364 void write_VectorTransform (const VectorTransform *vt, const char *fname) {
365  FILE *f = fopen (fname, "w");
366  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
367  ScopeFileCloser closer(f);
368  write_VectorTransform (vt, f);
369 }
370 
371 /*************************************************************
372  * Read
373  **************************************************************/
374 
375 static void read_index_header (Index *idx, FILE *f) {
376  READ1 (idx->d);
377  READ1 (idx->ntotal);
378  Index::idx_t dummy;
379  READ1 (dummy);
380  READ1 (dummy);
381  READ1 (idx->is_trained);
382  READ1 (idx->metric_type);
383  idx->verbose = false;
384 }
385 
386 VectorTransform* read_VectorTransform (FILE *f) {
387  uint32_t h;
388  READ1 (h);
389  VectorTransform *vt = nullptr;
390 
391  if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
392  h == fourcc ("LTra") || h == fourcc ("PcAm")) {
393  LinearTransform *lt = nullptr;
394  if (h == fourcc ("rrot")) {
395  lt = new RandomRotationMatrix ();
396  } else if (h == fourcc ("PCAm") ||
397  h == fourcc ("PcAm")) {
398  PCAMatrix * pca = new PCAMatrix ();
399  READ1 (pca->eigen_power);
400  READ1 (pca->random_rotation);
401  if (h == fourcc ("PcAm"))
402  READ1 (pca->balanced_bins);
403  READVECTOR (pca->mean);
404  READVECTOR (pca->eigenvalues);
405  READVECTOR (pca->PCAMat);
406  lt = pca;
407  } else if (h == fourcc ("LTra")) {
408  lt = new LinearTransform ();
409  }
410  READ1 (lt->have_bias);
411  READVECTOR (lt->A);
412  READVECTOR (lt->b);
413  vt = lt;
414  } else if (h == fourcc ("RmDT")) {
415  RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
416  READVECTOR (rdt->map);
417  vt = rdt;
418  } else if (h == fourcc ("VNrm")) {
419  NormalizationTransform *nt = new NormalizationTransform ();
420  READ1 (nt->norm);
421  vt = nt;
422  } else {
423  FAISS_THROW_MSG("fourcc not recognized");
424  }
425  READ1 (vt->d_in);
426  READ1 (vt->d_out);
427  READ1 (vt->is_trained);
428  return vt;
429 }
430 
431 static void read_ProductQuantizer (ProductQuantizer *pq, FILE *f) {
432  READ1 (pq->d);
433  READ1 (pq->M);
434  READ1 (pq->nbits);
435  pq->set_derived_values ();
436  READVECTOR (pq->centroids);
437 }
438 
439 static void read_ScalarQuantizer (ScalarQuantizer *ivsc, FILE *f) {
440  READ1 (ivsc->qtype);
441  READ1 (ivsc->rangestat);
442  READ1 (ivsc->rangestat_arg);
443  READ1 (ivsc->d);
444  READ1 (ivsc->code_size);
445  READVECTOR (ivsc->trained);
446 }
447 
448 ProductQuantizer * read_ProductQuantizer (const char*fname) {
449  FILE *f = fopen (fname, "r");
450  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
451  ScopeFileCloser closer(f);
452  ProductQuantizer *pq = new ProductQuantizer();
453  ScopeDeleter1<ProductQuantizer> del (pq);
454  read_ProductQuantizer(pq, f);
455  del.release ();
456  return pq;
457 }
458 
459 static void read_ivf_header (IndexIVF * ivf, FILE *f,
460  bool include_ids = true) {
461  read_index_header (ivf, f);
462  READ1 (ivf->nlist);
463  READ1 (ivf->nprobe);
464  ivf->quantizer = read_index (f);
465  ivf->own_fields = true;
466  if (include_ids) {
467  ivf->ids.resize (ivf->nlist);
468  for (size_t i = 0; i < ivf->nlist; i++)
469  READVECTOR (ivf->ids[i]);
470  }
471  READ1 (ivf->maintain_direct_map);
472  READVECTOR (ivf->direct_map);
473 }
474 
475 static IndexIVFPQ *read_ivfpq (FILE *f, uint32_t h, bool try_mmap)
476 {
477 
478  IndexIVFPQR *ivfpqr =
479  h == fourcc ("IvQR") ? new IndexIVFPQR () : nullptr;
480  IndexIVFPQCompact *ivfpqc =
481  h == fourcc ("IvPC") ? new IndexIVFPQCompact () : nullptr;
482  IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : ivfpqc ? ivfpqc : new IndexIVFPQ ();
483  read_ivf_header (ivpq, f, !ivfpqc);
484  READ1 (ivpq->by_residual);
485  READ1 (ivpq->code_size);
486  read_ProductQuantizer (&ivpq->pq, f);
487  if (!ivfpqc) {
488  ivpq->codes.resize (ivpq->nlist);
489  for (size_t i = 0; i < ivpq->nlist; i++)
490  READVECTOR (ivpq->codes[i]);
491  }
492  // precomputed table not stored. It is cheaper to recompute it
493  ivpq->use_precomputed_table = 0;
494  if (ivpq->by_residual)
495  ivpq->precompute_table ();
496  if (ivfpqr) {
497  read_ProductQuantizer (&ivfpqr->refine_pq, f);
498  READVECTOR (ivfpqr->refine_codes);
499  READ1 (ivfpqr->k_factor);
500  }
501  if (ivfpqc) {
502  if (!try_mmap) {
503  READTABPAD16 (ivfpqc->limits, uint32_t, ivfpqc->nlist + 1);
504  READTABPAD16 (ivfpqc->compact_ids, uint32_t, ivfpqc->ntotal);
505  READTABPAD16 (ivfpqc->compact_codes, uint8_t,
506  ivfpqc->ntotal * ivfpqc->code_size);
507  } else {
508  long offset_limits, offset_compact_ids, offset_compact_codes;
509  TABOFFSETPAD16 (offset_limits, uint32_t, ivfpqc->nlist + 1);
510  TABOFFSETPAD16 (offset_compact_ids, uint32_t, ivfpqc->ntotal);
511  TABOFFSETPAD16 (offset_compact_codes, uint8_t,
512  ivfpqc->ntotal * ivfpqc->code_size);
513  ivfpqc->mmap_length = ftell (f);
514  // mmap the whole file
515  ivfpqc->mmap_buffer = (char*)mmap (
516  nullptr, ivfpqc->mmap_length,
517  PROT_READ, MAP_SHARED, fileno (f), 0);
518  if (!ivfpqc->mmap_buffer) {
519  perror ("mmap failed");
520  abort ();
521  }
522  // at this point the file can be closed, it does not
523  // invalidate the mapping
524  ivfpqc->limits = (uint32_t*)(ivfpqc->mmap_buffer + offset_limits);
525  ivfpqc->compact_ids = (uint32_t*)(ivfpqc->mmap_buffer +
526  offset_compact_ids);
527  ivfpqc->compact_codes = (uint8_t*)(ivfpqc->mmap_buffer +
528  offset_compact_codes);
529  }
530  }
531  return ivpq;
532 }
533 
534 int read_old_fmt_hack = 0;
535 
536 Index *read_index (FILE * f, bool try_mmap) {
537  Index * idx = nullptr;
538  uint32_t h;
539  READ1 (h);
540  if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) {
541  IndexFlat *idxf;
542  if (h == fourcc ("IxFI")) idxf = new IndexFlatIP ();
543  else idxf = new IndexFlatL2 ();
544  read_index_header (idxf, f);
545  READVECTOR (idxf->xb);
546  FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
547  // leak!
548  idx = idxf;
549  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
550  IndexLSH * idxl = new IndexLSH ();
551  read_index_header (idxl, f);
552  READ1 (idxl->nbits);
553  READ1 (idxl->rotate_data);
554  READ1 (idxl->train_thresholds);
555  READVECTOR (idxl->thresholds);
556  READ1 (idxl->bytes_per_vec);
557  if (h == fourcc("IxHE")) {
558  FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
559  "can only read old format IndexLSH with "
560  "nbits multiple of 64 (got %d)",
561  (int) idxl->nbits);
562  // leak
563  idxl->bytes_per_vec *= 8;
564  }
565  {
566  RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
567  (read_VectorTransform (f));
568  FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
569  idxl->rrot = *rrot;
570  delete rrot;
571  }
572  READVECTOR (idxl->codes);
573  FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
574  idxl->rrot.d_out == idxl->nbits);
575  FAISS_THROW_IF_NOT (
576  idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
577  idx = idxl;
578  } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
579  h == fourcc ("IxPq")) {
580  // IxPQ and IxPo were merged into the same IndexPQ object
581  IndexPQ * idxp =new IndexPQ ();
582  read_index_header (idxp, f);
583  read_ProductQuantizer (&idxp->pq, f);
584  READVECTOR (idxp->codes);
585  if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
586  READ1 (idxp->search_type);
587  READ1 (idxp->encode_signs);
588  READ1 (idxp->polysemous_ht);
589  }
590  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
591  // when they were in fact using L2. Therefore, we force metric type
592  // to L2 when the old format is detected
593  if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
594  idxp->metric_type = METRIC_L2;
595  }
596  idx = idxp;
597  } else if (h == fourcc ("IvFl") || h == fourcc("IvFL")) {
598  IndexIVFFlat * ivfl = new IndexIVFFlat ();
599  read_ivf_header (ivfl, f);
600  ivfl->code_size = ivfl->d * sizeof(float);
601  ivfl->codes.resize (ivfl->nlist);
602  if (h == fourcc ("IvFL")) {
603  for (size_t i = 0; i < ivfl->nlist; i++) {
604  READVECTOR (ivfl->codes[i]);
605  }
606  } else { // old format
607  for (size_t i = 0; i < ivfl->nlist; i++) {
608  std::vector<float> vec;
609  READVECTOR (vec);
610  ivfl->codes[i].resize(vec.size() * sizeof(float));
611  memcpy(ivfl->codes[i].data(), vec.data(),
612  ivfl->codes[i].size());
613  }
614  }
615  idx = ivfl;
616  } else if (h == fourcc ("IxSQ")) {
618  read_index_header (idxs, f);
619  read_ScalarQuantizer (&idxs->sq, f);
620  READVECTOR (idxs->codes);
621  idxs->code_size = idxs->sq.code_size;
622  idx = idxs;
623  } else if(h == fourcc ("IvSQ")) {
625  read_ivf_header (ivsc, f);
626  ivsc->codes.resize(ivsc->nlist);
627  read_ScalarQuantizer (&ivsc->sq, f);
628  READ1 (ivsc->code_size);
629  for(int i = 0; i < ivsc->nlist; i++)
630  READVECTOR (ivsc->codes[i]);
631  idx = ivsc;
632  } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
633  h == fourcc ("IvPC")) {
634 
635  idx = read_ivfpq (f, h, try_mmap);
636 
637  } else if(h == fourcc ("IxPT")) {
638  IndexPreTransform * ixpt = new IndexPreTransform();
639  ixpt->own_fields = true;
640  read_index_header (ixpt, f);
641  int nt;
642  if (read_old_fmt_hack == 2) {
643  nt = 1;
644  } else {
645  READ1 (nt);
646  }
647  for (int i = 0; i < nt; i++) {
648  ixpt->chain.push_back (read_VectorTransform (f));
649  }
650  ixpt->index = read_index (f);
651  idx = ixpt;
652  } else if(h == fourcc ("Imiq")) {
654  read_index_header (imiq, f);
655  read_ProductQuantizer (&imiq->pq, f);
656  idx = imiq;
657  } else if(h == fourcc ("IxRF")) {
658  IndexRefineFlat *idxrf = new IndexRefineFlat ();
659  read_index_header (idxrf, f);
660  idxrf->base_index = read_index(f);
661  idxrf->own_fields = true;
662  IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f));
663  std::swap (*rf, idxrf->refine_index);
664  delete rf;
665  READ1 (idxrf->k_factor);
666  idx = idxrf;
667  } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
668  bool is_map2 = h == fourcc ("IxM2");
669  IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
670  read_index_header (idxmap, f);
671  idxmap->index = read_index (f);
672  idxmap->own_fields = true;
673  READVECTOR (idxmap->id_map);
674  if (is_map2) {
675  static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
676  }
677  idx = idxmap;
678  } else {
679  fprintf (stderr, "Index type 0x%08x not supported\n", h);
680  abort ();
681  }
682  return idx;
683 }
684 
685 
686 
687 Index *read_index (const char *fname, bool try_mmap) {
688  FILE *f = fopen (fname, "r");
689  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for reading:", fname);
690  Index *idx = read_index (f, try_mmap);
691  fclose (f);
692  return idx;
693 }
694 
695 VectorTransform *read_VectorTransform (const char *fname) {
696  FILE *f = fopen (fname, "r");
697  if (!f) {
698  fprintf (stderr, "cannot open %s for reading:", fname);
699  perror ("");
700  abort ();
701  }
702  VectorTransform *vt = read_VectorTransform (f);
703  fclose (f);
704  return vt;
705 }
706 
707 /*************************************************************
708  * cloning functions
709  **************************************************************/
710 
711 
712 
713 Index * clone_index (const Index *index)
714 {
715  Cloner cl;
716  return cl.clone_Index (index);
717 }
718 
719 // assumes there is a copy constructor ready. Always try from most
720 // specific to most general
721 #define TRYCLONE(classname, obj) \
722  if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
723  return new classname(*clo); \
724  } else
725 
726 VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt)
727 {
728  TRYCLONE (RemapDimensionsTransform, vt)
729  TRYCLONE (OPQMatrix, vt)
730  TRYCLONE (PCAMatrix, vt)
731  TRYCLONE (RandomRotationMatrix, vt)
732  TRYCLONE (LinearTransform, vt)
733  {
734  FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
735  }
736  return nullptr;
737 }
738 
739 IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf)
740 {
741  TRYCLONE (IndexIVFPQR, ivf)
742  TRYCLONE (IndexIVFPQ, ivf)
743  TRYCLONE (IndexIVFFlat, ivf)
744  TRYCLONE (IndexIVFScalarQuantizer, ivf)
745  {
746  FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
747  }
748  return nullptr;
749 }
750 
751 Index *Cloner::clone_Index (const Index *index)
752 {
753  TRYCLONE (IndexPQ, index)
754  TRYCLONE (IndexLSH, index)
755  TRYCLONE (IndexFlatL2, index)
756  TRYCLONE (IndexFlatIP, index)
757  TRYCLONE (IndexFlat, index)
758  TRYCLONE (IndexScalarQuantizer, index)
759  TRYCLONE (MultiIndexQuantizer, index)
760  if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
761  IndexIVF *res = clone_IndexIVF (ivf);
762  res->own_fields = true;
763  res->quantizer = clone_Index (ivf->quantizer);
764  return res;
765  } else if (const IndexPreTransform * ipt =
766  dynamic_cast<const IndexPreTransform*> (index)) {
767  IndexPreTransform *res = new IndexPreTransform ();
768  res->d = ipt->d;
769  res->index = clone_Index (ipt->index);
770  for (int i = 0; i < ipt->chain.size(); i++)
771  res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
772  res->own_fields = true;
773  return res;
774  } else {
775  FAISS_THROW_MSG( "clone not supported for this type of Index");
776  }
777  return nullptr;
778 }
779 
780 
781 } // namespace faiss
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:34
size_t code_size
bytes per vector
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
Index * read_index(FILE *f, bool try_mmap)
Definition: index_io.cpp:536
int bytes_per_vec
nb of 8-bits per encoded vector
Definition: IndexLSH.h:28
std::vector< float > thresholds
thresholds to compare with
Definition: IndexLSH.h:34
bool train_thresholds
whether we train thresholds or use 0
Definition: IndexLSH.h:30
Index * base_index
faster index to pre-select the vectors that should be filtered
Definition: IndexFlat.h:109
IndexFlat refine_index
storage for full vectors
Definition: IndexFlat.h:106
bool own_fields
should the base index be deallocated?
Definition: IndexFlat.h:110
int d
vector dimension
Definition: Index.h:64
std::vector< long > id_map
! whether pointers are deleted in destructo
Definition: MetaIndexes.h:29
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
RandomRotationMatrix rrot
optional random rotation
Definition: IndexLSH.h:32
ScalarQuantizer sq
Used to encode the vectors.
long idx_t
all indices are this type
Definition: Index.h:62
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:31
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
bool own_fields
! the sub-index
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
int d_out
! input dimension
int nbits
nb of bits per vector
Definition: IndexLSH.h:27
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
std::vector< float > xb
database vectors, size ntotal * d
Definition: IndexFlat.h:25
int polysemous_ht
Hamming threshold used for polysemy.
Definition: IndexPQ.h:91
size_t code_size
code size per vector in bytes
Definition: IndexIVF.h:57
bool rotate_data
whether to apply a random rotation to input
Definition: IndexLSH.h:29
std::vector< uint8_t > codes
encoded dataset
Definition: IndexLSH.h:37
bool own_fields
! the sub-index
Definition: MetaIndexes.h:28