Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/index_io.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the CC-by-NC license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved.
10 
11 #include "index_io.h"
12 
13 #include <cstdio>
14 #include <cstdlib>
15 
16 #include <sys/mman.h>
17 
18 #include "FaissAssert.h"
19 
20 #include "IndexFlat.h"
21 #include "VectorTransform.h"
22 #include "IndexLSH.h"
23 #include "IndexPQ.h"
24 #include "IndexIVF.h"
25 #include "IndexIVFPQ.h"
26 #include "MetaIndexes.h"
27 #include "IndexIVFScalarQuantizer.h"
28 
29 /*************************************************************
30  * The I/O format is the content of the class. For objects that are
31  * inherited, like Index, a 4-character-code (fourcc) indicates which
32  * child class this is an instance of.
33  *
34  * In this case, the fields of the parent class are written first,
35  * then the ones for the child classes. Note that this requires
36  * classes to be serialized to have a constructor without parameters,
37  * so that the fields can be filled in later. The default constructor
38  * should set reasonable defaults for all fields.
39  *
40  * The fourccs are assigned arbitrarily. When the class changed (added
41  * or deprecated fields), the fourcc can be replaced. New code should
42  * be able to read the old fourcc and fill in new classes.
43  *
44  * TODO: serialization to strings for use in Python pickle or Torch
45  * serialization.
46  *
47  * TODO: in this file, the read functions that encouter errors may
48  * leak memory.
49  **************************************************************/
50 
51 
52 
53 namespace faiss {
54 
55 static uint32_t fourcc (const char sx[4]) {
56  const unsigned char *x = (unsigned char*)sx;
57  return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
58 }
59 
60 /*************************************************************
61  * I/O macros
62  *
63  * we use macros so that we have a line number to report in
64  * abort (). This makes debugging a lot easier.
65  **************************************************************/
66 
67 
68 #define WRITEANDCHECK(ptr, n) { \
69  size_t ret = fwrite (ptr, sizeof (* (ptr)), n, f); \
70  FAISS_THROW_IF_NOT_MSG (ret == (n), "write error"); \
71  }
72 
73 #define READANDCHECK(ptr, n) { \
74  size_t ret = fread (ptr, sizeof (* (ptr)), n, f); \
75  FAISS_THROW_IF_NOT_MSG (ret == (n), "read error"); \
76  }
77 
78 #define WRITE1(x) WRITEANDCHECK(&(x), 1)
79 #define READ1(x) READANDCHECK(&(x), 1)
80 
81 #define WRITEVECTOR(vec) { \
82  size_t size = (vec).size (); \
83  WRITEANDCHECK (&size, 1); \
84  WRITEANDCHECK ((vec).data (), size); \
85  }
86 
87 #define READVECTOR(vec) { \
88  long size; \
89  READANDCHECK (&size, 1); \
90  FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
91  (vec).resize (size); \
92  READANDCHECK ((vec).data (), size); \
93  }
94 
96  FILE *f;
97  ScopeFileCloser (FILE *f): f (f) {}
98  ~ScopeFileCloser () {fclose (f); }
99 };
100 
101 // Macros for read/write arrays aligned to 16 bytes in the
102 // file. Useful when mmapped.
103 
104 #define WRITETABPAD16(tab, size_in) { \
105  size_t size = (size_in); \
106  WRITEANDCHECK (&size, 1); \
107  uint8_t padding[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; \
108  int idx = ftell(f) % 16; \
109  padding [idx] = 15 - idx; \
110  WRITEANDCHECK (padding + idx, 16 - idx); \
111  WRITEANDCHECK ((tab), size); \
112 }
113 
114 #define READTABPAD16(tab, basetype, expected_size) { \
115  size_t size; \
116  READANDCHECK (&size, 1); \
117  FAISS_THROW_IF_NOT ((expected_size) == size); \
118  uint8_t padding[16], npad; \
119  READ1(npad); \
120  FAISS_THROW_IF_NOT (npad < 16); \
121  READANDCHECK (padding, npad); \
122  (tab) = new basetype [size]; \
123  READANDCHECK ((tab), size); \
124 }
125 
126 // read only the array header, return its offset and skip over it
127 #define TABOFFSETPAD16(taboffset, basetype, expected_size) { \
128  size_t size; \
129  READANDCHECK (&size, 1); \
130  FAISS_THROW_IF_NOT ((expected_size) == size); \
131  uint8_t padding[16], npad; \
132  READ1(npad); \
133  FAISS_THROW_IF_NOT (npad < 16); \
134  READANDCHECK (padding, npad); \
135  taboffset = ftell(f); \
136  fseek (f, sizeof(basetype) * size, SEEK_CUR); \
137 }
138 
139 
140 
141 
142 /*************************************************************
143  * Write
144  **************************************************************/
145 
146 static void write_index_header (const Index *idx, FILE *f) {
147  WRITE1 (idx->d);
148  WRITE1 (idx->ntotal);
149  Index::idx_t dummy = 1 << 20;
150  WRITE1 (dummy);
151  WRITE1 (dummy);
152  WRITE1 (idx->is_trained);
153  WRITE1 (idx->metric_type);
154 }
155 
156 
157 
158 void write_VectorTransform (const VectorTransform *vt, FILE *f) {
159  if (const LinearTransform * lt =
160  dynamic_cast < const LinearTransform *> (vt)) {
161  if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
162  uint32_t h = fourcc ("rrot");
163  WRITE1 (h);
164  } else if (const PCAMatrix * pca =
165  dynamic_cast<const PCAMatrix *>(lt)) {
166  uint32_t h = fourcc ("PcAm");
167  WRITE1 (h);
168  WRITE1 (pca->eigen_power);
169  WRITE1 (pca->random_rotation);
170  WRITE1 (pca->balanced_bins);
171  WRITEVECTOR (pca->mean);
172  WRITEVECTOR (pca->eigenvalues);
173  WRITEVECTOR (pca->PCAMat);
174  } else {
175  // generic LinearTransform (includes OPQ)
176  uint32_t h = fourcc ("LTra");
177  WRITE1 (h);
178  }
179  WRITE1 (lt->have_bias);
180  WRITEVECTOR (lt->A);
181  WRITEVECTOR (lt->b);
182  } else if (const RemapDimensionsTransform *rdt =
183  dynamic_cast<const RemapDimensionsTransform *>(vt)) {
184  uint32_t h = fourcc ("RmDT");
185  WRITE1 (h);
186  WRITEVECTOR (rdt->map);
187  } else {
188  FAISS_THROW_MSG ("cannot serialize this");
189  }
190  // common fields
191  WRITE1 (vt->d_in);
192  WRITE1 (vt->d_out);
193  WRITE1 (vt->is_trained);
194 }
195 
196 static void write_ProductQuantizer (const ProductQuantizer *pq, FILE *f) {
197  WRITE1 (pq->d);
198  WRITE1 (pq->M);
199  WRITE1 (pq->nbits);
200  WRITEVECTOR (pq->centroids);
201 }
202 
203 static void write_ScalarQuantizer (const ScalarQuantizer *ivsc, FILE *f) {
204  WRITE1 (ivsc->qtype);
205  WRITE1 (ivsc->rangestat);
206  WRITE1 (ivsc->rangestat_arg);
207  WRITE1 (ivsc->d);
208  WRITE1 (ivsc->code_size);
209  WRITEVECTOR (ivsc->trained);
210 }
211 
212 void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
213  FILE *f = fopen (fname, "w");
214  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
215  ScopeFileCloser closer(f);
216  write_ProductQuantizer (pq, f);
217 }
218 
219 
220 
221 static void write_ivf_header (const IndexIVF * ivf, FILE *f,
222  bool include_ids = true) {
223  write_index_header (ivf, f);
224  WRITE1 (ivf->nlist);
225  WRITE1 (ivf->nprobe);
226  write_index (ivf->quantizer, f);
227  if (include_ids) {
228  for (size_t i = 0; i < ivf->nlist; i++)
229  WRITEVECTOR (ivf->ids[i]);
230  }
231  WRITE1 (ivf->maintain_direct_map);
232  WRITEVECTOR (ivf->direct_map);
233 }
234 
235 void write_index (const Index *idx, FILE *f) {
236  if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
237  uint32_t h = fourcc (
238  idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
239  idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr);
240  WRITE1 (h);
241  write_index_header (idx, f);
242  WRITEVECTOR (idxf->xb);
243  } else if(const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
244  uint32_t h = fourcc ("IxHe");
245  WRITE1 (h);
246  write_index_header (idx, f);
247  WRITE1 (idxl->nbits);
248  WRITE1 (idxl->rotate_data);
249  WRITE1 (idxl->train_thresholds);
250  WRITEVECTOR (idxl->thresholds);
251  WRITE1 (idxl->bytes_per_vec);
252  write_VectorTransform (&idxl->rrot, f);
253  WRITEVECTOR (idxl->codes);
254  } else if(const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
255  uint32_t h = fourcc ("IxPq");
256  WRITE1 (h);
257  write_index_header (idx, f);
258  write_ProductQuantizer (&idxp->pq, f);
259  WRITEVECTOR (idxp->codes);
260  // search params -- maybe not useful to store?
261  WRITE1 (idxp->search_type);
262  WRITE1 (idxp->encode_signs);
263  WRITE1 (idxp->polysemous_ht);
264  } else if(const IndexIVFFlat * ivfl =
265  dynamic_cast<const IndexIVFFlat *> (idx)) {
266  uint32_t h = fourcc ("IvFl");
267  WRITE1 (h);
268  write_ivf_header (ivfl, f);
269  for(int i = 0; i < ivfl->nlist; i++)
270  WRITEVECTOR (ivfl->vecs[i]);
271  } else if(const IndexIVFScalarQuantizer * ivsc =
272  dynamic_cast<const IndexIVFScalarQuantizer *> (idx)) {
273  uint32_t h = fourcc ("IvSQ");
274  WRITE1 (h);
275  write_ivf_header (ivsc, f);
276  write_ScalarQuantizer (&ivsc->sq, f);
277  WRITE1 (ivsc->code_size);
278  for(int i = 0; i < ivsc->nlist; i++)
279  WRITEVECTOR (ivsc->codes[i]);
280  } else if(const IndexIVFPQ * ivpq =
281  dynamic_cast<const IndexIVFPQ *> (idx)) {
282  const IndexIVFPQR * ivfpqr = dynamic_cast<const IndexIVFPQR *> (idx);
283  const IndexIVFPQCompact * ivfpqc =
284  dynamic_cast<const IndexIVFPQCompact *> (idx);
285  uint32_t h = fourcc (ivfpqr ? "IvQR" : ivfpqc ? "IvPC" : "IvPQ");
286  WRITE1 (h);
287  write_ivf_header (ivpq, f, !ivfpqc);
288  WRITE1 (ivpq->by_residual);
289  WRITE1 (ivpq->code_size);
290  write_ProductQuantizer (&ivpq->pq, f);
291  if (!ivfpqc) {
292  for(int i = 0; i < ivpq->codes.size(); i++)
293  WRITEVECTOR (ivpq->codes[i]);
294  }
295  if (ivfpqr) {
296  write_ProductQuantizer (&ivfpqr->refine_pq, f);
297  WRITEVECTOR (ivfpqr->refine_codes);
298  WRITE1 (ivfpqr->k_factor);
299  }
300  if (ivfpqc) {
301  WRITETABPAD16 (ivfpqc->limits, ivfpqc->nlist + 1);
302  WRITETABPAD16 (ivfpqc->compact_ids, ivfpqc->ntotal);
303  WRITETABPAD16 (ivfpqc->compact_codes,
304  ivfpqc->ntotal * ivfpqc->code_size);
305  }
306  } else if(const IndexPreTransform * ixpt =
307  dynamic_cast<const IndexPreTransform *> (idx)) {
308  uint32_t h = fourcc ("IxPT");
309  WRITE1 (h);
310  write_index_header (ixpt, f);
311  int nt = ixpt->chain.size();
312  WRITE1 (nt);
313  for (int i = 0; i < nt; i++)
314  write_VectorTransform (ixpt->chain[i], f);
315  write_index (ixpt->index, f);
316  } else if(const MultiIndexQuantizer * imiq =
317  dynamic_cast<const MultiIndexQuantizer *> (idx)) {
318  uint32_t h = fourcc ("Imiq");
319  WRITE1 (h);
320  write_index_header (imiq, f);
321  write_ProductQuantizer (&imiq->pq, f);
322  } else if(const IndexRefineFlat * idxrf =
323  dynamic_cast<const IndexRefineFlat *> (idx)) {
324  uint32_t h = fourcc ("IxRF");
325  WRITE1 (h);
326  write_index_header (idxrf, f);
327  write_index (idxrf->base_index, f);
328  write_index (&idxrf->refine_index, f);
329  WRITE1 (idxrf->k_factor);
330  } else if(const IndexIDMap * idxmap =
331  dynamic_cast<const IndexIDMap *> (idx)) {
332  uint32_t h = fourcc ("IxMp");
333  WRITE1 (h);
334  write_index_header (idxmap, f);
335  write_index (idxmap->index, f);
336  WRITEVECTOR (idxmap->id_map);
337  } else {
338  FAISS_THROW_MSG ("don't know how to serialize this type of index");
339  }
340 }
341 
342 void write_index (const Index *idx, const char *fname) {
343  FILE *f = fopen (fname, "w");
344  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
345  ScopeFileCloser closer(f);
346  write_index (idx, f);
347 }
348 
349 void write_VectorTransform (const VectorTransform *vt, const char *fname) {
350  FILE *f = fopen (fname, "w");
351  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
352  ScopeFileCloser closer(f);
353  write_VectorTransform (vt, f);
354 }
355 
356 /*************************************************************
357  * Read
358  **************************************************************/
359 
360 static void read_index_header (Index *idx, FILE *f) {
361  READ1 (idx->d);
362  READ1 (idx->ntotal);
363  Index::idx_t dummy;
364  READ1 (dummy);
365  READ1 (dummy);
366  READ1 (idx->is_trained);
367  READ1 (idx->metric_type);
368  idx->verbose = false;
369 }
370 
371 VectorTransform* read_VectorTransform (FILE *f) {
372  uint32_t h;
373  READ1 (h);
374  VectorTransform *vt = nullptr;
375 
376  if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
377  h == fourcc ("LTra") || h == fourcc ("PcAm")) {
378  LinearTransform *lt = nullptr;
379  if (h == fourcc ("rrot")) {
380  lt = new RandomRotationMatrix ();
381  } else if (h == fourcc ("PCAm") ||
382  h == fourcc ("PcAm")) {
383  PCAMatrix * pca = new PCAMatrix ();
384  READ1 (pca->eigen_power);
385  READ1 (pca->random_rotation);
386  if (h == fourcc ("PcAm"))
387  READ1 (pca->balanced_bins);
388  READVECTOR (pca->mean);
389  READVECTOR (pca->eigenvalues);
390  READVECTOR (pca->PCAMat);
391  lt = pca;
392  } else if (h == fourcc ("LTra")) {
393  lt = new LinearTransform ();
394  }
395  READ1 (lt->have_bias);
396  READVECTOR (lt->A);
397  READVECTOR (lt->b);
398  vt = lt;
399  } else if (h == fourcc ("RmDT")) {
400  RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
401  READVECTOR (rdt->map);
402  vt = rdt;
403  } else {
404  FAISS_THROW_MSG("fourcc not recognized");
405  }
406  READ1 (vt->d_in);
407  READ1 (vt->d_out);
408  READ1 (vt->is_trained);
409  return vt;
410 }
411 
412 static void read_ProductQuantizer (ProductQuantizer *pq, FILE *f) {
413  READ1 (pq->d);
414  READ1 (pq->M);
415  READ1 (pq->nbits);
416  pq->set_derived_values ();
417  READVECTOR (pq->centroids);
418 }
419 
420 static void read_ScalarQuantizer (ScalarQuantizer *ivsc, FILE *f) {
421  READ1 (ivsc->qtype);
422  READ1 (ivsc->rangestat);
423  READ1 (ivsc->rangestat_arg);
424  READ1 (ivsc->d);
425  READ1 (ivsc->code_size);
426  READVECTOR (ivsc->trained);
427 }
428 
429 ProductQuantizer * read_ProductQuantizer (const char*fname) {
430  FILE *f = fopen (fname, "r");
431  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for writing", fname);
432  ScopeFileCloser closer(f);
433  ProductQuantizer *pq = new ProductQuantizer();
434  ScopeDeleter1<ProductQuantizer> del (pq);
435  read_ProductQuantizer(pq, f);
436  del.release ();
437  return pq;
438 }
439 
440 static void read_ivf_header (IndexIVF * ivf, FILE *f,
441  bool include_ids = true) {
442  read_index_header (ivf, f);
443  READ1 (ivf->nlist);
444  READ1 (ivf->nprobe);
445  ivf->quantizer = read_index (f);
446  ivf->own_fields = true;
447  if (include_ids) {
448  ivf->ids.resize (ivf->nlist);
449  for (size_t i = 0; i < ivf->nlist; i++)
450  READVECTOR (ivf->ids[i]);
451  }
452  READ1 (ivf->maintain_direct_map);
453  READVECTOR (ivf->direct_map);
454 }
455 
456 static IndexIVFPQ *read_ivfpq (FILE *f, uint32_t h, bool try_mmap)
457 {
458 
459  IndexIVFPQR *ivfpqr =
460  h == fourcc ("IvQR") ? new IndexIVFPQR () : nullptr;
461  IndexIVFPQCompact *ivfpqc =
462  h == fourcc ("IvPC") ? new IndexIVFPQCompact () : nullptr;
463  IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : ivfpqc ? ivfpqc : new IndexIVFPQ ();
464  read_ivf_header (ivpq, f, !ivfpqc);
465  READ1 (ivpq->by_residual);
466  READ1 (ivpq->code_size);
467  read_ProductQuantizer (&ivpq->pq, f);
468  if (!ivfpqc) {
469  ivpq->codes.resize (ivpq->nlist);
470  for (size_t i = 0; i < ivpq->nlist; i++)
471  READVECTOR (ivpq->codes[i]);
472  }
473  // precomputed table not stored. It is cheaper to recompute it
474  ivpq->use_precomputed_table = 0;
475  if (ivpq->by_residual)
476  ivpq->precompute_table ();
477  if (ivfpqr) {
478  read_ProductQuantizer (&ivfpqr->refine_pq, f);
479  READVECTOR (ivfpqr->refine_codes);
480  READ1 (ivfpqr->k_factor);
481  }
482  if (ivfpqc) {
483  if (!try_mmap) {
484  READTABPAD16 (ivfpqc->limits, uint32_t, ivfpqc->nlist + 1);
485  READTABPAD16 (ivfpqc->compact_ids, uint32_t, ivfpqc->ntotal);
486  READTABPAD16 (ivfpqc->compact_codes, uint8_t,
487  ivfpqc->ntotal * ivfpqc->code_size);
488  } else {
489  long offset_limits, offset_compact_ids, offset_compact_codes;
490  TABOFFSETPAD16 (offset_limits, uint32_t, ivfpqc->nlist + 1);
491  TABOFFSETPAD16 (offset_compact_ids, uint32_t, ivfpqc->ntotal);
492  TABOFFSETPAD16 (offset_compact_codes, uint8_t,
493  ivfpqc->ntotal * ivfpqc->code_size);
494  ivfpqc->mmap_length = ftell (f);
495  // mmap the whole file
496  ivfpqc->mmap_buffer = (char*)mmap (
497  nullptr, ivfpqc->mmap_length,
498  PROT_READ, MAP_SHARED, fileno (f), 0);
499  if (!ivfpqc->mmap_buffer) {
500  perror ("mmap failed");
501  abort ();
502  }
503  // at this point the file can be closed, it does not
504  // invalidate the mapping
505  ivfpqc->limits = (uint32_t*)(ivfpqc->mmap_buffer + offset_limits);
506  ivfpqc->compact_ids = (uint32_t*)(ivfpqc->mmap_buffer +
507  offset_compact_ids);
508  ivfpqc->compact_codes = (uint8_t*)(ivfpqc->mmap_buffer +
509  offset_compact_codes);
510  }
511  }
512  return ivpq;
513 }
514 
515 int read_old_fmt_hack = 0;
516 
517 Index *read_index (FILE * f, bool try_mmap) {
518  Index * idx = nullptr;
519  uint32_t h;
520  READ1 (h);
521  if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) {
522  IndexFlat *idxf;
523  if (h == fourcc ("IxFI")) idxf = new IndexFlatIP ();
524  else idxf = new IndexFlatL2 ();
525  read_index_header (idxf, f);
526  READVECTOR (idxf->xb);
527  FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
528  // leak!
529  idx = idxf;
530  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
531  IndexLSH * idxl = new IndexLSH ();
532  read_index_header (idxl, f);
533  READ1 (idxl->nbits);
534  READ1 (idxl->rotate_data);
535  READ1 (idxl->train_thresholds);
536  READVECTOR (idxl->thresholds);
537  READ1 (idxl->bytes_per_vec);
538  if (h == fourcc("IxHE")) {
539  FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
540  "can only read old format IndexLSH with "
541  "nbits multiple of 64 (got %d)",
542  (int) idxl->nbits);
543  // leak
544  idxl->bytes_per_vec *= 8;
545  }
546  {
547  RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
548  (read_VectorTransform (f));
549  FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
550  idxl->rrot = *rrot;
551  delete rrot;
552  }
553  READVECTOR (idxl->codes);
554  FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
555  idxl->rrot.d_out == idxl->nbits);
556  FAISS_THROW_IF_NOT (
557  idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
558  idx = idxl;
559  } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
560  h == fourcc ("IxPq")) {
561  // IxPQ and IxPo were merged into the same IndexPQ object
562  IndexPQ * idxp =new IndexPQ ();
563  read_index_header (idxp, f);
564  read_ProductQuantizer (&idxp->pq, f);
565  READVECTOR (idxp->codes);
566  if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
567  READ1 (idxp->search_type);
568  READ1 (idxp->encode_signs);
569  READ1 (idxp->polysemous_ht);
570  }
571  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
572  // when they were in fact using L2. Therefore, we force metric type
573  // to L2 when the old format is detected
574  if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
575  idxp->metric_type = METRIC_L2;
576  }
577  idx = idxp;
578  } else if(h == fourcc ("IvFl")) {
579  IndexIVFFlat * ivfl = new IndexIVFFlat ();
580  read_ivf_header (ivfl, f);
581  ivfl->vecs.resize (ivfl->nlist);
582  for (size_t i = 0; i < ivfl->nlist; i++)
583  READVECTOR (ivfl->vecs[i]);
584  idx = ivfl;
585  } else if(h == fourcc ("IvSQ")) {
587  read_ivf_header (ivsc, f);
588  ivsc->codes.resize(ivsc->nlist);
589  read_ScalarQuantizer (&ivsc->sq, f);
590  READ1 (ivsc->code_size);
591  for(int i = 0; i < ivsc->nlist; i++)
592  READVECTOR (ivsc->codes[i]);
593  idx = ivsc;
594  } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
595  h == fourcc ("IvPC")) {
596 
597  idx = read_ivfpq (f, h, try_mmap);
598 
599  } else if(h == fourcc ("IxPT")) {
600  IndexPreTransform * ixpt = new IndexPreTransform();
601  ixpt->own_fields = true;
602  read_index_header (ixpt, f);
603  int nt;
604  if (read_old_fmt_hack == 2) {
605  nt = 1;
606  } else {
607  READ1 (nt);
608  }
609  for (int i = 0; i < nt; i++)
610  ixpt->chain.push_back (read_VectorTransform (f));
611  ixpt->index = read_index (f);
612  idx = ixpt;
613  } else if(h == fourcc ("Imiq")) {
615  read_index_header (imiq, f);
616  read_ProductQuantizer (&imiq->pq, f);
617  idx = imiq;
618  } else if(h == fourcc ("IxRF")) {
619  IndexRefineFlat *idxrf = new IndexRefineFlat ();
620  read_index_header (idxrf, f);
621  idxrf->base_index = read_index(f);
622  idxrf->own_fields = true;
623  IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f));
624  std::swap (*rf, idxrf->refine_index);
625  delete rf;
626  READ1 (idxrf->k_factor);
627  idx = idxrf;
628  } else if(h == fourcc ("IxMp")) {
629  IndexIDMap * idxmap = new IndexIDMap ();
630  read_index_header (idxmap, f);
631  idxmap->index = read_index (f);
632  idxmap->own_fields = true;
633  READVECTOR (idxmap->id_map);
634  idx = idxmap;
635  } else {
636  fprintf (stderr, "Index type 0x%08x not supported\n", h);
637  abort ();
638  }
639  return idx;
640 }
641 
642 
643 
644 Index *read_index (const char *fname, bool try_mmap) {
645  FILE *f = fopen (fname, "r");
646  FAISS_THROW_IF_NOT_FMT (f, "cannot open %s for reading:", fname);
647  Index *idx = read_index (f, try_mmap);
648  fclose (f);
649  return idx;
650 }
651 
652 VectorTransform *read_VectorTransform (const char *fname) {
653  FILE *f = fopen (fname, "r");
654  if (!f) {
655  fprintf (stderr, "cannot open %s for reading:", fname);
656  perror ("");
657  abort ();
658  }
659  VectorTransform *vt = read_VectorTransform (f);
660  fclose (f);
661  return vt;
662 }
663 
664 /*************************************************************
665  * cloning functions
666  **************************************************************/
667 
668 
669 
670 Index * clone_index (const Index *index)
671 {
672  Cloner cl;
673  return cl.clone_Index (index);
674 }
675 
676 // assumes there is a copy constructor ready. Always try from most
677 // specific to most general
678 #define TRYCLONE(classname, obj) \
679  if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
680  return new classname(*clo); \
681  } else
682 
683 VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt)
684 {
685  TRYCLONE (RemapDimensionsTransform, vt)
686  TRYCLONE (OPQMatrix, vt)
687  TRYCLONE (PCAMatrix, vt)
688  TRYCLONE (RandomRotationMatrix, vt)
689  TRYCLONE (LinearTransform, vt)
690  {
691  FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
692  }
693  return nullptr;
694 }
695 
696 IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf)
697 {
698  TRYCLONE (IndexIVFPQR, ivf)
699  TRYCLONE (IndexIVFPQ, ivf)
700  TRYCLONE (IndexIVFFlat, ivf)
701  {
702  FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
703  }
704  return nullptr;
705 }
706 
707 Index *Cloner::clone_Index (const Index *index)
708 {
709  TRYCLONE (IndexPQ, index)
710  TRYCLONE (IndexLSH, index)
711  TRYCLONE (IndexFlatL2, index)
712  TRYCLONE (IndexFlatIP, index)
713  TRYCLONE (IndexFlat, index)
714  TRYCLONE (MultiIndexQuantizer, index)
715  if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
716  IndexIVF *res = clone_IndexIVF (ivf);
717  res->own_fields = true;
718  res->quantizer = clone_Index (ivf->quantizer);
719  return res;
720  } else if (const IndexPreTransform * ipt =
721  dynamic_cast<const IndexPreTransform*> (index)) {
722  IndexPreTransform *res = new IndexPreTransform ();
723  res->d = ipt->d;
724  res->index = clone_Index (ipt->index);
725  for (int i = 0; i < ipt->chain.size(); i++)
726  res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
727  res->own_fields = true;
728  return res;
729  } else {
730  FAISS_THROW_MSG( "clone not supported for this type of Index");
731  }
732  return nullptr;
733 }
734 
735 
736 } // namespace faiss
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:34
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
Index * read_index(FILE *f, bool try_mmap)
Definition: index_io.cpp:517
int bytes_per_vec
nb of 8-bits per encoded vector
Definition: IndexLSH.h:28
std::vector< float > thresholds
thresholds to compare with
Definition: IndexLSH.h:34
bool train_thresholds
whether we train thresholds or use 0
Definition: IndexLSH.h:30
Index * base_index
faster index to pre-select the vectors that should be filtered
Definition: IndexFlat.h:109
std::vector< std::vector< uint8_t > > codes
inverted list codes.
IndexFlat refine_index
storage for full vectors
Definition: IndexFlat.h:106
bool own_fields
should the base index be deallocated?
Definition: IndexFlat.h:110
int d
vector dimension
Definition: Index.h:64
std::vector< long > id_map
! whether pointers are deleted in destructo
Definition: MetaIndexes.h:28
RandomRotationMatrix rrot
optional random rotation
Definition: IndexLSH.h:32
long idx_t
all indices are this type
Definition: Index.h:62
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:31
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:65
bool own_fields
! the sub-index
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:72
size_t nlist
number of possible key values
Definition: IndexIVF.h:46
int d_out
! input dimension
int nbits
nb of bits per vector
Definition: IndexLSH.h:27
bool is_trained
set if the Index does not require training, or if training is done already
Definition: Index.h:69
std::vector< float > xb
database vectors, size ntotal * d
Definition: IndexFlat.h:25
int polysemous_ht
Hamming threshold used for polysemy.
Definition: IndexPQ.h:91
bool rotate_data
whether to apply a random rotation to input
Definition: IndexLSH.h:29
std::vector< uint8_t > codes
encoded dataset
Definition: IndexLSH.h:37
std::vector< std::vector< float > > vecs
Definition: IndexIVF.h:132
bool own_fields
! the sub-index
Definition: MetaIndexes.h:27