Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/matthijs/github_faiss/faiss/index_io.cpp
1 
2 /**
3  * Copyright (c) 2015-present, Facebook, Inc.
4  * All rights reserved.
5  *
6  * This source code is licensed under the CC-by-NC license found in the
7  * LICENSE file in the root directory of this source tree.
8  */
9 
10 // Copyright 2004-present Facebook. All Rights Reserved.
11 
12 #include "index_io.h"
13 
14 #include <cstdio>
15 #include <cstdlib>
16 
17 #include <sys/mman.h>
18 
19 #include "FaissAssert.h"
20 
21 #include "IndexFlat.h"
22 #include "VectorTransform.h"
23 #include "IndexLSH.h"
24 #include "IndexPQ.h"
25 #include "IndexIVF.h"
26 #include "IndexIVFPQ.h"
27 #include "MetaIndexes.h"
28 
29 /*************************************************************
30  * The I/O format is the content of the class. For objects that are
31  * inherited, like Index, a 4-character-code (fourcc) indicates which
32  * child class this is an instance of.
33  *
34  * In this case, the fields of the parent class are written first,
35  * then the ones for the child classes. Note that this requires
36  * classes to be serialized to have a constructor without parameters,
37  * so that the fields can be filled in later. The default constructor
38  * should set reasonable defaults for all fields.
39  *
40  * The fourccs are assigned arbitrarily. When the class changed (added
41  * or deprecated fields), the fourcc can be replaced. New code should
42  * be able to read the old fourcc and fill in new classes.
43  *
44  * TODO: serialization to strings for use in Python pickle or Torch
45  * serialization.
46  **************************************************************/
47 
48 
49 
50 namespace faiss {
51 
52 static uint32_t fourcc (const char sx[4]) {
53  const unsigned char *x = (unsigned char*)sx;
54  return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
55 }
56 
57 /*************************************************************
58  * I/O macros
59  *
60  * we use macros so that we have a line number to report in
61  * abort (). This makes debugging a lot easier.
62  **************************************************************/
63 
64 
65 #define WRITEANDCHECK(ptr, n) { \
66  size_t ret = fwrite (ptr, sizeof (* (ptr)), n, f); \
67  FAISS_ASSERT (ret == (n) || !"write error"); \
68  }
69 
70 #define READANDCHECK(ptr, n) { \
71  size_t ret = fread (ptr, sizeof (* (ptr)), n, f); \
72  FAISS_ASSERT (ret == (n) || !"write error"); \
73  }
74 
75 #define WRITE1(x) WRITEANDCHECK(&(x), 1)
76 #define READ1(x) READANDCHECK(&(x), 1)
77 
78 #define WRITEVECTOR(vec) { \
79  size_t size = (vec).size (); \
80  WRITEANDCHECK (&size, 1); \
81  WRITEANDCHECK ((vec).data (), size); \
82  }
83 
84 #define READVECTOR(vec) { \
85  long size; \
86  READANDCHECK (&size, 1); \
87  FAISS_ASSERT (size >= 0 && size < (1L << 40)); \
88  (vec).resize (size); \
89  READANDCHECK ((vec).data (), size); \
90  }
91 
92 
93 // Macros for read/write arrays aligned to 16 bytes in the
94 // file. Useful when mmapped.
95 
96 #define WRITETABPAD16(tab, size_in) { \
97  size_t size = (size_in); \
98  WRITEANDCHECK (&size, 1); \
99  uint8_t padding[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; \
100  int idx = ftell(f) % 16; \
101  padding [idx] = 15 - idx; \
102  WRITEANDCHECK (padding + idx, 16 - idx); \
103  WRITEANDCHECK ((tab), size); \
104 }
105 
106 #define READTABPAD16(tab, basetype, expected_size) { \
107  size_t size; \
108  READANDCHECK (&size, 1); \
109  FAISS_ASSERT ((expected_size) == size); \
110  uint8_t padding[16], npad; \
111  READ1(npad); \
112  FAISS_ASSERT (npad < 16); \
113  READANDCHECK (padding, npad); \
114  (tab) = new basetype [size]; \
115  READANDCHECK ((tab), size); \
116 }
117 
118 // read only the array header, return its offset and skip over it
119 #define TABOFFSETPAD16(taboffset, basetype, expected_size) { \
120  size_t size; \
121  READANDCHECK (&size, 1); \
122  FAISS_ASSERT ((expected_size) == size); \
123  uint8_t padding[16], npad; \
124  READ1(npad); \
125  FAISS_ASSERT (npad < 16); \
126  READANDCHECK (padding, npad); \
127  taboffset = ftell(f); \
128  fseek (f, sizeof(basetype) * size, SEEK_CUR); \
129 }
130 
131 
132 
133 
134 /*************************************************************
135  * Write
136  **************************************************************/
137 
138 static void write_index_header (const Index *idx, FILE *f) {
139  WRITE1 (idx->d);
140  WRITE1 (idx->ntotal);
141  Index::idx_t dummy = 1 << 20;
142  WRITE1 (dummy);
143  WRITE1 (dummy);
144  WRITE1 (idx->is_trained);
145  WRITE1 (idx->metric_type);
146 }
147 
148 
149 
150 void write_VectorTransform (const VectorTransform *vt, FILE *f) {
151  if (const LinearTransform * lt =
152  dynamic_cast < const LinearTransform *> (vt)) {
153  if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
154  uint32_t h = fourcc ("rrot");
155  WRITE1 (h);
156  } else if (const PCAMatrix * pca =
157  dynamic_cast<const PCAMatrix *>(lt)) {
158  uint32_t h = fourcc ("PcAm");
159  WRITE1 (h);
160  WRITE1 (pca->eigen_power);
161  WRITE1 (pca->random_rotation);
162  WRITE1 (pca->balanced_bins);
163  WRITEVECTOR (pca->mean);
164  WRITEVECTOR (pca->eigenvalues);
165  WRITEVECTOR (pca->PCAMat);
166  } else {
167  // generic LinearTransform (includes OPQ)
168  uint32_t h = fourcc ("LTra");
169  WRITE1 (h);
170  }
171  WRITE1 (lt->have_bias);
172  WRITEVECTOR (lt->A);
173  WRITEVECTOR (lt->b);
174  } else if (const RemapDimensionsTransform *rdt =
175  dynamic_cast<const RemapDimensionsTransform *>(vt)) {
176  uint32_t h = fourcc ("RmDT");
177  WRITE1 (h);
178  WRITEVECTOR (rdt->map);
179  } else FAISS_ASSERT (!"cannot serialize this");
180  // common fields
181  WRITE1 (vt->d_in);
182  WRITE1 (vt->d_out);
183  WRITE1 (vt->is_trained);
184 }
185 
186 static void write_ProductQuantizer (const ProductQuantizer *pq, FILE *f) {
187  WRITE1 (pq->d);
188  WRITE1 (pq->M);
189  WRITE1 (pq->nbits);
190  WRITEVECTOR (pq->centroids);
191 }
192 
193 void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
194  FILE *f = fopen (fname, "w");
195  if (!f) {
196  fprintf (stderr, "cannot open %s for writing:", fname);
197  perror ("");
198  abort ();
199  }
200  write_ProductQuantizer (pq, f);
201  fclose (f);
202 }
203 
204 
205 
206 static void write_ivf_header (const IndexIVF * ivf, FILE *f,
207  bool include_ids = true) {
208  write_index_header (ivf, f);
209  WRITE1 (ivf->nlist);
210  WRITE1 (ivf->nprobe);
211  write_index (ivf->quantizer, f);
212  if (include_ids) {
213  for (size_t i = 0; i < ivf->nlist; i++)
214  WRITEVECTOR (ivf->ids[i]);
215  }
216  WRITE1 (ivf->maintain_direct_map);
217  WRITEVECTOR (ivf->direct_map);
218 }
219 
220 void write_index (const Index *idx, FILE *f) {
221  if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
222  uint32_t h = fourcc (
223  idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
224  idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr);
225  WRITE1 (h);
226  write_index_header (idx, f);
227  WRITEVECTOR (idxf->xb);
228  } else if(const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
229  uint32_t h = fourcc ("IxHe");
230  WRITE1 (h);
231  write_index_header (idx, f);
232  WRITE1 (idxl->nbits);
233  WRITE1 (idxl->rotate_data);
234  WRITE1 (idxl->train_thresholds);
235  WRITEVECTOR (idxl->thresholds);
236  WRITE1 (idxl->bytes_per_vec);
237  write_VectorTransform (&idxl->rrot, f);
238  WRITEVECTOR (idxl->codes);
239  } else if(const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
240  uint32_t h = fourcc ("IxPq");
241  WRITE1 (h);
242  write_index_header (idx, f);
243  write_ProductQuantizer (&idxp->pq, f);
244  WRITEVECTOR (idxp->codes);
245  // search params -- maybe not useful to store?
246  WRITE1 (idxp->search_type);
247  WRITE1 (idxp->encode_signs);
248  WRITE1 (idxp->polysemous_ht);
249  } else if(const IndexIVFFlat * ivfl =
250  dynamic_cast<const IndexIVFFlat *> (idx)) {
251  uint32_t h = fourcc ("IvFl");
252  WRITE1 (h);
253  write_ivf_header (ivfl, f);
254  for(int i = 0; i < ivfl->nlist; i++)
255  WRITEVECTOR (ivfl->vecs[i]);
256  } else if(const IndexIVFPQ * ivpq =
257  dynamic_cast<const IndexIVFPQ *> (idx)) {
258  const IndexIVFPQR * ivfpqr = dynamic_cast<const IndexIVFPQR *> (idx);
259  const IndexIVFPQCompact * ivfpqc =
260  dynamic_cast<const IndexIVFPQCompact *> (idx);
261  uint32_t h = fourcc (ivfpqr ? "IvQR" : ivfpqc ? "IvPC" : "IvPQ");
262  WRITE1 (h);
263  write_ivf_header (ivpq, f, !ivfpqc);
264  WRITE1 (ivpq->by_residual);
265  WRITE1 (ivpq->code_size);
266  write_ProductQuantizer (&ivpq->pq, f);
267  if (!ivfpqc) {
268  for(int i = 0; i < ivpq->codes.size(); i++)
269  WRITEVECTOR (ivpq->codes[i]);
270  }
271  if (ivfpqr) {
272  write_ProductQuantizer (&ivfpqr->refine_pq, f);
273  WRITEVECTOR (ivfpqr->refine_codes);
274  WRITE1 (ivfpqr->k_factor);
275  }
276  if (ivfpqc) {
277  WRITETABPAD16 (ivfpqc->limits, ivfpqc->nlist + 1);
278  WRITETABPAD16 (ivfpqc->compact_ids, ivfpqc->ntotal);
279  WRITETABPAD16 (ivfpqc->compact_codes,
280  ivfpqc->ntotal * ivfpqc->code_size);
281  }
282  } else if(const IndexPreTransform * ixpt =
283  dynamic_cast<const IndexPreTransform *> (idx)) {
284  uint32_t h = fourcc ("IxPT");
285  WRITE1 (h);
286  write_index_header (ixpt, f);
287  int nt = ixpt->chain.size();
288  WRITE1 (nt);
289  for (int i = 0; i < nt; i++)
290  write_VectorTransform (ixpt->chain[i], f);
291  write_index (ixpt->index, f);
292  } else if(const MultiIndexQuantizer * imiq =
293  dynamic_cast<const MultiIndexQuantizer *> (idx)) {
294  uint32_t h = fourcc ("Imiq");
295  WRITE1 (h);
296  write_index_header (imiq, f);
297  write_ProductQuantizer (&imiq->pq, f);
298  } else if(const IndexRefineFlat * idxrf =
299  dynamic_cast<const IndexRefineFlat *> (idx)) {
300  uint32_t h = fourcc ("IxRF");
301  WRITE1 (h);
302  write_index_header (idxrf, f);
303  write_index (idxrf->base_index, f);
304  write_index (&idxrf->refine_index, f);
305  WRITE1 (idxrf->k_factor);
306  } else if(const IndexIDMap * idxmap =
307  dynamic_cast<const IndexIDMap *> (idx)) {
308  uint32_t h = fourcc ("IxMp");
309  WRITE1 (h);
310  write_index_header (idxmap, f);
311  write_index (idxmap->index, f);
312  WRITEVECTOR (idxmap->id_map);
313  } else {
314  FAISS_ASSERT (!"don't know how to serialize this type of index");
315  }
316 }
317 
318 void write_index (const Index *idx, const char *fname) {
319  FILE *f = fopen (fname, "w");
320  if (!f) {
321  fprintf (stderr, "cannot open %s for writing:", fname);
322  perror ("");
323  abort ();
324  }
325  write_index (idx, f);
326  fclose (f);
327 }
328 
329 void write_VectorTransform (const VectorTransform *vt, const char *fname) {
330  FILE *f = fopen (fname, "w");
331  if (!f) {
332  fprintf (stderr, "cannot open %s for writing:", fname);
333  perror ("");
334  abort ();
335  }
336  write_VectorTransform (vt, f);
337  fclose (f);
338 }
339 
340 /*************************************************************
341  * Read
342  **************************************************************/
343 
344 static void read_index_header (Index *idx, FILE *f) {
345  READ1 (idx->d);
346  READ1 (idx->ntotal);
347  Index::idx_t dummy;
348  READ1 (dummy);
349  READ1 (dummy);
350  READ1 (idx->is_trained);
351  READ1 (idx->metric_type);
352  idx->verbose = false;
353 }
354 
355 VectorTransform* read_VectorTransform (FILE *f) {
356  uint32_t h;
357  READ1 (h);
358  VectorTransform *vt = nullptr;
359 
360  if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
361  h == fourcc ("LTra") || h == fourcc ("PcAm")) {
362  LinearTransform *lt = nullptr;
363  if (h == fourcc ("rrot")) {
364  lt = new RandomRotationMatrix ();
365  } else if (h == fourcc ("PCAm") ||
366  h == fourcc ("PcAm")) {
367  PCAMatrix * pca = new PCAMatrix ();
368  READ1 (pca->eigen_power);
369  READ1 (pca->random_rotation);
370  if (h == fourcc ("PcAm"))
371  READ1 (pca->balanced_bins);
372  READVECTOR (pca->mean);
373  READVECTOR (pca->eigenvalues);
374  READVECTOR (pca->PCAMat);
375  lt = pca;
376  } else if (h == fourcc ("LTra")) {
377  lt = new LinearTransform ();
378  }
379  READ1 (lt->have_bias);
380  READVECTOR (lt->A);
381  READVECTOR (lt->b);
382  vt = lt;
383  } else if (h == fourcc ("RmDT")) {
384  RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
385  READVECTOR (rdt->map);
386  vt = rdt;
387  } else FAISS_ASSERT(!"fourcc not recognized");
388  READ1 (vt->d_in);
389  READ1 (vt->d_out);
390  READ1 (vt->is_trained);
391  return vt;
392 }
393 
394 static void read_ProductQuantizer (ProductQuantizer *pq, FILE *f) {
395  READ1 (pq->d);
396  READ1 (pq->M);
397  READ1 (pq->nbits);
398  pq->set_derived_values ();
399  READVECTOR (pq->centroids);
400 }
401 
402 ProductQuantizer * read_ProductQuantizer (const char*fname) {
403  FILE *f = fopen (fname, "r");
404  if (!f) {
405  fprintf (stderr, "cannot open %s for reading:", fname);
406  perror ("");
407  abort ();
408  }
409  ProductQuantizer *pq = new ProductQuantizer();
410  read_ProductQuantizer(pq, f);
411  fclose(f);
412  return pq;
413 }
414 
415 static void read_ivf_header (IndexIVF * ivf, FILE *f,
416  bool include_ids = true) {
417  read_index_header (ivf, f);
418  READ1 (ivf->nlist);
419  READ1 (ivf->nprobe);
420  ivf->quantizer = read_index (f);
421  ivf->own_fields = true;
422  if (include_ids) {
423  ivf->ids.resize (ivf->nlist);
424  for (size_t i = 0; i < ivf->nlist; i++)
425  READVECTOR (ivf->ids[i]);
426  }
427  READ1 (ivf->maintain_direct_map);
428  READVECTOR (ivf->direct_map);
429 }
430 
431 static IndexIVFPQ *read_ivfpq (FILE *f, uint32_t h, bool try_mmap)
432 {
433 
434  IndexIVFPQR *ivfpqr =
435  h == fourcc ("IvQR") ? new IndexIVFPQR () : nullptr;
436  IndexIVFPQCompact *ivfpqc =
437  h == fourcc ("IvPC") ? new IndexIVFPQCompact () : nullptr;
438  IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : ivfpqc ? ivfpqc : new IndexIVFPQ ();
439  read_ivf_header (ivpq, f, !ivfpqc);
440  READ1 (ivpq->by_residual);
441  READ1 (ivpq->code_size);
442  read_ProductQuantizer (&ivpq->pq, f);
443  if (!ivfpqc) {
444  ivpq->codes.resize (ivpq->nlist);
445  for (size_t i = 0; i < ivpq->nlist; i++)
446  READVECTOR (ivpq->codes[i]);
447  }
448  // precomputed table not stored. It is cheaper to recompute it
449  ivpq->use_precomputed_table = 0;
450  if (ivpq->by_residual)
451  ivpq->precompute_table ();
452  if (ivfpqr) {
453  read_ProductQuantizer (&ivfpqr->refine_pq, f);
454  READVECTOR (ivfpqr->refine_codes);
455  READ1 (ivfpqr->k_factor);
456  }
457  if (ivfpqc) {
458  if (!try_mmap) {
459  READTABPAD16 (ivfpqc->limits, uint32_t, ivfpqc->nlist + 1);
460  READTABPAD16 (ivfpqc->compact_ids, uint32_t, ivfpqc->ntotal);
461  READTABPAD16 (ivfpqc->compact_codes, uint8_t,
462  ivfpqc->ntotal * ivfpqc->code_size);
463  } else {
464  long offset_limits, offset_compact_ids, offset_compact_codes;
465  TABOFFSETPAD16 (offset_limits, uint32_t, ivfpqc->nlist + 1);
466  TABOFFSETPAD16 (offset_compact_ids, uint32_t, ivfpqc->ntotal);
467  TABOFFSETPAD16 (offset_compact_codes, uint8_t,
468  ivfpqc->ntotal * ivfpqc->code_size);
469  ivfpqc->mmap_length = ftell (f);
470  // mmap the whole file
471  ivfpqc->mmap_buffer = (char*)mmap (
472  nullptr, ivfpqc->mmap_length,
473  PROT_READ, MAP_SHARED, fileno (f), 0);
474  if (!ivfpqc->mmap_buffer) {
475  perror ("mmap failed");
476  abort ();
477  }
478  // at this point the file can be closed, it does not
479  // invalidate the mapping
480  ivfpqc->limits = (uint32_t*)(ivfpqc->mmap_buffer + offset_limits);
481  ivfpqc->compact_ids = (uint32_t*)(ivfpqc->mmap_buffer +
482  offset_compact_ids);
483  ivfpqc->compact_codes = (uint8_t*)(ivfpqc->mmap_buffer +
484  offset_compact_codes);
485  }
486  }
487  return ivpq;
488 }
489 
490 int read_old_fmt_hack = 0;
491 
492 Index *read_index (FILE * f, bool try_mmap) {
493  Index * idx = nullptr;
494  uint32_t h;
495  READ1 (h);
496  if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) {
497  IndexFlat *idxf;
498  if (h == fourcc ("IxFI")) idxf = new IndexFlatIP ();
499  else idxf = new IndexFlatL2 ();
500  read_index_header (idxf, f);
501  READVECTOR (idxf->xb);
502  FAISS_ASSERT (idxf->xb.size() == idxf->ntotal * idxf->d);
503  idx = idxf;
504  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
505  IndexLSH * idxl = new IndexLSH ();
506  read_index_header (idxl, f);
507  READ1 (idxl->nbits);
508  READ1 (idxl->rotate_data);
509  READ1 (idxl->train_thresholds);
510  READVECTOR (idxl->thresholds);
511  READ1 (idxl->bytes_per_vec);
512  if (h == fourcc("IxHE")) {
513  FAISS_ASSERT (idxl->nbits % 64 == 0 ||
514  !"can only read old format IndexLSH with "
515  "nbits multiple of 64");
516  idxl->bytes_per_vec *= 8;
517  }
518  {
519  RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
520  (read_VectorTransform (f));
521  FAISS_ASSERT(rrot || !"expected a random rotation");
522  idxl->rrot = *rrot;
523  delete rrot;
524  }
525  READVECTOR (idxl->codes);
526  FAISS_ASSERT (idxl->rrot.d_in == idxl->d &&
527  idxl->rrot.d_out == idxl->nbits);
528  FAISS_ASSERT (idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
529  idx = idxl;
530  } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
531  h == fourcc ("IxPq")) {
532  // IxPQ and IxPo were merged into the same IndexPQ object
533  IndexPQ * idxp =new IndexPQ ();
534  read_index_header (idxp, f);
535  read_ProductQuantizer (&idxp->pq, f);
536  READVECTOR (idxp->codes);
537  if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
538  READ1 (idxp->search_type);
539  READ1 (idxp->encode_signs);
540  READ1 (idxp->polysemous_ht);
541  }
542  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
543  // when they were in fact using L2. Therefore, we force metric type
544  // to L2 when the old format is detected
545  if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
546  idxp->metric_type = METRIC_L2;
547  }
548  idx = idxp;
549  } else if(h == fourcc ("IvFl")) {
550  IndexIVFFlat * ivfl = new IndexIVFFlat ();
551  read_ivf_header (ivfl, f);
552  ivfl->vecs.resize (ivfl->nlist);
553  for (size_t i = 0; i < ivfl->nlist; i++)
554  READVECTOR (ivfl->vecs[i]);
555  idx = ivfl;
556 
557  } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
558  h == fourcc ("IvPC")) {
559 
560  idx = read_ivfpq (f, h, try_mmap);
561 
562  } else if(h == fourcc ("IxPT")) {
563  IndexPreTransform * ixpt = new IndexPreTransform();
564  ixpt->own_fields = true;
565  read_index_header (ixpt, f);
566  int nt;
567  if (read_old_fmt_hack == 2) {
568  nt = 1;
569  } else {
570  READ1 (nt);
571  }
572  for (int i = 0; i < nt; i++)
573  ixpt->chain.push_back (read_VectorTransform (f));
574  ixpt->index = read_index (f);
575  idx = ixpt;
576  } else if(h == fourcc ("Imiq")) {
578  read_index_header (imiq, f);
579  read_ProductQuantizer (&imiq->pq, f);
580  idx = imiq;
581  } else if(h == fourcc ("IxRF")) {
582  IndexRefineFlat *idxrf = new IndexRefineFlat ();
583  read_index_header (idxrf, f);
584  idxrf->base_index = read_index(f);
585  idxrf->own_fields = true;
586  IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f));
587  std::swap (*rf, idxrf->refine_index);
588  delete rf;
589  READ1 (idxrf->k_factor);
590  idx = idxrf;
591  } else if(h == fourcc ("IxMp")) {
592  IndexIDMap * idxmap = new IndexIDMap ();
593  read_index_header (idxmap, f);
594  idxmap->index = read_index (f);
595  idxmap->own_fields = true;
596  READVECTOR (idxmap->id_map);
597  idx = idxmap;
598  } else {
599  fprintf (stderr, "Index type 0x%08x not supported\n", h);
600  abort ();
601  }
602  idx->set_typename();
603  return idx;
604 }
605 
606 Index *read_index (const char *fname, bool try_mmap) {
607  FILE *f = fopen (fname, "r");
608  if (!f) {
609  fprintf (stderr, "cannot open %s for reading:", fname);
610  perror ("");
611  abort ();
612  }
613  Index *idx = read_index (f, try_mmap);
614  fclose (f);
615  return idx;
616 }
617 
618 VectorTransform *read_VectorTransform (const char *fname) {
619  FILE *f = fopen (fname, "r");
620  if (!f) {
621  fprintf (stderr, "cannot open %s for reading:", fname);
622  perror ("");
623  abort ();
624  }
625  VectorTransform *vt = read_VectorTransform (f);
626  fclose (f);
627  return vt;
628 }
629 
630 /*************************************************************
631  * cloning functions
632  **************************************************************/
633 
634 
635 
636 Index * clone_index (const Index *index)
637 {
638  Cloner cl;
639  return cl.clone_Index (index);
640 }
641 
642 // assumes there is a copy constructor ready. Always try from most
643 // specific to most general
644 #define TRYCLONE(classname, obj) \
645  if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
646  return new classname(*clo); \
647  } else
648 
649 VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt)
650 {
651  TRYCLONE (RemapDimensionsTransform, vt)
652  TRYCLONE (OPQMatrix, vt)
653  TRYCLONE (PCAMatrix, vt)
654  TRYCLONE (RandomRotationMatrix, vt)
655  TRYCLONE (LinearTransform, vt)
656  {
657  FAISS_ASSERT(!"clone not supported for this type of VectorTransform");
658  }
659  return nullptr;
660 }
661 
662 IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf)
663 {
664  TRYCLONE (IndexIVFPQR, ivf)
665  TRYCLONE (IndexIVFPQ, ivf)
666  TRYCLONE (IndexIVFFlat, ivf)
667  {
668  FAISS_ASSERT(!"clone not supported for this type of IndexIVF");
669  }
670  return nullptr;
671 }
672 
673 Index *Cloner::clone_Index (const Index *index)
674 {
675  TRYCLONE (IndexPQ, index)
676  TRYCLONE (IndexLSH, index)
677  TRYCLONE (IndexFlatL2, index)
678  TRYCLONE (IndexFlatIP, index)
679  TRYCLONE (IndexFlat, index)
680  TRYCLONE (MultiIndexQuantizer, index)
681  if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
682  IndexIVF *res = clone_IndexIVF (ivf);
683  res->own_fields = true;
684  res->quantizer = clone_Index (ivf->quantizer);
685  return res;
686  } else if (const IndexPreTransform * ipt =
687  dynamic_cast<const IndexPreTransform*> (index)) {
688  IndexPreTransform *res = new IndexPreTransform ();
689  res->d = ipt->d;
690  res->index = clone_Index (ipt->index);
691  for (int i = 0; i < ipt->chain.size(); i++)
692  res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
693  res->own_fields = true;
694  return res;
695  } else {
696  FAISS_ASSERT(!"clone not supported for this type of Index");
697  }
698  return nullptr;
699 }
700 
701 
702 } // namespace faiss
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
Definition: IndexPQ.h:35
Index * index
! chain of tranforms
Randomly rotate a set of vectors.
Index * read_index(FILE *f, bool try_mmap)
Definition: index_io.cpp:492
int bytes_per_vec
nb of 8-bits per encoded vector
Definition: IndexLSH.h:29
std::vector< float > thresholds
thresholds to compare with
Definition: IndexLSH.h:35
bool train_thresholds
whether we train thresholds or use 0
Definition: IndexLSH.h:31
Index * base_index
faster index to pre-select the vectors that should be filtered
Definition: IndexFlat.h:111
IndexFlat refine_index
storage for full vectors
Definition: IndexFlat.h:108
bool own_fields
should the base index be deallocated?
Definition: IndexFlat.h:112
int d
vector dimension
Definition: Index.h:66
std::vector< long > id_map
! whether pointers are deleted in destructo
Definition: MetaIndexes.h:29
RandomRotationMatrix rrot
optional random rotation
Definition: IndexLSH.h:33
long idx_t
all indices are this type
Definition: Index.h:64
ProductQuantizer pq
The product quantizer used to encode the vectors.
Definition: IndexPQ.h:32
idx_t ntotal
total nb of indexed vectors
Definition: Index.h:67
bool own_fields
! the sub-index
MetricType metric_type
type of metric this index uses for search
Definition: Index.h:74
size_t nlist
number of possible key values
Definition: IndexIVF.h:47
int d_out
! input dimension
int nbits
nb of bits per vector
Definition: IndexLSH.h:28
std::vector< float > xb
database vectors, size ntotal * d
Definition: IndexFlat.h:26
int polysemous_ht
Hamming threshold used for polysemy.
Definition: IndexPQ.h:93
bool rotate_data
whether to apply a random rotation to input
Definition: IndexLSH.h:30
std::vector< uint8_t > codes
encoded dataset
Definition: IndexLSH.h:38
std::vector< std::vector< float > > vecs
Definition: IndexIVF.h:131
bool own_fields
! the sub-index
Definition: MetaIndexes.h:28