Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/index_io.cpp
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #include "index_io.h"
11 
12 #include <cstdio>
13 #include <cstdlib>
14 
15 #include <sys/mman.h>
16 #include <sys/types.h>
17 #include <sys/stat.h>
18 #include <unistd.h>
19 
20 #include "FaissAssert.h"
21 #include "AuxIndexStructures.h"
22 
23 #include "IndexFlat.h"
24 #include "VectorTransform.h"
25 #include "IndexLSH.h"
26 #include "IndexPQ.h"
27 #include "IndexIVF.h"
28 #include "IndexIVFPQ.h"
29 #include "IndexIVFFlat.h"
30 #include "IndexIVFSpectralHash.h"
31 #include "MetaIndexes.h"
32 #include "IndexScalarQuantizer.h"
33 #include "IndexHNSW.h"
34 #include "OnDiskInvertedLists.h"
35 #include "IndexBinaryFlat.h"
36 #include "IndexBinaryFromFloat.h"
37 #include "IndexBinaryHNSW.h"
38 #include "IndexBinaryIVF.h"
39 
40 
41 
42 /*************************************************************
43  * The I/O format is the content of the class. For objects that are
44  * inherited, like Index, a 4-character-code (fourcc) indicates which
45  * child class this is an instance of.
46  *
47  * In this case, the fields of the parent class are written first,
48  * then the ones for the child classes. Note that this requires
49  * classes to be serialized to have a constructor without parameters,
50  * so that the fields can be filled in later. The default constructor
51  * should set reasonable defaults for all fields.
52  *
53  * The fourccs are assigned arbitrarily. When the class changed (added
54  * or deprecated fields), the fourcc can be replaced. New code should
55  * be able to read the old fourcc and fill in new classes.
56  *
57  * TODO: serialization to strings for use in Python pickle or Torch
58  * serialization.
59  *
60  * TODO: in this file, the read functions that encouter errors may
61  * leak memory.
62  **************************************************************/
63 
64 
65 
66 namespace faiss {
67 
68 static uint32_t fourcc (const char sx[4]) {
69  assert(4 == strlen(sx));
70  const unsigned char *x = (unsigned char*)sx;
71  return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
72 }
73 
74 /*************************************************************
75  * I/O macros
76  *
77  * we use macros so that we have a line number to report in abort
78  * (). This makes debugging a lot easier. The IOReader or IOWriter is
79  * always called f and thus is not passed in as a macro parameter.
80  **************************************************************/
81 
82 
83 #define WRITEANDCHECK(ptr, n) { \
84  size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
85  FAISS_THROW_IF_NOT_FMT(ret == (n), \
86  "write error in %s: %ld != %ld (%s)", \
87  f->name.c_str(), ret, size_t(n), strerror(errno)); \
88  }
89 
90 #define READANDCHECK(ptr, n) { \
91  size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
92  FAISS_THROW_IF_NOT_FMT(ret == (n), \
93  "read error in %s: %ld != %ld (%s)", \
94  f->name.c_str(), ret, size_t(n), strerror(errno)); \
95  }
96 
97 #define WRITE1(x) WRITEANDCHECK(&(x), 1)
98 #define READ1(x) READANDCHECK(&(x), 1)
99 
100 #define WRITEVECTOR(vec) { \
101  size_t size = (vec).size (); \
102  WRITEANDCHECK (&size, 1); \
103  WRITEANDCHECK ((vec).data (), size); \
104  }
105 
106 // will fail if we write 256G of data at once...
107 #define READVECTOR(vec) { \
108  long size; \
109  READANDCHECK (&size, 1); \
110  FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
111  (vec).resize (size); \
112  READANDCHECK ((vec).data (), size); \
113  }
114 
116  FILE *f;
117  ScopeFileCloser (FILE *f): f (f) {}
118  ~ScopeFileCloser () {fclose (f); }
119 };
120 
121 
122 namespace {
123 
124 struct FileIOReader: IOReader {
125  FILE *f = nullptr;
126  bool need_close = false;
127 
128  FileIOReader(FILE *rf): f(rf) {}
129 
130  FileIOReader(const char * fname)
131  {
132  name = fname;
133  f = fopen(fname, "rb");
134  FAISS_THROW_IF_NOT_FMT (
135  f, "could not open %s for reading: %s",
136  fname, strerror(errno));
137  need_close = true;
138  }
139 
140  ~FileIOReader() override {
141  if (need_close) {
142  int ret = fclose(f);
143  if (ret != 0) {// we cannot raise and exception in the destructor
144  fprintf(stderr, "file %s close error: %s",
145  name.c_str(), strerror(errno));
146  }
147  }
148  }
149 
150  size_t operator()(
151  void *ptr, size_t size, size_t nitems) override {
152  return fread(ptr, size, nitems, f);
153  }
154 
155  int fileno() override {
156  return ::fileno (f);
157  }
158 
159 };
160 
161 struct FileIOWriter: IOWriter {
162  FILE *f = nullptr;
163  bool need_close = false;
164 
165  FileIOWriter(FILE *wf): f(wf) {}
166 
167  FileIOWriter(const char * fname)
168  {
169  name = fname;
170  f = fopen(fname, "wb");
171  FAISS_THROW_IF_NOT_FMT (
172  f, "could not open %s for writing: %s",
173  fname, strerror(errno));
174  need_close = true;
175  }
176 
177  ~FileIOWriter() override {
178  if (need_close) {
179  int ret = fclose(f);
180  if (ret != 0) {
181  // we cannot raise and exception in the destructor
182  fprintf(stderr, "file %s close error: %s",
183  name.c_str(), strerror(errno));
184  }
185  }
186  }
187 
188  size_t operator()(
189  const void *ptr, size_t size, size_t nitems) override {
190  return fwrite(ptr, size, nitems, f);
191  }
192  int fileno() override {
193  return ::fileno (f);
194  }
195 
196 };
197 
198 
199 } // namespace
200 
201 
202 /*************************************************************
203  * Write
204  **************************************************************/
205 static void write_index_header (const Index *idx, IOWriter *f) {
206  WRITE1 (idx->d);
207  WRITE1 (idx->ntotal);
208  Index::idx_t dummy = 1 << 20;
209  WRITE1 (dummy);
210  WRITE1 (dummy);
211  WRITE1 (idx->is_trained);
212  WRITE1 (idx->metric_type);
213 }
214 
215 void write_VectorTransform (const VectorTransform *vt, IOWriter *f) {
216  if (const LinearTransform * lt =
217  dynamic_cast < const LinearTransform *> (vt)) {
218  if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
219  uint32_t h = fourcc ("rrot");
220  WRITE1 (h);
221  } else if (const PCAMatrix * pca =
222  dynamic_cast<const PCAMatrix *>(lt)) {
223  uint32_t h = fourcc ("PcAm");
224  WRITE1 (h);
225  WRITE1 (pca->eigen_power);
226  WRITE1 (pca->random_rotation);
227  WRITE1 (pca->balanced_bins);
228  WRITEVECTOR (pca->mean);
229  WRITEVECTOR (pca->eigenvalues);
230  WRITEVECTOR (pca->PCAMat);
231  } else {
232  // generic LinearTransform (includes OPQ)
233  uint32_t h = fourcc ("LTra");
234  WRITE1 (h);
235  }
236  WRITE1 (lt->have_bias);
237  WRITEVECTOR (lt->A);
238  WRITEVECTOR (lt->b);
239  } else if (const RemapDimensionsTransform *rdt =
240  dynamic_cast<const RemapDimensionsTransform *>(vt)) {
241  uint32_t h = fourcc ("RmDT");
242  WRITE1 (h);
243  WRITEVECTOR (rdt->map);
244  } else if (const NormalizationTransform *nt =
245  dynamic_cast<const NormalizationTransform *>(vt)) {
246  uint32_t h = fourcc ("VNrm");
247  WRITE1 (h);
248  WRITE1 (nt->norm);
249  } else if (const CenteringTransform *ct =
250  dynamic_cast<const CenteringTransform *>(vt)) {
251  uint32_t h = fourcc ("VCnt");
252  WRITE1 (h);
253  WRITEVECTOR (ct->mean);
254  } else {
255  FAISS_THROW_MSG ("cannot serialize this");
256  }
257  // common fields
258  WRITE1 (vt->d_in);
259  WRITE1 (vt->d_out);
260  WRITE1 (vt->is_trained);
261 }
262 
263 void write_ProductQuantizer (const ProductQuantizer *pq, IOWriter *f) {
264  WRITE1 (pq->d);
265  WRITE1 (pq->M);
266  WRITE1 (pq->nbits);
267  WRITEVECTOR (pq->centroids);
268 }
269 
270 static void write_ScalarQuantizer (
271  const ScalarQuantizer *ivsc, IOWriter *f) {
272  WRITE1 (ivsc->qtype);
273  WRITE1 (ivsc->rangestat);
274  WRITE1 (ivsc->rangestat_arg);
275  WRITE1 (ivsc->d);
276  WRITE1 (ivsc->code_size);
277  WRITEVECTOR (ivsc->trained);
278 }
279 
280 void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
281  if (ils == nullptr) {
282  uint32_t h = fourcc ("il00");
283  WRITE1 (h);
284  } else if (const auto & ails =
285  dynamic_cast<const ArrayInvertedLists *>(ils)) {
286  uint32_t h = fourcc ("ilar");
287  WRITE1 (h);
288  WRITE1 (ails->nlist);
289  WRITE1 (ails->code_size);
290  // here we store either as a full or a sparse data buffer
291  size_t n_non0 = 0;
292  for (size_t i = 0; i < ails->nlist; i++) {
293  if (ails->ids[i].size() > 0)
294  n_non0++;
295  }
296  if (n_non0 > ails->nlist / 2) {
297  uint32_t list_type = fourcc("full");
298  WRITE1 (list_type);
299  std::vector<size_t> sizes;
300  for (size_t i = 0; i < ails->nlist; i++) {
301  sizes.push_back (ails->ids[i].size());
302  }
303  WRITEVECTOR (sizes);
304  } else {
305  int list_type = fourcc("sprs"); // sparse
306  WRITE1 (list_type);
307  std::vector<size_t> sizes;
308  for (size_t i = 0; i < ails->nlist; i++) {
309  size_t n = ails->ids[i].size();
310  if (n > 0) {
311  sizes.push_back (i);
312  sizes.push_back (n);
313  }
314  }
315  WRITEVECTOR (sizes);
316  }
317  // make a single contiguous data buffer (useful for mmapping)
318  for (size_t i = 0; i < ails->nlist; i++) {
319  size_t n = ails->ids[i].size();
320  if (n > 0) {
321  WRITEANDCHECK (ails->codes[i].data(), n * ails->code_size);
322  WRITEANDCHECK (ails->ids[i].data(), n);
323  }
324  }
325  } else if (const auto & od =
326  dynamic_cast<const OnDiskInvertedLists *>(ils)) {
327  uint32_t h = fourcc ("ilod");
328  WRITE1 (h);
329  WRITE1 (ils->nlist);
330  WRITE1 (ils->code_size);
331  // this is a POD object
332  WRITEVECTOR (od->lists);
333 
334  {
335  std::vector<OnDiskInvertedLists::Slot> v(
336  od->slots.begin(), od->slots.end());
337  WRITEVECTOR(v);
338  }
339  {
340  std::vector<char> x(od->filename.begin(), od->filename.end());
341  WRITEVECTOR(x);
342  }
343  WRITE1(od->totsize);
344 
345  } else {
346  fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, "
347  "saving null invlist\n");
348  uint32_t h = fourcc ("il00");
349  WRITE1 (h);
350  }
351 }
352 
353 
354 void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
355  FileIOWriter writer(fname);
356  write_ProductQuantizer (pq, &writer);
357 }
358 
359 static void write_HNSW (const HNSW *hnsw, IOWriter *f) {
360 
361  WRITEVECTOR (hnsw->assign_probas);
362  WRITEVECTOR (hnsw->cum_nneighbor_per_level);
363  WRITEVECTOR (hnsw->levels);
364  WRITEVECTOR (hnsw->offsets);
365  WRITEVECTOR (hnsw->neighbors);
366 
367  WRITE1 (hnsw->entry_point);
368  WRITE1 (hnsw->max_level);
369  WRITE1 (hnsw->efConstruction);
370  WRITE1 (hnsw->efSearch);
371  WRITE1 (hnsw->upper_beam);
372 }
373 
374 static void write_ivf_header (const IndexIVF *ivf, IOWriter *f) {
375  write_index_header (ivf, f);
376  WRITE1 (ivf->nlist);
377  WRITE1 (ivf->nprobe);
378  write_index (ivf->quantizer, f);
379  WRITE1 (ivf->maintain_direct_map);
380  WRITEVECTOR (ivf->direct_map);
381 }
382 
383 void write_index (const Index *idx, IOWriter *f) {
384  if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
385  uint32_t h = fourcc (
386  idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
387  idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr);
388  WRITE1 (h);
389  write_index_header (idx, f);
390  WRITEVECTOR (idxf->xb);
391  } else if(const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
392  uint32_t h = fourcc ("IxHe");
393  WRITE1 (h);
394  write_index_header (idx, f);
395  WRITE1 (idxl->nbits);
396  WRITE1 (idxl->rotate_data);
397  WRITE1 (idxl->train_thresholds);
398  WRITEVECTOR (idxl->thresholds);
399  WRITE1 (idxl->bytes_per_vec);
400  write_VectorTransform (&idxl->rrot, f);
401  WRITEVECTOR (idxl->codes);
402  } else if(const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
403  uint32_t h = fourcc ("IxPq");
404  WRITE1 (h);
405  write_index_header (idx, f);
406  write_ProductQuantizer (&idxp->pq, f);
407  WRITEVECTOR (idxp->codes);
408  // search params -- maybe not useful to store?
409  WRITE1 (idxp->search_type);
410  WRITE1 (idxp->encode_signs);
411  WRITE1 (idxp->polysemous_ht);
412  } else if(const Index2Layer * idxp =
413  dynamic_cast<const Index2Layer *> (idx)) {
414  uint32_t h = fourcc ("Ix2L");
415  WRITE1 (h);
416  write_index_header (idx, f);
417  write_index (idxp->q1.quantizer, f);
418  WRITE1 (idxp->q1.nlist);
419  WRITE1 (idxp->q1.quantizer_trains_alone);
420  write_ProductQuantizer (&idxp->pq, f);
421  WRITE1 (idxp->code_size_1);
422  WRITE1 (idxp->code_size_2);
423  WRITE1 (idxp->code_size);
424  WRITEVECTOR (idxp->codes);
425  } else if(const IndexScalarQuantizer * idxs =
426  dynamic_cast<const IndexScalarQuantizer *> (idx)) {
427  uint32_t h = fourcc ("IxSQ");
428  WRITE1 (h);
429  write_index_header (idx, f);
430  write_ScalarQuantizer (&idxs->sq, f);
431  WRITEVECTOR (idxs->codes);
432  } else if(const IndexIVFFlatDedup * ivfl =
433  dynamic_cast<const IndexIVFFlatDedup *> (idx)) {
434  uint32_t h = fourcc ("IwFd");
435  WRITE1 (h);
436  write_ivf_header (ivfl, f);
437  {
438  std::vector<Index::idx_t> tab (2 * ivfl->instances.size());
439  long i = 0;
440  for (auto it = ivfl->instances.begin();
441  it != ivfl->instances.end(); ++it) {
442  tab[i++] = it->first;
443  tab[i++] = it->second;
444  }
445  WRITEVECTOR (tab);
446  }
447  write_InvertedLists (ivfl->invlists, f);
448  } else if(const IndexIVFFlat * ivfl =
449  dynamic_cast<const IndexIVFFlat *> (idx)) {
450  uint32_t h = fourcc ("IwFl");
451  WRITE1 (h);
452  write_ivf_header (ivfl, f);
453  write_InvertedLists (ivfl->invlists, f);
454  } else if(const IndexIVFScalarQuantizer * ivsc =
455  dynamic_cast<const IndexIVFScalarQuantizer *> (idx)) {
456  uint32_t h = fourcc ("IwSq");
457  WRITE1 (h);
458  write_ivf_header (ivsc, f);
459  write_ScalarQuantizer (&ivsc->sq, f);
460  WRITE1 (ivsc->code_size);
461  WRITE1 (ivsc->by_residual);
462  write_InvertedLists (ivsc->invlists, f);
463  } else if(const IndexIVFSpectralHash *ivsp =
464  dynamic_cast<const IndexIVFSpectralHash *>(idx)) {
465  uint32_t h = fourcc ("IwSh");
466  WRITE1 (h);
467  write_ivf_header (ivsp, f);
468  write_VectorTransform (ivsp->vt, f);
469  WRITE1 (ivsp->nbit);
470  WRITE1 (ivsp->period);
471  WRITE1 (ivsp->threshold_type);
472  WRITEVECTOR (ivsp->trained);
473  write_InvertedLists (ivsp->invlists, f);
474  } else if(const IndexIVFPQ * ivpq =
475  dynamic_cast<const IndexIVFPQ *> (idx)) {
476  const IndexIVFPQR * ivfpqr = dynamic_cast<const IndexIVFPQR *> (idx);
477 
478  uint32_t h = fourcc (ivfpqr ? "IwQR" : "IwPQ");
479  WRITE1 (h);
480  write_ivf_header (ivpq, f);
481  WRITE1 (ivpq->by_residual);
482  WRITE1 (ivpq->code_size);
483  write_ProductQuantizer (&ivpq->pq, f);
484  write_InvertedLists (ivpq->invlists, f);
485  if (ivfpqr) {
486  write_ProductQuantizer (&ivfpqr->refine_pq, f);
487  WRITEVECTOR (ivfpqr->refine_codes);
488  WRITE1 (ivfpqr->k_factor);
489  }
490 
491  } else if(const IndexPreTransform * ixpt =
492  dynamic_cast<const IndexPreTransform *> (idx)) {
493  uint32_t h = fourcc ("IxPT");
494  WRITE1 (h);
495  write_index_header (ixpt, f);
496  int nt = ixpt->chain.size();
497  WRITE1 (nt);
498  for (int i = 0; i < nt; i++)
499  write_VectorTransform (ixpt->chain[i], f);
500  write_index (ixpt->index, f);
501  } else if(const MultiIndexQuantizer * imiq =
502  dynamic_cast<const MultiIndexQuantizer *> (idx)) {
503  uint32_t h = fourcc ("Imiq");
504  WRITE1 (h);
505  write_index_header (imiq, f);
506  write_ProductQuantizer (&imiq->pq, f);
507  } else if(const IndexRefineFlat * idxrf =
508  dynamic_cast<const IndexRefineFlat *> (idx)) {
509  uint32_t h = fourcc ("IxRF");
510  WRITE1 (h);
511  write_index_header (idxrf, f);
512  write_index (idxrf->base_index, f);
513  write_index (&idxrf->refine_index, f);
514  WRITE1 (idxrf->k_factor);
515  } else if(const IndexIDMap * idxmap =
516  dynamic_cast<const IndexIDMap *> (idx)) {
517  uint32_t h =
518  dynamic_cast<const IndexIDMap2 *> (idx) ? fourcc ("IxM2") :
519  fourcc ("IxMp");
520  // no need to store additional info for IndexIDMap2
521  WRITE1 (h);
522  write_index_header (idxmap, f);
523  write_index (idxmap->index, f);
524  WRITEVECTOR (idxmap->id_map);
525  } else if(const IndexHNSW * idxhnsw =
526  dynamic_cast<const IndexHNSW *> (idx)) {
527  uint32_t h =
528  dynamic_cast<const IndexHNSWFlat*>(idx) ? fourcc("IHNf") :
529  dynamic_cast<const IndexHNSWPQ*>(idx) ? fourcc("IHNp") :
530  dynamic_cast<const IndexHNSWSQ*>(idx) ? fourcc("IHNs") :
531  dynamic_cast<const IndexHNSW2Level*>(idx) ? fourcc("IHN2") :
532  0;
533  FAISS_THROW_IF_NOT (h != 0);
534  WRITE1 (h);
535  write_index_header (idxhnsw, f);
536  write_HNSW (&idxhnsw->hnsw, f);
537  write_index (idxhnsw->storage, f);
538  } else {
539  FAISS_THROW_MSG ("don't know how to serialize this type of index");
540  }
541 }
542 
543 void write_index (const Index *idx, FILE *f) {
544  FileIOWriter writer(f);
545  write_index (idx, &writer);
546 }
547 
548 void write_index (const Index *idx, const char *fname) {
549  FileIOWriter writer(fname);
550  write_index (idx, &writer);
551 }
552 
553 void write_VectorTransform (const VectorTransform *vt, const char *fname) {
554  FileIOWriter writer(fname);
555  write_VectorTransform (vt, &writer);
556 }
557 
558 /*************************************************************
559  * Read
560  **************************************************************/
561 
562 static void read_index_header (Index *idx, IOReader *f) {
563  READ1 (idx->d);
564  READ1 (idx->ntotal);
565  Index::idx_t dummy;
566  READ1 (dummy);
567  READ1 (dummy);
568  READ1 (idx->is_trained);
569  READ1 (idx->metric_type);
570  idx->verbose = false;
571 }
572 
573 VectorTransform* read_VectorTransform (IOReader *f) {
574  uint32_t h;
575  READ1 (h);
576  VectorTransform *vt = nullptr;
577 
578  if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
579  h == fourcc ("LTra") || h == fourcc ("PcAm")) {
580  LinearTransform *lt = nullptr;
581  if (h == fourcc ("rrot")) {
582  lt = new RandomRotationMatrix ();
583  } else if (h == fourcc ("PCAm") ||
584  h == fourcc ("PcAm")) {
585  PCAMatrix * pca = new PCAMatrix ();
586  READ1 (pca->eigen_power);
587  READ1 (pca->random_rotation);
588  if (h == fourcc ("PcAm"))
589  READ1 (pca->balanced_bins);
590  READVECTOR (pca->mean);
591  READVECTOR (pca->eigenvalues);
592  READVECTOR (pca->PCAMat);
593  lt = pca;
594  } else if (h == fourcc ("LTra")) {
595  lt = new LinearTransform ();
596  }
597  READ1 (lt->have_bias);
598  READVECTOR (lt->A);
599  READVECTOR (lt->b);
600  FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
601  FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
602  lt->set_is_orthonormal();
603  vt = lt;
604  } else if (h == fourcc ("RmDT")) {
605  RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
606  READVECTOR (rdt->map);
607  vt = rdt;
608  } else if (h == fourcc ("VNrm")) {
609  NormalizationTransform *nt = new NormalizationTransform ();
610  READ1 (nt->norm);
611  vt = nt;
612  } else if (h == fourcc ("VCnt")) {
613  CenteringTransform *ct = new CenteringTransform ();
614  READVECTOR (ct->mean);
615  vt = ct;
616  } else {
617  FAISS_THROW_MSG("fourcc not recognized");
618  }
619  READ1 (vt->d_in);
620  READ1 (vt->d_out);
621  READ1 (vt->is_trained);
622  return vt;
623 }
624 
625 
626 static void read_ArrayInvertedLists_sizes (
627  IOReader *f, std::vector<size_t> & sizes)
628 {
629  uint32_t list_type;
630  READ1(list_type);
631  if (list_type == fourcc("full")) {
632  size_t os = sizes.size();
633  READVECTOR (sizes);
634  FAISS_THROW_IF_NOT (os == sizes.size());
635  } else if (list_type == fourcc("sprs")) {
636  std::vector<size_t> idsizes;
637  READVECTOR (idsizes);
638  for (size_t j = 0; j < idsizes.size(); j += 2) {
639  FAISS_THROW_IF_NOT (idsizes[j] < sizes.size());
640  sizes[idsizes[j]] = idsizes[j + 1];
641  }
642  } else {
643  FAISS_THROW_MSG ("invalid list_type");
644  }
645 }
646 
647 InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
648  uint32_t h;
649  READ1 (h);
650  if (h == fourcc ("il00")) {
651  fprintf(stderr, "read_InvertedLists:"
652  " WARN! inverted lists not stored with IVF object\n");
653  return nullptr;
654  } else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_MMAP)) {
655  auto ails = new ArrayInvertedLists (0, 0);
656  READ1 (ails->nlist);
657  READ1 (ails->code_size);
658  ails->ids.resize (ails->nlist);
659  ails->codes.resize (ails->nlist);
660  std::vector<size_t> sizes (ails->nlist);
661  read_ArrayInvertedLists_sizes (f, sizes);
662  for (size_t i = 0; i < ails->nlist; i++) {
663  ails->ids[i].resize (sizes[i]);
664  ails->codes[i].resize (sizes[i] * ails->code_size);
665  }
666  for (size_t i = 0; i < ails->nlist; i++) {
667  size_t n = ails->ids[i].size();
668  if (n > 0) {
669  READANDCHECK (ails->codes[i].data(), n * ails->code_size);
670  READANDCHECK (ails->ids[i].data(), n);
671  }
672  }
673  return ails;
674  } else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_MMAP)) {
675  // then we load it as an OnDiskInvertedLists
676 
677  FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
678  FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
679  FILE *fdesc = reader->f;
680 
681  auto ails = new OnDiskInvertedLists ();
682  READ1 (ails->nlist);
683  READ1 (ails->code_size);
684  ails->read_only = true;
685  ails->lists.resize (ails->nlist);
686  std::vector<size_t> sizes (ails->nlist);
687  read_ArrayInvertedLists_sizes (f, sizes);
688  size_t o0 = ftell(fdesc), o = o0;
689  { // do the mmap
690  struct stat buf;
691  int ret = fstat (fileno(fdesc), &buf);
692  FAISS_THROW_IF_NOT_FMT (ret == 0,
693  "fstat failed: %s", strerror(errno));
694  ails->totsize = buf.st_size;
695  ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize,
696  PROT_READ, MAP_SHARED,
697  fileno(fdesc), 0);
698  FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
699  "could not mmap: %s",
700  strerror(errno));
701  }
702 
703  for (size_t i = 0; i < ails->nlist; i++) {
704  OnDiskInvertedLists::List & l = ails->lists[i];
705  l.size = l.capacity = sizes[i];
706  l.offset = o;
707  o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
708  ails->code_size);
709  }
710  FAISS_THROW_IF_NOT(o <= ails->totsize);
711  // resume normal reading of file
712  fseek (fdesc, o, SEEK_SET);
713  return ails;
714  } else if (h == fourcc ("ilod")) {
715  OnDiskInvertedLists *od = new OnDiskInvertedLists();
716  od->read_only = io_flags & IO_FLAG_READ_ONLY;
717  READ1 (od->nlist);
718  READ1 (od->code_size);
719  // this is a POD object
720  READVECTOR (od->lists);
721  {
722  std::vector<OnDiskInvertedLists::Slot> v;
723  READVECTOR(v);
724  od->slots.assign(v.begin(), v.end());
725  }
726  {
727  std::vector<char> x;
728  READVECTOR(x);
729  od->filename.assign(x.begin(), x.end());
730 
731  if (io_flags & IO_FLAG_ONDISK_SAME_DIR) {
732  FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
733  FAISS_THROW_IF_NOT_MSG (
734  reader, "IO_FLAG_ONDISK_SAME_DIR only supported "
735  "when reading from file");
736  std::string indexname = reader->name;
737  std::string dirname = "./";
738  size_t slash = indexname.find_last_of('/');
739  if (slash != std::string::npos) {
740  dirname = indexname.substr(0, slash + 1);
741  }
742  std::string filename = od->filename;
743  slash = filename.find_last_of('/');
744  if (slash != std::string::npos) {
745  filename = filename.substr(slash + 1);
746  }
747  filename = dirname + filename;
748  printf("IO_FLAG_ONDISK_SAME_DIR: "
749  "updating ondisk filename from %s to %s\n",
750  od->filename.c_str(), filename.c_str());
751  od->filename = filename;
752  }
753 
754  }
755  READ1(od->totsize);
756  od->do_mmap();
757  return od;
758  } else {
759  FAISS_THROW_MSG ("read_InvertedLists: unsupported invlist type");
760  }
761 }
762 
763 static void read_InvertedLists (
764  IndexIVF *ivf, IOReader *f, int io_flags) {
765  InvertedLists *ils = read_InvertedLists (f, io_flags);
766  FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
767  ils->code_size == ivf->code_size));
768  ivf->invlists = ils;
769  ivf->own_invlists = true;
770 }
771 
772 static void read_InvertedLists (
773  IndexBinaryIVF *ivf, IOReader *f, int io_flags) {
774  InvertedLists *ils = read_InvertedLists (f, io_flags);
775  FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
776  ils->code_size == ivf->code_size));
777  ivf->invlists = ils;
778  ivf->own_invlists = true;
779 }
780 
781 static void read_ProductQuantizer (ProductQuantizer *pq, IOReader *f) {
782  READ1 (pq->d);
783  READ1 (pq->M);
784  READ1 (pq->nbits);
785  pq->set_derived_values ();
786  READVECTOR (pq->centroids);
787 }
788 
789 static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
790  READ1 (ivsc->qtype);
791  READ1 (ivsc->rangestat);
792  READ1 (ivsc->rangestat_arg);
793  READ1 (ivsc->d);
794  READ1 (ivsc->code_size);
795  READVECTOR (ivsc->trained);
796 }
797 
798 
799 static void read_HNSW (HNSW *hnsw, IOReader *f) {
800  READVECTOR (hnsw->assign_probas);
801  READVECTOR (hnsw->cum_nneighbor_per_level);
802  READVECTOR (hnsw->levels);
803  READVECTOR (hnsw->offsets);
804  READVECTOR (hnsw->neighbors);
805 
806  READ1 (hnsw->entry_point);
807  READ1 (hnsw->max_level);
808  READ1 (hnsw->efConstruction);
809  READ1 (hnsw->efSearch);
810  READ1 (hnsw->upper_beam);
811 }
812 
813 ProductQuantizer * read_ProductQuantizer (const char*fname) {
814  FileIOReader reader(fname);
815  return read_ProductQuantizer(&reader);
816 }
817 
818 ProductQuantizer * read_ProductQuantizer (IOReader *reader) {
819  ProductQuantizer *pq = new ProductQuantizer();
820  ScopeDeleter1<ProductQuantizer> del (pq);
821 
822  read_ProductQuantizer(pq, reader);
823  del.release ();
824  return pq;
825 }
826 
827 static void read_ivf_header (
828  IndexIVF *ivf, IOReader *f,
829  std::vector<std::vector<Index::idx_t> > *ids = nullptr)
830 {
831  read_index_header (ivf, f);
832  READ1 (ivf->nlist);
833  READ1 (ivf->nprobe);
834  ivf->quantizer = read_index (f);
835  ivf->own_fields = true;
836  if (ids) { // used in legacy "Iv" formats
837  ids->resize (ivf->nlist);
838  for (size_t i = 0; i < ivf->nlist; i++)
839  READVECTOR ((*ids)[i]);
840  }
841  READ1 (ivf->maintain_direct_map);
842  READVECTOR (ivf->direct_map);
843 }
844 
845 // used for legacy formats
846 static ArrayInvertedLists *set_array_invlist(
847  IndexIVF *ivf, std::vector<std::vector<Index::idx_t> > &ids)
848 {
849  ArrayInvertedLists *ail = new ArrayInvertedLists (
850  ivf->nlist, ivf->code_size);
851  std::swap (ail->ids, ids);
852  ivf->invlists = ail;
853  ivf->own_invlists = true;
854  return ail;
855 }
856 
857 static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
858 {
859  bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ");
860 
861  IndexIVFPQR *ivfpqr =
862  h == fourcc ("IvQR") || h == fourcc ("IwQR") ?
863  new IndexIVFPQR () : nullptr;
864  IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ ();
865 
866  std::vector<std::vector<Index::idx_t> > ids;
867  read_ivf_header (ivpq, f, legacy ? &ids : nullptr);
868  READ1 (ivpq->by_residual);
869  READ1 (ivpq->code_size);
870  read_ProductQuantizer (&ivpq->pq, f);
871 
872  if (legacy) {
873  ArrayInvertedLists *ail = set_array_invlist (ivpq, ids);
874  for (size_t i = 0; i < ail->nlist; i++)
875  READVECTOR (ail->codes[i]);
876  } else {
877  read_InvertedLists (ivpq, f, io_flags);
878  }
879 
880  if (ivpq->is_trained) {
881  // precomputed table not stored. It is cheaper to recompute it
882  ivpq->use_precomputed_table = 0;
883  if (ivpq->by_residual)
884  ivpq->precompute_table ();
885  if (ivfpqr) {
886  read_ProductQuantizer (&ivfpqr->refine_pq, f);
887  READVECTOR (ivfpqr->refine_codes);
888  READ1 (ivfpqr->k_factor);
889  }
890  }
891  return ivpq;
892 }
893 
894 int read_old_fmt_hack = 0;
895 
896 Index *read_index (IOReader *f, int io_flags) {
897  Index * idx = nullptr;
898  uint32_t h;
899  READ1 (h);
900  if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) {
901  IndexFlat *idxf;
902  if (h == fourcc ("IxFI")) idxf = new IndexFlatIP ();
903  else idxf = new IndexFlatL2 ();
904  read_index_header (idxf, f);
905  READVECTOR (idxf->xb);
906  FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
907  // leak!
908  idx = idxf;
909  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
910  IndexLSH * idxl = new IndexLSH ();
911  read_index_header (idxl, f);
912  READ1 (idxl->nbits);
913  READ1 (idxl->rotate_data);
914  READ1 (idxl->train_thresholds);
915  READVECTOR (idxl->thresholds);
916  READ1 (idxl->bytes_per_vec);
917  if (h == fourcc("IxHE")) {
918  FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
919  "can only read old format IndexLSH with "
920  "nbits multiple of 64 (got %d)",
921  (int) idxl->nbits);
922  // leak
923  idxl->bytes_per_vec *= 8;
924  }
925  {
926  RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
927  (read_VectorTransform (f));
928  FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
929  idxl->rrot = *rrot;
930  delete rrot;
931  }
932  READVECTOR (idxl->codes);
933  FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
934  idxl->rrot.d_out == idxl->nbits);
935  FAISS_THROW_IF_NOT (
936  idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
937  idx = idxl;
938  } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
939  h == fourcc ("IxPq")) {
940  // IxPQ and IxPo were merged into the same IndexPQ object
941  IndexPQ * idxp =new IndexPQ ();
942  read_index_header (idxp, f);
943  read_ProductQuantizer (&idxp->pq, f);
944  READVECTOR (idxp->codes);
945  if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
946  READ1 (idxp->search_type);
947  READ1 (idxp->encode_signs);
948  READ1 (idxp->polysemous_ht);
949  }
950  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
951  // when they were in fact using L2. Therefore, we force metric type
952  // to L2 when the old format is detected
953  if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
954  idxp->metric_type = METRIC_L2;
955  }
956  idx = idxp;
957  } else if (h == fourcc ("IvFl") || h == fourcc("IvFL")) { // legacy
958  IndexIVFFlat * ivfl = new IndexIVFFlat ();
959  std::vector<std::vector<Index::idx_t> > ids;
960  read_ivf_header (ivfl, f, &ids);
961  ivfl->code_size = ivfl->d * sizeof(float);
962  ArrayInvertedLists *ail = set_array_invlist (ivfl, ids);
963 
964  if (h == fourcc ("IvFL")) {
965  for (size_t i = 0; i < ivfl->nlist; i++) {
966  READVECTOR (ail->codes[i]);
967  }
968  } else { // old format
969  for (size_t i = 0; i < ivfl->nlist; i++) {
970  std::vector<float> vec;
971  READVECTOR (vec);
972  ail->codes[i].resize(vec.size() * sizeof(float));
973  memcpy(ail->codes[i].data(), vec.data(),
974  ail->codes[i].size());
975  }
976  }
977  idx = ivfl;
978  } else if (h == fourcc ("IwFd")) {
979  IndexIVFFlatDedup * ivfl = new IndexIVFFlatDedup ();
980  read_ivf_header (ivfl, f);
981  ivfl->code_size = ivfl->d * sizeof(float);
982  {
983  std::vector<Index::idx_t> tab;
984  READVECTOR (tab);
985  for (long i = 0; i < tab.size(); i += 2) {
986  std::pair<Index::idx_t, Index::idx_t>
987  pair (tab[i], tab[i + 1]);
988  ivfl->instances.insert (pair);
989  }
990  }
991  read_InvertedLists (ivfl, f, io_flags);
992  idx = ivfl;
993  } else if (h == fourcc ("IwFl")) {
994  IndexIVFFlat * ivfl = new IndexIVFFlat ();
995  read_ivf_header (ivfl, f);
996  ivfl->code_size = ivfl->d * sizeof(float);
997  read_InvertedLists (ivfl, f, io_flags);
998  idx = ivfl;
999  } else if (h == fourcc ("IxSQ")) {
1000  IndexScalarQuantizer * idxs = new IndexScalarQuantizer ();
1001  read_index_header (idxs, f);
1002  read_ScalarQuantizer (&idxs->sq, f);
1003  READVECTOR (idxs->codes);
1004  idxs->code_size = idxs->sq.code_size;
1005  idx = idxs;
1006  } else if(h == fourcc ("IvSQ")) { // legacy
1007  IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
1008  std::vector<std::vector<Index::idx_t> > ids;
1009  read_ivf_header (ivsc, f, &ids);
1010  read_ScalarQuantizer (&ivsc->sq, f);
1011  READ1 (ivsc->code_size);
1012  ArrayInvertedLists *ail = set_array_invlist (ivsc, ids);
1013  for(int i = 0; i < ivsc->nlist; i++)
1014  READVECTOR (ail->codes[i]);
1015  idx = ivsc;
1016  } else if(h == fourcc ("IwSQ") || h == fourcc ("IwSq")) {
1017  IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
1018  read_ivf_header (ivsc, f);
1019  read_ScalarQuantizer (&ivsc->sq, f);
1020  READ1 (ivsc->code_size);
1021  if (h == fourcc ("IwSQ")) {
1022  ivsc->by_residual = true;
1023  } else {
1024  READ1 (ivsc->by_residual);
1025  }
1026  read_InvertedLists (ivsc, f, io_flags);
1027  idx = ivsc;
1028  } else if(h == fourcc ("IwSh")) {
1029  IndexIVFSpectralHash *ivsp = new IndexIVFSpectralHash ();
1030  read_ivf_header (ivsp, f);
1031  ivsp->vt = read_VectorTransform (f);
1032  ivsp->own_fields = true;
1033  READ1 (ivsp->nbit);
1034  // not stored by write_ivf_header
1035  ivsp->code_size = (ivsp->nbit + 7) / 8;
1036  READ1 (ivsp->period);
1037  READ1 (ivsp->threshold_type);
1038  READVECTOR (ivsp->trained);
1039  read_InvertedLists (ivsp, f, io_flags);
1040  idx = ivsp;
1041  } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
1042  h == fourcc ("IwPQ") || h == fourcc ("IwQR")) {
1043 
1044  idx = read_ivfpq (f, h, io_flags);
1045 
1046  } else if(h == fourcc ("IxPT")) {
1047  IndexPreTransform * ixpt = new IndexPreTransform();
1048  ixpt->own_fields = true;
1049  read_index_header (ixpt, f);
1050  int nt;
1051  if (read_old_fmt_hack == 2) {
1052  nt = 1;
1053  } else {
1054  READ1 (nt);
1055  }
1056  for (int i = 0; i < nt; i++) {
1057  ixpt->chain.push_back (read_VectorTransform (f));
1058  }
1059  ixpt->index = read_index (f, io_flags);
1060  idx = ixpt;
1061  } else if(h == fourcc ("Imiq")) {
1062  MultiIndexQuantizer * imiq = new MultiIndexQuantizer ();
1063  read_index_header (imiq, f);
1064  read_ProductQuantizer (&imiq->pq, f);
1065  idx = imiq;
1066  } else if(h == fourcc ("IxRF")) {
1067  IndexRefineFlat *idxrf = new IndexRefineFlat ();
1068  read_index_header (idxrf, f);
1069  idxrf->base_index = read_index(f, io_flags);
1070  idxrf->own_fields = true;
1071  IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f, io_flags));
1072  std::swap (*rf, idxrf->refine_index);
1073  delete rf;
1074  READ1 (idxrf->k_factor);
1075  idx = idxrf;
1076  } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
1077  bool is_map2 = h == fourcc ("IxM2");
1078  IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
1079  read_index_header (idxmap, f);
1080  idxmap->index = read_index (f, io_flags);
1081  idxmap->own_fields = true;
1082  READVECTOR (idxmap->id_map);
1083  if (is_map2) {
1084  static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
1085  }
1086  idx = idxmap;
1087  } else if (h == fourcc ("Ix2L")) {
1088  Index2Layer * idxp = new Index2Layer ();
1089  read_index_header (idxp, f);
1090  idxp->q1.quantizer = read_index (f, io_flags);
1091  READ1 (idxp->q1.nlist);
1092  READ1 (idxp->q1.quantizer_trains_alone);
1093  read_ProductQuantizer (&idxp->pq, f);
1094  READ1 (idxp->code_size_1);
1095  READ1 (idxp->code_size_2);
1096  READ1 (idxp->code_size);
1097  READVECTOR (idxp->codes);
1098  idx = idxp;
1099  } else if(h == fourcc("IHNf") || h == fourcc("IHNp") ||
1100  h == fourcc("IHNs") || h == fourcc("IHN2")) {
1101  IndexHNSW *idxhnsw = nullptr;
1102  if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat ();
1103  if (h == fourcc("IHNp")) idxhnsw = new IndexHNSWPQ ();
1104  if (h == fourcc("IHNs")) idxhnsw = new IndexHNSWSQ ();
1105  if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level ();
1106  read_index_header (idxhnsw, f);
1107  read_HNSW (&idxhnsw->hnsw, f);
1108  idxhnsw->storage = read_index (f, io_flags);
1109  idxhnsw->own_fields = true;
1110  if (h == fourcc("IHNp")) {
1111  dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
1112  }
1113  idx = idxhnsw;
1114  } else {
1115  FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
1116  idx = nullptr;
1117  }
1118  return idx;
1119 }
1120 
1121 
1122 Index *read_index (FILE * f, int io_flags) {
1123  FileIOReader reader(f);
1124  return read_index(&reader, io_flags);
1125 }
1126 
1127 Index *read_index (const char *fname, int io_flags) {
1128  FileIOReader reader(fname);
1129  Index *idx = read_index (&reader, io_flags);
1130  return idx;
1131 }
1132 
1133 VectorTransform *read_VectorTransform (const char *fname) {
1134  FileIOReader reader(fname);
1135  VectorTransform *vt = read_VectorTransform (&reader);
1136  return vt;
1137 }
1138 
1139 /*************************************************************
1140  * cloning functions
1141  **************************************************************/
1142 
1143 
1144 
1145 Index * clone_index (const Index *index)
1146 {
1147  Cloner cl;
1148  return cl.clone_Index (index);
1149 }
1150 
1151 // assumes there is a copy constructor ready. Always try from most
1152 // specific to most general
1153 #define TRYCLONE(classname, obj) \
1154  if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
1155  return new classname(*clo); \
1156  } else
1157 
1158 VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt)
1159 {
1160  TRYCLONE (RemapDimensionsTransform, vt)
1161  TRYCLONE (OPQMatrix, vt)
1162  TRYCLONE (PCAMatrix, vt)
1163  TRYCLONE (RandomRotationMatrix, vt)
1164  TRYCLONE (LinearTransform, vt)
1165  {
1166  FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
1167  }
1168  return nullptr;
1169 }
1170 
1171 IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf)
1172 {
1173  TRYCLONE (IndexIVFPQR, ivf)
1174  TRYCLONE (IndexIVFPQ, ivf)
1175  TRYCLONE (IndexIVFFlat, ivf)
1176  TRYCLONE (IndexIVFScalarQuantizer, ivf)
1177  {
1178  FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
1179  }
1180  return nullptr;
1181 }
1182 
1183 Index *Cloner::clone_Index (const Index *index)
1184 {
1185  TRYCLONE (IndexPQ, index)
1186  TRYCLONE (IndexLSH, index)
1187  TRYCLONE (IndexFlatL2, index)
1188  TRYCLONE (IndexFlatIP, index)
1189  TRYCLONE (IndexFlat, index)
1190  TRYCLONE (IndexScalarQuantizer, index)
1191  TRYCLONE (MultiIndexQuantizer, index)
1192  if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
1193  IndexIVF *res = clone_IndexIVF (ivf);
1194  if (ivf->invlists == nullptr) {
1195  res->invlists = nullptr;
1196  } else if (auto *ails = dynamic_cast<const ArrayInvertedLists*>
1197  (ivf->invlists)) {
1198  res->invlists = new ArrayInvertedLists(*ails);
1199  res->own_invlists = true;
1200  } else {
1201  FAISS_THROW_MSG( "clone not supported for this type of inverted lists");
1202  }
1203  res->own_fields = true;
1204  res->quantizer = clone_Index (ivf->quantizer);
1205  return res;
1206  } else if (const IndexPreTransform * ipt =
1207  dynamic_cast<const IndexPreTransform*> (index)) {
1208  IndexPreTransform *res = new IndexPreTransform ();
1209  res->d = ipt->d;
1210  res->index = clone_Index (ipt->index);
1211  for (int i = 0; i < ipt->chain.size(); i++)
1212  res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
1213  res->own_fields = true;
1214  return res;
1215  } else if (const IndexIDMap *idmap =
1216  dynamic_cast<const IndexIDMap*> (index)) {
1217  IndexIDMap *res = new IndexIDMap (*idmap);
1218  res->own_fields = true;
1219  res->index = clone_Index (idmap->index);
1220  return res;
1221  } else {
1222  FAISS_THROW_MSG( "clone not supported for this type of Index");
1223  }
1224  return nullptr;
1225 }
1226 
1227 
1228 static void write_index_binary_header (const IndexBinary *idx, IOWriter *f) {
1229  WRITE1 (idx->d);
1230  WRITE1 (idx->code_size);
1231  WRITE1 (idx->ntotal);
1232  WRITE1 (idx->is_trained);
1233  WRITE1 (idx->metric_type);
1234 }
1235 
1236 static void write_binary_ivf_header (const IndexBinaryIVF *ivf, IOWriter *f) {
1237  write_index_binary_header (ivf, f);
1238  WRITE1 (ivf->nlist);
1239  WRITE1 (ivf->nprobe);
1240  write_index_binary (ivf->quantizer, f);
1241  WRITE1 (ivf->maintain_direct_map);
1242  WRITEVECTOR (ivf->direct_map);
1243 }
1244 
1245 void write_index_binary (const IndexBinary *idx, IOWriter *f) {
1246  if (const IndexBinaryFlat *idxf =
1247  dynamic_cast<const IndexBinaryFlat *> (idx)) {
1248  uint32_t h = fourcc ("IBxF");
1249  WRITE1 (h);
1250  write_index_binary_header (idx, f);
1251  WRITEVECTOR (idxf->xb);
1252  } else if (const IndexBinaryIVF *ivf =
1253  dynamic_cast<const IndexBinaryIVF *> (idx)) {
1254  uint32_t h = fourcc ("IBwF");
1255  WRITE1 (h);
1256  write_binary_ivf_header (ivf, f);
1257  write_InvertedLists (ivf->invlists, f);
1258  } else if(const IndexBinaryFromFloat * idxff =
1259  dynamic_cast<const IndexBinaryFromFloat *> (idx)) {
1260  uint32_t h = fourcc ("IBFf");
1261  WRITE1 (h);
1262  write_index_binary_header (idxff, f);
1263  write_index (idxff->index, f);
1264  } else if (const IndexBinaryHNSW *idxhnsw =
1265  dynamic_cast<const IndexBinaryHNSW *> (idx)) {
1266  uint32_t h = fourcc ("IBHf");
1267  WRITE1 (h);
1268  write_index_binary_header (idxhnsw, f);
1269  write_HNSW (&idxhnsw->hnsw, f);
1270  write_index_binary (idxhnsw->storage, f);
1271  } else {
1272  FAISS_THROW_MSG ("don't know how to serialize this type of index");
1273  }
1274 }
1275 
1276 void write_index_binary (const IndexBinary *idx, FILE *f) {
1277  FileIOWriter writer(f);
1278  write_index_binary(idx, &writer);
1279 }
1280 
1281 void write_index_binary (const IndexBinary *idx, const char *fname) {
1282  FileIOWriter writer(fname);
1283  write_index_binary (idx, &writer);
1284 }
1285 
1286 static void read_index_binary_header (IndexBinary *idx, IOReader *f) {
1287  READ1 (idx->d);
1288  READ1 (idx->code_size);
1289  READ1 (idx->ntotal);
1290  READ1 (idx->is_trained);
1291  READ1 (idx->metric_type);
1292  idx->verbose = false;
1293 }
1294 
1295 static void read_binary_ivf_header (
1296  IndexBinaryIVF *ivf, IOReader *f,
1297  std::vector<std::vector<Index::idx_t> > *ids = nullptr)
1298 {
1299  read_index_binary_header (ivf, f);
1300  READ1 (ivf->nlist);
1301  READ1 (ivf->nprobe);
1302  ivf->quantizer = read_index_binary (f);
1303  ivf->own_fields = true;
1304  if (ids) { // used in legacy "Iv" formats
1305  ids->resize (ivf->nlist);
1306  for (size_t i = 0; i < ivf->nlist; i++)
1307  READVECTOR ((*ids)[i]);
1308  }
1309  READ1 (ivf->maintain_direct_map);
1310  READVECTOR (ivf->direct_map);
1311 }
1312 
1313 IndexBinary *read_index_binary (IOReader *f, int io_flags) {
1314  IndexBinary * idx = nullptr;
1315  uint32_t h;
1316  READ1 (h);
1317  if (h == fourcc ("IBxF")) {
1318  IndexBinaryFlat *idxf = new IndexBinaryFlat ();
1319  read_index_binary_header (idxf, f);
1320  READVECTOR (idxf->xb);
1321  FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->code_size);
1322  // leak!
1323  idx = idxf;
1324  } else if (h == fourcc ("IBwF")) {
1325  IndexBinaryIVF *ivf = new IndexBinaryIVF ();
1326  read_binary_ivf_header (ivf, f);
1327  read_InvertedLists (ivf, f, io_flags);
1328  idx = ivf;
1329  } else if (h == fourcc ("IBFf")) {
1330  IndexBinaryFromFloat *idxff = new IndexBinaryFromFloat ();
1331  read_index_binary_header (idxff, f);
1332  idxff->own_fields = true;
1333  idxff->index = read_index (f, io_flags);
1334  idx = idxff;
1335  } else if (h == fourcc ("IBHf")) {
1336  IndexBinaryHNSW *idxhnsw = new IndexBinaryHNSW ();
1337  read_index_binary_header (idxhnsw, f);
1338  read_HNSW (&idxhnsw->hnsw, f);
1339  idxhnsw->storage = read_index_binary (f, io_flags);
1340  idxhnsw->own_fields = true;
1341  idx = idxhnsw;
1342  } else {
1343  FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
1344  idx = nullptr;
1345  }
1346  return idx;
1347 }
1348 
1349 IndexBinary *read_index_binary (FILE * f, int io_flags) {
1350  FileIOReader reader(f);
1351  return read_index_binary(&reader, io_flags);
1352 }
1353 
1354 IndexBinary *read_index_binary (const char *fname, int io_flags) {
1355  FileIOReader reader(fname);
1356  IndexBinary *idx = read_index_binary (&reader, io_flags);
1357  return idx;
1358 }
1359 
1360 
1361 } // namespace faiss
long idx_t
all indices are this type
Definition: Index.h:62