Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/index_io.cpp
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #include "index_io.h"
12 
13 #include <cstdio>
14 #include <cstdlib>
15 
16 #include <sys/mman.h>
17 #include <sys/types.h>
18 #include <sys/stat.h>
19 #include <unistd.h>
20 
21 #include "FaissAssert.h"
22 #include "AuxIndexStructures.h"
23 
24 #include "IndexFlat.h"
25 #include "VectorTransform.h"
26 #include "IndexLSH.h"
27 #include "IndexPQ.h"
28 #include "IndexIVF.h"
29 #include "IndexIVFPQ.h"
30 #include "IndexIVFFlat.h"
31 #include "MetaIndexes.h"
32 #include "IndexScalarQuantizer.h"
33 #include "IndexHNSW.h"
34 #include "OnDiskInvertedLists.h"
35 #include "IndexBinaryFlat.h"
36 #include "IndexBinaryFromFloat.h"
37 #include "IndexBinaryHNSW.h"
38 #include "IndexBinaryIVF.h"
39 
40 
41 
42 /*************************************************************
43  * The I/O format is the content of the class. For objects that are
44  * inherited, like Index, a 4-character-code (fourcc) indicates which
45  * child class this is an instance of.
46  *
47  * In this case, the fields of the parent class are written first,
48  * then the ones for the child classes. Note that this requires
49  * classes to be serialized to have a constructor without parameters,
50  * so that the fields can be filled in later. The default constructor
51  * should set reasonable defaults for all fields.
52  *
53  * The fourccs are assigned arbitrarily. When the class changed (added
54  * or deprecated fields), the fourcc can be replaced. New code should
55  * be able to read the old fourcc and fill in new classes.
56  *
57  * TODO: serialization to strings for use in Python pickle or Torch
58  * serialization.
59  *
60  * TODO: in this file, the read functions that encouter errors may
61  * leak memory.
62  **************************************************************/
63 
64 
65 
66 namespace faiss {
67 
68 static uint32_t fourcc (const char sx[4]) {
69  assert(4 == strlen(sx));
70  const unsigned char *x = (unsigned char*)sx;
71  return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
72 }
73 
74 /*************************************************************
75  * I/O macros
76  *
77  * we use macros so that we have a line number to report in abort
78  * (). This makes debugging a lot easier. The IOReader or IOWriter is
79  * always called f and thus is not passed in as a macro parameter.
80  **************************************************************/
81 
82 
83 #define WRITEANDCHECK(ptr, n) { \
84  size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
85  FAISS_THROW_IF_NOT_FMT(ret == (n), \
86  "write error in %s: %ld != %ld (%s)", \
87  f->name.c_str(), ret, size_t(n), strerror(errno)); \
88  }
89 
90 #define READANDCHECK(ptr, n) { \
91  size_t ret = (*f)(ptr, sizeof(*(ptr)), n); \
92  FAISS_THROW_IF_NOT_FMT(ret == (n), \
93  "read error in %s: %ld != %ld (%s)", \
94  f->name.c_str(), ret, size_t(n), strerror(errno)); \
95  }
96 
97 #define WRITE1(x) WRITEANDCHECK(&(x), 1)
98 #define READ1(x) READANDCHECK(&(x), 1)
99 
100 #define WRITEVECTOR(vec) { \
101  size_t size = (vec).size (); \
102  WRITEANDCHECK (&size, 1); \
103  WRITEANDCHECK ((vec).data (), size); \
104  }
105 
106 // will fail if we write 256G of data at once...
107 #define READVECTOR(vec) { \
108  long size; \
109  READANDCHECK (&size, 1); \
110  FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
111  (vec).resize (size); \
112  READANDCHECK ((vec).data (), size); \
113  }
114 
116  FILE *f;
117  ScopeFileCloser (FILE *f): f (f) {}
118  ~ScopeFileCloser () {fclose (f); }
119 };
120 
121 
122 namespace {
123 
124 struct FileIOReader: IOReader {
125  FILE *f = nullptr;
126  bool need_close = false;
127 
128  FileIOReader(FILE *rf): f(rf) {}
129 
130  FileIOReader(const char * fname)
131  {
132  name = fname;
133  f = fopen(fname, "rb");
134  FAISS_THROW_IF_NOT_FMT (
135  f, "could not open %s for reading: %s",
136  fname, strerror(errno));
137  need_close = true;
138  }
139 
140  ~FileIOReader() {
141  if (need_close) {
142  int ret = fclose(f);
143  FAISS_THROW_IF_NOT_FMT (
144  ret == 0, "file %s close error: %s",
145  name.c_str(), strerror(errno));
146  }
147  }
148 
149  size_t operator()(
150  void *ptr, size_t size, size_t nitems) override {
151  return fread(ptr, size, nitems, f);
152  }
153 
154  int fileno() override {
155  return ::fileno (f);
156  }
157 
158 };
159 
160 struct FileIOWriter: IOWriter {
161  FILE *f = nullptr;
162  bool need_close = false;
163 
164  FileIOWriter(FILE *wf): f(wf) {}
165 
166  FileIOWriter(const char * fname)
167  {
168  name = fname;
169  f = fopen(fname, "wb");
170  FAISS_THROW_IF_NOT_FMT (
171  f, "could not open %s for writing: %s",
172  fname, strerror(errno));
173  need_close = true;
174  }
175 
176  ~FileIOWriter() {
177  if (need_close) {
178  int ret = fclose(f);
179  FAISS_THROW_IF_NOT_FMT (
180  ret == 0, "file %s close error: %s",
181  name.c_str(), strerror(errno));
182  }
183  }
184 
185  size_t operator()(
186  const void *ptr, size_t size, size_t nitems) override {
187  return fwrite(ptr, size, nitems, f);
188  }
189  int fileno() override {
190  return ::fileno (f);
191  }
192 
193 };
194 
195 
196 } // namespace
197 
198 
199 /*************************************************************
200  * Write
201  **************************************************************/
202 static void write_index_header (const Index *idx, IOWriter *f) {
203  WRITE1 (idx->d);
204  WRITE1 (idx->ntotal);
205  Index::idx_t dummy = 1 << 20;
206  WRITE1 (dummy);
207  WRITE1 (dummy);
208  WRITE1 (idx->is_trained);
209  WRITE1 (idx->metric_type);
210 }
211 
212 void write_VectorTransform (const VectorTransform *vt, IOWriter *f) {
213  if (const LinearTransform * lt =
214  dynamic_cast < const LinearTransform *> (vt)) {
215  if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
216  uint32_t h = fourcc ("rrot");
217  WRITE1 (h);
218  } else if (const PCAMatrix * pca =
219  dynamic_cast<const PCAMatrix *>(lt)) {
220  uint32_t h = fourcc ("PcAm");
221  WRITE1 (h);
222  WRITE1 (pca->eigen_power);
223  WRITE1 (pca->random_rotation);
224  WRITE1 (pca->balanced_bins);
225  WRITEVECTOR (pca->mean);
226  WRITEVECTOR (pca->eigenvalues);
227  WRITEVECTOR (pca->PCAMat);
228  } else {
229  // generic LinearTransform (includes OPQ)
230  uint32_t h = fourcc ("LTra");
231  WRITE1 (h);
232  }
233  WRITE1 (lt->have_bias);
234  WRITEVECTOR (lt->A);
235  WRITEVECTOR (lt->b);
236  } else if (const RemapDimensionsTransform *rdt =
237  dynamic_cast<const RemapDimensionsTransform *>(vt)) {
238  uint32_t h = fourcc ("RmDT");
239  WRITE1 (h);
240  WRITEVECTOR (rdt->map);
241  } else if (const NormalizationTransform *nt =
242  dynamic_cast<const NormalizationTransform *>(vt)) {
243  uint32_t h = fourcc ("VNrm");
244  WRITE1 (h);
245  WRITE1 (nt->norm);
246  } else {
247  FAISS_THROW_MSG ("cannot serialize this");
248  }
249  // common fields
250  WRITE1 (vt->d_in);
251  WRITE1 (vt->d_out);
252  WRITE1 (vt->is_trained);
253 }
254 
255 void write_ProductQuantizer (const ProductQuantizer *pq, IOWriter *f) {
256  WRITE1 (pq->d);
257  WRITE1 (pq->M);
258  WRITE1 (pq->nbits);
259  WRITEVECTOR (pq->centroids);
260 }
261 
262 static void write_ScalarQuantizer (
263  const ScalarQuantizer *ivsc, IOWriter *f) {
264  WRITE1 (ivsc->qtype);
265  WRITE1 (ivsc->rangestat);
266  WRITE1 (ivsc->rangestat_arg);
267  WRITE1 (ivsc->d);
268  WRITE1 (ivsc->code_size);
269  WRITEVECTOR (ivsc->trained);
270 }
271 
272 void write_InvertedLists (const InvertedLists *ils, IOWriter *f) {
273  if (ils == nullptr) {
274  uint32_t h = fourcc ("il00");
275  WRITE1 (h);
276  } else if (const auto & ails =
277  dynamic_cast<const ArrayInvertedLists *>(ils)) {
278  uint32_t h = fourcc ("ilar");
279  WRITE1 (h);
280  WRITE1 (ails->nlist);
281  WRITE1 (ails->code_size);
282  // here we store either as a full or a sparse data buffer
283  size_t n_non0 = 0;
284  for (size_t i = 0; i < ails->nlist; i++) {
285  if (ails->ids[i].size() > 0)
286  n_non0++;
287  }
288  if (n_non0 > ails->nlist / 2) {
289  uint32_t list_type = fourcc("full");
290  WRITE1 (list_type);
291  std::vector<size_t> sizes;
292  for (size_t i = 0; i < ails->nlist; i++) {
293  sizes.push_back (ails->ids[i].size());
294  }
295  WRITEVECTOR (sizes);
296  } else {
297  int list_type = fourcc("sprs"); // sparse
298  WRITE1 (list_type);
299  std::vector<size_t> sizes;
300  for (size_t i = 0; i < ails->nlist; i++) {
301  size_t n = ails->ids[i].size();
302  if (n > 0) {
303  sizes.push_back (i);
304  sizes.push_back (n);
305  }
306  }
307  WRITEVECTOR (sizes);
308  }
309  // make a single contiguous data buffer (useful for mmapping)
310  for (size_t i = 0; i < ails->nlist; i++) {
311  size_t n = ails->ids[i].size();
312  if (n > 0) {
313  WRITEANDCHECK (ails->codes[i].data(), n * ails->code_size);
314  WRITEANDCHECK (ails->ids[i].data(), n);
315  }
316  }
317  } else if (const auto & od =
318  dynamic_cast<const OnDiskInvertedLists *>(ils)) {
319  uint32_t h = fourcc ("ilod");
320  WRITE1 (h);
321  WRITE1 (ils->nlist);
322  WRITE1 (ils->code_size);
323  // this is a POD object
324  WRITEVECTOR (od->lists);
325 
326  {
327  std::vector<OnDiskInvertedLists::Slot> v(
328  od->slots.begin(), od->slots.end());
329  WRITEVECTOR(v);
330  }
331  {
332  std::vector<char> x(od->filename.begin(), od->filename.end());
333  WRITEVECTOR(x);
334  }
335  WRITE1(od->totsize);
336 
337  } else {
338  fprintf(stderr, "WARN! write_InvertedLists: unsupported invlist type, "
339  "saving null invlist\n");
340  uint32_t h = fourcc ("il00");
341  WRITE1 (h);
342  }
343 }
344 
345 
346 void write_ProductQuantizer (const ProductQuantizer*pq, const char *fname) {
347  FileIOWriter writer(fname);
348  write_ProductQuantizer (pq, &writer);
349 }
350 
351 static void write_HNSW (const HNSW *hnsw, IOWriter *f) {
352 
353  WRITEVECTOR (hnsw->assign_probas);
354  WRITEVECTOR (hnsw->cum_nneighbor_per_level);
355  WRITEVECTOR (hnsw->levels);
356  WRITEVECTOR (hnsw->offsets);
357  WRITEVECTOR (hnsw->neighbors);
358 
359  WRITE1 (hnsw->entry_point);
360  WRITE1 (hnsw->max_level);
361  WRITE1 (hnsw->efConstruction);
362  WRITE1 (hnsw->efSearch);
363  WRITE1 (hnsw->upper_beam);
364 }
365 
366 static void write_ivf_header (const IndexIVF *ivf, IOWriter *f) {
367  write_index_header (ivf, f);
368  WRITE1 (ivf->nlist);
369  WRITE1 (ivf->nprobe);
370  write_index (ivf->quantizer, f);
371  WRITE1 (ivf->maintain_direct_map);
372  WRITEVECTOR (ivf->direct_map);
373 }
374 
375 void write_index (const Index *idx, IOWriter *f) {
376  if (const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
377  uint32_t h = fourcc (
378  idxf->metric_type == METRIC_INNER_PRODUCT ? "IxFI" :
379  idxf->metric_type == METRIC_L2 ? "IxF2" : nullptr);
380  WRITE1 (h);
381  write_index_header (idx, f);
382  WRITEVECTOR (idxf->xb);
383  } else if(const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
384  uint32_t h = fourcc ("IxHe");
385  WRITE1 (h);
386  write_index_header (idx, f);
387  WRITE1 (idxl->nbits);
388  WRITE1 (idxl->rotate_data);
389  WRITE1 (idxl->train_thresholds);
390  WRITEVECTOR (idxl->thresholds);
391  WRITE1 (idxl->bytes_per_vec);
392  write_VectorTransform (&idxl->rrot, f);
393  WRITEVECTOR (idxl->codes);
394  } else if(const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
395  uint32_t h = fourcc ("IxPq");
396  WRITE1 (h);
397  write_index_header (idx, f);
398  write_ProductQuantizer (&idxp->pq, f);
399  WRITEVECTOR (idxp->codes);
400  // search params -- maybe not useful to store?
401  WRITE1 (idxp->search_type);
402  WRITE1 (idxp->encode_signs);
403  WRITE1 (idxp->polysemous_ht);
404  } else if(const Index2Layer * idxp =
405  dynamic_cast<const Index2Layer *> (idx)) {
406  uint32_t h = fourcc ("Ix2L");
407  WRITE1 (h);
408  write_index_header (idx, f);
409  write_index (idxp->q1.quantizer, f);
410  WRITE1 (idxp->q1.nlist);
411  WRITE1 (idxp->q1.quantizer_trains_alone);
412  write_ProductQuantizer (&idxp->pq, f);
413  WRITE1 (idxp->code_size_1);
414  WRITE1 (idxp->code_size_2);
415  WRITE1 (idxp->code_size);
416  WRITEVECTOR (idxp->codes);
417  } else if(const IndexScalarQuantizer * idxs =
418  dynamic_cast<const IndexScalarQuantizer *> (idx)) {
419  uint32_t h = fourcc ("IxSQ");
420  WRITE1 (h);
421  write_index_header (idx, f);
422  write_ScalarQuantizer (&idxs->sq, f);
423  WRITEVECTOR (idxs->codes);
424  } else if(const IndexIVFFlatDedup * ivfl =
425  dynamic_cast<const IndexIVFFlatDedup *> (idx)) {
426  uint32_t h = fourcc ("IwFd");
427  WRITE1 (h);
428  write_ivf_header (ivfl, f);
429  {
430  std::vector<Index::idx_t> tab (2 * ivfl->instances.size());
431  long i = 0;
432  for (auto it = ivfl->instances.begin();
433  it != ivfl->instances.end(); ++it) {
434  tab[i++] = it->first;
435  tab[i++] = it->second;
436  }
437  WRITEVECTOR (tab);
438  }
439  write_InvertedLists (ivfl->invlists, f);
440  } else if(const IndexIVFFlat * ivfl =
441  dynamic_cast<const IndexIVFFlat *> (idx)) {
442  uint32_t h = fourcc ("IwFl");
443  WRITE1 (h);
444  write_ivf_header (ivfl, f);
445  write_InvertedLists (ivfl->invlists, f);
446  } else if(const IndexIVFScalarQuantizer * ivsc =
447  dynamic_cast<const IndexIVFScalarQuantizer *> (idx)) {
448  uint32_t h = fourcc ("IwSQ");
449  WRITE1 (h);
450  write_ivf_header (ivsc, f);
451  write_ScalarQuantizer (&ivsc->sq, f);
452  WRITE1 (ivsc->code_size);
453  write_InvertedLists (ivsc->invlists, f);
454  } else if(const IndexIVFPQ * ivpq =
455  dynamic_cast<const IndexIVFPQ *> (idx)) {
456  const IndexIVFPQR * ivfpqr = dynamic_cast<const IndexIVFPQR *> (idx);
457 
458  uint32_t h = fourcc (ivfpqr ? "IwQR" : "IwPQ");
459  WRITE1 (h);
460  write_ivf_header (ivpq, f);
461  WRITE1 (ivpq->by_residual);
462  WRITE1 (ivpq->code_size);
463  write_ProductQuantizer (&ivpq->pq, f);
464  write_InvertedLists (ivpq->invlists, f);
465  if (ivfpqr) {
466  write_ProductQuantizer (&ivfpqr->refine_pq, f);
467  WRITEVECTOR (ivfpqr->refine_codes);
468  WRITE1 (ivfpqr->k_factor);
469  }
470 
471  } else if(const IndexPreTransform * ixpt =
472  dynamic_cast<const IndexPreTransform *> (idx)) {
473  uint32_t h = fourcc ("IxPT");
474  WRITE1 (h);
475  write_index_header (ixpt, f);
476  int nt = ixpt->chain.size();
477  WRITE1 (nt);
478  for (int i = 0; i < nt; i++)
479  write_VectorTransform (ixpt->chain[i], f);
480  write_index (ixpt->index, f);
481  } else if(const MultiIndexQuantizer * imiq =
482  dynamic_cast<const MultiIndexQuantizer *> (idx)) {
483  uint32_t h = fourcc ("Imiq");
484  WRITE1 (h);
485  write_index_header (imiq, f);
486  write_ProductQuantizer (&imiq->pq, f);
487  } else if(const IndexRefineFlat * idxrf =
488  dynamic_cast<const IndexRefineFlat *> (idx)) {
489  uint32_t h = fourcc ("IxRF");
490  WRITE1 (h);
491  write_index_header (idxrf, f);
492  write_index (idxrf->base_index, f);
493  write_index (&idxrf->refine_index, f);
494  WRITE1 (idxrf->k_factor);
495  } else if(const IndexIDMap * idxmap =
496  dynamic_cast<const IndexIDMap *> (idx)) {
497  uint32_t h =
498  dynamic_cast<const IndexIDMap2 *> (idx) ? fourcc ("IxM2") :
499  fourcc ("IxMp");
500  // no need to store additional info for IndexIDMap2
501  WRITE1 (h);
502  write_index_header (idxmap, f);
503  write_index (idxmap->index, f);
504  WRITEVECTOR (idxmap->id_map);
505  } else if(const IndexHNSW * idxhnsw =
506  dynamic_cast<const IndexHNSW *> (idx)) {
507  uint32_t h =
508  dynamic_cast<const IndexHNSWFlat*>(idx) ? fourcc("IHNf") :
509  dynamic_cast<const IndexHNSWPQ*>(idx) ? fourcc("IHNp") :
510  dynamic_cast<const IndexHNSWSQ*>(idx) ? fourcc("IHNs") :
511  dynamic_cast<const IndexHNSW2Level*>(idx) ? fourcc("IHN2") :
512  0;
513  FAISS_THROW_IF_NOT (h != 0);
514  WRITE1 (h);
515  write_index_header (idxhnsw, f);
516  write_HNSW (&idxhnsw->hnsw, f);
517  write_index (idxhnsw->storage, f);
518  } else {
519  FAISS_THROW_MSG ("don't know how to serialize this type of index");
520  }
521 }
522 
523 void write_index (const Index *idx, FILE *f) {
524  FileIOWriter writer(f);
525  write_index (idx, &writer);
526 }
527 
528 void write_index (const Index *idx, const char *fname) {
529  FileIOWriter writer(fname);
530  write_index (idx, &writer);
531 }
532 
533 void write_VectorTransform (const VectorTransform *vt, const char *fname) {
534  FileIOWriter writer(fname);
535  write_VectorTransform (vt, &writer);
536 }
537 
538 /*************************************************************
539  * Read
540  **************************************************************/
541 
542 static void read_index_header (Index *idx, IOReader *f) {
543  READ1 (idx->d);
544  READ1 (idx->ntotal);
545  Index::idx_t dummy;
546  READ1 (dummy);
547  READ1 (dummy);
548  READ1 (idx->is_trained);
549  READ1 (idx->metric_type);
550  idx->verbose = false;
551 }
552 
553 VectorTransform* read_VectorTransform (IOReader *f) {
554  uint32_t h;
555  READ1 (h);
556  VectorTransform *vt = nullptr;
557 
558  if (h == fourcc ("rrot") || h == fourcc ("PCAm") ||
559  h == fourcc ("LTra") || h == fourcc ("PcAm")) {
560  LinearTransform *lt = nullptr;
561  if (h == fourcc ("rrot")) {
562  lt = new RandomRotationMatrix ();
563  } else if (h == fourcc ("PCAm") ||
564  h == fourcc ("PcAm")) {
565  PCAMatrix * pca = new PCAMatrix ();
566  READ1 (pca->eigen_power);
567  READ1 (pca->random_rotation);
568  if (h == fourcc ("PcAm"))
569  READ1 (pca->balanced_bins);
570  READVECTOR (pca->mean);
571  READVECTOR (pca->eigenvalues);
572  READVECTOR (pca->PCAMat);
573  lt = pca;
574  } else if (h == fourcc ("LTra")) {
575  lt = new LinearTransform ();
576  }
577  READ1 (lt->have_bias);
578  READVECTOR (lt->A);
579  READVECTOR (lt->b);
580  FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
581  FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
582  lt->set_is_orthonormal();
583  vt = lt;
584  } else if (h == fourcc ("RmDT")) {
585  RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
586  READVECTOR (rdt->map);
587  vt = rdt;
588  } else if (h == fourcc ("VNrm")) {
589  NormalizationTransform *nt = new NormalizationTransform ();
590  READ1 (nt->norm);
591  vt = nt;
592  } else {
593  FAISS_THROW_MSG("fourcc not recognized");
594  }
595  READ1 (vt->d_in);
596  READ1 (vt->d_out);
597  READ1 (vt->is_trained);
598  return vt;
599 }
600 
601 
602 static void read_ArrayInvertedLists_sizes (
603  IOReader *f, std::vector<size_t> & sizes)
604 {
605  uint32_t list_type;
606  READ1(list_type);
607  if (list_type == fourcc("full")) {
608  size_t os = sizes.size();
609  READVECTOR (sizes);
610  FAISS_THROW_IF_NOT (os == sizes.size());
611  } else if (list_type == fourcc("sprs")) {
612  std::vector<size_t> idsizes;
613  READVECTOR (idsizes);
614  for (size_t j = 0; j < idsizes.size(); j += 2) {
615  FAISS_THROW_IF_NOT (idsizes[j] < sizes.size());
616  sizes[idsizes[j]] = idsizes[j + 1];
617  }
618  } else {
619  FAISS_THROW_MSG ("invalid list_type");
620  }
621 }
622 
623 InvertedLists *read_InvertedLists (IOReader *f, int io_flags) {
624  uint32_t h;
625  READ1 (h);
626  if (h == fourcc ("il00")) {
627  fprintf(stderr, "read_InvertedLists:"
628  " WARN! inverted lists not stored with IVF object\n");
629  return nullptr;
630  } else if (h == fourcc ("ilar") && !(io_flags & IO_FLAG_MMAP)) {
631  auto ails = new ArrayInvertedLists (0, 0);
632  READ1 (ails->nlist);
633  READ1 (ails->code_size);
634  ails->ids.resize (ails->nlist);
635  ails->codes.resize (ails->nlist);
636  std::vector<size_t> sizes (ails->nlist);
637  read_ArrayInvertedLists_sizes (f, sizes);
638  for (size_t i = 0; i < ails->nlist; i++) {
639  ails->ids[i].resize (sizes[i]);
640  ails->codes[i].resize (sizes[i] * ails->code_size);
641  }
642  for (size_t i = 0; i < ails->nlist; i++) {
643  size_t n = ails->ids[i].size();
644  if (n > 0) {
645  READANDCHECK (ails->codes[i].data(), n * ails->code_size);
646  READANDCHECK (ails->ids[i].data(), n);
647  }
648  }
649  return ails;
650  } else if (h == fourcc ("ilar") && (io_flags & IO_FLAG_MMAP)) {
651  // then we load it as an OnDiskInvertedLists
652 
653  FileIOReader *reader = dynamic_cast<FileIOReader*>(f);
654  FAISS_THROW_IF_NOT_MSG(reader, "mmap only supported for File objects");
655  FILE *fdesc = reader->f;
656 
657  auto ails = new OnDiskInvertedLists ();
658  READ1 (ails->nlist);
659  READ1 (ails->code_size);
660  ails->read_only = true;
661  ails->lists.resize (ails->nlist);
662  std::vector<size_t> sizes (ails->nlist);
663  read_ArrayInvertedLists_sizes (f, sizes);
664  size_t o0 = ftell(fdesc), o = o0;
665  { // do the mmap
666  struct stat buf;
667  int ret = fstat (fileno(fdesc), &buf);
668  FAISS_THROW_IF_NOT_FMT (ret == 0,
669  "fstat failed: %s", strerror(errno));
670  ails->totsize = buf.st_size;
671  ails->ptr = (uint8_t*)mmap (nullptr, ails->totsize,
672  PROT_READ, MAP_SHARED,
673  fileno(fdesc), 0);
674  FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
675  "could not mmap: %s",
676  strerror(errno));
677  }
678 
679  for (size_t i = 0; i < ails->nlist; i++) {
680  OnDiskInvertedLists::List & l = ails->lists[i];
681  l.size = l.capacity = sizes[i];
682  l.offset = o;
683  o += l.size * (sizeof(OnDiskInvertedLists::idx_t) +
684  ails->code_size);
685  }
686  FAISS_THROW_IF_NOT(o <= ails->totsize);
687  // resume normal reading of file
688  fseek (fdesc, o, SEEK_SET);
689  return ails;
690  } else if (h == fourcc ("ilod")) {
691  OnDiskInvertedLists *od = new OnDiskInvertedLists();
692  od->read_only = io_flags & IO_FLAG_READ_ONLY;
693  READ1 (od->nlist);
694  READ1 (od->code_size);
695  // this is a POD object
696  READVECTOR (od->lists);
697  {
698  std::vector<OnDiskInvertedLists::Slot> v;
699  READVECTOR(v);
700  od->slots.assign(v.begin(), v.end());
701  }
702  {
703  std::vector<char> x;
704  READVECTOR(x);
705  od->filename.assign(x.begin(), x.end());
706  }
707  READ1(od->totsize);
708  od->do_mmap();
709  return od;
710  } else {
711  FAISS_THROW_MSG ("read_InvertedLists: unsupported invlist type");
712  }
713 }
714 
715 static void read_InvertedLists (
716  IndexIVF *ivf, IOReader *f, int io_flags) {
717  InvertedLists *ils = read_InvertedLists (f, io_flags);
718  FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
719  ils->code_size == ivf->code_size));
720  ivf->invlists = ils;
721  ivf->own_invlists = true;
722 }
723 
724 static void read_InvertedLists (
725  IndexBinaryIVF *ivf, IOReader *f, int io_flags) {
726  InvertedLists *ils = read_InvertedLists (f, io_flags);
727  FAISS_THROW_IF_NOT (!ils || (ils->nlist == ivf->nlist &&
728  ils->code_size == ivf->code_size));
729  ivf->invlists = ils;
730  ivf->own_invlists = true;
731 }
732 
733 static void read_ProductQuantizer (ProductQuantizer *pq, IOReader *f) {
734  READ1 (pq->d);
735  READ1 (pq->M);
736  READ1 (pq->nbits);
737  pq->set_derived_values ();
738  READVECTOR (pq->centroids);
739 }
740 
741 static void read_ScalarQuantizer (ScalarQuantizer *ivsc, IOReader *f) {
742  READ1 (ivsc->qtype);
743  READ1 (ivsc->rangestat);
744  READ1 (ivsc->rangestat_arg);
745  READ1 (ivsc->d);
746  READ1 (ivsc->code_size);
747  READVECTOR (ivsc->trained);
748 }
749 
750 
751 static void read_HNSW (HNSW *hnsw, IOReader *f) {
752  READVECTOR (hnsw->assign_probas);
753  READVECTOR (hnsw->cum_nneighbor_per_level);
754  READVECTOR (hnsw->levels);
755  READVECTOR (hnsw->offsets);
756  READVECTOR (hnsw->neighbors);
757 
758  READ1 (hnsw->entry_point);
759  READ1 (hnsw->max_level);
760  READ1 (hnsw->efConstruction);
761  READ1 (hnsw->efSearch);
762  READ1 (hnsw->upper_beam);
763 }
764 
765 ProductQuantizer * read_ProductQuantizer (const char*fname) {
766  FileIOReader reader(fname);
767  return read_ProductQuantizer(&reader);
768 }
769 
770 ProductQuantizer * read_ProductQuantizer (IOReader *reader) {
771  ProductQuantizer *pq = new ProductQuantizer();
772  ScopeDeleter1<ProductQuantizer> del (pq);
773 
774  read_ProductQuantizer(pq, reader);
775  del.release ();
776  return pq;
777 }
778 
779 static void read_ivf_header (
780  IndexIVF *ivf, IOReader *f,
781  std::vector<std::vector<Index::idx_t> > *ids = nullptr)
782 {
783  read_index_header (ivf, f);
784  READ1 (ivf->nlist);
785  READ1 (ivf->nprobe);
786  ivf->quantizer = read_index (f);
787  ivf->own_fields = true;
788  if (ids) { // used in legacy "Iv" formats
789  ids->resize (ivf->nlist);
790  for (size_t i = 0; i < ivf->nlist; i++)
791  READVECTOR ((*ids)[i]);
792  }
793  READ1 (ivf->maintain_direct_map);
794  READVECTOR (ivf->direct_map);
795 }
796 
797 // used for legacy formats
798 static ArrayInvertedLists *set_array_invlist(
799  IndexIVF *ivf, std::vector<std::vector<Index::idx_t> > &ids)
800 {
801  ArrayInvertedLists *ail = new ArrayInvertedLists (
802  ivf->nlist, ivf->code_size);
803  std::swap (ail->ids, ids);
804  ivf->invlists = ail;
805  ivf->own_invlists = true;
806  return ail;
807 }
808 
809 static IndexIVFPQ *read_ivfpq (IOReader *f, uint32_t h, int io_flags)
810 {
811  bool legacy = h == fourcc ("IvQR") || h == fourcc ("IvPQ");
812 
813  IndexIVFPQR *ivfpqr =
814  h == fourcc ("IvQR") || h == fourcc ("IwQR") ?
815  new IndexIVFPQR () : nullptr;
816  IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : new IndexIVFPQ ();
817 
818  std::vector<std::vector<Index::idx_t> > ids;
819  read_ivf_header (ivpq, f, legacy ? &ids : nullptr);
820  READ1 (ivpq->by_residual);
821  READ1 (ivpq->code_size);
822  read_ProductQuantizer (&ivpq->pq, f);
823 
824  if (legacy) {
825  ArrayInvertedLists *ail = set_array_invlist (ivpq, ids);
826  for (size_t i = 0; i < ail->nlist; i++)
827  READVECTOR (ail->codes[i]);
828  } else {
829  read_InvertedLists (ivpq, f, io_flags);
830  }
831 
832  // precomputed table not stored. It is cheaper to recompute it
833  ivpq->use_precomputed_table = 0;
834  if (ivpq->by_residual)
835  ivpq->precompute_table ();
836  if (ivfpqr) {
837  read_ProductQuantizer (&ivfpqr->refine_pq, f);
838  READVECTOR (ivfpqr->refine_codes);
839  READ1 (ivfpqr->k_factor);
840  }
841  return ivpq;
842 }
843 
844 int read_old_fmt_hack = 0;
845 
846 Index *read_index (IOReader *f, int io_flags) {
847  Index * idx = nullptr;
848  uint32_t h;
849  READ1 (h);
850  if (h == fourcc ("IxFI") || h == fourcc ("IxF2")) {
851  IndexFlat *idxf;
852  if (h == fourcc ("IxFI")) idxf = new IndexFlatIP ();
853  else idxf = new IndexFlatL2 ();
854  read_index_header (idxf, f);
855  READVECTOR (idxf->xb);
856  FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
857  // leak!
858  idx = idxf;
859  } else if (h == fourcc("IxHE") || h == fourcc("IxHe")) {
860  IndexLSH * idxl = new IndexLSH ();
861  read_index_header (idxl, f);
862  READ1 (idxl->nbits);
863  READ1 (idxl->rotate_data);
864  READ1 (idxl->train_thresholds);
865  READVECTOR (idxl->thresholds);
866  READ1 (idxl->bytes_per_vec);
867  if (h == fourcc("IxHE")) {
868  FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
869  "can only read old format IndexLSH with "
870  "nbits multiple of 64 (got %d)",
871  (int) idxl->nbits);
872  // leak
873  idxl->bytes_per_vec *= 8;
874  }
875  {
876  RandomRotationMatrix *rrot = dynamic_cast<RandomRotationMatrix *>
877  (read_VectorTransform (f));
878  FAISS_THROW_IF_NOT_MSG(rrot, "expected a random rotation");
879  idxl->rrot = *rrot;
880  delete rrot;
881  }
882  READVECTOR (idxl->codes);
883  FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
884  idxl->rrot.d_out == idxl->nbits);
885  FAISS_THROW_IF_NOT (
886  idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
887  idx = idxl;
888  } else if (h == fourcc ("IxPQ") || h == fourcc ("IxPo") ||
889  h == fourcc ("IxPq")) {
890  // IxPQ and IxPo were merged into the same IndexPQ object
891  IndexPQ * idxp =new IndexPQ ();
892  read_index_header (idxp, f);
893  read_ProductQuantizer (&idxp->pq, f);
894  READVECTOR (idxp->codes);
895  if (h == fourcc ("IxPo") || h == fourcc ("IxPq")) {
896  READ1 (idxp->search_type);
897  READ1 (idxp->encode_signs);
898  READ1 (idxp->polysemous_ht);
899  }
900  // Old versoins of PQ all had metric_type set to INNER_PRODUCT
901  // when they were in fact using L2. Therefore, we force metric type
902  // to L2 when the old format is detected
903  if (h == fourcc ("IxPQ") || h == fourcc ("IxPo")) {
904  idxp->metric_type = METRIC_L2;
905  }
906  idx = idxp;
907  } else if (h == fourcc ("IvFl") || h == fourcc("IvFL")) { // legacy
908  IndexIVFFlat * ivfl = new IndexIVFFlat ();
909  std::vector<std::vector<Index::idx_t> > ids;
910  read_ivf_header (ivfl, f, &ids);
911  ivfl->code_size = ivfl->d * sizeof(float);
912  ArrayInvertedLists *ail = set_array_invlist (ivfl, ids);
913 
914  if (h == fourcc ("IvFL")) {
915  for (size_t i = 0; i < ivfl->nlist; i++) {
916  READVECTOR (ail->codes[i]);
917  }
918  } else { // old format
919  for (size_t i = 0; i < ivfl->nlist; i++) {
920  std::vector<float> vec;
921  READVECTOR (vec);
922  ail->codes[i].resize(vec.size() * sizeof(float));
923  memcpy(ail->codes[i].data(), vec.data(),
924  ail->codes[i].size());
925  }
926  }
927  idx = ivfl;
928  } else if (h == fourcc ("IwFd")) {
929  IndexIVFFlatDedup * ivfl = new IndexIVFFlatDedup ();
930  read_ivf_header (ivfl, f);
931  ivfl->code_size = ivfl->d * sizeof(float);
932  {
933  std::vector<Index::idx_t> tab;
934  READVECTOR (tab);
935  for (long i = 0; i < tab.size(); i += 2) {
936  std::pair<Index::idx_t, Index::idx_t>
937  pair (tab[i], tab[i + 1]);
938  ivfl->instances.insert (pair);
939  }
940  }
941  read_InvertedLists (ivfl, f, io_flags);
942  idx = ivfl;
943  } else if (h == fourcc ("IwFl")) {
944  IndexIVFFlat * ivfl = new IndexIVFFlat ();
945  read_ivf_header (ivfl, f);
946  ivfl->code_size = ivfl->d * sizeof(float);
947  read_InvertedLists (ivfl, f, io_flags);
948  idx = ivfl;
949  } else if (h == fourcc ("IxSQ")) {
950  IndexScalarQuantizer * idxs = new IndexScalarQuantizer ();
951  read_index_header (idxs, f);
952  read_ScalarQuantizer (&idxs->sq, f);
953  READVECTOR (idxs->codes);
954  idxs->code_size = idxs->sq.code_size;
955  idx = idxs;
956  } else if(h == fourcc ("IvSQ")) { // legacy
957  IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
958  std::vector<std::vector<Index::idx_t> > ids;
959  read_ivf_header (ivsc, f, &ids);
960  read_ScalarQuantizer (&ivsc->sq, f);
961  READ1 (ivsc->code_size);
962  ArrayInvertedLists *ail = set_array_invlist (ivsc, ids);
963  for(int i = 0; i < ivsc->nlist; i++)
964  READVECTOR (ail->codes[i]);
965  idx = ivsc;
966  } else if(h == fourcc ("IwSQ")) {
967  IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
968  read_ivf_header (ivsc, f);
969  read_ScalarQuantizer (&ivsc->sq, f);
970  READ1 (ivsc->code_size);
971  read_InvertedLists (ivsc, f, io_flags);
972  idx = ivsc;
973  } else if(h == fourcc ("IvPQ") || h == fourcc ("IvQR") ||
974  h == fourcc ("IwPQ") || h == fourcc ("IwQR")) {
975 
976  idx = read_ivfpq (f, h, io_flags);
977 
978  } else if(h == fourcc ("IxPT")) {
979  IndexPreTransform * ixpt = new IndexPreTransform();
980  ixpt->own_fields = true;
981  read_index_header (ixpt, f);
982  int nt;
983  if (read_old_fmt_hack == 2) {
984  nt = 1;
985  } else {
986  READ1 (nt);
987  }
988  for (int i = 0; i < nt; i++) {
989  ixpt->chain.push_back (read_VectorTransform (f));
990  }
991  ixpt->index = read_index (f, io_flags);
992  idx = ixpt;
993  } else if(h == fourcc ("Imiq")) {
994  MultiIndexQuantizer * imiq = new MultiIndexQuantizer ();
995  read_index_header (imiq, f);
996  read_ProductQuantizer (&imiq->pq, f);
997  idx = imiq;
998  } else if(h == fourcc ("IxRF")) {
999  IndexRefineFlat *idxrf = new IndexRefineFlat ();
1000  read_index_header (idxrf, f);
1001  idxrf->base_index = read_index(f, io_flags);
1002  idxrf->own_fields = true;
1003  IndexFlat *rf = dynamic_cast<IndexFlat*> (read_index (f, io_flags));
1004  std::swap (*rf, idxrf->refine_index);
1005  delete rf;
1006  READ1 (idxrf->k_factor);
1007  idx = idxrf;
1008  } else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
1009  bool is_map2 = h == fourcc ("IxM2");
1010  IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
1011  read_index_header (idxmap, f);
1012  idxmap->index = read_index (f, io_flags);
1013  idxmap->own_fields = true;
1014  READVECTOR (idxmap->id_map);
1015  if (is_map2) {
1016  static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
1017  }
1018  idx = idxmap;
1019  } else if (h == fourcc ("Ix2L")) {
1020  Index2Layer * idxp = new Index2Layer ();
1021  read_index_header (idxp, f);
1022  idxp->q1.quantizer = read_index (f, io_flags);
1023  READ1 (idxp->q1.nlist);
1024  READ1 (idxp->q1.quantizer_trains_alone);
1025  read_ProductQuantizer (&idxp->pq, f);
1026  READ1 (idxp->code_size_1);
1027  READ1 (idxp->code_size_2);
1028  READ1 (idxp->code_size);
1029  READVECTOR (idxp->codes);
1030  idx = idxp;
1031  } else if(h == fourcc("IHNf") || h == fourcc("IHNp") ||
1032  h == fourcc("IHNs") || h == fourcc("IHN2")) {
1033  IndexHNSW *idxhnsw = nullptr;
1034  if (h == fourcc("IHNf")) idxhnsw = new IndexHNSWFlat ();
1035  if (h == fourcc("IHNp")) idxhnsw = new IndexHNSWPQ ();
1036  if (h == fourcc("IHNs")) idxhnsw = new IndexHNSWSQ ();
1037  if (h == fourcc("IHN2")) idxhnsw = new IndexHNSW2Level ();
1038  read_index_header (idxhnsw, f);
1039  read_HNSW (&idxhnsw->hnsw, f);
1040  idxhnsw->storage = read_index (f, io_flags);
1041  idxhnsw->own_fields = true;
1042  if (h == fourcc("IHNp")) {
1043  dynamic_cast<IndexPQ*>(idxhnsw->storage)->pq.compute_sdc_table ();
1044  }
1045  idx = idxhnsw;
1046  } else {
1047  FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
1048  idx = nullptr;
1049  }
1050  return idx;
1051 }
1052 
1053 
1054 Index *read_index (FILE * f, int io_flags) {
1055  FileIOReader reader(f);
1056  return read_index(&reader, io_flags);
1057 }
1058 
1059 Index *read_index (const char *fname, int io_flags) {
1060  FileIOReader reader(fname);
1061  Index *idx = read_index (&reader, io_flags);
1062  return idx;
1063 }
1064 
1065 VectorTransform *read_VectorTransform (const char *fname) {
1066  FileIOReader reader(fname);
1067  VectorTransform *vt = read_VectorTransform (&reader);
1068  return vt;
1069 }
1070 
1071 /*************************************************************
1072  * cloning functions
1073  **************************************************************/
1074 
1075 
1076 
1077 Index * clone_index (const Index *index)
1078 {
1079  Cloner cl;
1080  return cl.clone_Index (index);
1081 }
1082 
1083 // assumes there is a copy constructor ready. Always try from most
1084 // specific to most general
1085 #define TRYCLONE(classname, obj) \
1086  if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
1087  return new classname(*clo); \
1088  } else
1089 
1090 VectorTransform *Cloner::clone_VectorTransform (const VectorTransform *vt)
1091 {
1092  TRYCLONE (RemapDimensionsTransform, vt)
1093  TRYCLONE (OPQMatrix, vt)
1094  TRYCLONE (PCAMatrix, vt)
1095  TRYCLONE (RandomRotationMatrix, vt)
1096  TRYCLONE (LinearTransform, vt)
1097  {
1098  FAISS_THROW_MSG("clone not supported for this type of VectorTransform");
1099  }
1100  return nullptr;
1101 }
1102 
1103 IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf)
1104 {
1105  TRYCLONE (IndexIVFPQR, ivf)
1106  TRYCLONE (IndexIVFPQ, ivf)
1107  TRYCLONE (IndexIVFFlat, ivf)
1108  TRYCLONE (IndexIVFScalarQuantizer, ivf)
1109  {
1110  FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
1111  }
1112  return nullptr;
1113 }
1114 
1115 Index *Cloner::clone_Index (const Index *index)
1116 {
1117  TRYCLONE (IndexPQ, index)
1118  TRYCLONE (IndexLSH, index)
1119  TRYCLONE (IndexFlatL2, index)
1120  TRYCLONE (IndexFlatIP, index)
1121  TRYCLONE (IndexFlat, index)
1122  TRYCLONE (IndexScalarQuantizer, index)
1123  TRYCLONE (MultiIndexQuantizer, index)
1124  if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
1125  IndexIVF *res = clone_IndexIVF (ivf);
1126  if (ivf->invlists == nullptr) {
1127  res->invlists = nullptr;
1128  } else if (auto *ails = dynamic_cast<const ArrayInvertedLists*>
1129  (ivf->invlists)) {
1130  res->invlists = new ArrayInvertedLists(*ails);
1131  res->own_invlists = true;
1132  } else {
1133  FAISS_THROW_MSG( "clone not supported for this type of inverted lists");
1134  }
1135  res->own_fields = true;
1136  res->quantizer = clone_Index (ivf->quantizer);
1137  return res;
1138  } else if (const IndexPreTransform * ipt =
1139  dynamic_cast<const IndexPreTransform*> (index)) {
1140  IndexPreTransform *res = new IndexPreTransform ();
1141  res->d = ipt->d;
1142  res->index = clone_Index (ipt->index);
1143  for (int i = 0; i < ipt->chain.size(); i++)
1144  res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
1145  res->own_fields = true;
1146  return res;
1147  } else if (const IndexIDMap *idmap =
1148  dynamic_cast<const IndexIDMap*> (index)) {
1149  IndexIDMap *res = new IndexIDMap (*idmap);
1150  res->own_fields = true;
1151  res->index = clone_Index (idmap->index);
1152  return res;
1153  } else {
1154  FAISS_THROW_MSG( "clone not supported for this type of Index");
1155  }
1156  return nullptr;
1157 }
1158 
1159 
1160 static void write_index_binary_header (const IndexBinary *idx, IOWriter *f) {
1161  WRITE1 (idx->d);
1162  WRITE1 (idx->code_size);
1163  WRITE1 (idx->ntotal);
1164  WRITE1 (idx->is_trained);
1165  WRITE1 (idx->metric_type);
1166 }
1167 
1168 static void write_binary_ivf_header (const IndexBinaryIVF *ivf, IOWriter *f) {
1169  write_index_binary_header (ivf, f);
1170  WRITE1 (ivf->nlist);
1171  WRITE1 (ivf->nprobe);
1172  write_index_binary (ivf->quantizer, f);
1173  WRITE1 (ivf->maintain_direct_map);
1174  WRITEVECTOR (ivf->direct_map);
1175 }
1176 
1177 void write_index_binary (const IndexBinary *idx, IOWriter *f) {
1178  if (const IndexBinaryFlat *idxf =
1179  dynamic_cast<const IndexBinaryFlat *> (idx)) {
1180  uint32_t h = fourcc ("IBxF");
1181  WRITE1 (h);
1182  write_index_binary_header (idx, f);
1183  WRITEVECTOR (idxf->xb);
1184  } else if (const IndexBinaryIVF *ivf =
1185  dynamic_cast<const IndexBinaryIVF *> (idx)) {
1186  uint32_t h = fourcc ("IBwF");
1187  WRITE1 (h);
1188  write_binary_ivf_header (ivf, f);
1189  write_InvertedLists (ivf->invlists, f);
1190  } else if(const IndexBinaryFromFloat * idxff =
1191  dynamic_cast<const IndexBinaryFromFloat *> (idx)) {
1192  uint32_t h = fourcc ("IBFf");
1193  WRITE1 (h);
1194  write_index_binary_header (idxff, f);
1195  write_index (idxff->index, f);
1196  } else if (const IndexBinaryHNSW *idxhnsw =
1197  dynamic_cast<const IndexBinaryHNSW *> (idx)) {
1198  uint32_t h = fourcc ("IBHf");
1199  WRITE1 (h);
1200  write_index_binary_header (idxhnsw, f);
1201  write_HNSW (&idxhnsw->hnsw, f);
1202  write_index_binary (idxhnsw->storage, f);
1203  } else {
1204  FAISS_THROW_MSG ("don't know how to serialize this type of index");
1205  }
1206 }
1207 
1208 void write_index_binary (const IndexBinary *idx, FILE *f) {
1209  FileIOWriter writer(f);
1210  write_index_binary(idx, &writer);
1211 }
1212 
1213 void write_index_binary (const IndexBinary *idx, const char *fname) {
1214  FileIOWriter writer(fname);
1215  write_index_binary (idx, &writer);
1216 }
1217 
1218 static void read_index_binary_header (IndexBinary *idx, IOReader *f) {
1219  READ1 (idx->d);
1220  READ1 (idx->code_size);
1221  READ1 (idx->ntotal);
1222  READ1 (idx->is_trained);
1223  READ1 (idx->metric_type);
1224  idx->verbose = false;
1225 }
1226 
1227 static void read_binary_ivf_header (
1228  IndexBinaryIVF *ivf, IOReader *f,
1229  std::vector<std::vector<Index::idx_t> > *ids = nullptr)
1230 {
1231  read_index_binary_header (ivf, f);
1232  READ1 (ivf->nlist);
1233  READ1 (ivf->nprobe);
1234  ivf->quantizer = read_index_binary (f);
1235  ivf->own_fields = true;
1236  if (ids) { // used in legacy "Iv" formats
1237  ids->resize (ivf->nlist);
1238  for (size_t i = 0; i < ivf->nlist; i++)
1239  READVECTOR ((*ids)[i]);
1240  }
1241  READ1 (ivf->maintain_direct_map);
1242  READVECTOR (ivf->direct_map);
1243 }
1244 
1245 IndexBinary *read_index_binary (IOReader *f, int io_flags) {
1246  IndexBinary * idx = nullptr;
1247  uint32_t h;
1248  READ1 (h);
1249  if (h == fourcc ("IBxF")) {
1250  IndexBinaryFlat *idxf = new IndexBinaryFlat ();
1251  read_index_binary_header (idxf, f);
1252  READVECTOR (idxf->xb);
1253  FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->code_size);
1254  // leak!
1255  idx = idxf;
1256  } else if (h == fourcc ("IBwF")) {
1257  IndexBinaryIVF *ivf = new IndexBinaryIVF ();
1258  read_binary_ivf_header (ivf, f);
1259  read_InvertedLists (ivf, f, io_flags);
1260  idx = ivf;
1261  } else if (h == fourcc ("IBFf")) {
1262  IndexBinaryFromFloat *idxff = new IndexBinaryFromFloat ();
1263  read_index_binary_header (idxff, f);
1264  idxff->own_fields = true;
1265  idxff->index = read_index (f, io_flags);
1266  idx = idxff;
1267  } else if (h == fourcc ("IBHf")) {
1268  IndexBinaryHNSW *idxhnsw = new IndexBinaryHNSW ();
1269  read_index_binary_header (idxhnsw, f);
1270  read_HNSW (&idxhnsw->hnsw, f);
1271  idxhnsw->storage = read_index_binary (f, io_flags);
1272  idxhnsw->own_fields = true;
1273  idx = idxhnsw;
1274  } else {
1275  FAISS_THROW_FMT("Index type 0x%08x not supported\n", h);
1276  idx = nullptr;
1277  }
1278  return idx;
1279 }
1280 
1281 IndexBinary *read_index_binary (FILE * f, int io_flags) {
1282  FileIOReader reader(f);
1283  return read_index_binary(&reader, io_flags);
1284 }
1285 
1286 IndexBinary *read_index_binary (const char *fname, int io_flags) {
1287  FileIOReader reader(fname);
1288  IndexBinary *idx = read_index_binary (&reader, io_flags);
1289  return idx;
1290 }
1291 
1292 
1293 } // namespace faiss
long idx_t
all indices are this type
Definition: Index.h:64