17 #include <sys/types.h>
21 #include "FaissAssert.h"
23 #include "IndexFlat.h"
24 #include "VectorTransform.h"
28 #include "IndexIVFPQ.h"
29 #include "IndexIVFFlat.h"
30 #include "MetaIndexes.h"
31 #include "IndexScalarQuantizer.h"
32 #include "IndexHNSW.h"
33 #include "OnDiskInvertedLists.h"
63 static uint32_t fourcc (
const char sx[4]) {
64 const unsigned char *x = (
unsigned char*)sx;
65 return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
76 #define WRITEANDCHECK(ptr, n) { \
77 size_t ret = fwrite (ptr, sizeof (* (ptr)), n, f); \
78 FAISS_THROW_IF_NOT_MSG (ret == (n), "write error"); \
81 #define READANDCHECK(ptr, n) { \
82 size_t ret = fread (ptr, sizeof (* (ptr)), n, f); \
83 FAISS_THROW_IF_NOT_MSG (ret == (n), "read error"); \
86 #define WRITE1(x) WRITEANDCHECK(&(x), 1)
87 #define READ1(x) READANDCHECK(&(x), 1)
89 #define WRITEVECTOR(vec) { \
90 size_t size = (vec).size (); \
91 WRITEANDCHECK (&size, 1); \
92 WRITEANDCHECK ((vec).data (), size); \
95 #define READVECTOR(vec) { \
97 READANDCHECK (&size, 1); \
98 FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
99 (vec).resize (size); \
100 READANDCHECK ((vec).data (), size); \
117 static void write_index_header (
const Index *idx, FILE *f) {
127 void write_VectorTransform (
const VectorTransform *vt, FILE *f) {
128 if (
const LinearTransform * lt =
129 dynamic_cast < const LinearTransform *> (vt)) {
130 if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
131 uint32_t h = fourcc (
"rrot");
133 }
else if (
const PCAMatrix * pca =
134 dynamic_cast<const PCAMatrix *>(lt)) {
135 uint32_t h = fourcc (
"PcAm");
137 WRITE1 (pca->eigen_power);
138 WRITE1 (pca->random_rotation);
139 WRITE1 (pca->balanced_bins);
140 WRITEVECTOR (pca->mean);
141 WRITEVECTOR (pca->eigenvalues);
142 WRITEVECTOR (pca->PCAMat);
145 uint32_t h = fourcc (
"LTra");
148 WRITE1 (lt->have_bias);
151 }
else if (
const RemapDimensionsTransform *rdt =
152 dynamic_cast<const RemapDimensionsTransform *>(vt)) {
153 uint32_t h = fourcc (
"RmDT");
155 WRITEVECTOR (rdt->map);
156 }
else if (
const NormalizationTransform *nt =
157 dynamic_cast<const NormalizationTransform *>(vt)) {
158 uint32_t h = fourcc (
"VNrm");
162 FAISS_THROW_MSG (
"cannot serialize this");
167 WRITE1 (vt->is_trained);
170 static void write_ProductQuantizer (
const ProductQuantizer *pq, FILE *f) {
174 WRITEVECTOR (pq->centroids);
177 static void write_ScalarQuantizer (
const ScalarQuantizer *ivsc, FILE *f) {
178 WRITE1 (ivsc->qtype);
179 WRITE1 (ivsc->rangestat);
180 WRITE1 (ivsc->rangestat_arg);
182 WRITE1 (ivsc->code_size);
183 WRITEVECTOR (ivsc->trained);
186 static void write_InvertedLists (
const InvertedLists *ils, FILE *f) {
187 if (ils ==
nullptr) {
188 uint32_t h = fourcc (
"il00");
190 }
else if (
const auto & ails =
191 dynamic_cast<const ArrayInvertedLists *>(ils)) {
192 uint32_t h = fourcc (
"ilar");
194 WRITE1 (ails->nlist);
195 WRITE1 (ails->code_size);
198 for (
size_t i = 0; i < ails->nlist; i++) {
199 if (ails->ids[i].size() > 0)
202 if (n_non0 > ails->nlist / 2) {
203 uint32_t list_type = fourcc(
"full");
205 std::vector<size_t> sizes;
206 for (
size_t i = 0; i < ails->nlist; i++) {
207 sizes.push_back (ails->ids[i].size());
211 int list_type = fourcc(
"sprs");
213 std::vector<size_t> sizes;
214 for (
size_t i = 0; i < ails->nlist; i++) {
215 size_t n = ails->ids[i].size();
224 for (
size_t i = 0; i < ails->nlist; i++) {
225 size_t n = ails->ids[i].size();
227 WRITEANDCHECK (ails->codes[i].data(), n * ails->code_size);
228 WRITEANDCHECK (ails->ids[i].data(), n);
231 }
else if (
const auto & od =
232 dynamic_cast<const OnDiskInvertedLists *>(ils)) {
233 uint32_t h = fourcc (
"ilod");
236 WRITE1 (ils->code_size);
238 WRITEVECTOR (od->lists);
241 std::vector<OnDiskInvertedLists::Slot> v(
242 od->slots.begin(), od->slots.end());
246 std::vector<char> x(od->filename.begin(), od->filename.end());
252 FAISS_THROW_MSG (
"write_InvertedLists: unsupported invlist type");
257 void write_ProductQuantizer (
const ProductQuantizer*pq,
const char *fname) {
258 FILE *f = fopen (fname,
"w");
259 FAISS_THROW_IF_NOT_FMT (f,
"cannot open %s for writing", fname);
260 ScopeFileCloser closer(f);
261 write_ProductQuantizer (pq, f);
264 static void write_HNSW (
const HNSW *hnsw, FILE *f) {
266 WRITEVECTOR (hnsw->assign_probas);
267 WRITEVECTOR (hnsw->cum_nneighbor_per_level);
268 WRITEVECTOR (hnsw->levels);
269 WRITEVECTOR (hnsw->offsets);
270 WRITEVECTOR (hnsw->neighbors);
272 WRITE1 (hnsw->entry_point);
273 WRITE1 (hnsw->max_level);
274 WRITE1 (hnsw->efConstruction);
275 WRITE1 (hnsw->efSearch);
276 WRITE1 (hnsw->upper_beam);
280 static void write_ivf_header (
const IndexIVF * ivf, FILE *f) {
281 write_index_header (ivf, f);
283 WRITE1 (ivf->nprobe);
284 write_index (ivf->quantizer, f);
285 WRITE1 (ivf->maintain_direct_map);
286 WRITEVECTOR (ivf->direct_map);
289 void write_index (
const Index *idx, FILE *f) {
290 if (
const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
291 uint32_t h = fourcc (
292 idxf->metric_type == METRIC_INNER_PRODUCT ?
"IxFI" :
293 idxf->metric_type == METRIC_L2 ?
"IxF2" :
nullptr);
295 write_index_header (idx, f);
296 WRITEVECTOR (idxf->xb);
297 }
else if(
const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
298 uint32_t h = fourcc (
"IxHe");
300 write_index_header (idx, f);
301 WRITE1 (idxl->nbits);
302 WRITE1 (idxl->rotate_data);
303 WRITE1 (idxl->train_thresholds);
304 WRITEVECTOR (idxl->thresholds);
305 WRITE1 (idxl->bytes_per_vec);
306 write_VectorTransform (&idxl->rrot, f);
307 WRITEVECTOR (idxl->codes);
308 }
else if(
const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
309 uint32_t h = fourcc (
"IxPq");
311 write_index_header (idx, f);
312 write_ProductQuantizer (&idxp->pq, f);
313 WRITEVECTOR (idxp->codes);
315 WRITE1 (idxp->search_type);
316 WRITE1 (idxp->encode_signs);
317 WRITE1 (idxp->polysemous_ht);
318 }
else if(
const Index2Layer * idxp =
319 dynamic_cast<const Index2Layer *> (idx)) {
320 uint32_t h = fourcc (
"Ix2L");
322 write_index_header (idx, f);
323 write_index (idxp->q1.quantizer, f);
324 WRITE1 (idxp->q1.nlist);
325 WRITE1 (idxp->q1.quantizer_trains_alone);
326 write_ProductQuantizer (&idxp->pq, f);
327 WRITE1 (idxp->code_size_1);
328 WRITE1 (idxp->code_size_2);
329 WRITE1 (idxp->code_size);
330 WRITEVECTOR (idxp->codes);
331 }
else if(
const IndexScalarQuantizer * idxs =
332 dynamic_cast<const IndexScalarQuantizer *> (idx)) {
333 uint32_t h = fourcc (
"IxSQ");
335 write_index_header (idx, f);
336 write_ScalarQuantizer (&idxs->sq, f);
337 WRITEVECTOR (idxs->codes);
338 }
else if(
const IndexIVFFlat * ivfl =
339 dynamic_cast<const IndexIVFFlat *> (idx)) {
340 uint32_t h = fourcc (
"IwFl");
342 write_ivf_header (ivfl, f);
343 write_InvertedLists (ivfl->invlists, f);
344 }
else if(
const IndexIVFScalarQuantizer * ivsc =
345 dynamic_cast<const IndexIVFScalarQuantizer *> (idx)) {
346 uint32_t h = fourcc (
"IwSQ");
348 write_ivf_header (ivsc, f);
349 write_ScalarQuantizer (&ivsc->sq, f);
350 WRITE1 (ivsc->code_size);
351 write_InvertedLists (ivsc->invlists, f);
352 }
else if(
const IndexIVFPQ * ivpq =
353 dynamic_cast<const IndexIVFPQ *> (idx)) {
354 const IndexIVFPQR * ivfpqr =
dynamic_cast<const IndexIVFPQR *
> (idx);
356 uint32_t h = fourcc (ivfpqr ?
"IwQR" :
"IwPQ");
358 write_ivf_header (ivpq, f);
359 WRITE1 (ivpq->by_residual);
360 WRITE1 (ivpq->code_size);
361 write_ProductQuantizer (&ivpq->pq, f);
362 write_InvertedLists (ivpq->invlists, f);
364 write_ProductQuantizer (&ivfpqr->refine_pq, f);
365 WRITEVECTOR (ivfpqr->refine_codes);
366 WRITE1 (ivfpqr->k_factor);
369 }
else if(
const IndexPreTransform * ixpt =
370 dynamic_cast<const IndexPreTransform *> (idx)) {
371 uint32_t h = fourcc (
"IxPT");
373 write_index_header (ixpt, f);
374 int nt = ixpt->chain.size();
376 for (
int i = 0; i < nt; i++)
377 write_VectorTransform (ixpt->chain[i], f);
378 write_index (ixpt->index, f);
379 }
else if(
const MultiIndexQuantizer * imiq =
380 dynamic_cast<const MultiIndexQuantizer *> (idx)) {
381 uint32_t h = fourcc (
"Imiq");
383 write_index_header (imiq, f);
384 write_ProductQuantizer (&imiq->pq, f);
385 }
else if(
const IndexRefineFlat * idxrf =
386 dynamic_cast<const IndexRefineFlat *> (idx)) {
387 uint32_t h = fourcc (
"IxRF");
389 write_index_header (idxrf, f);
390 write_index (idxrf->base_index, f);
391 write_index (&idxrf->refine_index, f);
392 WRITE1 (idxrf->k_factor);
393 }
else if(
const IndexIDMap * idxmap =
394 dynamic_cast<const IndexIDMap *> (idx)) {
396 dynamic_cast<const IndexIDMap2 *
> (idx) ? fourcc (
"IxM2") :
400 write_index_header (idxmap, f);
401 write_index (idxmap->index, f);
402 WRITEVECTOR (idxmap->id_map);
403 }
else if(
const IndexHNSW * idxhnsw =
404 dynamic_cast<const IndexHNSW *> (idx)) {
406 dynamic_cast<const IndexHNSWFlat*
>(idx) ? fourcc(
"IHNf") :
407 dynamic_cast<const IndexHNSWPQ*
>(idx) ? fourcc(
"IHNp") :
408 dynamic_cast<const IndexHNSWSQ*
>(idx) ? fourcc(
"IHNs") :
409 dynamic_cast<const IndexHNSW2Level*
>(idx) ? fourcc(
"IHN2") :
411 FAISS_THROW_IF_NOT (h != 0);
413 write_index_header (idxhnsw, f);
414 write_HNSW (&idxhnsw->hnsw, f);
415 write_index (idxhnsw->storage, f);
417 FAISS_THROW_MSG (
"don't know how to serialize this type of index");
421 void write_index (
const Index *idx,
const char *fname) {
422 FILE *f = fopen (fname,
"w");
423 FAISS_THROW_IF_NOT_FMT (f,
"cannot open %s for writing", fname);
424 ScopeFileCloser closer(f);
425 write_index (idx, f);
428 void write_VectorTransform (
const VectorTransform *vt,
const char *fname) {
429 FILE *f = fopen (fname,
"w");
430 FAISS_THROW_IF_NOT_FMT (f,
"cannot open %s for writing", fname);
431 ScopeFileCloser closer(f);
432 write_VectorTransform (vt, f);
439 static void read_index_header (Index *idx, FILE *f) {
445 READ1 (idx->is_trained);
446 READ1 (idx->metric_type);
447 idx->verbose =
false;
450 VectorTransform* read_VectorTransform (FILE *f) {
453 VectorTransform *vt =
nullptr;
455 if (h == fourcc (
"rrot") || h == fourcc (
"PCAm") ||
456 h == fourcc (
"LTra") || h == fourcc (
"PcAm")) {
457 LinearTransform *lt =
nullptr;
458 if (h == fourcc (
"rrot")) {
459 lt =
new RandomRotationMatrix ();
460 }
else if (h == fourcc (
"PCAm") ||
461 h == fourcc (
"PcAm")) {
462 PCAMatrix * pca =
new PCAMatrix ();
463 READ1 (pca->eigen_power);
464 READ1 (pca->random_rotation);
465 if (h == fourcc (
"PcAm"))
466 READ1 (pca->balanced_bins);
467 READVECTOR (pca->mean);
468 READVECTOR (pca->eigenvalues);
469 READVECTOR (pca->PCAMat);
471 }
else if (h == fourcc (
"LTra")) {
472 lt =
new LinearTransform ();
474 READ1 (lt->have_bias);
477 FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
478 FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
479 lt->set_is_orthonormal();
481 }
else if (h == fourcc (
"RmDT")) {
482 RemapDimensionsTransform *rdt =
new RemapDimensionsTransform ();
483 READVECTOR (rdt->map);
485 }
else if (h == fourcc (
"VNrm")) {
486 NormalizationTransform *nt =
new NormalizationTransform ();
490 FAISS_THROW_MSG(
"fourcc not recognized");
494 READ1 (vt->is_trained);
499 static void read_ArrayInvertedLists_sizes (
500 FILE *f, std::vector<size_t> & sizes)
502 size_t nlist = sizes.size();
505 if (list_type == fourcc(
"full")) {
506 size_t os = sizes.size();
508 FAISS_THROW_IF_NOT (os == sizes.size());
509 }
else if (list_type == fourcc(
"sprs")) {
510 std::vector<size_t> idsizes;
511 READVECTOR (idsizes);
512 for (
size_t j = 0; j < idsizes.size(); j += 2) {
513 FAISS_THROW_IF_NOT (idsizes[j] < sizes.size());
514 sizes[idsizes[j]] = idsizes[j + 1];
517 FAISS_THROW_MSG (
"invalid list_type");
522 InvertedLists *read_InvertedLists (FILE *f,
int io_flags) {
525 if (h == fourcc (
"il00")) {
527 }
else if (h == fourcc (
"ilar") && !(io_flags & IO_FLAG_MMAP)) {
528 auto ails =
new ArrayInvertedLists (0, 0);
530 READ1 (ails->code_size);
531 ails->ids.resize (ails->nlist);
532 ails->codes.resize (ails->nlist);
533 std::vector<size_t> sizes (ails->nlist);
534 read_ArrayInvertedLists_sizes (f, sizes);
535 for (
size_t i = 0; i < ails->nlist; i++) {
536 ails->ids[i].resize (sizes[i]);
537 ails->codes[i].resize (sizes[i] * ails->code_size);
539 for (
size_t i = 0; i < ails->nlist; i++) {
540 size_t n = ails->ids[i].size();
542 READANDCHECK (ails->codes[i].data(), n * ails->code_size);
543 READANDCHECK (ails->ids[i].data(), n);
547 }
else if (h == fourcc (
"ilar") && (io_flags & IO_FLAG_MMAP)) {
548 auto ails =
new OnDiskInvertedLists ();
550 READ1 (ails->code_size);
551 ails->read_only =
true;
552 ails->lists.resize (ails->nlist);
553 std::vector<size_t> sizes (ails->nlist);
554 read_ArrayInvertedLists_sizes (f, sizes);
555 size_t o0 = ftell (f), o = o0;
558 int ret = fstat (fileno(f), &buf);
559 FAISS_THROW_IF_NOT_FMT (ret == 0,
560 "fstat failed: %s", strerror(errno));
561 ails->totsize = buf.st_size;
562 ails->ptr = (uint8_t*)mmap (
nullptr, ails->totsize,
563 PROT_READ, MAP_SHARED,
565 FAISS_THROW_IF_NOT_FMT (ails->ptr != MAP_FAILED,
566 "could not mmap: %s",
569 for (
size_t i = 0; i < ails->nlist; i++) {
570 OnDiskInvertedLists::List & l = ails->lists[i];
571 l.size = l.capacity = sizes[i];
573 o += l.size * (
sizeof(OnDiskInvertedLists::idx_t) +
577 fseek (f, o, SEEK_SET);
579 }
else if (h == fourcc (
"ilod")) {
580 OnDiskInvertedLists *od =
new OnDiskInvertedLists();
581 od->read_only = io_flags & IO_FLAG_READ_ONLY;
583 READ1 (od->code_size);
585 READVECTOR (od->lists);
587 std::vector<OnDiskInvertedLists::Slot> v;
589 od->slots.assign(v.begin(), v.end());
594 od->filename.assign(x.begin(), x.end());
600 FAISS_THROW_MSG (
"read_InvertedLists: unsupported invlist type");
604 static void read_InvertedLists (IndexIVF *ivf, FILE *f,
int io_flags) {
605 InvertedLists *ils = read_InvertedLists (f, io_flags);
606 FAISS_THROW_IF_NOT (ils->nlist == ivf->nlist &&
607 ils->code_size == ivf->code_size);
609 ivf->own_invlists =
true;
613 static void read_ProductQuantizer (ProductQuantizer *pq, FILE *f) {
617 pq->set_derived_values ();
618 READVECTOR (pq->centroids);
621 static void read_ScalarQuantizer (ScalarQuantizer *ivsc, FILE *f) {
623 READ1 (ivsc->rangestat);
624 READ1 (ivsc->rangestat_arg);
626 READ1 (ivsc->code_size);
627 READVECTOR (ivsc->trained);
631 static void read_HNSW (HNSW *hnsw, FILE *f) {
632 READVECTOR (hnsw->assign_probas);
633 READVECTOR (hnsw->cum_nneighbor_per_level);
634 READVECTOR (hnsw->levels);
635 READVECTOR (hnsw->offsets);
636 READVECTOR (hnsw->neighbors);
638 READ1 (hnsw->entry_point);
639 READ1 (hnsw->max_level);
640 READ1 (hnsw->efConstruction);
641 READ1 (hnsw->efSearch);
642 READ1 (hnsw->upper_beam);
645 ProductQuantizer * read_ProductQuantizer (
const char*fname) {
646 FILE *f = fopen (fname,
"r");
647 FAISS_THROW_IF_NOT_FMT (f,
"cannot open %s for writing", fname);
648 ScopeFileCloser closer(f);
649 ProductQuantizer *pq =
new ProductQuantizer();
650 ScopeDeleter1<ProductQuantizer> del (pq);
651 read_ProductQuantizer(pq, f);
656 static void read_ivf_header (
657 IndexIVF * ivf, FILE *f,
658 std::vector<std::vector<Index::idx_t> > *ids =
nullptr)
660 read_index_header (ivf, f);
663 ivf->quantizer = read_index (f);
664 ivf->own_fields =
true;
666 ids->resize (ivf->nlist);
667 for (
size_t i = 0; i < ivf->nlist; i++)
668 READVECTOR ((*ids)[i]);
670 READ1 (ivf->maintain_direct_map);
671 READVECTOR (ivf->direct_map);
675 static ArrayInvertedLists *set_array_invlist(
676 IndexIVF *ivf, std::vector<std::vector<Index::idx_t> > &ids)
678 ArrayInvertedLists *ail =
new ArrayInvertedLists (
679 ivf->nlist, ivf->code_size);
680 std::swap (ail->ids, ids);
682 ivf->own_invlists =
true;
686 static IndexIVFPQ *read_ivfpq (FILE *f, uint32_t h,
int io_flags)
688 bool legacy = h == fourcc (
"IvQR") || h == fourcc (
"IvPQ");
690 IndexIVFPQR *ivfpqr =
691 h == fourcc (
"IvQR") || h == fourcc (
"IwQR") ?
692 new IndexIVFPQR () : nullptr;
693 IndexIVFPQ * ivpq = ivfpqr ? ivfpqr :
new IndexIVFPQ ();
695 std::vector<std::vector<Index::idx_t> > ids;
696 read_ivf_header (ivpq, f, legacy ? &ids :
nullptr);
697 READ1 (ivpq->by_residual);
698 READ1 (ivpq->code_size);
699 read_ProductQuantizer (&ivpq->pq, f);
702 ArrayInvertedLists *ail = set_array_invlist (ivpq, ids);
703 for (
size_t i = 0; i < ail->nlist; i++)
704 READVECTOR (ail->codes[i]);
706 read_InvertedLists (ivpq, f, io_flags);
710 ivpq->use_precomputed_table = 0;
711 if (ivpq->by_residual)
712 ivpq->precompute_table ();
714 read_ProductQuantizer (&ivfpqr->refine_pq, f);
715 READVECTOR (ivfpqr->refine_codes);
716 READ1 (ivfpqr->k_factor);
721 int read_old_fmt_hack = 0;
723 Index *read_index (FILE * f,
int io_flags) {
724 Index * idx =
nullptr;
727 if (h == fourcc (
"IxFI") || h == fourcc (
"IxF2")) {
729 if (h == fourcc (
"IxFI")) idxf =
new IndexFlatIP ();
730 else idxf =
new IndexFlatL2 ();
731 read_index_header (idxf, f);
732 READVECTOR (idxf->xb);
733 FAISS_THROW_IF_NOT (idxf->xb.size() == idxf->ntotal * idxf->d);
736 }
else if (h == fourcc(
"IxHE") || h == fourcc(
"IxHe")) {
737 IndexLSH * idxl =
new IndexLSH ();
738 read_index_header (idxl, f);
740 READ1 (idxl->rotate_data);
741 READ1 (idxl->train_thresholds);
742 READVECTOR (idxl->thresholds);
743 READ1 (idxl->bytes_per_vec);
744 if (h == fourcc(
"IxHE")) {
745 FAISS_THROW_IF_NOT_FMT (idxl->nbits % 64 == 0,
746 "can only read old format IndexLSH with "
747 "nbits multiple of 64 (got %d)",
750 idxl->bytes_per_vec *= 8;
753 RandomRotationMatrix *rrot =
dynamic_cast<RandomRotationMatrix *
>
754 (read_VectorTransform (f));
755 FAISS_THROW_IF_NOT_MSG(rrot,
"expected a random rotation");
759 READVECTOR (idxl->codes);
760 FAISS_THROW_IF_NOT (idxl->rrot.d_in == idxl->d &&
761 idxl->rrot.d_out == idxl->nbits);
763 idxl->codes.size() == idxl->ntotal * idxl->bytes_per_vec);
765 }
else if (h == fourcc (
"IxPQ") || h == fourcc (
"IxPo") ||
766 h == fourcc (
"IxPq")) {
768 IndexPQ * idxp =
new IndexPQ ();
769 read_index_header (idxp, f);
770 read_ProductQuantizer (&idxp->pq, f);
771 READVECTOR (idxp->codes);
772 if (h == fourcc (
"IxPo") || h == fourcc (
"IxPq")) {
773 READ1 (idxp->search_type);
774 READ1 (idxp->encode_signs);
775 READ1 (idxp->polysemous_ht);
780 if (h == fourcc (
"IxPQ") || h == fourcc (
"IxPo")) {
781 idxp->metric_type = METRIC_L2;
784 }
else if (h == fourcc (
"IvFl") || h == fourcc(
"IvFL")) {
785 IndexIVFFlat * ivfl =
new IndexIVFFlat ();
786 std::vector<std::vector<Index::idx_t> > ids;
787 read_ivf_header (ivfl, f, &ids);
788 ivfl->code_size = ivfl->d *
sizeof(float);
789 ArrayInvertedLists *ail = set_array_invlist (ivfl, ids);
791 if (h == fourcc (
"IvFL")) {
792 for (
size_t i = 0; i < ivfl->nlist; i++) {
793 READVECTOR (ail->codes[i]);
796 for (
size_t i = 0; i < ivfl->nlist; i++) {
797 std::vector<float> vec;
799 ail->codes[i].resize(vec.size() *
sizeof(float));
800 memcpy(ail->codes[i].data(), vec.data(),
801 ail->codes[i].size());
805 }
else if (h == fourcc (
"IwFl")) {
806 IndexIVFFlat * ivfl =
new IndexIVFFlat ();
807 read_ivf_header (ivfl, f);
808 ivfl->code_size = ivfl->d *
sizeof(float);
809 read_InvertedLists (ivfl, f, io_flags);
811 }
else if (h == fourcc (
"IxSQ")) {
812 IndexScalarQuantizer * idxs =
new IndexScalarQuantizer ();
813 read_index_header (idxs, f);
814 read_ScalarQuantizer (&idxs->sq, f);
815 READVECTOR (idxs->codes);
816 idxs->code_size = idxs->sq.code_size;
818 }
else if(h == fourcc (
"IvSQ")) {
819 IndexIVFScalarQuantizer * ivsc =
new IndexIVFScalarQuantizer();
820 std::vector<std::vector<Index::idx_t> > ids;
821 read_ivf_header (ivsc, f, &ids);
822 read_ScalarQuantizer (&ivsc->sq, f);
823 READ1 (ivsc->code_size);
824 ArrayInvertedLists *ail = set_array_invlist (ivsc, ids);
825 for(
int i = 0; i < ivsc->nlist; i++)
826 READVECTOR (ail->codes[i]);
828 }
else if(h == fourcc (
"IwSQ")) {
829 IndexIVFScalarQuantizer * ivsc =
new IndexIVFScalarQuantizer();
830 read_ivf_header (ivsc, f);
831 read_ScalarQuantizer (&ivsc->sq, f);
832 READ1 (ivsc->code_size);
833 read_InvertedLists (ivsc, f, io_flags);
835 }
else if(h == fourcc (
"IvPQ") || h == fourcc (
"IvQR") ||
836 h == fourcc (
"IwPQ") || h == fourcc (
"IwQR")) {
838 idx = read_ivfpq (f, h, io_flags);
840 }
else if(h == fourcc (
"IxPT")) {
841 IndexPreTransform * ixpt =
new IndexPreTransform();
842 ixpt->own_fields =
true;
843 read_index_header (ixpt, f);
845 if (read_old_fmt_hack == 2) {
850 for (
int i = 0; i < nt; i++) {
851 ixpt->chain.push_back (read_VectorTransform (f));
853 ixpt->index = read_index (f);
855 }
else if(h == fourcc (
"Imiq")) {
856 MultiIndexQuantizer * imiq =
new MultiIndexQuantizer ();
857 read_index_header (imiq, f);
858 read_ProductQuantizer (&imiq->pq, f);
860 }
else if(h == fourcc (
"IxRF")) {
861 IndexRefineFlat *idxrf =
new IndexRefineFlat ();
862 read_index_header (idxrf, f);
863 idxrf->base_index = read_index(f);
864 idxrf->own_fields =
true;
865 IndexFlat *rf =
dynamic_cast<IndexFlat*
> (read_index (f));
866 std::swap (*rf, idxrf->refine_index);
868 READ1 (idxrf->k_factor);
870 }
else if(h == fourcc (
"IxMp") || h == fourcc (
"IxM2")) {
871 bool is_map2 = h == fourcc (
"IxM2");
872 IndexIDMap * idxmap = is_map2 ?
new IndexIDMap2 () : new IndexIDMap ();
873 read_index_header (idxmap, f);
874 idxmap->index = read_index (f);
875 idxmap->own_fields =
true;
876 READVECTOR (idxmap->id_map);
878 static_cast<IndexIDMap2*
>(idxmap)->construct_rev_map ();
881 }
else if (h == fourcc (
"Ix2L")) {
882 Index2Layer * idxp =
new Index2Layer ();
883 read_index_header (idxp, f);
884 idxp->q1.quantizer = read_index (f);
885 READ1 (idxp->q1.nlist);
886 READ1 (idxp->q1.quantizer_trains_alone);
887 read_ProductQuantizer (&idxp->pq, f);
888 READ1 (idxp->code_size_1);
889 READ1 (idxp->code_size_2);
890 READ1 (idxp->code_size);
891 READVECTOR (idxp->codes);
893 }
else if(h == fourcc(
"IHNf") || h == fourcc(
"IHNp") ||
894 h == fourcc(
"IHNs") || h == fourcc(
"IHN2")) {
895 IndexHNSW *idxhnsw =
nullptr;
896 if (h == fourcc(
"IHNf")) idxhnsw =
new IndexHNSWFlat ();
897 if (h == fourcc(
"IHNp")) idxhnsw =
new IndexHNSWPQ ();
898 if (h == fourcc(
"IHNs")) idxhnsw =
new IndexHNSWSQ ();
899 if (h == fourcc(
"IHN2")) idxhnsw =
new IndexHNSW2Level ();
900 read_index_header (idxhnsw, f);
901 read_HNSW (&idxhnsw->hnsw, f);
902 idxhnsw->storage = read_index (f);
903 idxhnsw->own_fields =
true;
904 if (h == fourcc(
"IHNp")) {
905 dynamic_cast<IndexPQ*
>(idxhnsw->storage)->pq.compute_sdc_table ();
909 FAISS_THROW_FMT(
"Index type 0x%08x not supported\n", h);
917 Index *read_index (
const char *fname,
int io_flags) {
918 FILE *f = fopen (fname,
"r");
919 FAISS_THROW_IF_NOT_FMT (f,
"cannot open %s for reading:", fname);
920 Index *idx = read_index (f, io_flags);
925 VectorTransform *read_VectorTransform (
const char *fname) {
926 FILE *f = fopen (fname,
"r");
928 fprintf (stderr,
"cannot open %s for reading:", fname);
932 VectorTransform *vt = read_VectorTransform (f);
943 Index * clone_index (
const Index *index)
946 return cl.clone_Index (index);
951 #define TRYCLONE(classname, obj) \
952 if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
953 return new classname(*clo); \
956 VectorTransform *Cloner::clone_VectorTransform (
const VectorTransform *vt)
958 TRYCLONE (RemapDimensionsTransform, vt)
959 TRYCLONE (OPQMatrix, vt)
960 TRYCLONE (PCAMatrix, vt)
961 TRYCLONE (RandomRotationMatrix, vt)
962 TRYCLONE (LinearTransform, vt)
964 FAISS_THROW_MSG(
"clone not supported for this type of VectorTransform");
969 IndexIVF * Cloner::clone_IndexIVF (
const IndexIVF *ivf)
971 TRYCLONE (IndexIVFPQR, ivf)
972 TRYCLONE (IndexIVFPQ, ivf)
973 TRYCLONE (IndexIVFFlat, ivf)
974 TRYCLONE (IndexIVFScalarQuantizer, ivf)
976 FAISS_THROW_MSG(
"clone not supported for this type of IndexIVF");
981 Index *Cloner::clone_Index (
const Index *index)
983 TRYCLONE (IndexPQ, index)
984 TRYCLONE (IndexLSH, index)
985 TRYCLONE (IndexFlatL2, index)
986 TRYCLONE (IndexFlatIP, index)
987 TRYCLONE (IndexFlat, index)
988 TRYCLONE (IndexScalarQuantizer, index)
989 TRYCLONE (MultiIndexQuantizer, index)
990 if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
991 IndexIVF *res = clone_IndexIVF (ivf);
992 if (ivf->invlists ==
nullptr) {
993 res->invlists =
nullptr;
994 }
else if (
auto *ails = dynamic_cast<const ArrayInvertedLists*>
996 res->invlists =
new ArrayInvertedLists(*ails);
997 res->own_invlists =
true;
999 FAISS_THROW_MSG(
"clone not supported for this type of inverted lists");
1001 res->own_fields =
true;
1002 res->quantizer = clone_Index (ivf->quantizer);
1004 }
else if (
const IndexPreTransform * ipt =
1005 dynamic_cast<const IndexPreTransform*> (index)) {
1006 IndexPreTransform *res =
new IndexPreTransform ();
1008 res->index = clone_Index (ipt->index);
1009 for (
int i = 0; i < ipt->chain.size(); i++)
1010 res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
1011 res->own_fields =
true;
1013 }
else if (
const IndexIDMap *idmap =
1014 dynamic_cast<const IndexIDMap*> (index)) {
1015 IndexIDMap *res =
new IndexIDMap (*idmap);
1016 res->own_fields =
true;
1017 res->index = clone_Index (idmap->index);
1020 FAISS_THROW_MSG(
"clone not supported for this type of Index");
long idx_t
all indices are this type
idx_t ntotal
total nb of indexed vectors
MetricType metric_type
type of metric this index uses for search
bool is_trained
set if the Index does not require training, or if training is done already