18 #include "FaissAssert.h"
20 #include "IndexFlat.h"
21 #include "VectorTransform.h"
25 #include "IndexIVFPQ.h"
26 #include "MetaIndexes.h"
27 #include "IndexScalarQuantizer.h"
28 #include "IndexHNSW.h"
56 static uint32_t fourcc (
const char sx[4]) {
57 const unsigned char *x = (
unsigned char*)sx;
58 return x[0] | x[1] << 8 | x[2] << 16 | x[3] << 24;
69 #define WRITEANDCHECK(ptr, n) { \
70 size_t ret = fwrite (ptr, sizeof (* (ptr)), n, f); \
71 FAISS_THROW_IF_NOT_MSG (ret == (n), "write error"); \
74 #define READANDCHECK(ptr, n) { \
75 size_t ret = fread (ptr, sizeof (* (ptr)), n, f); \
76 FAISS_THROW_IF_NOT_MSG (ret == (n), "read error"); \
79 #define WRITE1(x) WRITEANDCHECK(&(x), 1)
80 #define READ1(x) READANDCHECK(&(x), 1)
82 #define WRITEVECTOR(vec) { \
83 size_t size = (vec).size (); \
84 WRITEANDCHECK (&size, 1); \
85 WRITEANDCHECK ((vec).data (), size); \
88 #define READVECTOR(vec) { \
90 READANDCHECK (&size, 1); \
91 FAISS_THROW_IF_NOT (size >= 0 && size < (1L << 40)); \
92 (vec).resize (size); \
93 READANDCHECK ((vec).data (), size); \
105 #define WRITETABPAD16(tab, size_in) { \
106 size_t size = (size_in); \
107 WRITEANDCHECK (&size, 1); \
108 uint8_t padding[16] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; \
109 int idx = ftell(f) % 16; \
110 padding [idx] = 15 - idx; \
111 WRITEANDCHECK (padding + idx, 16 - idx); \
112 WRITEANDCHECK ((tab), size); \
115 #define READTABPAD16(tab, basetype, expected_size) { \
117 READANDCHECK (&size, 1); \
118 FAISS_THROW_IF_NOT ((expected_size) == size); \
119 uint8_t padding[16], npad; \
121 FAISS_THROW_IF_NOT (npad < 16); \
122 READANDCHECK (padding, npad); \
123 (tab) = new basetype [size]; \
124 READANDCHECK ((tab), size); \
128 #define TABOFFSETPAD16(taboffset, basetype, expected_size) { \
130 READANDCHECK (&size, 1); \
131 FAISS_THROW_IF_NOT ((expected_size) == size); \
132 uint8_t padding[16], npad; \
134 FAISS_THROW_IF_NOT (npad < 16); \
135 READANDCHECK (padding, npad); \
136 taboffset = ftell(f); \
137 fseek (f, sizeof(basetype) * size, SEEK_CUR); \
147 static void write_index_header (
const Index *idx, FILE *f) {
157 void write_VectorTransform (
const VectorTransform *vt, FILE *f) {
158 if (
const LinearTransform * lt =
159 dynamic_cast < const LinearTransform *> (vt)) {
160 if (dynamic_cast<const RandomRotationMatrix *>(lt)) {
161 uint32_t h = fourcc (
"rrot");
163 }
else if (
const PCAMatrix * pca =
164 dynamic_cast<const PCAMatrix *>(lt)) {
165 uint32_t h = fourcc (
"PcAm");
167 WRITE1 (pca->eigen_power);
168 WRITE1 (pca->random_rotation);
169 WRITE1 (pca->balanced_bins);
170 WRITEVECTOR (pca->mean);
171 WRITEVECTOR (pca->eigenvalues);
172 WRITEVECTOR (pca->PCAMat);
175 uint32_t h = fourcc (
"LTra");
178 WRITE1 (lt->have_bias);
181 }
else if (
const RemapDimensionsTransform *rdt =
182 dynamic_cast<const RemapDimensionsTransform *>(vt)) {
183 uint32_t h = fourcc (
"RmDT");
185 WRITEVECTOR (rdt->map);
186 }
else if (
const NormalizationTransform *nt =
187 dynamic_cast<const NormalizationTransform *>(vt)) {
188 uint32_t h = fourcc (
"VNrm");
192 FAISS_THROW_MSG (
"cannot serialize this");
197 WRITE1 (vt->is_trained);
200 static void write_ProductQuantizer (
const ProductQuantizer *pq, FILE *f) {
204 WRITEVECTOR (pq->centroids);
207 static void write_ScalarQuantizer (
const ScalarQuantizer *ivsc, FILE *f) {
208 WRITE1 (ivsc->qtype);
209 WRITE1 (ivsc->rangestat);
210 WRITE1 (ivsc->rangestat_arg);
212 WRITE1 (ivsc->code_size);
213 WRITEVECTOR (ivsc->trained);
216 void write_ProductQuantizer (
const ProductQuantizer*pq,
const char *fname) {
217 FILE *f = fopen (fname,
"w");
218 FAISS_THROW_IF_NOT_FMT (f,
"cannot open %s for writing", fname);
219 ScopeFileCloser closer(f);
220 write_ProductQuantizer (pq, f);
223 static void write_HNSW (
const HNSW *hnsw, FILE *f) {
225 WRITEVECTOR (hnsw->assign_probas);
226 WRITEVECTOR (hnsw->cum_nneighbor_per_level);
227 WRITEVECTOR (hnsw->levels);
228 WRITEVECTOR (hnsw->offsets);
229 WRITEVECTOR (hnsw->neighbors);
231 WRITE1 (hnsw->entry_point);
232 WRITE1 (hnsw->max_level);
233 WRITE1 (hnsw->efConstruction);
234 WRITE1 (hnsw->efSearch);
235 WRITE1 (hnsw->upper_beam);
239 static void write_ivf_header (
const IndexIVF * ivf, FILE *f,
240 bool include_ids =
true) {
241 write_index_header (ivf, f);
243 WRITE1 (ivf->nprobe);
244 write_index (ivf->quantizer, f);
246 for (
size_t i = 0; i < ivf->nlist; i++)
247 WRITEVECTOR (ivf->ids[i]);
249 WRITE1 (ivf->maintain_direct_map);
250 WRITEVECTOR (ivf->direct_map);
253 void write_index (
const Index *idx, FILE *f) {
254 if (
const IndexFlat * idxf = dynamic_cast<const IndexFlat *> (idx)) {
255 uint32_t h = fourcc (
256 idxf->metric_type == METRIC_INNER_PRODUCT ?
"IxFI" :
257 idxf->metric_type == METRIC_L2 ?
"IxF2" :
nullptr);
259 write_index_header (idx, f);
260 WRITEVECTOR (idxf->xb);
261 }
else if(
const IndexLSH * idxl = dynamic_cast<const IndexLSH *> (idx)) {
262 uint32_t h = fourcc (
"IxHe");
264 write_index_header (idx, f);
265 WRITE1 (idxl->nbits);
266 WRITE1 (idxl->rotate_data);
267 WRITE1 (idxl->train_thresholds);
268 WRITEVECTOR (idxl->thresholds);
269 WRITE1 (idxl->bytes_per_vec);
270 write_VectorTransform (&idxl->rrot, f);
271 WRITEVECTOR (idxl->codes);
272 }
else if(
const IndexPQ * idxp = dynamic_cast<const IndexPQ *> (idx)) {
273 uint32_t h = fourcc (
"IxPq");
275 write_index_header (idx, f);
276 write_ProductQuantizer (&idxp->pq, f);
277 WRITEVECTOR (idxp->codes);
279 WRITE1 (idxp->search_type);
280 WRITE1 (idxp->encode_signs);
281 WRITE1 (idxp->polysemous_ht);
282 }
else if(
const Index2Layer * idxp =
283 dynamic_cast<const Index2Layer *> (idx)) {
284 uint32_t h = fourcc (
"Ix2L");
286 write_index_header (idx, f);
287 write_index (idxp->q1.quantizer, f);
288 WRITE1 (idxp->q1.nlist);
289 WRITE1 (idxp->q1.quantizer_trains_alone);
290 write_ProductQuantizer (&idxp->pq, f);
291 WRITE1 (idxp->code_size_1);
292 WRITE1 (idxp->code_size_2);
293 WRITE1 (idxp->code_size);
294 WRITEVECTOR (idxp->codes);
295 }
else if(
const IndexScalarQuantizer * idxs =
296 dynamic_cast<const IndexScalarQuantizer *> (idx)) {
297 uint32_t h = fourcc (
"IxSQ");
299 write_index_header (idx, f);
300 write_ScalarQuantizer (&idxs->sq, f);
301 WRITEVECTOR (idxs->codes);
302 }
else if(
const IndexIVFFlat * ivfl =
303 dynamic_cast<const IndexIVFFlat *> (idx)) {
304 uint32_t h = fourcc (
"IvFL");
306 write_ivf_header (ivfl, f);
307 for(
int i = 0; i < ivfl->nlist; i++)
308 WRITEVECTOR (ivfl->codes[i]);
309 }
else if(
const IndexIVFScalarQuantizer * ivsc =
310 dynamic_cast<const IndexIVFScalarQuantizer *> (idx)) {
311 uint32_t h = fourcc (
"IvSQ");
313 write_ivf_header (ivsc, f);
314 write_ScalarQuantizer (&ivsc->sq, f);
315 WRITE1 (ivsc->code_size);
316 for(
int i = 0; i < ivsc->nlist; i++)
317 WRITEVECTOR (ivsc->codes[i]);
318 }
else if(
const IndexIVFPQ * ivpq =
319 dynamic_cast<const IndexIVFPQ *> (idx)) {
320 const IndexIVFPQR * ivfpqr =
dynamic_cast<const IndexIVFPQR *
> (idx);
321 const IndexIVFPQCompact * ivfpqc =
322 dynamic_cast<const IndexIVFPQCompact *
> (idx);
323 uint32_t h = fourcc (ivfpqr ?
"IvQR" : ivfpqc ?
"IvPC" :
"IvPQ");
325 write_ivf_header (ivpq, f, !ivfpqc);
326 WRITE1 (ivpq->by_residual);
327 WRITE1 (ivpq->code_size);
328 write_ProductQuantizer (&ivpq->pq, f);
330 for(
int i = 0; i < ivpq->codes.size(); i++)
331 WRITEVECTOR (ivpq->codes[i]);
334 write_ProductQuantizer (&ivfpqr->refine_pq, f);
335 WRITEVECTOR (ivfpqr->refine_codes);
336 WRITE1 (ivfpqr->k_factor);
339 WRITETABPAD16 (ivfpqc->limits, ivfpqc->nlist + 1);
340 WRITETABPAD16 (ivfpqc->compact_ids, ivfpqc->ntotal);
341 WRITETABPAD16 (ivfpqc->compact_codes,
342 ivfpqc->ntotal * ivfpqc->code_size);
344 }
else if(
const IndexPreTransform * ixpt =
345 dynamic_cast<const IndexPreTransform *> (idx)) {
346 uint32_t h = fourcc (
"IxPT");
348 write_index_header (ixpt, f);
349 int nt = ixpt->chain.size();
351 for (
int i = 0; i < nt; i++)
352 write_VectorTransform (ixpt->chain[i], f);
353 write_index (ixpt->index, f);
354 }
else if(
const MultiIndexQuantizer * imiq =
355 dynamic_cast<const MultiIndexQuantizer *> (idx)) {
356 uint32_t h = fourcc (
"Imiq");
358 write_index_header (imiq, f);
359 write_ProductQuantizer (&imiq->pq, f);
360 }
else if(
const IndexRefineFlat * idxrf =
361 dynamic_cast<const IndexRefineFlat *> (idx)) {
362 uint32_t h = fourcc (
"IxRF");
364 write_index_header (idxrf, f);
365 write_index (idxrf->base_index, f);
366 write_index (&idxrf->refine_index, f);
367 WRITE1 (idxrf->k_factor);
368 }
else if(
const IndexIDMap * idxmap =
369 dynamic_cast<const IndexIDMap *> (idx)) {
371 dynamic_cast<const IndexIDMap2 *
> (idx) ? fourcc (
"IxM2") :
375 write_index_header (idxmap, f);
376 write_index (idxmap->index, f);
377 WRITEVECTOR (idxmap->id_map);
378 }
else if(
const IndexHNSW * idxhnsw =
379 dynamic_cast<const IndexHNSW *> (idx)) {
381 dynamic_cast<const IndexHNSWFlat*
>(idx) ? fourcc(
"IHNf") :
382 dynamic_cast<const IndexHNSWPQ*
>(idx) ? fourcc(
"IHNp") :
383 dynamic_cast<const IndexHNSWSQ*
>(idx) ? fourcc(
"IHNs") :
384 dynamic_cast<const IndexHNSW2Level*
>(idx) ? fourcc(
"IHN2") :
386 FAISS_THROW_IF_NOT (h != 0);
388 write_index_header (idxhnsw, f);
389 write_HNSW (&idxhnsw->hnsw, f);
390 write_index (idxhnsw->storage, f);
392 FAISS_THROW_MSG (
"don't know how to serialize this type of index");
396 void write_index (
const Index *idx,
const char *fname) {
397 FILE *f = fopen (fname,
"w");
398 FAISS_THROW_IF_NOT_FMT (f,
"cannot open %s for writing", fname);
399 ScopeFileCloser closer(f);
400 write_index (idx, f);
403 void write_VectorTransform (
const VectorTransform *vt,
const char *fname) {
404 FILE *f = fopen (fname,
"w");
405 FAISS_THROW_IF_NOT_FMT (f,
"cannot open %s for writing", fname);
406 ScopeFileCloser closer(f);
407 write_VectorTransform (vt, f);
414 static void read_index_header (Index *idx, FILE *f) {
420 READ1 (idx->is_trained);
421 READ1 (idx->metric_type);
422 idx->verbose =
false;
425 VectorTransform* read_VectorTransform (FILE *f) {
428 VectorTransform *vt =
nullptr;
430 if (h == fourcc (
"rrot") || h == fourcc (
"PCAm") ||
431 h == fourcc (
"LTra") || h == fourcc (
"PcAm")) {
432 LinearTransform *lt =
nullptr;
433 if (h == fourcc (
"rrot")) {
434 lt =
new RandomRotationMatrix ();
435 }
else if (h == fourcc (
"PCAm") ||
436 h == fourcc (
"PcAm")) {
437 PCAMatrix * pca =
new PCAMatrix ();
438 READ1 (pca->eigen_power);
439 READ1 (pca->random_rotation);
440 if (h == fourcc (
"PcAm"))
441 READ1 (pca->balanced_bins);
442 READVECTOR (pca->mean);
443 READVECTOR (pca->eigenvalues);
444 READVECTOR (pca->PCAMat);
446 }
else if (h == fourcc (
"LTra")) {
447 lt =
new LinearTransform ();
449 READ1 (lt->have_bias);
452 FAISS_THROW_IF_NOT (lt->A.size() >= lt->d_in * lt->d_out);
453 FAISS_THROW_IF_NOT (!lt->have_bias || lt->b.size() >= lt->d_out);
454 lt->set_is_orthonormal();
456 }
else if (h == fourcc (
"RmDT")) {
457 RemapDimensionsTransform *rdt =
new RemapDimensionsTransform ();
458 READVECTOR (rdt->map);
460 }
else if (h == fourcc (
"VNrm")) {
461 NormalizationTransform *nt =
new NormalizationTransform ();
465 FAISS_THROW_MSG(
"fourcc not recognized");
469 READ1 (vt->is_trained);
473 static void read_ProductQuantizer (ProductQuantizer *pq, FILE *f) {
477 pq->set_derived_values ();
478 READVECTOR (pq->centroids);
481 static void read_ScalarQuantizer (ScalarQuantizer *ivsc, FILE *f) {
483 READ1 (ivsc->rangestat);
484 READ1 (ivsc->rangestat_arg);
486 READ1 (ivsc->code_size);
487 READVECTOR (ivsc->trained);
490 static void read_HNSW (HNSW *hnsw, FILE *f) {
491 READVECTOR (hnsw->assign_probas);
492 READVECTOR (hnsw->cum_nneighbor_per_level);
493 READVECTOR (hnsw->levels);
494 READVECTOR (hnsw->offsets);
495 READVECTOR (hnsw->neighbors);
497 READ1 (hnsw->entry_point);
498 READ1 (hnsw->max_level);
499 READ1 (hnsw->efConstruction);
500 READ1 (hnsw->efSearch);
501 READ1 (hnsw->upper_beam);
504 ProductQuantizer * read_ProductQuantizer (
const char*fname) {
505 FILE *f = fopen (fname,
"r");
506 FAISS_THROW_IF_NOT_FMT (f,
"cannot open %s for writing", fname);
507 ScopeFileCloser closer(f);
508 ProductQuantizer *pq =
new ProductQuantizer();
509 ScopeDeleter1<ProductQuantizer> del (pq);
510 read_ProductQuantizer(pq, f);
515 static void read_ivf_header (IndexIVF * ivf, FILE *f,
516 bool include_ids =
true) {
517 read_index_header (ivf, f);
521 ivf->own_fields =
true;
523 ivf->ids.resize (ivf->nlist);
524 for (
size_t i = 0; i < ivf->nlist; i++)
525 READVECTOR (ivf->ids[i]);
527 READ1 (ivf->maintain_direct_map);
528 READVECTOR (ivf->direct_map);
531 static IndexIVFPQ *read_ivfpq (FILE *f, uint32_t h,
bool try_mmap)
534 IndexIVFPQR *ivfpqr =
535 h == fourcc (
"IvQR") ?
new IndexIVFPQR () : nullptr;
536 IndexIVFPQCompact *ivfpqc =
537 h == fourcc (
"IvPC") ?
new IndexIVFPQCompact () : nullptr;
538 IndexIVFPQ * ivpq = ivfpqr ? ivfpqr : ivfpqc ? ivfpqc :
new IndexIVFPQ ();
539 read_ivf_header (ivpq, f, !ivfpqc);
540 READ1 (ivpq->by_residual);
541 READ1 (ivpq->code_size);
542 read_ProductQuantizer (&ivpq->pq, f);
544 ivpq->codes.resize (ivpq->nlist);
545 for (
size_t i = 0; i < ivpq->nlist; i++)
546 READVECTOR (ivpq->codes[i]);
549 ivpq->use_precomputed_table = 0;
550 if (ivpq->by_residual)
551 ivpq->precompute_table ();
553 read_ProductQuantizer (&ivfpqr->refine_pq, f);
554 READVECTOR (ivfpqr->refine_codes);
555 READ1 (ivfpqr->k_factor);
559 READTABPAD16 (ivfpqc->limits, uint32_t, ivfpqc->nlist + 1);
560 READTABPAD16 (ivfpqc->compact_ids, uint32_t, ivfpqc->ntotal);
561 READTABPAD16 (ivfpqc->compact_codes, uint8_t,
562 ivfpqc->ntotal * ivfpqc->code_size);
564 long offset_limits, offset_compact_ids, offset_compact_codes;
565 TABOFFSETPAD16 (offset_limits, uint32_t, ivfpqc->nlist + 1);
566 TABOFFSETPAD16 (offset_compact_ids, uint32_t, ivfpqc->ntotal);
567 TABOFFSETPAD16 (offset_compact_codes, uint8_t,
568 ivfpqc->ntotal * ivfpqc->code_size);
569 ivfpqc->mmap_length = ftell (f);
571 ivfpqc->mmap_buffer = (
char*)mmap (
572 nullptr, ivfpqc->mmap_length,
573 PROT_READ, MAP_SHARED, fileno (f), 0);
574 if (!ivfpqc->mmap_buffer) {
575 perror (
"mmap failed");
580 ivfpqc->limits = (uint32_t*)(ivfpqc->mmap_buffer + offset_limits);
581 ivfpqc->compact_ids = (uint32_t*)(ivfpqc->mmap_buffer +
583 ivfpqc->compact_codes = (uint8_t*)(ivfpqc->mmap_buffer +
584 offset_compact_codes);
590 int read_old_fmt_hack = 0;
593 Index * idx =
nullptr;
596 if (h == fourcc (
"IxFI") || h == fourcc (
"IxF2")) {
598 if (h == fourcc (
"IxFI")) idxf =
new IndexFlatIP ();
600 read_index_header (idxf, f);
601 READVECTOR (idxf->
xb);
602 FAISS_THROW_IF_NOT (idxf->
xb.size() == idxf->
ntotal * idxf->
d);
605 }
else if (h == fourcc(
"IxHE") || h == fourcc(
"IxHe")) {
607 read_index_header (idxl, f);
613 if (h == fourcc(
"IxHE")) {
614 FAISS_THROW_IF_NOT_FMT (idxl->
nbits % 64 == 0,
615 "can only read old format IndexLSH with "
616 "nbits multiple of 64 (got %d)",
623 (read_VectorTransform (f));
624 FAISS_THROW_IF_NOT_MSG(rrot,
"expected a random rotation");
628 READVECTOR (idxl->
codes);
629 FAISS_THROW_IF_NOT (idxl->
rrot.d_in == idxl->
d &&
634 }
else if (h == fourcc (
"IxPQ") || h == fourcc (
"IxPo") ||
635 h == fourcc (
"IxPq")) {
638 read_index_header (idxp, f);
639 read_ProductQuantizer (&idxp->
pq, f);
640 READVECTOR (idxp->
codes);
641 if (h == fourcc (
"IxPo") || h == fourcc (
"IxPq")) {
642 READ1 (idxp->search_type);
643 READ1 (idxp->encode_signs);
649 if (h == fourcc (
"IxPQ") || h == fourcc (
"IxPo")) {
653 }
else if (h == fourcc (
"IvFl") || h == fourcc(
"IvFL")) {
655 read_ivf_header (ivfl, f);
657 ivfl->codes.resize (ivfl->
nlist);
658 if (h == fourcc (
"IvFL")) {
659 for (
size_t i = 0; i < ivfl->
nlist; i++) {
660 READVECTOR (ivfl->codes[i]);
663 for (
size_t i = 0; i < ivfl->
nlist; i++) {
664 std::vector<float> vec;
666 ivfl->codes[i].resize(vec.size() *
sizeof(float));
667 memcpy(ivfl->codes[i].data(), vec.data(),
668 ivfl->codes[i].size());
672 }
else if (h == fourcc (
"IxSQ")) {
674 read_index_header (idxs, f);
675 read_ScalarQuantizer (&idxs->
sq, f);
676 READVECTOR (idxs->
codes);
679 }
else if(h == fourcc (
"IvSQ")) {
681 read_ivf_header (ivsc, f);
682 ivsc->codes.resize(ivsc->
nlist);
683 read_ScalarQuantizer (&ivsc->sq, f);
685 for(
int i = 0; i < ivsc->
nlist; i++)
686 READVECTOR (ivsc->codes[i]);
688 }
else if(h == fourcc (
"IvPQ") || h == fourcc (
"IvQR") ||
689 h == fourcc (
"IvPC")) {
691 idx = read_ivfpq (f, h, try_mmap);
693 }
else if(h == fourcc (
"IxPT")) {
696 read_index_header (ixpt, f);
698 if (read_old_fmt_hack == 2) {
703 for (
int i = 0; i < nt; i++) {
704 ixpt->chain.push_back (read_VectorTransform (f));
708 }
else if(h == fourcc (
"Imiq")) {
710 read_index_header (imiq, f);
711 read_ProductQuantizer (&imiq->pq, f);
713 }
else if(h == fourcc (
"IxRF")) {
715 read_index_header (idxrf, f);
723 }
else if(h == fourcc (
"IxMp") || h == fourcc (
"IxM2")) {
724 bool is_map2 = h == fourcc (
"IxM2");
726 read_index_header (idxmap, f);
729 READVECTOR (idxmap->
id_map);
731 static_cast<IndexIDMap2*
>(idxmap)->construct_rev_map ();
734 }
else if (h == fourcc (
"Ix2L")) {
736 read_index_header (idxp, f);
740 read_ProductQuantizer (&idxp->
pq, f);
744 READVECTOR (idxp->
codes);
746 }
else if(h == fourcc(
"IHNf") || h == fourcc(
"IHNp") ||
747 h == fourcc(
"IHNs") || h == fourcc(
"IHN2")) {
750 if (h == fourcc(
"IHNp")) idxhnsw =
new IndexHNSWPQ ();
751 if (h == fourcc(
"IHNs")) idxhnsw =
new IndexHNSWSQ ();
753 read_index_header (idxhnsw, f);
754 read_HNSW (&idxhnsw->hnsw, f);
756 idxhnsw->own_fields =
true;
757 if (h == fourcc(
"IHNp")) {
758 dynamic_cast<IndexPQ*
>(idxhnsw->storage)->pq.compute_sdc_table ();
762 FAISS_THROW_FMT(
"Index type 0x%08x not supported\n", h);
770 Index *
read_index (
const char *fname,
bool try_mmap) {
771 FILE *f = fopen (fname,
"r");
772 FAISS_THROW_IF_NOT_FMT (f,
"cannot open %s for reading:", fname);
778 VectorTransform *read_VectorTransform (
const char *fname) {
779 FILE *f = fopen (fname,
"r");
781 fprintf (stderr,
"cannot open %s for reading:", fname);
785 VectorTransform *vt = read_VectorTransform (f);
796 Index * clone_index (
const Index *index)
799 return cl.clone_Index (index);
804 #define TRYCLONE(classname, obj) \
805 if (const classname *clo = dynamic_cast<const classname *>(obj)) { \
806 return new classname(*clo); \
809 VectorTransform *Cloner::clone_VectorTransform (
const VectorTransform *vt)
811 TRYCLONE (RemapDimensionsTransform, vt)
812 TRYCLONE (OPQMatrix, vt)
813 TRYCLONE (PCAMatrix, vt)
814 TRYCLONE (RandomRotationMatrix, vt)
815 TRYCLONE (LinearTransform, vt)
817 FAISS_THROW_MSG(
"clone not supported for this type of VectorTransform");
822 IndexIVF * Cloner::clone_IndexIVF (
const IndexIVF *ivf)
824 TRYCLONE (IndexIVFPQR, ivf)
825 TRYCLONE (IndexIVFPQ, ivf)
826 TRYCLONE (IndexIVFFlat, ivf)
827 TRYCLONE (IndexIVFScalarQuantizer, ivf)
829 FAISS_THROW_MSG(
"clone not supported for this type of IndexIVF");
834 Index *Cloner::clone_Index (
const Index *index)
836 TRYCLONE (IndexPQ, index)
837 TRYCLONE (IndexLSH, index)
838 TRYCLONE (IndexFlatL2, index)
839 TRYCLONE (IndexFlatIP, index)
840 TRYCLONE (IndexFlat, index)
841 TRYCLONE (IndexScalarQuantizer, index)
842 TRYCLONE (MultiIndexQuantizer, index)
843 if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
844 IndexIVF *res = clone_IndexIVF (ivf);
845 res->own_fields =
true;
846 res->quantizer = clone_Index (ivf->quantizer);
848 }
else if (
const IndexPreTransform * ipt =
849 dynamic_cast<const IndexPreTransform*> (index)) {
850 IndexPreTransform *res =
new IndexPreTransform ();
852 res->index = clone_Index (ipt->index);
853 for (
int i = 0; i < ipt->chain.size(); i++)
854 res->chain.push_back (clone_VectorTransform (ipt->chain[i]));
855 res->own_fields =
true;
857 }
else if (
const IndexIDMap *idmap =
858 dynamic_cast<const IndexIDMap*> (index)) {
859 IndexIDMap *res =
new IndexIDMap (*idmap);
860 res->own_fields =
true;
861 res->index = clone_Index (idmap->index);
864 FAISS_THROW_MSG(
"clone not supported for this type of Index");
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
size_t code_size
bytes per vector
size_t code_size_2
size of the code for the second level
Randomly rotate a set of vectors.
size_t code_size
code_size_1 + code_size_2
Index * read_index(FILE *f, bool try_mmap)
int bytes_per_vec
nb of 8-bits per encoded vector
std::vector< float > thresholds
thresholds to compare with
bool train_thresholds
whether we train thresholds or use 0
Level1Quantizer q1
first level quantizer
std::vector< uint8_t > codes
Codes. Size ntotal * code_size.
Index * base_index
faster index to pre-select the vectors that should be filtered
IndexFlat refine_index
storage for full vectors
bool own_fields
should the base index be deallocated?
std::vector< long > id_map
! whether pointers are deleted in destructo
std::vector< uint8_t > codes
Codes. Size ntotal * pq.code_size.
char quantizer_trains_alone
RandomRotationMatrix rrot
optional random rotation
ScalarQuantizer sq
Used to encode the vectors.
long idx_t
all indices are this type
ProductQuantizer pq
The product quantizer used to encode the vectors.
idx_t ntotal
total nb of indexed vectors
MetricType metric_type
type of metric this index uses for search
size_t code_size_1
size of the code for the first level (ceil(log8(q1.nlist)))
int nbits
nb of bits per vector
Index * quantizer
quantizer that maps vectors to inverted lists
bool is_trained
set if the Index does not require training, or if training is done already
ProductQuantizer pq
second level quantizer is always a PQ
std::vector< float > xb
database vectors, size ntotal * d
int polysemous_ht
Hamming threshold used for polysemy.
size_t nlist
number of possible key values
size_t code_size
code size per vector in bytes
bool rotate_data
whether to apply a random rotation to input
std::vector< uint8_t > codes
encoded dataset
bool own_fields
! the sub-index