Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/tmp/faiss/IndexHNSW.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // -*- c++ -*-
10 
11 #pragma once
12 
13 #include <vector>
14 
15 #include "HNSW.h"
16 #include "IndexFlat.h"
17 #include "IndexPQ.h"
18 #include "IndexScalarQuantizer.h"
19 #include "utils.h"
20 
21 
22 namespace faiss {
23 
24 struct IndexHNSW;
25 
27  typedef Index::idx_t idx_t;
28  typedef HNSW::storage_idx_t storage_idx_t;
29 
30  const IndexHNSW & index;
31  size_t M; // number of neighbors
32  size_t k; // number of codebook entries
33  size_t nsq; // number of subvectors
34  size_t code_size;
35  int k_reorder; // nb to reorder. -1 = all
36 
37  std::vector<float> codebook; // size nsq * k * (M + 1)
38 
39  std::vector<uint8_t> codes; // size ntotal * code_size
40  size_t ntotal;
41  size_t d, dsub; // derived values
42 
43  explicit ReconstructFromNeighbors(const IndexHNSW& index,
44  size_t k=256, size_t nsq=1);
45 
46  /// codes must be added in the correct order and the IndexHNSW
47  /// must be populated and sorted
48  void add_codes(size_t n, const float *x);
49 
50  size_t compute_distances(size_t n, const idx_t *shortlist,
51  const float *query, float *distances) const;
52 
53  /// called by add_codes
54  void estimate_code(const float *x, storage_idx_t i, uint8_t *code) const;
55 
56  /// called by compute_distances
57  void reconstruct(storage_idx_t i, float *x, float *tmp) const;
58 
59  void reconstruct_n(storage_idx_t n0, storage_idx_t ni, float *x) const;
60 
61  /// get the M+1 -by-d table for neighbor coordinates for vector i
62  void get_neighbor_table(storage_idx_t i, float *out) const;
63 
64 };
65 
66 
67 /** The HNSW index is a normal random-access index with a HNSW
68  * link structure built on top */
69 
70 struct IndexHNSW : Index {
71 
72  typedef HNSW::storage_idx_t storage_idx_t;
73 
74  // the link strcuture
75  HNSW hnsw;
76 
77  // the sequential storage
78  bool own_fields;
79  Index *storage;
80 
81  ReconstructFromNeighbors *reconstruct_from_neighbors;
82 
83  explicit IndexHNSW (int d = 0, int M = 32);
84  explicit IndexHNSW (Index *storage, int M = 32);
85 
86  ~IndexHNSW() override;
87 
88  // get a DistanceComputer object for this kind of storage
89  virtual HNSW::DistanceComputer *get_distance_computer() const = 0;
90 
91  void add(idx_t n, const float *x) override;
92 
93  /// Trains the storage if needed
94  void train(idx_t n, const float* x) override;
95 
96  /// entry point for search
97  void search (idx_t n, const float *x, idx_t k,
98  float *distances, idx_t *labels) const override;
99 
100  void reconstruct(idx_t key, float* recons) const override;
101 
102  void reset () override;
103 
104  void shrink_level_0_neighbors(int size);
105 
106  /** Perform search only on level 0, given the starting points for
107  * each vertex.
108  *
109  * @param search_type 1:perform one search per nprobe, 2: enqueue
110  * all entry points
111  */
112  void search_level_0(idx_t n, const float *x, idx_t k,
113  const storage_idx_t *nearest, const float *nearest_d,
114  float *distances, idx_t *labels, int nprobe = 1,
115  int search_type = 1) const;
116 
117  /// alternative graph building
119  int k, const float *D, const idx_t *I);
120 
121  /// alternative graph building
123  int npt, const storage_idx_t *points,
124  const storage_idx_t *nearests);
125 
126  // reorder links from nearest to farthest
127  void reorder_links();
128 
129  void link_singletons();
130 };
131 
132 
133 
134 /** Flat index topped with with a HNSW structure to access elements
135  * more efficiently.
136  */
137 
139  IndexHNSWFlat();
140  IndexHNSWFlat(int d, int M);
142  get_distance_computer() const override;
143 };
144 
145 /** PQ index topped with with a HNSW structure to access elements
146  * more efficiently.
147  */
149  IndexHNSWPQ();
150  IndexHNSWPQ(int d, int pq_m, int M);
151  void train(idx_t n, const float* x) override;
153  get_distance_computer() const override;
154 };
155 
156 /** SQ index topped with with a HNSW structure to access elements
157  * more efficiently.
158  */
160  IndexHNSWSQ();
161  IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M);
163  get_distance_computer() const override;
164 };
165 
166 /** 2-level code structure with fast random access
167  */
169  IndexHNSW2Level();
170  IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M);
172  get_distance_computer() const override;
173  void flip_to_ivf();
174 
175  /// entry point for search
176  void search (idx_t n, const float *x, idx_t k,
177  float *distances, idx_t *labels) const override;
178 
179 };
180 
181 
182 } // namespace faiss
void train(idx_t n, const float *x) override
Trains the storage if needed.
Definition: IndexHNSW.cpp:979
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
entry point for search
Definition: IndexHNSW.cpp:1324
void get_neighbor_table(storage_idx_t i, float *out) const
get the M+1 -by-d table for neighbor coordinates for vector i
Definition: IndexHNSW.cpp:748
void train(idx_t n, const float *x) override
Trains the storage if needed.
Definition: IndexHNSW.cpp:206
int d
vector dimension
Definition: Index.h:66
void add(idx_t n, const float *x) override
Definition: IndexHNSW.cpp:259
void reconstruct(storage_idx_t i, float *x, float *tmp) const
called by compute_distances
Definition: IndexHNSW.cpp:613
long idx_t
all indices are this type
Definition: Index.h:64
void estimate_code(const float *x, storage_idx_t i, uint8_t *code) const
called by add_codes
Definition: IndexHNSW.cpp:767
void add_codes(size_t n, const float *x)
Definition: IndexHNSW.cpp:807
void reset() override
removes all elements from the database.
Definition: IndexHNSW.cpp:270
void init_level_0_from_entry_points(int npt, const storage_idx_t *points, const storage_idx_t *nearests)
alternative graph building
Definition: IndexHNSW.cpp:430
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
entry point for search
Definition: IndexHNSW.cpp:213
void init_level_0_from_knngraph(int k, const float *D, const idx_t *I)
alternative graph building
Definition: IndexHNSW.cpp:392
void search_level_0(idx_t n, const float *x, idx_t k, const storage_idx_t *nearest, const float *nearest_d, float *distances, idx_t *labels, int nprobe=1, int search_type=1) const
Definition: IndexHNSW.cpp:320
void reconstruct(idx_t key, float *recons) const override
Definition: IndexHNSW.cpp:277
int storage_idx_t
internal storage of vectors (32 bits: this is expensive)
Definition: HNSW.h:49