Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
/data/users/hoss/faiss/IndexHNSW.h
1 /**
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 // -*- c++ -*-
9 
10 #pragma once
11 
12 #include <vector>
13 
14 #include "HNSW.h"
15 #include "IndexFlat.h"
16 #include "IndexPQ.h"
17 #include "IndexScalarQuantizer.h"
18 #include "utils.h"
19 
20 
21 namespace faiss {
22 
23 struct IndexHNSW;
24 
26  typedef Index::idx_t idx_t;
27  typedef HNSW::storage_idx_t storage_idx_t;
28 
29  const IndexHNSW & index;
30  size_t M; // number of neighbors
31  size_t k; // number of codebook entries
32  size_t nsq; // number of subvectors
33  size_t code_size;
34  int k_reorder; // nb to reorder. -1 = all
35 
36  std::vector<float> codebook; // size nsq * k * (M + 1)
37 
38  std::vector<uint8_t> codes; // size ntotal * code_size
39  size_t ntotal;
40  size_t d, dsub; // derived values
41 
42  explicit ReconstructFromNeighbors(const IndexHNSW& index,
43  size_t k=256, size_t nsq=1);
44 
45  /// codes must be added in the correct order and the IndexHNSW
46  /// must be populated and sorted
47  void add_codes(size_t n, const float *x);
48 
49  size_t compute_distances(size_t n, const idx_t *shortlist,
50  const float *query, float *distances) const;
51 
52  /// called by add_codes
53  void estimate_code(const float *x, storage_idx_t i, uint8_t *code) const;
54 
55  /// called by compute_distances
56  void reconstruct(storage_idx_t i, float *x, float *tmp) const;
57 
58  void reconstruct_n(storage_idx_t n0, storage_idx_t ni, float *x) const;
59 
60  /// get the M+1 -by-d table for neighbor coordinates for vector i
61  void get_neighbor_table(storage_idx_t i, float *out) const;
62 
63 };
64 
65 
66 /** The HNSW index is a normal random-access index with a HNSW
67  * link structure built on top */
68 
69 struct IndexHNSW : Index {
70 
71  typedef HNSW::storage_idx_t storage_idx_t;
72 
73  // the link strcuture
74  HNSW hnsw;
75 
76  // the sequential storage
77  bool own_fields;
78  Index *storage;
79 
80  ReconstructFromNeighbors *reconstruct_from_neighbors;
81 
82  explicit IndexHNSW (int d = 0, int M = 32);
83  explicit IndexHNSW (Index *storage, int M = 32);
84 
85  ~IndexHNSW() override;
86 
87  // get a DistanceComputer object for this kind of storage
88  virtual DistanceComputer *get_distance_computer() const = 0;
89 
90  void add(idx_t n, const float *x) override;
91 
92  /// Trains the storage if needed
93  void train(idx_t n, const float* x) override;
94 
95  /// entry point for search
96  void search (idx_t n, const float *x, idx_t k,
97  float *distances, idx_t *labels) const override;
98 
99  void reconstruct(idx_t key, float* recons) const override;
100 
101  void reset () override;
102 
103  void shrink_level_0_neighbors(int size);
104 
105  /** Perform search only on level 0, given the starting points for
106  * each vertex.
107  *
108  * @param search_type 1:perform one search per nprobe, 2: enqueue
109  * all entry points
110  */
111  void search_level_0(idx_t n, const float *x, idx_t k,
112  const storage_idx_t *nearest, const float *nearest_d,
113  float *distances, idx_t *labels, int nprobe = 1,
114  int search_type = 1) const;
115 
116  /// alternative graph building
118  int k, const float *D, const idx_t *I);
119 
120  /// alternative graph building
122  int npt, const storage_idx_t *points,
123  const storage_idx_t *nearests);
124 
125  // reorder links from nearest to farthest
126  void reorder_links();
127 
128  void link_singletons();
129 };
130 
131 
132 
133 /** Flat index topped with with a HNSW structure to access elements
134  * more efficiently.
135  */
136 
138  IndexHNSWFlat();
139  IndexHNSWFlat(int d, int M);
141  get_distance_computer() const override;
142 };
143 
144 /** PQ index topped with with a HNSW structure to access elements
145  * more efficiently.
146  */
148  IndexHNSWPQ();
149  IndexHNSWPQ(int d, int pq_m, int M);
150  void train(idx_t n, const float* x) override;
152  get_distance_computer() const override;
153 };
154 
155 /** SQ index topped with with a HNSW structure to access elements
156  * more efficiently.
157  */
159  IndexHNSWSQ();
160  IndexHNSWSQ(int d, ScalarQuantizer::QuantizerType qtype, int M);
162  get_distance_computer() const override;
163 };
164 
165 /** 2-level code structure with fast random access
166  */
168  IndexHNSW2Level();
169  IndexHNSW2Level(Index *quantizer, size_t nlist, int m_pq, int M);
171  get_distance_computer() const override;
172  void flip_to_ivf();
173 
174  /// entry point for search
175  void search (idx_t n, const float *x, idx_t k,
176  float *distances, idx_t *labels) const override;
177 
178 };
179 
180 
181 } // namespace faiss
void train(idx_t n, const float *x) override
Trains the storage if needed.
Definition: IndexHNSW.cpp:1016
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
entry point for search
Definition: IndexHNSW.cpp:1316
void get_neighbor_table(storage_idx_t i, float *out) const
get the M+1 -by-d table for neighbor coordinates for vector i
Definition: IndexHNSW.cpp:783
void train(idx_t n, const float *x) override
Trains the storage if needed.
Definition: IndexHNSW.cpp:236
int d
vector dimension
Definition: Index.h:66
long idx_t
all indices are this type
Definition: Index.h:62
void add(idx_t n, const float *x) override
Definition: IndexHNSW.cpp:294
void reconstruct(storage_idx_t i, float *x, float *tmp) const
called by compute_distances
Definition: IndexHNSW.cpp:648
void estimate_code(const float *x, storage_idx_t i, uint8_t *code) const
called by add_codes
Definition: IndexHNSW.cpp:802
void add_codes(size_t n, const float *x)
Definition: IndexHNSW.cpp:842
void reset() override
removes all elements from the database.
Definition: IndexHNSW.cpp:305
void init_level_0_from_entry_points(int npt, const storage_idx_t *points, const storage_idx_t *nearests)
alternative graph building
Definition: IndexHNSW.cpp:465
void search(idx_t n, const float *x, idx_t k, float *distances, idx_t *labels) const override
entry point for search
Definition: IndexHNSW.cpp:243
void init_level_0_from_knngraph(int k, const float *D, const idx_t *I)
alternative graph building
Definition: IndexHNSW.cpp:427
void search_level_0(idx_t n, const float *x, idx_t k, const storage_idx_t *nearest, const float *nearest_d, float *distances, idx_t *labels, int nprobe=1, int search_type=1) const
Definition: IndexHNSW.cpp:355
void reconstruct(idx_t key, float *recons) const override
Definition: IndexHNSW.cpp:312
int storage_idx_t
internal storage of vectors (32 bits: this is expensive)
Definition: HNSW.h:48