Faiss
 All Classes Namespaces Functions Variables Typedefs Enumerations Enumerator Friends
Clustering_c.h
1 /**
2  * Copyright (c) 2015-present, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under the BSD+Patents license found in the
6  * LICENSE file in the root directory of this source tree.
7  */
8 
9 // Copyright 2004-present Facebook. All Rights Reserved
10 // -*- c -*-
11 
12 #ifndef FAISS_CLUSTERING_C_H
13 #define FAISS_CLUSTERING_C_H
14 
15 #include "Index_c.h"
16 #include "faiss_c.h"
17 
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22 /** Class for the clustering parameters. Can be passed to the
23  * constructor of the Clustering object.
24  */
25 typedef struct FaissClusteringParameters {
26  int niter; ///< clustering iterations
27  int nredo; ///< redo clustering this many times and keep best
28 
29  int verbose; ///< (bool)
30  int spherical; ///< (bool) do we want normalized centroids?
31  int update_index; ///< (bool) update index after each iteration?
32  int frozen_centroids; ///< (bool) use the centroids provided as input and do not change them during iterations
33 
34  int min_points_per_centroid; ///< otherwise you get a warning
35  int max_points_per_centroid; ///< to limit size of dataset
36 
37  int seed; ///< seed for the random number generator
39 
40 
41 /// Sets the ClusteringParameters object with reasonable defaults
42 void faiss_ClusteringParameters_init(FaissClusteringParameters* params);
43 
44 
45 /** clustering based on assignment - centroid update iterations
46  *
47  * The clustering is based on an Index object that assigns training
48  * points to the centroids. Therefore, at each iteration the centroids
49  * are added to the index.
50  *
51  * On output, the centoids table is set to the latest version
52  * of the centroids and they are also added to the index. If the
53  * centroids table it is not empty on input, it is also used for
54  * initialization.
55  *
56  * To do several clusterings, just call train() several times on
57  * different training sets, clearing the centroid table in between.
58  */
59 FAISS_DECLARE_CLASS(Clustering)
60 
61 FAISS_DECLARE_GETTER(Clustering, int, niter)
62 FAISS_DECLARE_GETTER(Clustering, int, nredo)
63 FAISS_DECLARE_GETTER(Clustering, int, verbose)
64 FAISS_DECLARE_GETTER(Clustering, int, spherical)
65 FAISS_DECLARE_GETTER(Clustering, int, update_index)
66 FAISS_DECLARE_GETTER(Clustering, int, frozen_centroids)
67 
68 FAISS_DECLARE_GETTER(Clustering, int, min_points_per_centroid)
69 FAISS_DECLARE_GETTER(Clustering, int, max_points_per_centroid)
70 
71 FAISS_DECLARE_GETTER(Clustering, int, seed)
72 
73 /// getter for d
74 FAISS_DECLARE_GETTER(Clustering, size_t, d)
75 
76 /// getter for k
77 FAISS_DECLARE_GETTER(Clustering, size_t, k)
78 
79 /// getter for centroids (size = k * d)
80 void faiss_Clustering_centroids(
81  FaissClustering* clustering, float** centroids, size_t* size);
82 
83 /// getter for objective values (sum of distances reported by index)
84 /// over iterations
85 void faiss_Clustering_obj(
86  FaissClustering* clustering, float** obj, size_t* size);
87 
88 /// the only mandatory parameters are k and d
89 int faiss_Clustering_new(FaissClustering** p_clustering, int d, int k);
90 
91 int faiss_Clustering_new_with_params(
92  FaissClustering** p_clustering, int d, int k, const FaissClusteringParameters* cp);
93 
94 int faiss_Clustering_train(
95  FaissClustering* clustering, idx_t n, const float* x, FaissIndex* index);
96 
97 void faiss_Clustering_free(FaissClustering* clustering);
98 
99 /** simplified interface
100  *
101  * @param d dimension of the data
102  * @param n nb of training vectors
103  * @param k nb of output centroids
104  * @param x training set (size n * d)
105  * @param centroids output centroids (size k * d)
106  * @param q_error final quantization error
107  * @return error code
108  */
109 int faiss_kmeans_clustering (size_t d, size_t n, size_t k,
110  const float *x,
111  float *centroids,
112  float *q_error);
113 
114 #ifdef __cplusplus
115 }
116 #endif
117 
118 #endif
int nredo
redo clustering this many times and keep best
Definition: Clustering_c.h:27
int spherical
(bool) do we want normalized centroids?
Definition: Clustering_c.h:30
int frozen_centroids
(bool) use the centroids provided as input and do not change them during iterations ...
Definition: Clustering_c.h:32
int max_points_per_centroid
to limit size of dataset
Definition: Clustering_c.h:35
int seed
seed for the random number generator
Definition: Clustering_c.h:37
int niter
clustering iterations
Definition: Clustering_c.h:26
int update_index
(bool) update index after each iteration?
Definition: Clustering_c.h:31
int min_points_per_centroid
otherwise you get a warning
Definition: Clustering_c.h:34