139 lines
4.3 KiB
C
139 lines
4.3 KiB
C
/*
|
|
* Copyright (c) Meta Platforms, Inc. and affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
// -*- c -*-
|
|
|
|
#ifndef FAISS_CLUSTERING_C_H
|
|
#define FAISS_CLUSTERING_C_H
|
|
|
|
#include "Index_c.h"
|
|
#include "faiss_c.h"
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/** Class for the clustering parameters. Can be passed to the
|
|
* constructor of the Clustering object.
|
|
*/
|
|
typedef struct FaissClusteringParameters {
|
|
int niter; ///< clustering iterations
|
|
int nredo; ///< redo clustering this many times and keep best
|
|
|
|
int verbose; ///< (bool)
|
|
int spherical; ///< (bool) do we want normalized centroids?
|
|
int int_centroids; ///< (bool) round centroids coordinates to integer
|
|
int update_index; ///< (bool) update index after each iteration?
|
|
int frozen_centroids; ///< (bool) use the centroids provided as input and do
|
|
///< not change them during iterations
|
|
|
|
int min_points_per_centroid; ///< otherwise you get a warning
|
|
int max_points_per_centroid; ///< to limit size of dataset
|
|
|
|
int seed; ///< seed for the random number generator
|
|
size_t decode_block_size; ///< how many vectors at a time to decode
|
|
} FaissClusteringParameters;
|
|
|
|
/// Sets the ClusteringParameters object with reasonable defaults
|
|
void faiss_ClusteringParameters_init(FaissClusteringParameters* params);
|
|
|
|
/** clustering based on assignment - centroid update iterations
|
|
*
|
|
* The clustering is based on an Index object that assigns training
|
|
* points to the centroids. Therefore, at each iteration the centroids
|
|
* are added to the index.
|
|
*
|
|
* On output, the centroids table is set to the latest version
|
|
* of the centroids and they are also added to the index. If the
|
|
* centroids table it is not empty on input, it is also used for
|
|
* initialization.
|
|
*
|
|
* To do several clusterings, just call train() several times on
|
|
* different training sets, clearing the centroid table in between.
|
|
*/
|
|
FAISS_DECLARE_CLASS(Clustering)
|
|
|
|
FAISS_DECLARE_GETTER(Clustering, int, niter)
|
|
FAISS_DECLARE_GETTER(Clustering, int, nredo)
|
|
FAISS_DECLARE_GETTER(Clustering, int, verbose)
|
|
FAISS_DECLARE_GETTER(Clustering, int, spherical)
|
|
FAISS_DECLARE_GETTER(Clustering, int, int_centroids)
|
|
FAISS_DECLARE_GETTER(Clustering, int, update_index)
|
|
FAISS_DECLARE_GETTER(Clustering, int, frozen_centroids)
|
|
|
|
FAISS_DECLARE_GETTER(Clustering, int, min_points_per_centroid)
|
|
FAISS_DECLARE_GETTER(Clustering, int, max_points_per_centroid)
|
|
|
|
FAISS_DECLARE_GETTER(Clustering, int, seed)
|
|
FAISS_DECLARE_GETTER(Clustering, size_t, decode_block_size)
|
|
|
|
/// getter for d
|
|
FAISS_DECLARE_GETTER(Clustering, size_t, d)
|
|
|
|
/// getter for k
|
|
FAISS_DECLARE_GETTER(Clustering, size_t, k)
|
|
|
|
FAISS_DECLARE_CLASS(ClusteringIterationStats)
|
|
FAISS_DECLARE_GETTER(ClusteringIterationStats, float, obj)
|
|
FAISS_DECLARE_GETTER(ClusteringIterationStats, double, time)
|
|
FAISS_DECLARE_GETTER(ClusteringIterationStats, double, time_search)
|
|
FAISS_DECLARE_GETTER(ClusteringIterationStats, double, imbalance_factor)
|
|
FAISS_DECLARE_GETTER(ClusteringIterationStats, int, nsplit)
|
|
|
|
/// getter for centroids (size = k * d)
|
|
void faiss_Clustering_centroids(
|
|
FaissClustering* clustering,
|
|
float** centroids,
|
|
size_t* size);
|
|
|
|
/// getter for iteration stats
|
|
void faiss_Clustering_iteration_stats(
|
|
FaissClustering* clustering,
|
|
FaissClusteringIterationStats** iteration_stats,
|
|
size_t* size);
|
|
|
|
/// the only mandatory parameters are k and d
|
|
int faiss_Clustering_new(FaissClustering** p_clustering, int d, int k);
|
|
|
|
int faiss_Clustering_new_with_params(
|
|
FaissClustering** p_clustering,
|
|
int d,
|
|
int k,
|
|
const FaissClusteringParameters* cp);
|
|
|
|
int faiss_Clustering_train(
|
|
FaissClustering* clustering,
|
|
idx_t n,
|
|
const float* x,
|
|
FaissIndex* index);
|
|
|
|
void faiss_Clustering_free(FaissClustering* clustering);
|
|
|
|
/** simplified interface
|
|
*
|
|
* @param d dimension of the data
|
|
* @param n nb of training vectors
|
|
* @param k nb of output centroids
|
|
* @param x training set (size n * d)
|
|
* @param centroids output centroids (size k * d)
|
|
* @param q_error final quantization error
|
|
* @return error code
|
|
*/
|
|
int faiss_kmeans_clustering(
|
|
size_t d,
|
|
size_t n,
|
|
size_t k,
|
|
const float* x,
|
|
float* centroids,
|
|
float* q_error);
|
|
|
|
#ifdef __cplusplus
|
|
}
|
|
#endif
|
|
|
|
#endif
|