From f6d2efd1dffd3e8cac7ee6241395c8557892f814 Mon Sep 17 00:00:00 2001 From: Alexander Andreev Date: Tue, 8 Jun 2021 15:32:56 -0700 Subject: [PATCH] Cover more types for C_API (#1917) Summary: Exported some global variables and statistics. Supported downcast for IndexIDMap and IndexIDMap2 from faiss::Index Fixes https://github.com/facebookresearch/faiss/issues/1863 Pull Request resolved: https://github.com/facebookresearch/faiss/pull/1917 Reviewed By: beauby Differential Revision: D28834039 Pulled By: mdouze fbshipit-source-id: c1f7739dcdc23055780ebc665082609641dff861 --- c_api/CMakeLists.txt | 4 ++- c_api/IndexIVF_c.cpp | 4 +++ c_api/IndexIVF_c.h | 12 ++++++--- c_api/Index_c.h | 6 ++--- c_api/MetaIndexes_c.cpp | 7 ++++++ c_api/MetaIndexes_c.h | 20 ++++++++++++++- c_api/utils/distances_c.cpp | 44 ++++++++++++++++++++++++++++++++ c_api/utils/distances_c.h | 50 +++++++++++++++++++++++++++++++++++++ faiss/IndexIVF.h | 2 +- faiss/utils/distances.h | 6 ++--- faiss/utils/utils.h | 2 +- 11 files changed, 144 insertions(+), 13 deletions(-) create mode 100644 c_api/utils/distances_c.cpp create mode 100644 c_api/utils/distances_c.h diff --git a/c_api/CMakeLists.txt b/c_api/CMakeLists.txt index 747ce50c3..f786ab69f 100644 --- a/c_api/CMakeLists.txt +++ b/c_api/CMakeLists.txt @@ -27,6 +27,7 @@ set(FAISS_C_SRC index_factory_c.cpp index_io_c.cpp impl/AuxIndexStructures_c.cpp + utils/distances_c.cpp ) add_library(faiss_c ${FAISS_C_SRC}) target_link_libraries(faiss_c PRIVATE faiss) @@ -43,7 +44,8 @@ endfunction() file(GLOB FAISS_C_API_HEADERS RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} "*.h" - "impl/*.h") + "impl/*.h" + "utils/*.h") faiss_install_headers("${FAISS_C_API_HEADERS}" c_api) diff --git a/c_api/IndexIVF_c.cpp b/c_api/IndexIVF_c.cpp index e9f2260d3..768a3d8d6 100644 --- a/c_api/IndexIVF_c.cpp +++ b/c_api/IndexIVF_c.cpp @@ -125,3 +125,7 @@ void faiss_IndexIVF_invlists_get_ids( void faiss_IndexIVFStats_reset(FaissIndexIVFStats* stats) { reinterpret_cast(stats)->reset(); } + +FaissIndexIVFStats* faiss_get_indexIVF_stats() { + return reinterpret_cast(&faiss::indexIVF_stats); +} diff --git a/c_api/IndexIVF_c.h b/c_api/IndexIVF_c.h index 1d5782a5d..4d6f1d6ac 100644 --- a/c_api/IndexIVF_c.h +++ b/c_api/IndexIVF_c.h @@ -139,9 +139,12 @@ void faiss_IndexIVF_invlists_get_ids( idx_t* invlist); typedef struct FaissIndexIVFStats { - size_t nq; // nb of queries run - size_t nlist; // nb of inverted lists scanned - size_t ndis; // nb of distances computed + size_t nq; // nb of queries run + size_t nlist; // nb of inverted lists scanned + size_t ndis; // nb of distances computed + size_t nheap_updates; // nb of times the heap was updated + double quantization_time; // time spent quantizing vectors (in ms) + double search_time; // time spent searching lists (in ms) } FaissIndexIVFStats; void faiss_IndexIVFStats_reset(FaissIndexIVFStats* stats); @@ -150,6 +153,9 @@ inline void faiss_IndexIVFStats_init(FaissIndexIVFStats* stats) { faiss_IndexIVFStats_reset(stats); } +/// global var that collects all statists +FaissIndexIVFStats* faiss_get_indexIVF_stats(); + #ifdef __cplusplus } #endif diff --git a/c_api/Index_c.h b/c_api/Index_c.h index 234b4d68b..4271fade4 100644 --- a/c_api/Index_c.h +++ b/c_api/Index_c.h @@ -68,7 +68,7 @@ int faiss_Index_train(FaissIndex* index, idx_t n, const float* x); /** Add n vectors of dimension d to the index. * * Vectors are implicitly assigned labels ntotal .. ntotal + n - 1 - * This function slices the input vectors in chuncks smaller than + * This function slices the input vectors in chunks smaller than * blocksize_add and calls add_core. * @param index opaque pointer to index object * @param x input matrix, size n * d @@ -158,7 +158,7 @@ int faiss_Index_remove_ids( * this function may not be defined for some indexes * @param index opaque pointer to index object * @param key id of the vector to reconstruct - * @param recons reconstucted vector (size d) + * @param recons reconstructed vector (size d) */ int faiss_Index_reconstruct(const FaissIndex* index, idx_t key, float* recons); @@ -166,7 +166,7 @@ int faiss_Index_reconstruct(const FaissIndex* index, idx_t key, float* recons); * * this function may not be defined for some indexes * @param index opaque pointer to index object - * @param recons reconstucted vector (size ni * d) + * @param recons reconstructed vector (size ni * d) */ int faiss_Index_reconstruct_n( const FaissIndex* index, diff --git a/c_api/MetaIndexes_c.cpp b/c_api/MetaIndexes_c.cpp index 4471a5ce0..e3e0fbd42 100644 --- a/c_api/MetaIndexes_c.cpp +++ b/c_api/MetaIndexes_c.cpp @@ -19,6 +19,13 @@ using faiss::IndexIDMap2; DEFINE_GETTER(IndexIDMap, int, own_fields) DEFINE_SETTER(IndexIDMap, int, own_fields) +DEFINE_INDEX_DOWNCAST(IndexIDMap) + +DEFINE_GETTER(IndexIDMap2, int, own_fields) +DEFINE_SETTER(IndexIDMap2, int, own_fields) + +DEFINE_INDEX_DOWNCAST(IndexIDMap2) + int faiss_IndexIDMap_new(FaissIndexIDMap** p_index, FaissIndex* index) { try { auto out = new IndexIDMap(reinterpret_cast(index)); diff --git a/c_api/MetaIndexes_c.h b/c_api/MetaIndexes_c.h index 1c798e68e..9f24cecba 100644 --- a/c_api/MetaIndexes_c.h +++ b/c_api/MetaIndexes_c.h @@ -25,6 +25,14 @@ FAISS_DECLARE_GETTER_SETTER(IndexIDMap, int, own_fields) int faiss_IndexIDMap_new(FaissIndexIDMap** p_index, FaissIndex* index); +/** attempt a dynamic cast to a IDMap, thus checking + * check whether the underlying index type is `IndexIDMap`. + * + * @param index opaque pointer to index object + * @return the same pointer if the index is a IDMap index, NULL otherwise + */ +FAISS_DECLARE_INDEX_DOWNCAST(IndexIDMap) + /** get a pointer to the index map's internal ID vector (the `id_map` field). * The outputs of this function become invalid after any operation that can * modify the index. @@ -40,13 +48,23 @@ void faiss_IndexIDMap_id_map( /** same as IndexIDMap but also provides an efficient reconstruction implementation via a 2-way index */ -FAISS_DECLARE_CLASS_INHERITED(IndexIDMap2, IndexIDMap) +FAISS_DECLARE_CLASS_INHERITED(IndexIDMap2, Index) + +FAISS_DECLARE_GETTER_SETTER(IndexIDMap2, int, own_fields) int faiss_IndexIDMap2_new(FaissIndexIDMap2** p_index, FaissIndex* index); /// make the rev_map from scratch int faiss_IndexIDMap2_construct_rev_map(FaissIndexIDMap2* index); +/** attempt a dynamic cast to a IDMap2, thus checking + * check whether the underlying index type is `IndexIDMap`. + * + * @param index opaque pointer to index object + * @return the same pointer if the index is a IDMap2 index, NULL otherwise + */ +FAISS_DECLARE_INDEX_DOWNCAST(IndexIDMap2) + #ifdef __cplusplus } #endif diff --git a/c_api/utils/distances_c.cpp b/c_api/utils/distances_c.cpp new file mode 100644 index 000000000..534b286e7 --- /dev/null +++ b/c_api/utils/distances_c.cpp @@ -0,0 +1,44 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// Copyright 2004-present Facebook. All Rights Reserved. +// -*- c++ -*- + +#include "distances_c.h" +#include + +void faiss_set_distance_compute_blas_threshold(int value) { + faiss::distance_compute_blas_threshold = value; +} + +int faiss_get_distance_compute_blas_threshold() { + return faiss::distance_compute_blas_threshold; +} + +void faiss_set_distance_compute_blas_query_bs(int value) { + faiss::distance_compute_blas_query_bs = value; +} + +int faiss_get_distance_compute_blas_query_bs() { + return faiss::distance_compute_blas_query_bs; +} + +void faiss_set_distance_compute_blas_database_bs(int value) { + faiss::distance_compute_blas_database_bs = value; +} + +int faiss_get_distance_compute_blas_database_bs() { + return faiss::distance_compute_blas_database_bs; +} + +void faiss_set_distance_compute_min_k_reservoir(int value) { + faiss::distance_compute_min_k_reservoir = value; +} + +int faiss_get_distance_compute_min_k_reservoir() { + return faiss::distance_compute_min_k_reservoir; +} diff --git a/c_api/utils/distances_c.h b/c_api/utils/distances_c.h new file mode 100644 index 000000000..0d82cdf1c --- /dev/null +++ b/c_api/utils/distances_c.h @@ -0,0 +1,50 @@ +/** + * Copyright (c) Facebook, Inc. and its affiliates. + * + * This source code is licensed under the MIT license found in the + * LICENSE file in the root directory of this source tree. + */ + +// Copyright 2004-present Facebook. All Rights Reserved. +// -*- c -*- + +#ifndef FAISS_DISTANCES_C_H +#define FAISS_DISTANCES_C_H + +#ifdef __cplusplus +extern "C" { +#endif + +/// Setter of threshold value on nx above which we switch to BLAS to compute +/// distances +void faiss_set_distance_compute_blas_threshold(int value); + +/// Getter of threshold value on nx above which we switch to BLAS to compute +/// distances +int faiss_get_distance_compute_blas_threshold(); + +/// Setter of block sizes value for BLAS distance computations +void faiss_set_distance_compute_blas_query_bs(int value); + +/// Getter of block sizes value for BLAS distance computations +int faiss_get_distance_compute_blas_query_bs(); + +/// Setter of block sizes value for BLAS distance computations +void faiss_set_distance_compute_blas_database_bs(int value); + +/// Getter of block sizes value for BLAS distance computations +int faiss_get_distance_compute_blas_database_bs(); + +/// Setter of number of results we switch to a reservoir to collect results +/// rather than a heap +void faiss_set_distance_compute_min_k_reservoir(int value); + +/// Getter of number of results we switch to a reservoir to collect results +/// rather than a heap +int faiss_get_distance_compute_min_k_reservoir(); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/faiss/IndexIVF.h b/faiss/IndexIVF.h index 9a68bd5b8..aa631f92f 100644 --- a/faiss/IndexIVF.h +++ b/faiss/IndexIVF.h @@ -396,7 +396,7 @@ struct InvertedListScanner { struct IndexIVFStats { size_t nq; // nb of queries run size_t nlist; // nb of inverted lists scanned - size_t ndis; // nb of distancs computed + size_t ndis; // nb of distances computed size_t nheap_updates; // nb of times the heap was updated double quantization_time; // time spent quantizing vectors (in ms) double search_time; // time spent searching lists (in ms) diff --git a/faiss/utils/distances.h b/faiss/utils/distances.h index fbe676ff9..cfcc5eea9 100644 --- a/faiss/utils/distances.h +++ b/faiss/utils/distances.h @@ -40,7 +40,7 @@ float fvec_Linf(const float* x, const float* y, size_t d); * @param nq nb of query vectors * @param nb nb of database vectors * @param xq query vectors (size nq * d) - * @param xb database vectros (size nb * d) + * @param xb database vectors (size nb * d) * @param dis output distances (size nq * nb) * @param ldq,ldb, ldd strides for the matrices */ @@ -63,7 +63,7 @@ void fvec_inner_products_ny( size_t d, size_t ny); -/* compute ny square L2 distance bewteen x and a set of contiguous y vectors */ +/* compute ny square L2 distance between x and a set of contiguous y vectors */ void fvec_L2sqr_ny( float* dis, const float* x, @@ -87,7 +87,7 @@ void fvec_norms_L2sqr(float* norms, const float* x, size_t d, size_t nx); /* L2-renormalize a set of vector. Nothing done if the vector is 0-normed */ void fvec_renorm_L2(size_t d, size_t nx, float* x); -/* This function exists because the Torch counterpart is extremly slow +/* This function exists because the Torch counterpart is extremely slow (not multi-threaded + unexpected overhead even in single thread). It is here to implement the usual property |x-y|^2=|x|^2+|y|^2-2 */ void inner_product_to_L2sqr( diff --git a/faiss/utils/utils.h b/faiss/utils/utils.h index 28863b74f..d7b324d5b 100644 --- a/faiss/utils/utils.h +++ b/faiss/utils/utils.h @@ -80,7 +80,7 @@ void matrix_qr(int m, int n, float* a); /** distances are supposed to be sorted. Sorts indices with same distance*/ void ranklist_handle_ties(int k, int64_t* idx, const float* dis); -/** count the number of comon elements between v1 and v2 +/** count the number of common elements between v1 and v2 * algorithm = sorting + bissection to avoid double-counting duplicates */ size_t ranklist_intersection_size(