sync with FB version 2017-07-18
- implemented ScalarQuantizer (without IVF) - implemented update for IndexIVFFlat - implemented L2 normalization preprocpull/167/head
parent
602debae7b
commit
f7aedbdfc0
76
AutoTune.cpp
76
AutoTune.cpp
|
@ -22,7 +22,7 @@
|
|||
#include "IndexIVF.h"
|
||||
#include "IndexIVFPQ.h"
|
||||
#include "MetaIndexes.h"
|
||||
#include "IndexIVFScalarQuantizer.h"
|
||||
#include "IndexScalarQuantizer.h"
|
||||
|
||||
|
||||
namespace faiss {
|
||||
|
@ -623,18 +623,28 @@ void ParameterSpace::explore (Index *index,
|
|||
* index_factory
|
||||
***************************************************************/
|
||||
|
||||
namespace {
|
||||
|
||||
struct VTChain {
|
||||
std::vector<VectorTransform *> chain;
|
||||
~VTChain () {
|
||||
for (int i = 0; i < chain.size(); i++) {
|
||||
delete chain[i];
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
Index *index_factory (int d, const char *description_in, MetricType metric)
|
||||
{
|
||||
VectorTransform *vt = nullptr;
|
||||
VTChain vts;
|
||||
Index *coarse_quantizer = nullptr;
|
||||
Index *index = nullptr;
|
||||
bool add_idmap = false;
|
||||
bool make_IndexRefineFlat = false;
|
||||
|
||||
ScopeDeleter1<Index> del_coarse_quantizer, del_index;
|
||||
ScopeDeleter1<VectorTransform> del_vt;
|
||||
|
||||
char description[strlen(description_in) + 1];
|
||||
char *ptr;
|
||||
|
@ -656,17 +666,26 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|||
Index *index_1 = nullptr;
|
||||
|
||||
// VectorTransforms
|
||||
if (!vt && sscanf (tok, "PCA%d", &d_out) == 1) {
|
||||
if (sscanf (tok, "PCA%d", &d_out) == 1) {
|
||||
vt_1 = new PCAMatrix (d, d_out);
|
||||
d = d_out;
|
||||
} else if (!vt && sscanf (tok, "PCAR%d", &d_out) == 1) {
|
||||
} else if (sscanf (tok, "PCAR%d", &d_out) == 1) {
|
||||
vt_1 = new PCAMatrix (d, d_out, 0, true);
|
||||
d = d_out;
|
||||
} else if (!vt && sscanf (tok, "OPQ%d_%d", &opq_M, &d_out) == 2) {
|
||||
} else if (sscanf (tok, "PCAW%d", &d_out) == 1) {
|
||||
vt_1 = new PCAMatrix (d, d_out, -0.5, false);
|
||||
d = d_out;
|
||||
} else if (sscanf (tok, "PCAWR%d", &d_out) == 1) {
|
||||
vt_1 = new PCAMatrix (d, d_out, -0.5, true);
|
||||
d = d_out;
|
||||
} else if (sscanf (tok, "OPQ%d_%d", &opq_M, &d_out) == 2) {
|
||||
vt_1 = new OPQMatrix (d, opq_M, d_out);
|
||||
d = d_out;
|
||||
} else if (!vt && sscanf (tok, "OPQ%d", &opq_M) == 1) {
|
||||
} else if (sscanf (tok, "OPQ%d", &opq_M) == 1) {
|
||||
vt_1 = new OPQMatrix (d, opq_M);
|
||||
} else if (stok == "L2norm") {
|
||||
vt_1 = new NormalizationTransform (d, 2.0);
|
||||
|
||||
// coarse quantizers
|
||||
} else if (!coarse_quantizer &&
|
||||
sscanf (tok, "IVF%d", &ncentroids) == 1) {
|
||||
|
@ -698,21 +717,15 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|||
index_1 = index_ivf;
|
||||
} else {
|
||||
index_1 = new IndexFlat (d, metric);
|
||||
if (add_idmap) {
|
||||
IndexIDMap *idmap = new IndexIDMap(index_1);
|
||||
idmap->own_fields = true;
|
||||
index_1 = idmap;
|
||||
add_idmap = false;
|
||||
}
|
||||
}
|
||||
} else if (!index && (stok == "SQ8" || stok == "SQ4")) {
|
||||
FAISS_THROW_IF_NOT_MSG(coarse_quantizer,
|
||||
"ScalarQuantizer works only with an IVF");
|
||||
ScalarQuantizer::QuantizerType qt =
|
||||
stok == "SQ8" ? ScalarQuantizer::QT_8bit :
|
||||
stok == "SQ4" ? ScalarQuantizer::QT_4bit :
|
||||
ScalarQuantizer::QT_4bit;
|
||||
IndexIVFScalarQuantizer *index_ivf = new IndexIVFScalarQuantizer (
|
||||
if (coarse_quantizer) {
|
||||
IndexIVFScalarQuantizer *index_ivf =
|
||||
new IndexIVFScalarQuantizer (
|
||||
coarse_quantizer, d, ncentroids, qt, metric);
|
||||
index_ivf->quantizer_trains_alone =
|
||||
dynamic_cast<MultiIndexQuantizer*>(coarse_quantizer)
|
||||
|
@ -720,6 +733,9 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|||
del_coarse_quantizer.release ();
|
||||
index_ivf->own_fields = true;
|
||||
index_1 = index_ivf;
|
||||
} else {
|
||||
index_1 = new IndexScalarQuantizer (d, qt, metric);
|
||||
}
|
||||
} else if (!index && sscanf (tok, "PQ%d+%d", &M, &M2) == 2) {
|
||||
FAISS_THROW_IF_NOT_MSG(coarse_quantizer,
|
||||
"PQ with + works only with an IVF");
|
||||
|
@ -750,13 +766,6 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|||
IndexPQ *index_pq = new IndexPQ (d, M, 8, metric);
|
||||
index_pq->do_polysemous_training = true;
|
||||
index_1 = index_pq;
|
||||
if (add_idmap) {
|
||||
IndexIDMap *idmap = new IndexIDMap(index_1);
|
||||
del_index.set (idmap);
|
||||
idmap->own_fields = true;
|
||||
index_1 = idmap;
|
||||
add_idmap = false;
|
||||
}
|
||||
}
|
||||
} else if (stok == "RFlat") {
|
||||
make_IndexRefineFlat = true;
|
||||
|
@ -765,9 +774,16 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|||
tok, description_in);
|
||||
}
|
||||
|
||||
if (index_1 && add_idmap) {
|
||||
IndexIDMap *idmap = new IndexIDMap(index_1);
|
||||
del_index.set (idmap);
|
||||
idmap->own_fields = true;
|
||||
index_1 = idmap;
|
||||
add_idmap = false;
|
||||
}
|
||||
|
||||
if (vt_1) {
|
||||
vt = vt_1;
|
||||
del_vt.set (vt);
|
||||
vts.chain.push_back (vt_1);
|
||||
}
|
||||
|
||||
if (coarse_quantizer_1) {
|
||||
|
@ -793,10 +809,14 @@ Index *index_factory (int d, const char *description_in, MetricType metric)
|
|||
"IDMap option not used\n");
|
||||
}
|
||||
|
||||
if (vt) {
|
||||
IndexPreTransform *index_pt = new IndexPreTransform (vt, index);
|
||||
del_vt.release ();
|
||||
if (vts.chain.size() > 0) {
|
||||
IndexPreTransform *index_pt = new IndexPreTransform (index);
|
||||
index_pt->own_fields = true;
|
||||
// add from back
|
||||
while (vts.chain.size() > 0) {
|
||||
index_pt->prepend_transform (vts.chain.back());
|
||||
vts.chain.pop_back ();
|
||||
}
|
||||
index = index_pt;
|
||||
}
|
||||
|
||||
|
|
|
@ -158,6 +158,10 @@ void RangeSearchPartialResult::set_result (bool incremental)
|
|||
}
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
* IDSelectorRange
|
||||
***********************************************************************/
|
||||
|
||||
IDSelectorRange::IDSelectorRange (idx_t imin, idx_t imax):
|
||||
imin (imin), imax (imax)
|
||||
{
|
||||
|
@ -169,6 +173,9 @@ bool IDSelectorRange::is_member (idx_t id) const
|
|||
}
|
||||
|
||||
|
||||
/***********************************************************************
|
||||
* IDSelectorBatch
|
||||
***********************************************************************/
|
||||
|
||||
IDSelectorBatch::IDSelectorBatch (long n, const idx_t *indices)
|
||||
{
|
||||
|
|
|
@ -15,12 +15,7 @@
|
|||
#define FAISS_AUX_INDEX_STRUCTURES_H
|
||||
|
||||
#include <vector>
|
||||
|
||||
#if __cplusplus >= 201103L
|
||||
#include <unordered_set>
|
||||
#endif
|
||||
|
||||
#include <set>
|
||||
|
||||
|
||||
#include "Index.h"
|
||||
|
@ -80,11 +75,7 @@ struct IDSelectorRange: IDSelector {
|
|||
* hash collisions if lsb's are always the same */
|
||||
struct IDSelectorBatch: IDSelector {
|
||||
|
||||
#if __cplusplus >= 201103L
|
||||
std::unordered_set<idx_t> set;
|
||||
#else
|
||||
std::set<idx_t> set;
|
||||
#endif
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
std::vector<uint8_t> bloom; // assumes low bits of id are a good hash value
|
||||
|
|
|
@ -9,7 +9,6 @@
|
|||
// Copyright 2004-present Facebook. All Rights Reserved.
|
||||
|
||||
#include "FaissException.h"
|
||||
#include <cstdio>
|
||||
|
||||
namespace faiss {
|
||||
|
||||
|
@ -28,4 +27,9 @@ FaissException::FaissException(const std::string& m,
|
|||
funcName, file, line, m.c_str());
|
||||
}
|
||||
|
||||
const char*
|
||||
FaissException::what() const noexcept {
|
||||
return msg.c_str();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -27,9 +27,7 @@ class FaissException : public std::exception {
|
|||
int line);
|
||||
|
||||
/// from std::exception
|
||||
const char* what() const noexcept override
|
||||
{ return msg.c_str(); }
|
||||
~FaissException () noexcept override {}
|
||||
const char* what() const noexcept override;
|
||||
|
||||
std::string msg;
|
||||
};
|
||||
|
|
65
IndexIVF.cpp
65
IndexIVF.cpp
|
@ -65,21 +65,28 @@ void IndexIVF::add (idx_t n, const float * x)
|
|||
add_with_ids (n, x, nullptr);
|
||||
}
|
||||
|
||||
void IndexIVF::make_direct_map ()
|
||||
void IndexIVF::make_direct_map (bool new_maintain_direct_map)
|
||||
{
|
||||
if (maintain_direct_map) return;
|
||||
// nothing to do
|
||||
if (new_maintain_direct_map == maintain_direct_map)
|
||||
return;
|
||||
|
||||
if (new_maintain_direct_map) {
|
||||
direct_map.resize (ntotal, -1);
|
||||
for (size_t key = 0; key < nlist; key++) {
|
||||
const std::vector<long> & idlist = ids[key];
|
||||
|
||||
for (long ofs = 0; ofs < idlist.size(); ofs++) {
|
||||
direct_map [idlist [ofs]] =
|
||||
key << 32 | ofs;
|
||||
FAISS_THROW_IF_NOT_MSG (
|
||||
0 <= idlist [ofs] && idlist[ofs] < ntotal,
|
||||
"direct map supported only for seuquential ids");
|
||||
direct_map [idlist [ofs]] = key << 32 | ofs;
|
||||
}
|
||||
}
|
||||
|
||||
maintain_direct_map = true;
|
||||
} else {
|
||||
direct_map.clear ();
|
||||
}
|
||||
maintain_direct_map = new_maintain_direct_map;
|
||||
}
|
||||
|
||||
|
||||
|
@ -183,7 +190,6 @@ void IndexIVF::merge_from (IndexIVF &other, idx_t add_id)
|
|||
|
||||
|
||||
|
||||
|
||||
IndexIVF::~IndexIVF()
|
||||
{
|
||||
if (own_fields) delete quantizer;
|
||||
|
@ -217,6 +223,8 @@ void IndexIVFFlat::add_core (idx_t n, const float * x, const long *xids,
|
|||
|
||||
{
|
||||
FAISS_THROW_IF_NOT (is_trained);
|
||||
FAISS_THROW_IF_NOT_MSG (!(maintain_direct_map && xids),
|
||||
"cannot have direct map and add with ids");
|
||||
const long * idx;
|
||||
ScopeDeleter<long> del;
|
||||
|
||||
|
@ -477,6 +485,49 @@ void IndexIVFFlat::copy_subset_to (IndexIVFFlat & other, int subset_type,
|
|||
}
|
||||
}
|
||||
|
||||
void IndexIVFFlat::update_vectors (int n, idx_t *new_ids, const float *x)
|
||||
{
|
||||
FAISS_THROW_IF_NOT (maintain_direct_map);
|
||||
FAISS_THROW_IF_NOT (is_trained);
|
||||
std::vector<idx_t> assign (n);
|
||||
quantizer->assign (n, x, assign.data());
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
idx_t id = new_ids[i];
|
||||
FAISS_THROW_IF_NOT_MSG (0 <= id && id < ntotal,
|
||||
"id to update out of range");
|
||||
{ // remove old one
|
||||
long dm = direct_map[id];
|
||||
long ofs = dm & 0xffffffff;
|
||||
long il = dm >> 32;
|
||||
size_t l = ids[il].size();
|
||||
if (ofs != l - 1) {
|
||||
long id2 = ids[il].back();
|
||||
ids[il][ofs] = id2;
|
||||
direct_map[id2] = (il << 32) | ofs;
|
||||
memcpy (vecs[il].data() + ofs * d,
|
||||
vecs[il].data() + (l - 1) * d,
|
||||
d * sizeof(vecs[il][0]));
|
||||
}
|
||||
ids[il].pop_back();
|
||||
vecs[il].resize((l - 1) * d);
|
||||
}
|
||||
{ // insert new one
|
||||
long il = assign[i];
|
||||
size_t l = ids[il].size();
|
||||
long dm = (il << 32) | l;
|
||||
direct_map[id] = dm;
|
||||
ids[il].push_back (id);
|
||||
vecs[il].resize((l + 1) * d);
|
||||
memcpy (vecs[il].data() + l * d,
|
||||
x + i * d,
|
||||
d * sizeof(vecs[il][0]));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
void IndexIVFFlat::reset()
|
||||
|
|
19
IndexIVF.h
19
IndexIVF.h
|
@ -91,9 +91,12 @@ struct IndexIVF: Index {
|
|||
size_t get_list_size (size_t list_no) const
|
||||
{ return ids[list_no].size(); }
|
||||
|
||||
|
||||
/// intialize a direct map
|
||||
void make_direct_map ();
|
||||
/** intialize a direct map
|
||||
*
|
||||
* @param new_maintain_direct_map if true, create a direct map,
|
||||
* else clear it
|
||||
*/
|
||||
void make_direct_map (bool new_maintain_direct_map=true);
|
||||
|
||||
/// 1= perfectly balanced, >1: imbalanced
|
||||
double imbalance_factor () const;
|
||||
|
@ -184,6 +187,16 @@ struct IndexIVFFlat: IndexIVF {
|
|||
const long * keys,
|
||||
float_maxheap_array_t * res) const;
|
||||
|
||||
/** Update a subset of vectors.
|
||||
*
|
||||
* The index must have a direct_map
|
||||
*
|
||||
* @param nv nb of vectors to update
|
||||
* @param idx vector indices to update, size nv
|
||||
* @param v vectors of new values, size nv*d
|
||||
*/
|
||||
void update_vectors (int nv, idx_t *idx, const float *v);
|
||||
|
||||
void reconstruct(idx_t key, float* recons) const override;
|
||||
|
||||
void merge_from_residuals(IndexIVF& other) override;
|
||||
|
|
|
@ -1,895 +0,0 @@
|
|||
/**
|
||||
* Copyright (c) 2015-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under the CC-by-NC license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
#include "IndexIVFScalarQuantizer.h"
|
||||
|
||||
#include <cstdio>
|
||||
#include <algorithm>
|
||||
|
||||
#include <omp.h>
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#include "FaissAssert.h"
|
||||
|
||||
namespace faiss {
|
||||
|
||||
/*******************************************************************
|
||||
* IndexIVFScalarQuantizer implementation
|
||||
*
|
||||
* The main source of complexity is to support combinations of 4
|
||||
* variants without incurring runtime tests or virtual function calls:
|
||||
*
|
||||
* - 4 / 8 bits per code component
|
||||
* - uniform / non-uniform
|
||||
* - IP / L2 distance search
|
||||
* - scalar / AVX distance computation
|
||||
*
|
||||
* The appropriate Quantizer object is returned via select_quantizer
|
||||
* that hides the template mess.
|
||||
********************************************************************/
|
||||
|
||||
#ifdef __AVX__
|
||||
#define USE_AVX
|
||||
#endif
|
||||
|
||||
|
||||
namespace {
|
||||
|
||||
typedef Index::idx_t idx_t;
|
||||
typedef ScalarQuantizer::QuantizerType QuantizerType;
|
||||
typedef ScalarQuantizer::RangeStat RangeStat;
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* Codec: converts between values in [0, 1] and an index in a code
|
||||
* array. The "i" parameter is the vector component index (not byte
|
||||
* index).
|
||||
*/
|
||||
|
||||
struct Codec8bit {
|
||||
|
||||
static void encode_component (float x, uint8_t *code, int i) {
|
||||
code[i] = (int)(255 * x);
|
||||
}
|
||||
|
||||
static float decode_component (const uint8_t *code, int i) {
|
||||
return (code[i] + 0.5f) / 255.0f;
|
||||
}
|
||||
|
||||
#ifdef USE_AVX
|
||||
static __m256 decode_8_components (const uint8_t *code, int i) {
|
||||
uint64_t c8 = *(uint64_t*)(code + i);
|
||||
__m128i c4lo = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8));
|
||||
__m128i c4hi = _mm_cvtepu8_epi32 (_mm_set1_epi32(c8 >> 32));
|
||||
// __m256i i8 = _mm256_set_m128i(c4lo, c4hi);
|
||||
__m256i i8 = _mm256_castsi128_si256 (c4lo);
|
||||
i8 = _mm256_insertf128_si256 (i8, c4hi, 1);
|
||||
__m256 f8 = _mm256_cvtepi32_ps (i8);
|
||||
__m256 half = _mm256_set1_ps (0.5f);
|
||||
f8 += half;
|
||||
__m256 one_255 = _mm256_set1_ps (1.f / 255.f);
|
||||
return f8 * one_255;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
struct Codec4bit {
|
||||
|
||||
static void encode_component (float x, uint8_t *code, int i) {
|
||||
code [i / 2] |= (int)(x * 15.0) << ((i & 1) << 2);
|
||||
}
|
||||
|
||||
static float decode_component (const uint8_t *code, int i) {
|
||||
return (((code[i / 2] >> ((i & 1) << 2)) & 0xf) + 0.5f) / 15.0f;
|
||||
}
|
||||
|
||||
|
||||
#ifdef USE_AVX
|
||||
static __m256 decode_8_components (const uint8_t *code, int i) {
|
||||
uint32_t c4 = *(uint32_t*)(code + (i >> 1));
|
||||
uint32_t mask = 0x0f0f0f0f;
|
||||
uint32_t c4ev = c4 & mask;
|
||||
uint32_t c4od = (c4 >> 4) & mask;
|
||||
|
||||
// the 8 lower bytes of c8 contain the values
|
||||
__m128i c8 = _mm_unpacklo_epi8 (_mm_set1_epi32(c4ev),
|
||||
_mm_set1_epi32(c4od));
|
||||
__m128i c4lo = _mm_cvtepu8_epi32 (c8);
|
||||
__m128i c4hi = _mm_cvtepu8_epi32 (_mm_srli_si128(c8, 4));
|
||||
__m256i i8 = _mm256_castsi128_si256 (c4lo);
|
||||
i8 = _mm256_insertf128_si256 (i8, c4hi, 1);
|
||||
__m256 f8 = _mm256_cvtepi32_ps (i8);
|
||||
__m256 half = _mm256_set1_ps (0.5f);
|
||||
f8 += half;
|
||||
__m256 one_255 = _mm256_set1_ps (1.f / 15.f);
|
||||
return f8 * one_255;
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* Similarity: gets vector components and computes a similarity wrt. a
|
||||
* query vector stored in the object
|
||||
*/
|
||||
|
||||
struct SimilarityL2 {
|
||||
const float *y, *yi;
|
||||
explicit SimilarityL2 (const float * y): y(y) {}
|
||||
|
||||
|
||||
/******* scalar accumulator *******/
|
||||
|
||||
float accu;
|
||||
|
||||
void begin () {
|
||||
accu = 0;
|
||||
yi = y;
|
||||
}
|
||||
|
||||
void add_component (float x) {
|
||||
float tmp = *yi++ - x;
|
||||
accu += tmp * tmp;
|
||||
}
|
||||
|
||||
float result () {
|
||||
return accu;
|
||||
}
|
||||
|
||||
#ifdef USE_AVX
|
||||
/******* AVX accumulator *******/
|
||||
|
||||
__m256 accu8;
|
||||
|
||||
void begin_8 () {
|
||||
accu8 = _mm256_setzero_ps();
|
||||
yi = y;
|
||||
}
|
||||
|
||||
void add_8_components (__m256 x) {
|
||||
__m256 yiv = _mm256_loadu_ps (yi);
|
||||
yi += 8;
|
||||
__m256 tmp = yiv - x;
|
||||
accu8 += tmp * tmp;
|
||||
}
|
||||
|
||||
float result_8 () {
|
||||
__m256 sum = _mm256_hadd_ps(accu8, accu8);
|
||||
__m256 sum2 = _mm256_hadd_ps(sum, sum);
|
||||
// now add the 0th and 4th component
|
||||
return
|
||||
_mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
|
||||
_mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
struct SimilarityIP {
|
||||
const float *y, *yi;
|
||||
const float accu0;
|
||||
|
||||
/******* scalar accumulator *******/
|
||||
|
||||
float accu;
|
||||
|
||||
SimilarityIP (const float * y, float accu0):
|
||||
y (y), accu0 (accu0) {}
|
||||
|
||||
void begin () {
|
||||
accu = accu0;
|
||||
yi = y;
|
||||
}
|
||||
|
||||
void add_component (float x) {
|
||||
accu += *yi++ * x;
|
||||
}
|
||||
|
||||
float result () {
|
||||
return accu;
|
||||
}
|
||||
|
||||
#ifdef USE_AVX
|
||||
/******* AVX accumulator *******/
|
||||
|
||||
__m256 accu8;
|
||||
|
||||
void begin_8 () {
|
||||
accu8 = _mm256_setzero_ps();
|
||||
yi = y;
|
||||
}
|
||||
|
||||
void add_8_components (__m256 x) {
|
||||
__m256 yiv = _mm256_loadu_ps (yi);
|
||||
yi += 8;
|
||||
accu8 += yiv * x;
|
||||
}
|
||||
|
||||
float result_8 () {
|
||||
__m256 sum = _mm256_hadd_ps(accu8, accu8);
|
||||
__m256 sum2 = _mm256_hadd_ps(sum, sum);
|
||||
// now add the 0th and 4th component
|
||||
return
|
||||
accu0 +
|
||||
_mm_cvtss_f32 (_mm256_castps256_ps128(sum2)) +
|
||||
_mm_cvtss_f32 (_mm256_extractf128_ps(sum2, 1));
|
||||
}
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* templatized distance functions
|
||||
*/
|
||||
|
||||
|
||||
template<class Quantizer, class Similarity>
|
||||
float compute_distance(const Quantizer & quant, Similarity & sim,
|
||||
const uint8_t *code)
|
||||
{
|
||||
sim.begin();
|
||||
for (size_t i = 0; i < quant.d; i++) {
|
||||
float xi = quant.reconstruct_component (code, i);
|
||||
sim.add_component (xi);
|
||||
}
|
||||
return sim.result();
|
||||
}
|
||||
|
||||
#ifdef USE_AVX
|
||||
template<class Quantizer, class Similarity>
|
||||
float compute_distance_8(const Quantizer & quant, Similarity & sim,
|
||||
const uint8_t *code)
|
||||
{
|
||||
sim.begin_8();
|
||||
for (size_t i = 0; i < quant.d; i += 8) {
|
||||
__m256 xi = quant.reconstruct_8_components (code, i);
|
||||
sim.add_8_components (xi);
|
||||
}
|
||||
return sim.result_8();
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* Quantizer range training
|
||||
*/
|
||||
|
||||
static float sqr (float x) {
|
||||
return x * x;
|
||||
}
|
||||
|
||||
|
||||
void train_Uniform(RangeStat rs, float rs_arg,
|
||||
idx_t n, int k, const float *x,
|
||||
std::vector<float> & trained)
|
||||
{
|
||||
trained.resize (2);
|
||||
float & vmin = trained[0];
|
||||
float & vmax = trained[1];
|
||||
|
||||
if (rs == ScalarQuantizer::RS_minmax) {
|
||||
vmin = HUGE_VAL; vmax = -HUGE_VAL;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
if (x[i] < vmin) vmin = x[i];
|
||||
if (x[i] > vmax) vmax = x[i];
|
||||
}
|
||||
float vexp = (vmax - vmin) * rs_arg;
|
||||
vmin -= vexp;
|
||||
vmax += vexp;
|
||||
} else if (rs == ScalarQuantizer::RS_meanstd) {
|
||||
double sum = 0, sum2 = 0;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
sum += x[i];
|
||||
sum2 += x[i] * x[i];
|
||||
}
|
||||
float mean = sum / n;
|
||||
float var = sum2 / n - mean * mean;
|
||||
float std = var <= 0 ? 1.0 : sqrt(var);
|
||||
|
||||
vmin = mean - std * rs_arg ;
|
||||
vmax = mean + std * rs_arg ;
|
||||
} else if (rs == ScalarQuantizer::RS_quantiles) {
|
||||
std::vector<float> x_copy(n);
|
||||
memcpy(x_copy.data(), x, n * sizeof(*x));
|
||||
// TODO just do a qucikselect
|
||||
std::sort(x_copy.begin(), x_copy.end());
|
||||
int o = int(rs_arg * n);
|
||||
if (o < 0) o = 0;
|
||||
if (o > n - o) o = n / 2;
|
||||
vmin = x_copy[o];
|
||||
vmax = x_copy[n - 1 - o];
|
||||
|
||||
} else if (rs == ScalarQuantizer::RS_optim) {
|
||||
float a, b;
|
||||
float sx = 0;
|
||||
{
|
||||
vmin = HUGE_VAL, vmax = -HUGE_VAL;
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
if (x[i] < vmin) vmin = x[i];
|
||||
if (x[i] > vmax) vmax = x[i];
|
||||
sx += x[i];
|
||||
}
|
||||
b = vmin;
|
||||
a = (vmax - vmin) / (k - 1);
|
||||
}
|
||||
int verbose = false;
|
||||
int niter = 2000;
|
||||
float last_err = -1;
|
||||
int iter_last_err = 0;
|
||||
for (int it = 0; it < niter; it++) {
|
||||
float sn = 0, sn2 = 0, sxn = 0, err1 = 0;
|
||||
|
||||
for (idx_t i = 0; i < n; i++) {
|
||||
float xi = x[i];
|
||||
float ni = floor ((xi - b) / a + 0.5);
|
||||
if (ni < 0) ni = 0;
|
||||
if (ni >= k) ni = k - 1;
|
||||
err1 += sqr (xi - (ni * a + b));
|
||||
sn += ni;
|
||||
sn2 += ni * ni;
|
||||
sxn += ni * xi;
|
||||
}
|
||||
|
||||
if (err1 == last_err) {
|
||||
iter_last_err ++;
|
||||
if (iter_last_err == 16) break;
|
||||
} else {
|
||||
last_err = err1;
|
||||
iter_last_err = 0;
|
||||
}
|
||||
|
||||
float det = sqr (sn) - sn2 * n;
|
||||
|
||||
b = (sn * sxn - sn2 * sx) / det;
|
||||
a = (sn * sx - n * sxn) / det;
|
||||
if (verbose) {
|
||||
printf ("it %d, err1=%g \r", it, err1);
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
if (verbose) printf("\n");
|
||||
|
||||
vmin = b;
|
||||
vmax = b + a * (k - 1);
|
||||
|
||||
} else {
|
||||
FAISS_THROW_MSG ("Invalid qtype");
|
||||
}
|
||||
vmax -= vmin;
|
||||
}
|
||||
|
||||
void train_NonUniform(RangeStat rs, float rs_arg,
|
||||
idx_t n, int d, int k, const float *x,
|
||||
std::vector<float> & trained)
|
||||
{
|
||||
trained.resize (2 * d);
|
||||
float * vmin = trained.data();
|
||||
float * vmax = trained.data() + d;
|
||||
if (rs == ScalarQuantizer::RS_minmax) {
|
||||
memcpy (vmin, x, sizeof(*x) * d);
|
||||
memcpy (vmax, x, sizeof(*x) * d);
|
||||
for (size_t i = 1; i < n; i++) {
|
||||
const float *xi = x + i * d;
|
||||
for (size_t j = 0; j < d; j++) {
|
||||
if (xi[j] < vmin[j]) vmin[j] = xi[j];
|
||||
if (xi[j] > vmax[j]) vmax[j] = xi[j];
|
||||
}
|
||||
}
|
||||
float *vdiff = vmax;
|
||||
for (size_t j = 0; j < d; j++) {
|
||||
float vexp = (vmax[j] - vmin[j]) * rs_arg;
|
||||
vmin[j] -= vexp;
|
||||
vmax[j] += vexp;
|
||||
vdiff [j] = vmax[j] - vmin[j];
|
||||
}
|
||||
} else {
|
||||
// transpose
|
||||
std::vector<float> xt(n * d);
|
||||
for (size_t i = 1; i < n; i++) {
|
||||
const float *xi = x + i * d;
|
||||
for (size_t j = 0; j < d; j++) {
|
||||
xt[j * n + i] = xi[j];
|
||||
}
|
||||
}
|
||||
std::vector<float> trained_d(2);
|
||||
#pragma omp parallel for
|
||||
for (size_t j = 0; j < d; j++) {
|
||||
train_Uniform(rs, rs_arg,
|
||||
n, k, xt.data() + j * n,
|
||||
trained_d);
|
||||
vmin[j] = trained_d[0];
|
||||
vmax[j] = trained_d[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* Quantizer: normalizes scalar vector components, then passes them
|
||||
* through a codec
|
||||
*/
|
||||
|
||||
|
||||
|
||||
struct Quantizer {
|
||||
virtual void encode_vector(const float *x, uint8_t *code) const = 0;
|
||||
virtual void decode_vector(const uint8_t *code, float *x) const = 0;
|
||||
|
||||
virtual float compute_distance_L2 (SimilarityL2 &sim,
|
||||
const uint8_t * codes) const = 0;
|
||||
virtual float compute_distance_IP (SimilarityIP &sim,
|
||||
const uint8_t * codes) const = 0;
|
||||
|
||||
virtual ~Quantizer() {}
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
template<class Codec>
|
||||
struct QuantizerUniform: Quantizer {
|
||||
const size_t d;
|
||||
const float vmin, vdiff;
|
||||
|
||||
QuantizerUniform(size_t d, const std::vector<float> &trained):
|
||||
d(d), vmin(trained[0]), vdiff(trained[1]) {
|
||||
}
|
||||
|
||||
void encode_vector(const float* x, uint8_t* code) const override {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
float xi = (x[i] - vmin) / vdiff;
|
||||
if (xi < 0)
|
||||
xi = 0;
|
||||
if (xi > 1.0)
|
||||
xi = 1.0;
|
||||
Codec::encode_component(xi, code, i);
|
||||
}
|
||||
}
|
||||
|
||||
void decode_vector(const uint8_t* code, float* x) const override {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
float xi = Codec::decode_component(code, i);
|
||||
x[i] = vmin + xi * vdiff;
|
||||
}
|
||||
}
|
||||
|
||||
float reconstruct_component (const uint8_t * code, int i) const
|
||||
{
|
||||
float xi = Codec::decode_component (code, i);
|
||||
return vmin + xi * vdiff;
|
||||
}
|
||||
|
||||
#ifdef USE_AVX
|
||||
__m256 reconstruct_8_components (const uint8_t * code, int i) const
|
||||
{
|
||||
__m256 xi = Codec::decode_8_components (code, i);
|
||||
return _mm256_set1_ps(vmin) + xi * _mm256_set1_ps (vdiff);
|
||||
}
|
||||
#endif
|
||||
|
||||
float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes)
|
||||
const override {
|
||||
return compute_distance(*this, sim, codes);
|
||||
}
|
||||
|
||||
float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes)
|
||||
const override {
|
||||
return compute_distance(*this, sim, codes);
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef USE_AVX
|
||||
template<class Codec>
|
||||
struct QuantizerUniform8: QuantizerUniform<Codec> {
|
||||
|
||||
QuantizerUniform8 (size_t d, const std::vector<float> &trained):
|
||||
QuantizerUniform<Codec> (d, trained) {}
|
||||
|
||||
float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes)
|
||||
const override {
|
||||
return compute_distance_8(*this, sim, codes);
|
||||
}
|
||||
|
||||
float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes)
|
||||
const override {
|
||||
return compute_distance_8(*this, sim, codes);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
template<class Codec>
|
||||
struct QuantizerNonUniform: Quantizer {
|
||||
const size_t d;
|
||||
const float *vmin, *vdiff;
|
||||
|
||||
QuantizerNonUniform(size_t d, const std::vector<float> &trained):
|
||||
d(d), vmin(trained.data()), vdiff(trained.data() + d) {}
|
||||
|
||||
void encode_vector(const float* x, uint8_t* code) const override {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
float xi = (x[i] - vmin[i]) / vdiff[i];
|
||||
if (xi < 0)
|
||||
xi = 0;
|
||||
if (xi > 1.0)
|
||||
xi = 1.0;
|
||||
Codec::encode_component(xi, code, i);
|
||||
}
|
||||
}
|
||||
|
||||
void decode_vector(const uint8_t* code, float* x) const override {
|
||||
for (size_t i = 0; i < d; i++) {
|
||||
float xi = Codec::decode_component(code, i);
|
||||
x[i] = vmin[i] + xi * vdiff[i];
|
||||
}
|
||||
}
|
||||
|
||||
float reconstruct_component (const uint8_t * code, int i) const
|
||||
{
|
||||
float xi = Codec::decode_component (code, i);
|
||||
return vmin[i] + xi * vdiff[i];
|
||||
}
|
||||
|
||||
#ifdef USE_AVX
|
||||
__m256 reconstruct_8_components (const uint8_t * code, int i) const
|
||||
{
|
||||
__m256 xi = Codec::decode_8_components (code, i);
|
||||
return _mm256_loadu_ps(vmin + i) + xi * _mm256_loadu_ps (vdiff + i);
|
||||
}
|
||||
#endif
|
||||
|
||||
float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes)
|
||||
const override {
|
||||
return compute_distance(*this, sim, codes);
|
||||
}
|
||||
|
||||
float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes)
|
||||
const override {
|
||||
return compute_distance(*this, sim, codes);
|
||||
}
|
||||
};
|
||||
|
||||
#ifdef USE_AVX
|
||||
template<class Codec>
|
||||
struct QuantizerNonUniform8: QuantizerNonUniform<Codec> {
|
||||
|
||||
QuantizerNonUniform8 (size_t d, const std::vector<float> &trained):
|
||||
QuantizerNonUniform<Codec> (d, trained) {}
|
||||
|
||||
float compute_distance_L2(SimilarityL2& sim, const uint8_t* codes)
|
||||
const override {
|
||||
return compute_distance_8(*this, sim, codes);
|
||||
}
|
||||
|
||||
float compute_distance_IP(SimilarityIP& sim, const uint8_t* codes)
|
||||
const override {
|
||||
return compute_distance_8(*this, sim, codes);
|
||||
}
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Quantizer *select_quantizer (
|
||||
QuantizerType qtype,
|
||||
size_t d, const std::vector<float> & trained)
|
||||
{
|
||||
#ifdef USE_AVX
|
||||
if (d % 8 == 0) {
|
||||
switch(qtype) {
|
||||
case ScalarQuantizer::QT_8bit:
|
||||
return new QuantizerNonUniform8<Codec8bit>(d, trained);
|
||||
case ScalarQuantizer::QT_4bit:
|
||||
return new QuantizerNonUniform8<Codec4bit>(d, trained);
|
||||
case ScalarQuantizer::QT_8bit_uniform:
|
||||
return new QuantizerUniform8<Codec8bit>(d, trained);
|
||||
case ScalarQuantizer::QT_4bit_uniform:
|
||||
return new QuantizerUniform8<Codec4bit>(d, trained);
|
||||
}
|
||||
} else
|
||||
#endif
|
||||
{
|
||||
switch(qtype) {
|
||||
case ScalarQuantizer::QT_8bit:
|
||||
return new QuantizerNonUniform<Codec8bit>(d, trained);
|
||||
case ScalarQuantizer::QT_4bit:
|
||||
return new QuantizerNonUniform<Codec4bit>(d, trained);
|
||||
case ScalarQuantizer::QT_8bit_uniform:
|
||||
return new QuantizerUniform<Codec8bit>(d, trained);
|
||||
case ScalarQuantizer::QT_4bit_uniform:
|
||||
return new QuantizerUniform<Codec4bit>(d, trained);
|
||||
}
|
||||
}
|
||||
FAISS_THROW_MSG ("unknown qtype");
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
Quantizer *select_quantizer (const ScalarQuantizer &sq)
|
||||
{
|
||||
return select_quantizer (sq.qtype, sq.d, sq.trained);
|
||||
}
|
||||
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* ScalarQuantizer implementation
|
||||
********************************************************************/
|
||||
|
||||
ScalarQuantizer::ScalarQuantizer
|
||||
(size_t d, QuantizerType qtype):
|
||||
qtype (qtype), rangestat(RS_minmax), rangestat_arg(0), d (d)
|
||||
{
|
||||
switch (qtype) {
|
||||
case QT_8bit: case QT_8bit_uniform:
|
||||
code_size = d;
|
||||
break;
|
||||
case QT_4bit: case QT_4bit_uniform:
|
||||
code_size = (d + 1) / 2;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
ScalarQuantizer::ScalarQuantizer ():
|
||||
qtype(QT_8bit),
|
||||
rangestat(RS_minmax), rangestat_arg(0), d (0), code_size(0)
|
||||
{}
|
||||
|
||||
void ScalarQuantizer::train (size_t n, const float *x)
|
||||
{
|
||||
int bit_per_dim =
|
||||
qtype == QT_4bit_uniform ? 4 :
|
||||
qtype == QT_4bit ? 4 :
|
||||
qtype == QT_8bit_uniform ? 8 :
|
||||
qtype == QT_8bit ? 8 : -1;
|
||||
|
||||
switch (qtype) {
|
||||
case QT_4bit_uniform: case QT_8bit_uniform:
|
||||
train_Uniform (rangestat, rangestat_arg,
|
||||
n * d, 1 << bit_per_dim, x, trained);
|
||||
break;
|
||||
case QT_4bit: case QT_8bit:
|
||||
train_NonUniform (rangestat, rangestat_arg,
|
||||
n, d, 1 << bit_per_dim, x, trained);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ScalarQuantizer::compute_codes (const float * x,
|
||||
uint8_t * codes,
|
||||
size_t n) const
|
||||
{
|
||||
Quantizer *squant = select_quantizer (*this);
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < n; i++)
|
||||
squant->encode_vector (x + i * d, codes + i * code_size);
|
||||
delete squant;
|
||||
}
|
||||
|
||||
void ScalarQuantizer::decode (const uint8_t *codes, float *x, size_t n) const
|
||||
{
|
||||
Quantizer *squant = select_quantizer (*this);
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < n; i++)
|
||||
squant->decode_vector (codes + i * code_size, x + i * d);
|
||||
delete squant;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*******************************************************************
|
||||
* IndexIVFScalarQuantizer implementation
|
||||
********************************************************************/
|
||||
|
||||
IndexIVFScalarQuantizer::IndexIVFScalarQuantizer
|
||||
(Index *quantizer, size_t d, size_t nlist,
|
||||
QuantizerType qtype, MetricType metric):
|
||||
IndexIVF (quantizer, d, nlist, metric),
|
||||
sq (d, qtype)
|
||||
{
|
||||
code_size = sq.code_size;
|
||||
codes.resize(nlist);
|
||||
}
|
||||
|
||||
IndexIVFScalarQuantizer::IndexIVFScalarQuantizer ():
|
||||
IndexIVF (), code_size (0)
|
||||
{}
|
||||
|
||||
void IndexIVFScalarQuantizer::train_residual (idx_t n, const float *x)
|
||||
{
|
||||
long * idx = new long [n];
|
||||
ScopeDeleter<long> del (idx);
|
||||
quantizer->assign (n, x, idx);
|
||||
float *residuals = new float [n * d];
|
||||
ScopeDeleter<float> del2 (residuals);
|
||||
|
||||
#pragma omp parallel for
|
||||
for (idx_t i = 0; i < n; i++) {
|
||||
quantizer->compute_residual (x + i * d, residuals + i * d, idx[i]);
|
||||
}
|
||||
|
||||
sq.train (n, residuals);
|
||||
|
||||
}
|
||||
|
||||
|
||||
void IndexIVFScalarQuantizer::add_with_ids
|
||||
(idx_t n, const float * x, const long *xids)
|
||||
{
|
||||
FAISS_THROW_IF_NOT (is_trained);
|
||||
long * idx = new long [n];
|
||||
ScopeDeleter<long> del (idx);
|
||||
quantizer->assign (n, x, idx);
|
||||
size_t nadd = 0;
|
||||
Quantizer *squant = select_quantizer (sq);
|
||||
ScopeDeleter1<Quantizer> del2 (squant);
|
||||
|
||||
#pragma omp parallel reduction(+: nadd)
|
||||
{
|
||||
std::vector<float> residual (d);
|
||||
int nt = omp_get_num_threads();
|
||||
int rank = omp_get_thread_num();
|
||||
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
|
||||
long list_no = idx [i];
|
||||
if (list_no >= 0 && list_no % nt == rank) {
|
||||
long id = xids ? xids[i] : ntotal + i;
|
||||
|
||||
assert (list_no < nlist);
|
||||
|
||||
ids[list_no].push_back (id);
|
||||
nadd++;
|
||||
quantizer->compute_residual (
|
||||
x + i * d, residual.data(), list_no);
|
||||
|
||||
size_t cur_size = codes[list_no].size();
|
||||
codes[list_no].resize (cur_size + code_size);
|
||||
|
||||
squant->encode_vector (residual.data(),
|
||||
codes[list_no].data() + cur_size);
|
||||
}
|
||||
}
|
||||
}
|
||||
ntotal += nadd;
|
||||
}
|
||||
|
||||
|
||||
void search_with_probes_ip (const IndexIVFScalarQuantizer & index,
|
||||
const float *x,
|
||||
const idx_t *cent_ids, const float *cent_dis,
|
||||
const Quantizer & quant,
|
||||
int k, float *simi, idx_t *idxi)
|
||||
{
|
||||
int nprobe = index.nprobe;
|
||||
size_t code_size = index.code_size;
|
||||
size_t d = index.d;
|
||||
std::vector<float> decoded(d);
|
||||
minheap_heapify (k, simi, idxi);
|
||||
for (int i = 0; i < nprobe; i++) {
|
||||
idx_t list_no = cent_ids[i];
|
||||
if (list_no < 0) break;
|
||||
float accu0 = cent_dis[i];
|
||||
|
||||
const std::vector<idx_t> & ids = index.ids[list_no];
|
||||
const uint8_t* codes = index.codes[list_no].data();
|
||||
|
||||
SimilarityIP sim(x, accu0);
|
||||
|
||||
for (size_t j = 0; j < ids.size(); j++) {
|
||||
|
||||
float accu = quant.compute_distance_IP(sim, codes);
|
||||
|
||||
if (accu > simi [0]) {
|
||||
minheap_pop (k, simi, idxi);
|
||||
minheap_push (k, simi, idxi, accu, ids[j]);
|
||||
}
|
||||
codes += code_size;
|
||||
}
|
||||
|
||||
}
|
||||
minheap_reorder (k, simi, idxi);
|
||||
}
|
||||
|
||||
void search_with_probes_L2 (const IndexIVFScalarQuantizer & index,
|
||||
const float *x_in,
|
||||
const idx_t *cent_ids,
|
||||
const Index *quantizer,
|
||||
const Quantizer & quant,
|
||||
int k, float *simi, idx_t *idxi)
|
||||
{
|
||||
int nprobe = index.nprobe;
|
||||
size_t code_size = index.code_size;
|
||||
size_t d = index.d;
|
||||
std::vector<float> decoded(d), x(d);
|
||||
maxheap_heapify (k, simi, idxi);
|
||||
for (int i = 0; i < nprobe; i++) {
|
||||
idx_t list_no = cent_ids[i];
|
||||
if (list_no < 0) break;
|
||||
|
||||
const std::vector<idx_t> & ids = index.ids[list_no];
|
||||
const uint8_t* codes = index.codes[list_no].data();
|
||||
|
||||
// shift of x_in wrt centroid
|
||||
quantizer->compute_residual (x_in, x.data(), list_no);
|
||||
|
||||
SimilarityL2 sim(x.data());
|
||||
|
||||
for (size_t j = 0; j < ids.size(); j++) {
|
||||
|
||||
float dis = quant.compute_distance_L2 (sim, codes);
|
||||
|
||||
if (dis < simi [0]) {
|
||||
maxheap_pop (k, simi, idxi);
|
||||
maxheap_push (k, simi, idxi, dis, ids[j]);
|
||||
}
|
||||
codes += code_size;
|
||||
}
|
||||
}
|
||||
maxheap_reorder (k, simi, idxi);
|
||||
}
|
||||
|
||||
|
||||
void IndexIVFScalarQuantizer::search (idx_t n, const float *x, idx_t k,
|
||||
float *distances, idx_t *labels) const
|
||||
{
|
||||
FAISS_THROW_IF_NOT (is_trained);
|
||||
idx_t *idx = new idx_t [n * nprobe];
|
||||
ScopeDeleter<idx_t> del (idx);
|
||||
float *dis = new float [n * nprobe];
|
||||
ScopeDeleter<float> del2 (dis);
|
||||
|
||||
quantizer->search (n, x, nprobe, dis, idx);
|
||||
|
||||
Quantizer *squant = select_quantizer (sq);
|
||||
ScopeDeleter1<Quantizer> del3(squant);
|
||||
|
||||
if (metric_type == METRIC_INNER_PRODUCT) {
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
search_with_probes_ip (*this, x + i * d,
|
||||
idx + i * nprobe, dis + i * nprobe, *squant,
|
||||
k, distances + i * k, labels + i * k);
|
||||
}
|
||||
} else {
|
||||
#pragma omp parallel for
|
||||
for (size_t i = 0; i < n; i++) {
|
||||
search_with_probes_L2 (*this, x + i * d,
|
||||
idx + i * nprobe, quantizer, *squant,
|
||||
k, distances + i * k, labels + i * k);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void IndexIVFScalarQuantizer::merge_from_residuals (IndexIVF & other_in) {
|
||||
IndexIVFScalarQuantizer &other =
|
||||
dynamic_cast<IndexIVFScalarQuantizer &> (other_in);
|
||||
for (int i = 0; i < nlist; i++) {
|
||||
std::vector<uint8_t> & src = other.codes[i];
|
||||
std::vector<uint8_t> & dest = codes[i];
|
||||
dest.insert (dest.end(), src.begin (), src.end ());
|
||||
src.clear ();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
|
@ -1,118 +0,0 @@
|
|||
/**
|
||||
* Copyright (c) 2015-present, Facebook, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* This source code is licensed under the CC-by-NC license found in the
|
||||
* LICENSE file in the root directory of this source tree.
|
||||
*/
|
||||
|
||||
#ifndef FAISS_INDEX_IVF_SCALAR_QUANTIZER_H
|
||||
#define FAISS_INDEX_IVF_SCALAR_QUANTIZER_H
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#include <vector>
|
||||
|
||||
|
||||
#include "IndexIVF.h"
|
||||
|
||||
|
||||
namespace faiss {
|
||||
|
||||
/** An IVF implementation where the components of the residuals are
|
||||
* encoded with a scalar uniform quantizer. All distance computations
|
||||
* are asymmetric, so the encoded vectors are decoded and approximate
|
||||
* distances are computed.
|
||||
*
|
||||
* The uniform quantizer has a range [vmin, vmax]. The range can be
|
||||
* the same for all dimensions (uniform) or specific per dimension
|
||||
* (default).
|
||||
*/
|
||||
|
||||
|
||||
struct ScalarQuantizer {
|
||||
|
||||
enum QuantizerType {
|
||||
QT_8bit, ///< 8 bits per component
|
||||
QT_4bit, ///< 4 bits per component
|
||||
QT_8bit_uniform, ///< same, shared range for all dimensions
|
||||
QT_4bit_uniform,
|
||||
};
|
||||
|
||||
QuantizerType qtype;
|
||||
|
||||
/** The uniform encoder can estimate the range of representable
|
||||
* values of the unform encoder using different statistics. Here
|
||||
* rs = rangestat_arg */
|
||||
|
||||
// rangestat_arg.
|
||||
enum RangeStat {
|
||||
RS_minmax, ///< [min - rs*(max-min), max + rs*(max-min)]
|
||||
RS_meanstd, ///< [mean - std * rs, mean + std * rs]
|
||||
RS_quantiles, ///< [Q(rs), Q(1-rs)]
|
||||
RS_optim, ///< alternate optimization of reconstruction error
|
||||
};
|
||||
|
||||
RangeStat rangestat;
|
||||
float rangestat_arg;
|
||||
|
||||
/// dimension of input vectors
|
||||
size_t d;
|
||||
|
||||
/// bytes per vector
|
||||
size_t code_size;
|
||||
|
||||
/// trained values (including the range)
|
||||
std::vector<float> trained;
|
||||
|
||||
ScalarQuantizer (size_t d, QuantizerType qtype);
|
||||
ScalarQuantizer ();
|
||||
|
||||
void train (size_t n, const float *x);
|
||||
|
||||
|
||||
/// same as compute_code for several vectors
|
||||
void compute_codes (const float * x,
|
||||
uint8_t * codes,
|
||||
size_t n) const ;
|
||||
|
||||
/// decode a vector from a given code (or n vectors if third argument)
|
||||
void decode (const uint8_t *code, float *x, size_t n) const;
|
||||
|
||||
};
|
||||
|
||||
|
||||
struct IndexIVFScalarQuantizer:IndexIVF {
|
||||
ScalarQuantizer sq;
|
||||
|
||||
size_t code_size;
|
||||
|
||||
/// inverted list codes.
|
||||
std::vector<std::vector<uint8_t> > codes;
|
||||
|
||||
IndexIVFScalarQuantizer(Index *quantizer, size_t d, size_t nlist,
|
||||
ScalarQuantizer::QuantizerType qtype,
|
||||
MetricType metric = METRIC_L2);
|
||||
|
||||
IndexIVFScalarQuantizer();
|
||||
|
||||
void train_residual(idx_t n, const float* x) override;
|
||||
|
||||
void add_with_ids(idx_t n, const float* x, const long* xids) override;
|
||||
|
||||
void search(
|
||||
idx_t n,
|
||||
const float* x,
|
||||
idx_t k,
|
||||
float* distances,
|
||||
idx_t* labels) const override;
|
||||
|
||||
void merge_from_residuals(IndexIVF& other) override;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
17
Makefile
17
Makefile
|
@ -29,7 +29,7 @@ LIBOBJ=hamming.o utils.o \
|
|||
Clustering.o Heap.o VectorTransform.o index_io.o \
|
||||
PolysemousTraining.o MetaIndexes.o Index.o \
|
||||
ProductQuantizer.o AutoTune.o AuxIndexStructures.o \
|
||||
IndexIVFScalarQuantizer.o FaissException.o
|
||||
IndexScalarQuantizer.o FaissException.o
|
||||
|
||||
|
||||
$(LIBNAME).a: $(LIBOBJ)
|
||||
|
@ -71,7 +71,7 @@ tests/demo_sift1M: tests/demo_sift1M.cpp $(LIBNAME).a
|
|||
HFILES = IndexFlat.h Index.h IndexLSH.h IndexPQ.h IndexIVF.h \
|
||||
IndexIVFPQ.h VectorTransform.h index_io.h utils.h \
|
||||
PolysemousTraining.h Heap.h MetaIndexes.h AuxIndexStructures.h \
|
||||
Clustering.h hamming.h AutoTune.h IndexIVFScalarQuantizer.h FaissException.h
|
||||
Clustering.h hamming.h AutoTune.h IndexScalarQuantizer.h FaissException.h
|
||||
|
||||
# also silently generates python/swigfaiss.py
|
||||
python/swigfaiss_wrap.cxx: swigfaiss.swig $(HFILES)
|
||||
|
@ -89,11 +89,12 @@ _swigfaiss.so: python/_swigfaiss.so
|
|||
#############################
|
||||
# Dependencies
|
||||
|
||||
# for i in *.cpp ; do gcc -I.. -MM $i -msse4; done
|
||||
# for i in *.cpp ; do g++ -std=c++11 -I.. -MM $i -msse4; done
|
||||
|
||||
AutoTune.o: AutoTune.cpp AutoTune.h Index.h FaissAssert.h \
|
||||
FaissException.h utils.h Heap.h IndexFlat.h VectorTransform.h IndexLSH.h \
|
||||
IndexPQ.h ProductQuantizer.h Clustering.h PolysemousTraining.h \
|
||||
IndexIVF.h IndexIVFPQ.h MetaIndexes.h IndexIVFScalarQuantizer.h
|
||||
IndexIVF.h IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h
|
||||
AuxIndexStructures.o: AuxIndexStructures.cpp AuxIndexStructures.h Index.h
|
||||
Clustering.o: Clustering.cpp Clustering.h Index.h utils.h Heap.h \
|
||||
FaissAssert.h FaissException.h IndexFlat.h
|
||||
|
@ -106,7 +107,7 @@ IndexFlat.o: IndexFlat.cpp IndexFlat.h Index.h utils.h Heap.h \
|
|||
index_io.o: index_io.cpp index_io.h FaissAssert.h FaissException.h \
|
||||
IndexFlat.h Index.h VectorTransform.h IndexLSH.h IndexPQ.h \
|
||||
ProductQuantizer.h Clustering.h Heap.h PolysemousTraining.h IndexIVF.h \
|
||||
IndexIVFPQ.h MetaIndexes.h IndexIVFScalarQuantizer.h
|
||||
IndexIVFPQ.h MetaIndexes.h IndexScalarQuantizer.h
|
||||
IndexIVF.o: IndexIVF.cpp IndexIVF.h Index.h Clustering.h Heap.h utils.h \
|
||||
hamming.h FaissAssert.h FaissException.h IndexFlat.h \
|
||||
AuxIndexStructures.h
|
||||
|
@ -114,13 +115,13 @@ IndexIVFPQ.o: IndexIVFPQ.cpp IndexIVFPQ.h IndexIVF.h Index.h Clustering.h \
|
|||
Heap.h IndexPQ.h ProductQuantizer.h PolysemousTraining.h utils.h \
|
||||
IndexFlat.h hamming.h FaissAssert.h FaissException.h \
|
||||
AuxIndexStructures.h
|
||||
IndexIVFScalarQuantizer.o: IndexIVFScalarQuantizer.cpp \
|
||||
IndexIVFScalarQuantizer.h IndexIVF.h Index.h Clustering.h Heap.h utils.h \
|
||||
FaissAssert.h FaissException.h
|
||||
IndexLSH.o: IndexLSH.cpp IndexLSH.h Index.h VectorTransform.h utils.h \
|
||||
Heap.h hamming.h FaissAssert.h FaissException.h
|
||||
IndexPQ.o: IndexPQ.cpp IndexPQ.h Index.h ProductQuantizer.h Clustering.h \
|
||||
Heap.h PolysemousTraining.h FaissAssert.h FaissException.h hamming.h
|
||||
IndexScalarQuantizer.o: IndexScalarQuantizer.cpp IndexScalarQuantizer.h \
|
||||
IndexIVF.h Index.h Clustering.h Heap.h utils.h FaissAssert.h \
|
||||
FaissException.h
|
||||
MetaIndexes.o: MetaIndexes.cpp MetaIndexes.h Index.h FaissAssert.h \
|
||||
FaissException.h Heap.h AuxIndexStructures.h
|
||||
PolysemousTraining.o: PolysemousTraining.cpp PolysemousTraining.h \
|
||||
|
|
|
@ -120,6 +120,48 @@ IndexIDMap::~IndexIDMap ()
|
|||
if (own_fields) delete index;
|
||||
}
|
||||
|
||||
/*****************************************************
|
||||
* IndexIDMap2 implementation
|
||||
*******************************************************/
|
||||
|
||||
IndexIDMap2::IndexIDMap2 (Index *index): IndexIDMap (index)
|
||||
{}
|
||||
|
||||
void IndexIDMap2::add_with_ids(idx_t n, const float* x, const long* xids)
|
||||
{
|
||||
size_t prev_ntotal = ntotal;
|
||||
IndexIDMap::add_with_ids (n, x, xids);
|
||||
for (size_t i = prev_ntotal; i < ntotal; i++) {
|
||||
rev_map [id_map [i]] = i;
|
||||
}
|
||||
}
|
||||
|
||||
void IndexIDMap2::construct_rev_map ()
|
||||
{
|
||||
rev_map.clear ();
|
||||
for (size_t i = 0; i < ntotal; i++) {
|
||||
rev_map [id_map [i]] = i;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
long IndexIDMap2::remove_ids(const IDSelector& sel)
|
||||
{
|
||||
// This is quite inefficient
|
||||
long nremove = IndexIDMap::remove_ids (sel);
|
||||
construct_rev_map ();
|
||||
return nremove;
|
||||
}
|
||||
|
||||
void IndexIDMap2::reconstruct (idx_t key, float * recons) const
|
||||
{
|
||||
try {
|
||||
index->reconstruct (rev_map.at (key), recons);
|
||||
} catch (const std::out_of_range& e) {
|
||||
FAISS_THROW_FMT ("key %ld not found", key);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*****************************************************
|
||||
|
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
|
||||
|
||||
#include "Index.h"
|
||||
|
@ -54,6 +55,28 @@ struct IndexIDMap : Index {
|
|||
IndexIDMap () {own_fields=false; index=nullptr; }
|
||||
};
|
||||
|
||||
/** same as IndexIDMap but also provides an efficient reconstruction
|
||||
implementation via a 2-way index */
|
||||
struct IndexIDMap2 : IndexIDMap {
|
||||
|
||||
std::unordered_map<idx_t, idx_t> rev_map;
|
||||
|
||||
explicit IndexIDMap2 (Index *index);
|
||||
|
||||
/// make the rev_map from scratch
|
||||
void construct_rev_map ();
|
||||
|
||||
void add_with_ids(idx_t n, const float* x, const long* xids) override;
|
||||
|
||||
long remove_ids(const IDSelector& sel) override;
|
||||
|
||||
void reconstruct (idx_t key, float * recons) const override;
|
||||
|
||||
~IndexIDMap2() override {}
|
||||
IndexIDMap2 () {}
|
||||
};
|
||||
|
||||
|
||||
/** Index that concatenates the results from several sub-indexes
|
||||
*
|
||||
*/
|
||||
|
|
|
@ -711,6 +711,32 @@ void OPQMatrix::reverse_transform (idx_t n, const float * xt,
|
|||
transform_transpose (n, xt, x);
|
||||
}
|
||||
|
||||
|
||||
/*********************************************
|
||||
* NormalizationTransform
|
||||
*********************************************/
|
||||
|
||||
NormalizationTransform::NormalizationTransform (int d, float norm):
|
||||
VectorTransform (d, d), norm (norm)
|
||||
{
|
||||
}
|
||||
|
||||
NormalizationTransform::NormalizationTransform ():
|
||||
VectorTransform (-1, -1), norm (-1)
|
||||
{
|
||||
}
|
||||
|
||||
void NormalizationTransform::apply_noalloc
|
||||
(idx_t n, const float* x, float* xt) const
|
||||
{
|
||||
if (norm == 2.0) {
|
||||
memcpy (xt, x, sizeof (x[0]) * n * d_in);
|
||||
fvec_renorm_L2 (d_in, n, xt);
|
||||
} else {
|
||||
FAISS_THROW_MSG ("not implemented");
|
||||
}
|
||||
}
|
||||
|
||||
/*********************************************
|
||||
* IndexPreTransform
|
||||
*********************************************/
|
||||
|
@ -730,8 +756,6 @@ IndexPreTransform::IndexPreTransform (
|
|||
}
|
||||
|
||||
|
||||
|
||||
|
||||
IndexPreTransform::IndexPreTransform (
|
||||
VectorTransform * ltrans,
|
||||
Index * index):
|
||||
|
@ -766,9 +790,16 @@ IndexPreTransform::~IndexPreTransform ()
|
|||
void IndexPreTransform::train (idx_t n, const float *x)
|
||||
{
|
||||
int last_untrained = 0;
|
||||
for (int i = 0; i < chain.size(); i++)
|
||||
if (!chain[i]->is_trained) last_untrained = i;
|
||||
if (!index->is_trained) last_untrained = chain.size();
|
||||
if (index->is_trained) {
|
||||
last_untrained = chain.size();
|
||||
} else {
|
||||
for (int i = chain.size() - 1; i >= 0; i--) {
|
||||
if (!chain[i]->is_trained) {
|
||||
last_untrained = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
const float *prev_x = x;
|
||||
ScopeDeleter<float> del;
|
||||
|
||||
|
|
|
@ -76,7 +76,6 @@ struct VectorTransform {
|
|||
*/
|
||||
struct LinearTransform: VectorTransform {
|
||||
|
||||
|
||||
bool have_bias; ///! whether to use the bias term
|
||||
|
||||
/// Transformation matrix, size d_out * d_in
|
||||
|
@ -85,7 +84,6 @@ struct LinearTransform: VectorTransform {
|
|||
/// bias vector, size d_out
|
||||
std::vector<float> b;
|
||||
|
||||
|
||||
/// both d_in > d_out and d_out < d_in are supported
|
||||
explicit LinearTransform (int d_in = 0, int d_out = 0,
|
||||
bool have_bias = false);
|
||||
|
@ -204,7 +202,6 @@ struct OPQMatrix: LinearTransform {
|
|||
* to compute it with matrix multiplies */
|
||||
struct RemapDimensionsTransform: VectorTransform {
|
||||
|
||||
|
||||
/// map from output dimension to input, size d_out
|
||||
/// -1 -> set output to 0
|
||||
std::vector<int> map;
|
||||
|
@ -225,6 +222,18 @@ struct RemapDimensionsTransform: VectorTransform {
|
|||
};
|
||||
|
||||
|
||||
/** per-vector normalization */
|
||||
struct NormalizationTransform: VectorTransform {
|
||||
float norm;
|
||||
|
||||
explicit NormalizationTransform (int d, float norm = 2.0);
|
||||
NormalizationTransform ();
|
||||
|
||||
void apply_noalloc(idx_t n, const float* x, float* xt) const override;
|
||||
};
|
||||
|
||||
|
||||
|
||||
/** Index that applies a LinearTransform transform on vectors before
|
||||
* handing them over to a sub-index */
|
||||
struct IndexPreTransform: Index {
|
||||
|
|
34
faiss.py
34
faiss.py
|
@ -34,8 +34,13 @@ except ImportError as e:
|
|||
##################################################################
|
||||
|
||||
|
||||
def replace_method(the_class, name, replacement):
|
||||
def replace_method(the_class, name, replacement, ignore_missing=False):
|
||||
try:
|
||||
orig_method = getattr(the_class, name)
|
||||
except AttributeError:
|
||||
if ignore_missing:
|
||||
return
|
||||
raise
|
||||
if orig_method.__name__ == 'replacement_' + name:
|
||||
# replacement was done in parent class
|
||||
return
|
||||
|
@ -123,12 +128,31 @@ def handle_Index(the_class):
|
|||
sel = IDSelectorBatch(x.size, swig_ptr(x))
|
||||
return self.remove_ids_c(sel)
|
||||
|
||||
def replacement_reconstruct(self, key):
|
||||
x = np.empty(self.d, dtype=np.float32)
|
||||
self.reconstruct_c(key, swig_ptr(x))
|
||||
return x
|
||||
|
||||
def replacement_reconstruct_n(self, n0, ni):
|
||||
x = np.empty((ni, self.d), dtype=np.float32)
|
||||
self.reconstruct_n_c(n0, ni, swig_ptr(x))
|
||||
return x
|
||||
|
||||
def replacement_update_vectors(self, keys, x):
|
||||
n = keys.size
|
||||
assert keys.shape == (n, )
|
||||
assert x.shape == (n, self.d)
|
||||
self.update_vectors_c(n, swig_ptr(keys), swig_ptr(x))
|
||||
|
||||
replace_method(the_class, 'add', replacement_add)
|
||||
replace_method(the_class, 'add_with_ids', replacement_add_with_ids)
|
||||
replace_method(the_class, 'train', replacement_train)
|
||||
replace_method(the_class, 'search', replacement_search)
|
||||
replace_method(the_class, 'remove_ids', replacement_remove_ids)
|
||||
|
||||
replace_method(the_class, 'reconstruct', replacement_reconstruct)
|
||||
replace_method(the_class, 'reconstruct_n', replacement_reconstruct_n)
|
||||
replace_method(the_class, 'update_vectors', replacement_update_vectors,
|
||||
ignore_missing=True)
|
||||
|
||||
def handle_VectorTransform(the_class):
|
||||
|
||||
|
@ -228,12 +252,13 @@ def vector_float_to_array(v):
|
|||
|
||||
class Kmeans:
|
||||
|
||||
def __init__(self, d, k, niter=25, verbose=False):
|
||||
def __init__(self, d, k, niter=25, verbose=False, spherical = False):
|
||||
self.d = d
|
||||
self.k = k
|
||||
self.cp = ClusteringParameters()
|
||||
self.cp.niter = niter
|
||||
self.cp.verbose = verbose
|
||||
self.cp.spherical = spherical
|
||||
self.centroids = None
|
||||
|
||||
def train(self, x):
|
||||
|
@ -241,6 +266,9 @@ class Kmeans:
|
|||
n, d = x.shape
|
||||
assert d == self.d
|
||||
clus = Clustering(d, self.k, self.cp)
|
||||
if self.cp.spherical:
|
||||
self.index = IndexFlatIP(d)
|
||||
else:
|
||||
self.index = IndexFlatL2(d)
|
||||
clus.train(x, self.index)
|
||||
centroids = vector_float_to_array(clus.centroids)
|
||||
|
|
43
index_io.cpp
43
index_io.cpp
|
@ -24,7 +24,7 @@
|
|||
#include "IndexIVF.h"
|
||||
#include "IndexIVFPQ.h"
|
||||
#include "MetaIndexes.h"
|
||||
#include "IndexIVFScalarQuantizer.h"
|
||||
#include "IndexScalarQuantizer.h"
|
||||
|
||||
/*************************************************************
|
||||
* The I/O format is the content of the class. For objects that are
|
||||
|
@ -184,6 +184,11 @@ void write_VectorTransform (const VectorTransform *vt, FILE *f) {
|
|||
uint32_t h = fourcc ("RmDT");
|
||||
WRITE1 (h);
|
||||
WRITEVECTOR (rdt->map);
|
||||
} else if (const NormalizationTransform *nt =
|
||||
dynamic_cast<const NormalizationTransform *>(vt)) {
|
||||
uint32_t h = fourcc ("VNrm");
|
||||
WRITE1 (h);
|
||||
WRITE1 (nt->norm);
|
||||
} else {
|
||||
FAISS_THROW_MSG ("cannot serialize this");
|
||||
}
|
||||
|
@ -261,6 +266,13 @@ void write_index (const Index *idx, FILE *f) {
|
|||
WRITE1 (idxp->search_type);
|
||||
WRITE1 (idxp->encode_signs);
|
||||
WRITE1 (idxp->polysemous_ht);
|
||||
} else if(const IndexScalarQuantizer * idxs =
|
||||
dynamic_cast<const IndexScalarQuantizer *> (idx)) {
|
||||
uint32_t h = fourcc ("IxSQ");
|
||||
WRITE1 (h);
|
||||
write_index_header (idx, f);
|
||||
write_ScalarQuantizer (&idxs->sq, f);
|
||||
WRITEVECTOR (idxs->codes);
|
||||
} else if(const IndexIVFFlat * ivfl =
|
||||
dynamic_cast<const IndexIVFFlat *> (idx)) {
|
||||
uint32_t h = fourcc ("IvFl");
|
||||
|
@ -329,7 +341,10 @@ void write_index (const Index *idx, FILE *f) {
|
|||
WRITE1 (idxrf->k_factor);
|
||||
} else if(const IndexIDMap * idxmap =
|
||||
dynamic_cast<const IndexIDMap *> (idx)) {
|
||||
uint32_t h = fourcc ("IxMp");
|
||||
uint32_t h =
|
||||
dynamic_cast<const IndexIDMap2 *> (idx) ? fourcc ("IxM2") :
|
||||
fourcc ("IxMp");
|
||||
// no need to store additional info for IndexIDMap2
|
||||
WRITE1 (h);
|
||||
write_index_header (idxmap, f);
|
||||
write_index (idxmap->index, f);
|
||||
|
@ -400,6 +415,10 @@ VectorTransform* read_VectorTransform (FILE *f) {
|
|||
RemapDimensionsTransform *rdt = new RemapDimensionsTransform ();
|
||||
READVECTOR (rdt->map);
|
||||
vt = rdt;
|
||||
} else if (h == fourcc ("VNrm")) {
|
||||
NormalizationTransform *nt = new NormalizationTransform ();
|
||||
READ1 (nt->norm);
|
||||
vt = nt;
|
||||
} else {
|
||||
FAISS_THROW_MSG("fourcc not recognized");
|
||||
}
|
||||
|
@ -582,6 +601,13 @@ Index *read_index (FILE * f, bool try_mmap) {
|
|||
for (size_t i = 0; i < ivfl->nlist; i++)
|
||||
READVECTOR (ivfl->vecs[i]);
|
||||
idx = ivfl;
|
||||
} else if (h == fourcc ("IxSQ")) {
|
||||
IndexScalarQuantizer * idxs = new IndexScalarQuantizer ();
|
||||
read_index_header (idxs, f);
|
||||
read_ScalarQuantizer (&idxs->sq, f);
|
||||
READVECTOR (idxs->codes);
|
||||
idxs->code_size = idxs->sq.code_size;
|
||||
idx = idxs;
|
||||
} else if(h == fourcc ("IvSQ")) {
|
||||
IndexIVFScalarQuantizer * ivsc = new IndexIVFScalarQuantizer();
|
||||
read_ivf_header (ivsc, f);
|
||||
|
@ -606,8 +632,9 @@ Index *read_index (FILE * f, bool try_mmap) {
|
|||
} else {
|
||||
READ1 (nt);
|
||||
}
|
||||
for (int i = 0; i < nt; i++)
|
||||
for (int i = 0; i < nt; i++) {
|
||||
ixpt->chain.push_back (read_VectorTransform (f));
|
||||
}
|
||||
ixpt->index = read_index (f);
|
||||
idx = ixpt;
|
||||
} else if(h == fourcc ("Imiq")) {
|
||||
|
@ -625,12 +652,16 @@ Index *read_index (FILE * f, bool try_mmap) {
|
|||
delete rf;
|
||||
READ1 (idxrf->k_factor);
|
||||
idx = idxrf;
|
||||
} else if(h == fourcc ("IxMp")) {
|
||||
IndexIDMap * idxmap = new IndexIDMap ();
|
||||
} else if(h == fourcc ("IxMp") || h == fourcc ("IxM2")) {
|
||||
bool is_map2 = h == fourcc ("IxM2");
|
||||
IndexIDMap * idxmap = is_map2 ? new IndexIDMap2 () : new IndexIDMap ();
|
||||
read_index_header (idxmap, f);
|
||||
idxmap->index = read_index (f);
|
||||
idxmap->own_fields = true;
|
||||
READVECTOR (idxmap->id_map);
|
||||
if (is_map2) {
|
||||
static_cast<IndexIDMap2*>(idxmap)->construct_rev_map ();
|
||||
}
|
||||
idx = idxmap;
|
||||
} else {
|
||||
fprintf (stderr, "Index type 0x%08x not supported\n", h);
|
||||
|
@ -698,6 +729,7 @@ IndexIVF * Cloner::clone_IndexIVF (const IndexIVF *ivf)
|
|||
TRYCLONE (IndexIVFPQR, ivf)
|
||||
TRYCLONE (IndexIVFPQ, ivf)
|
||||
TRYCLONE (IndexIVFFlat, ivf)
|
||||
TRYCLONE (IndexIVFScalarQuantizer, ivf)
|
||||
{
|
||||
FAISS_THROW_MSG("clone not supported for this type of IndexIVF");
|
||||
}
|
||||
|
@ -711,6 +743,7 @@ Index *Cloner::clone_Index (const Index *index)
|
|||
TRYCLONE (IndexFlatL2, index)
|
||||
TRYCLONE (IndexFlatIP, index)
|
||||
TRYCLONE (IndexFlat, index)
|
||||
TRYCLONE (IndexScalarQuantizer, index)
|
||||
TRYCLONE (MultiIndexQuantizer, index)
|
||||
if (const IndexIVF * ivf = dynamic_cast<const IndexIVF*>(index)) {
|
||||
IndexIVF *res = clone_IndexIVF (ivf);
|
||||
|
|
|
@ -1094,6 +1094,27 @@ class RemapDimensionsTransform(VectorTransform):
|
|||
RemapDimensionsTransform_swigregister = _swigfaiss.RemapDimensionsTransform_swigregister
|
||||
RemapDimensionsTransform_swigregister(RemapDimensionsTransform)
|
||||
|
||||
class NormalizationTransform(VectorTransform):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [VectorTransform]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
__setattr__ = lambda self, name, value: _swig_setattr(self, NormalizationTransform, name, value)
|
||||
__swig_getmethods__ = {}
|
||||
for _s in [VectorTransform]: __swig_getmethods__.update(getattr(_s,'__swig_getmethods__',{}))
|
||||
__getattr__ = lambda self, name: _swig_getattr(self, NormalizationTransform, name)
|
||||
__repr__ = _swig_repr
|
||||
__swig_setmethods__["norm"] = _swigfaiss.NormalizationTransform_norm_set
|
||||
__swig_getmethods__["norm"] = _swigfaiss.NormalizationTransform_norm_get
|
||||
if _newclass:norm = _swig_property(_swigfaiss.NormalizationTransform_norm_get, _swigfaiss.NormalizationTransform_norm_set)
|
||||
def __init__(self, *args):
|
||||
this = _swigfaiss.new_NormalizationTransform(*args)
|
||||
try: self.this.append(this)
|
||||
except: self.this = this
|
||||
def apply_noalloc(self, *args): return _swigfaiss.NormalizationTransform_apply_noalloc(self, *args)
|
||||
__swig_destroy__ = _swigfaiss.delete_NormalizationTransform
|
||||
__del__ = lambda self : None;
|
||||
NormalizationTransform_swigregister = _swigfaiss.NormalizationTransform_swigregister
|
||||
NormalizationTransform_swigregister(NormalizationTransform)
|
||||
|
||||
class IndexPreTransform(Index):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [Index]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
|
@ -1635,7 +1656,7 @@ class IndexIVF(Index):
|
|||
__swig_destroy__ = _swigfaiss.delete_IndexIVF
|
||||
__del__ = lambda self : None;
|
||||
def get_list_size(self, *args): return _swigfaiss.IndexIVF_get_list_size(self, *args)
|
||||
def make_direct_map(self): return _swigfaiss.IndexIVF_make_direct_map(self)
|
||||
def make_direct_map(self, new_maintain_direct_map=True): return _swigfaiss.IndexIVF_make_direct_map(self, new_maintain_direct_map)
|
||||
def imbalance_factor(self): return _swigfaiss.IndexIVF_imbalance_factor(self)
|
||||
def print_stats(self): return _swigfaiss.IndexIVF_print_stats(self)
|
||||
IndexIVF_swigregister = _swigfaiss.IndexIVF_swigregister
|
||||
|
@ -1690,6 +1711,7 @@ class IndexIVFFlat(IndexIVF):
|
|||
def remove_ids(self, *args): return _swigfaiss.IndexIVFFlat_remove_ids(self, *args)
|
||||
def search_knn_inner_product(self, *args): return _swigfaiss.IndexIVFFlat_search_knn_inner_product(self, *args)
|
||||
def search_knn_L2sqr(self, *args): return _swigfaiss.IndexIVFFlat_search_knn_L2sqr(self, *args)
|
||||
def update_vectors(self, *args): return _swigfaiss.IndexIVFFlat_update_vectors(self, *args)
|
||||
def reconstruct(self, *args): return _swigfaiss.IndexIVFFlat_reconstruct(self, *args)
|
||||
def merge_from_residuals(self, *args): return _swigfaiss.IndexIVFFlat_merge_from_residuals(self, *args)
|
||||
def __init__(self, *args):
|
||||
|
@ -1770,6 +1792,38 @@ class ScalarQuantizer(_object):
|
|||
ScalarQuantizer_swigregister = _swigfaiss.ScalarQuantizer_swigregister
|
||||
ScalarQuantizer_swigregister(ScalarQuantizer)
|
||||
|
||||
class IndexScalarQuantizer(Index):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [Index]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
__setattr__ = lambda self, name, value: _swig_setattr(self, IndexScalarQuantizer, name, value)
|
||||
__swig_getmethods__ = {}
|
||||
for _s in [Index]: __swig_getmethods__.update(getattr(_s,'__swig_getmethods__',{}))
|
||||
__getattr__ = lambda self, name: _swig_getattr(self, IndexScalarQuantizer, name)
|
||||
__repr__ = _swig_repr
|
||||
__swig_setmethods__["sq"] = _swigfaiss.IndexScalarQuantizer_sq_set
|
||||
__swig_getmethods__["sq"] = _swigfaiss.IndexScalarQuantizer_sq_get
|
||||
if _newclass:sq = _swig_property(_swigfaiss.IndexScalarQuantizer_sq_get, _swigfaiss.IndexScalarQuantizer_sq_set)
|
||||
__swig_setmethods__["codes"] = _swigfaiss.IndexScalarQuantizer_codes_set
|
||||
__swig_getmethods__["codes"] = _swigfaiss.IndexScalarQuantizer_codes_get
|
||||
if _newclass:codes = _swig_property(_swigfaiss.IndexScalarQuantizer_codes_get, _swigfaiss.IndexScalarQuantizer_codes_set)
|
||||
__swig_setmethods__["code_size"] = _swigfaiss.IndexScalarQuantizer_code_size_set
|
||||
__swig_getmethods__["code_size"] = _swigfaiss.IndexScalarQuantizer_code_size_get
|
||||
if _newclass:code_size = _swig_property(_swigfaiss.IndexScalarQuantizer_code_size_get, _swigfaiss.IndexScalarQuantizer_code_size_set)
|
||||
def __init__(self, *args):
|
||||
this = _swigfaiss.new_IndexScalarQuantizer(*args)
|
||||
try: self.this.append(this)
|
||||
except: self.this = this
|
||||
def train(self, *args): return _swigfaiss.IndexScalarQuantizer_train(self, *args)
|
||||
def add(self, *args): return _swigfaiss.IndexScalarQuantizer_add(self, *args)
|
||||
def search(self, *args): return _swigfaiss.IndexScalarQuantizer_search(self, *args)
|
||||
def reset(self): return _swigfaiss.IndexScalarQuantizer_reset(self)
|
||||
def reconstruct_n(self, *args): return _swigfaiss.IndexScalarQuantizer_reconstruct_n(self, *args)
|
||||
def reconstruct(self, *args): return _swigfaiss.IndexScalarQuantizer_reconstruct(self, *args)
|
||||
__swig_destroy__ = _swigfaiss.delete_IndexScalarQuantizer
|
||||
__del__ = lambda self : None;
|
||||
IndexScalarQuantizer_swigregister = _swigfaiss.IndexScalarQuantizer_swigregister
|
||||
IndexScalarQuantizer_swigregister(IndexScalarQuantizer)
|
||||
|
||||
class IndexIVFScalarQuantizer(IndexIVF):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [IndexIVF]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
|
@ -2024,6 +2078,30 @@ class IndexIDMap(Index):
|
|||
IndexIDMap_swigregister = _swigfaiss.IndexIDMap_swigregister
|
||||
IndexIDMap_swigregister(IndexIDMap)
|
||||
|
||||
class IndexIDMap2(IndexIDMap):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [IndexIDMap]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
__setattr__ = lambda self, name, value: _swig_setattr(self, IndexIDMap2, name, value)
|
||||
__swig_getmethods__ = {}
|
||||
for _s in [IndexIDMap]: __swig_getmethods__.update(getattr(_s,'__swig_getmethods__',{}))
|
||||
__getattr__ = lambda self, name: _swig_getattr(self, IndexIDMap2, name)
|
||||
__repr__ = _swig_repr
|
||||
__swig_setmethods__["rev_map"] = _swigfaiss.IndexIDMap2_rev_map_set
|
||||
__swig_getmethods__["rev_map"] = _swigfaiss.IndexIDMap2_rev_map_get
|
||||
if _newclass:rev_map = _swig_property(_swigfaiss.IndexIDMap2_rev_map_get, _swigfaiss.IndexIDMap2_rev_map_set)
|
||||
def construct_rev_map(self): return _swigfaiss.IndexIDMap2_construct_rev_map(self)
|
||||
def add_with_ids(self, *args): return _swigfaiss.IndexIDMap2_add_with_ids(self, *args)
|
||||
def remove_ids(self, *args): return _swigfaiss.IndexIDMap2_remove_ids(self, *args)
|
||||
def reconstruct(self, *args): return _swigfaiss.IndexIDMap2_reconstruct(self, *args)
|
||||
__swig_destroy__ = _swigfaiss.delete_IndexIDMap2
|
||||
__del__ = lambda self : None;
|
||||
def __init__(self, *args):
|
||||
this = _swigfaiss.new_IndexIDMap2(*args)
|
||||
try: self.this.append(this)
|
||||
except: self.this = this
|
||||
IndexIDMap2_swigregister = _swigfaiss.IndexIDMap2_swigregister
|
||||
IndexIDMap2_swigregister(IndexIDMap2)
|
||||
|
||||
class IndexShards(Index):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [Index]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
|
|
|
@ -1163,6 +1163,27 @@ class RemapDimensionsTransform(VectorTransform):
|
|||
RemapDimensionsTransform_swigregister = _swigfaiss_gpu.RemapDimensionsTransform_swigregister
|
||||
RemapDimensionsTransform_swigregister(RemapDimensionsTransform)
|
||||
|
||||
class NormalizationTransform(VectorTransform):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [VectorTransform]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
__setattr__ = lambda self, name, value: _swig_setattr(self, NormalizationTransform, name, value)
|
||||
__swig_getmethods__ = {}
|
||||
for _s in [VectorTransform]: __swig_getmethods__.update(getattr(_s,'__swig_getmethods__',{}))
|
||||
__getattr__ = lambda self, name: _swig_getattr(self, NormalizationTransform, name)
|
||||
__repr__ = _swig_repr
|
||||
__swig_setmethods__["norm"] = _swigfaiss_gpu.NormalizationTransform_norm_set
|
||||
__swig_getmethods__["norm"] = _swigfaiss_gpu.NormalizationTransform_norm_get
|
||||
if _newclass:norm = _swig_property(_swigfaiss_gpu.NormalizationTransform_norm_get, _swigfaiss_gpu.NormalizationTransform_norm_set)
|
||||
def __init__(self, *args):
|
||||
this = _swigfaiss_gpu.new_NormalizationTransform(*args)
|
||||
try: self.this.append(this)
|
||||
except: self.this = this
|
||||
def apply_noalloc(self, *args): return _swigfaiss_gpu.NormalizationTransform_apply_noalloc(self, *args)
|
||||
__swig_destroy__ = _swigfaiss_gpu.delete_NormalizationTransform
|
||||
__del__ = lambda self : None;
|
||||
NormalizationTransform_swigregister = _swigfaiss_gpu.NormalizationTransform_swigregister
|
||||
NormalizationTransform_swigregister(NormalizationTransform)
|
||||
|
||||
class IndexPreTransform(Index):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [Index]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
|
@ -1704,7 +1725,7 @@ class IndexIVF(Index):
|
|||
__swig_destroy__ = _swigfaiss_gpu.delete_IndexIVF
|
||||
__del__ = lambda self : None;
|
||||
def get_list_size(self, *args): return _swigfaiss_gpu.IndexIVF_get_list_size(self, *args)
|
||||
def make_direct_map(self): return _swigfaiss_gpu.IndexIVF_make_direct_map(self)
|
||||
def make_direct_map(self, new_maintain_direct_map=True): return _swigfaiss_gpu.IndexIVF_make_direct_map(self, new_maintain_direct_map)
|
||||
def imbalance_factor(self): return _swigfaiss_gpu.IndexIVF_imbalance_factor(self)
|
||||
def print_stats(self): return _swigfaiss_gpu.IndexIVF_print_stats(self)
|
||||
IndexIVF_swigregister = _swigfaiss_gpu.IndexIVF_swigregister
|
||||
|
@ -1759,6 +1780,7 @@ class IndexIVFFlat(IndexIVF):
|
|||
def remove_ids(self, *args): return _swigfaiss_gpu.IndexIVFFlat_remove_ids(self, *args)
|
||||
def search_knn_inner_product(self, *args): return _swigfaiss_gpu.IndexIVFFlat_search_knn_inner_product(self, *args)
|
||||
def search_knn_L2sqr(self, *args): return _swigfaiss_gpu.IndexIVFFlat_search_knn_L2sqr(self, *args)
|
||||
def update_vectors(self, *args): return _swigfaiss_gpu.IndexIVFFlat_update_vectors(self, *args)
|
||||
def reconstruct(self, *args): return _swigfaiss_gpu.IndexIVFFlat_reconstruct(self, *args)
|
||||
def merge_from_residuals(self, *args): return _swigfaiss_gpu.IndexIVFFlat_merge_from_residuals(self, *args)
|
||||
def __init__(self, *args):
|
||||
|
@ -1839,6 +1861,38 @@ class ScalarQuantizer(_object):
|
|||
ScalarQuantizer_swigregister = _swigfaiss_gpu.ScalarQuantizer_swigregister
|
||||
ScalarQuantizer_swigregister(ScalarQuantizer)
|
||||
|
||||
class IndexScalarQuantizer(Index):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [Index]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
__setattr__ = lambda self, name, value: _swig_setattr(self, IndexScalarQuantizer, name, value)
|
||||
__swig_getmethods__ = {}
|
||||
for _s in [Index]: __swig_getmethods__.update(getattr(_s,'__swig_getmethods__',{}))
|
||||
__getattr__ = lambda self, name: _swig_getattr(self, IndexScalarQuantizer, name)
|
||||
__repr__ = _swig_repr
|
||||
__swig_setmethods__["sq"] = _swigfaiss_gpu.IndexScalarQuantizer_sq_set
|
||||
__swig_getmethods__["sq"] = _swigfaiss_gpu.IndexScalarQuantizer_sq_get
|
||||
if _newclass:sq = _swig_property(_swigfaiss_gpu.IndexScalarQuantizer_sq_get, _swigfaiss_gpu.IndexScalarQuantizer_sq_set)
|
||||
__swig_setmethods__["codes"] = _swigfaiss_gpu.IndexScalarQuantizer_codes_set
|
||||
__swig_getmethods__["codes"] = _swigfaiss_gpu.IndexScalarQuantizer_codes_get
|
||||
if _newclass:codes = _swig_property(_swigfaiss_gpu.IndexScalarQuantizer_codes_get, _swigfaiss_gpu.IndexScalarQuantizer_codes_set)
|
||||
__swig_setmethods__["code_size"] = _swigfaiss_gpu.IndexScalarQuantizer_code_size_set
|
||||
__swig_getmethods__["code_size"] = _swigfaiss_gpu.IndexScalarQuantizer_code_size_get
|
||||
if _newclass:code_size = _swig_property(_swigfaiss_gpu.IndexScalarQuantizer_code_size_get, _swigfaiss_gpu.IndexScalarQuantizer_code_size_set)
|
||||
def __init__(self, *args):
|
||||
this = _swigfaiss_gpu.new_IndexScalarQuantizer(*args)
|
||||
try: self.this.append(this)
|
||||
except: self.this = this
|
||||
def train(self, *args): return _swigfaiss_gpu.IndexScalarQuantizer_train(self, *args)
|
||||
def add(self, *args): return _swigfaiss_gpu.IndexScalarQuantizer_add(self, *args)
|
||||
def search(self, *args): return _swigfaiss_gpu.IndexScalarQuantizer_search(self, *args)
|
||||
def reset(self): return _swigfaiss_gpu.IndexScalarQuantizer_reset(self)
|
||||
def reconstruct_n(self, *args): return _swigfaiss_gpu.IndexScalarQuantizer_reconstruct_n(self, *args)
|
||||
def reconstruct(self, *args): return _swigfaiss_gpu.IndexScalarQuantizer_reconstruct(self, *args)
|
||||
__swig_destroy__ = _swigfaiss_gpu.delete_IndexScalarQuantizer
|
||||
__del__ = lambda self : None;
|
||||
IndexScalarQuantizer_swigregister = _swigfaiss_gpu.IndexScalarQuantizer_swigregister
|
||||
IndexScalarQuantizer_swigregister(IndexScalarQuantizer)
|
||||
|
||||
class IndexIVFScalarQuantizer(IndexIVF):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [IndexIVF]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
|
@ -2093,6 +2147,30 @@ class IndexIDMap(Index):
|
|||
IndexIDMap_swigregister = _swigfaiss_gpu.IndexIDMap_swigregister
|
||||
IndexIDMap_swigregister(IndexIDMap)
|
||||
|
||||
class IndexIDMap2(IndexIDMap):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [IndexIDMap]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
__setattr__ = lambda self, name, value: _swig_setattr(self, IndexIDMap2, name, value)
|
||||
__swig_getmethods__ = {}
|
||||
for _s in [IndexIDMap]: __swig_getmethods__.update(getattr(_s,'__swig_getmethods__',{}))
|
||||
__getattr__ = lambda self, name: _swig_getattr(self, IndexIDMap2, name)
|
||||
__repr__ = _swig_repr
|
||||
__swig_setmethods__["rev_map"] = _swigfaiss_gpu.IndexIDMap2_rev_map_set
|
||||
__swig_getmethods__["rev_map"] = _swigfaiss_gpu.IndexIDMap2_rev_map_get
|
||||
if _newclass:rev_map = _swig_property(_swigfaiss_gpu.IndexIDMap2_rev_map_get, _swigfaiss_gpu.IndexIDMap2_rev_map_set)
|
||||
def construct_rev_map(self): return _swigfaiss_gpu.IndexIDMap2_construct_rev_map(self)
|
||||
def add_with_ids(self, *args): return _swigfaiss_gpu.IndexIDMap2_add_with_ids(self, *args)
|
||||
def remove_ids(self, *args): return _swigfaiss_gpu.IndexIDMap2_remove_ids(self, *args)
|
||||
def reconstruct(self, *args): return _swigfaiss_gpu.IndexIDMap2_reconstruct(self, *args)
|
||||
__swig_destroy__ = _swigfaiss_gpu.delete_IndexIDMap2
|
||||
__del__ = lambda self : None;
|
||||
def __init__(self, *args):
|
||||
this = _swigfaiss_gpu.new_IndexIDMap2(*args)
|
||||
try: self.this.append(this)
|
||||
except: self.this = this
|
||||
IndexIDMap2_swigregister = _swigfaiss_gpu.IndexIDMap2_swigregister
|
||||
IndexIDMap2_swigregister(IndexIDMap2)
|
||||
|
||||
class IndexShards(Index):
|
||||
__swig_setmethods__ = {}
|
||||
for _s in [Index]: __swig_setmethods__.update(getattr(_s,'__swig_setmethods__',{}))
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -74,7 +74,7 @@ extern "C" {
|
|||
#include "IndexPQ.h"
|
||||
#include "IndexIVF.h"
|
||||
#include "IndexIVFPQ.h"
|
||||
#include "IndexIVFScalarQuantizer.h"
|
||||
#include "IndexScalarQuantizer.h"
|
||||
|
||||
#include "MetaIndexes.h"
|
||||
#include "FaissAssert.h"
|
||||
|
@ -240,7 +240,7 @@ int get_num_gpus()
|
|||
%include "PolysemousTraining.h"
|
||||
%include "IndexPQ.h"
|
||||
%include "IndexIVF.h"
|
||||
%include "IndexIVFScalarQuantizer.h"
|
||||
%include "IndexScalarQuantizer.h"
|
||||
|
||||
%ignore faiss::IndexIVFPQ::alloc_type;
|
||||
%include "IndexIVFPQ.h"
|
||||
|
@ -426,6 +426,7 @@ struct AsyncIndexSearchC {
|
|||
DOWNCAST ( IndexIVF )
|
||||
DOWNCAST ( IndexFlat )
|
||||
DOWNCAST ( IndexPQ )
|
||||
DOWNCAST ( IndexScalarQuantizer )
|
||||
DOWNCAST ( IndexLSH )
|
||||
DOWNCAST ( IndexPreTransform )
|
||||
DOWNCAST ( MultiIndexQuantizer )
|
||||
|
@ -457,6 +458,7 @@ struct AsyncIndexSearchC {
|
|||
DOWNCAST (PCAMatrix)
|
||||
DOWNCAST (RandomRotationMatrix)
|
||||
DOWNCAST (LinearTransform)
|
||||
DOWNCAST (NormalizationTransform)
|
||||
DOWNCAST (VectorTransform)
|
||||
{
|
||||
assert(false);
|
||||
|
|
|
@ -11,6 +11,7 @@ import numpy as np
|
|||
import faiss
|
||||
import unittest
|
||||
|
||||
|
||||
class TestClustering(unittest.TestCase):
|
||||
|
||||
def test_clustering(self):
|
||||
|
@ -34,6 +35,17 @@ class TestClustering(unittest.TestCase):
|
|||
# check that 64 centroids give a lower quantization error than 32
|
||||
self.assertGreater(err32, err64)
|
||||
|
||||
def test_nasty_clustering(self):
|
||||
d = 2
|
||||
np.random.seed(123)
|
||||
x = np.zeros((100, d), dtype='float32')
|
||||
for i in range(5):
|
||||
x[i * 20:i * 20 + 20] = np.random.random(size=d)
|
||||
|
||||
# we have 5 distinct points but ask for 10 centroids...
|
||||
km = faiss.Kmeans(d, 10, niter=10, verbose=True)
|
||||
km.train(x)
|
||||
|
||||
|
||||
class TestPCA(unittest.TestCase):
|
||||
|
||||
|
|
|
@ -6,10 +6,8 @@
|
|||
|
||||
#! /usr/bin/env python2
|
||||
|
||||
"""this is a basic test script that works with fbmake to check if
|
||||
some simple indices work"""
|
||||
"""this is a basic test script for simple indices work"""
|
||||
|
||||
import sys
|
||||
import numpy as np
|
||||
import unittest
|
||||
import faiss
|
||||
|
@ -75,9 +73,9 @@ class TestMultiIndexQuantizer(unittest.TestCase):
|
|||
self.assertEqual(np.abs(D1[:, :1] - D5[:, :1]).max(), 0)
|
||||
|
||||
|
||||
class TestIVFScalarQuantizer(unittest.TestCase):
|
||||
class TestScalarQuantizer(unittest.TestCase):
|
||||
|
||||
def test_4variants(self):
|
||||
def test_4variants_ivf(self):
|
||||
d = 32
|
||||
nt = 1500
|
||||
nq = 200
|
||||
|
@ -127,19 +125,39 @@ class TestIVFScalarQuantizer(unittest.TestCase):
|
|||
self.assertGreaterEqual(nok['QT_8bit'], nok['QT_8bit_uniform'])
|
||||
self.assertGreaterEqual(nok['QT_4bit'], nok['QT_4bit_uniform'])
|
||||
|
||||
def test_4variants(self):
|
||||
d = 32
|
||||
nt = 1500
|
||||
nq = 200
|
||||
nb = 10000
|
||||
|
||||
class TestRemove(unittest.TestCase):
|
||||
np.random.seed(123)
|
||||
|
||||
def test_remove(self):
|
||||
# only tests the python interface
|
||||
xt = np.random.random(size=(nt, d)).astype('float32')
|
||||
xq = np.random.random(size=(nq, d)).astype('float32')
|
||||
xb = np.random.random(size=(nb, d)).astype('float32')
|
||||
|
||||
index = faiss.IndexFlat(5)
|
||||
xb = np.zeros((10, 5), dtype='float32')
|
||||
xb[:, 0] = np.arange(10) + 1000
|
||||
index_gt = faiss.IndexFlatL2(d)
|
||||
index_gt.add(xb)
|
||||
D, I_ref = index_gt.search(xq, 10)
|
||||
|
||||
nok = {}
|
||||
|
||||
for qname in "QT_4bit QT_4bit_uniform QT_8bit QT_8bit_uniform".split():
|
||||
qtype = getattr(faiss.ScalarQuantizer, qname)
|
||||
index = faiss.IndexScalarQuantizer(d, qtype, faiss.METRIC_L2)
|
||||
index.train(xt)
|
||||
index.add(xb)
|
||||
index.remove_ids(np.arange(5) * 2)
|
||||
xb2 = faiss.vector_float_to_array(index.xb).reshape(5, 5)
|
||||
assert np.all(xb2[:, 0] == xb[np.arange(5) * 2 + 1, 0])
|
||||
D, I = index.search(xq, 10)
|
||||
|
||||
nok[qname] = (I[:, 0] == I_ref[:, 0]).sum()
|
||||
|
||||
print(nok)
|
||||
|
||||
self.assertGreaterEqual(nok['QT_8bit'], nok['QT_4bit'])
|
||||
self.assertGreaterEqual(nok['QT_8bit'], nok['QT_8bit_uniform'])
|
||||
self.assertGreaterEqual(nok['QT_4bit'], nok['QT_4bit_uniform'])
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
10
utils.cpp
10
utils.cpp
|
@ -1418,7 +1418,7 @@ int km_update_centroids (const float * x,
|
|||
for (size_t ci = 0; ci < k; ci++) {
|
||||
if (hassign[ci] == 0) { /* need to redefine a centroid */
|
||||
size_t cj;
|
||||
for (cj = 0; 1; cj = (cj+1) % k) {
|
||||
for (cj = 0; 1; cj = (cj + 1) % k) {
|
||||
/* probability to pick this cluster for split */
|
||||
float p = (hassign[cj] - 1.0) / (float) (n - k);
|
||||
float r = rng.rand_float ();
|
||||
|
@ -1429,14 +1429,14 @@ int km_update_centroids (const float * x,
|
|||
memcpy (centroids+ci*d, centroids+cj*d, sizeof(*centroids) * d);
|
||||
|
||||
/* small symmetric pertubation. Much better than */
|
||||
for (size_t j = 0; j < d; j++)
|
||||
for (size_t j = 0; j < d; j++) {
|
||||
if (j % 2 == 0) {
|
||||
centroids[ci * d + j] *= 1 + EPS;
|
||||
centroids[cj * d + j] *= 1 - EPS;
|
||||
} else {
|
||||
centroids[ci * d + j] *= 1 - EPS;
|
||||
centroids[cj * d + j] *= 1 + EPS;
|
||||
}
|
||||
else {
|
||||
centroids[ci * d + j] *= 1 + EPS;
|
||||
centroids[cj * d + j] *= 1 - EPS;
|
||||
}
|
||||
|
||||
/* assume even split of the cluster */
|
||||
|
|
Loading…
Reference in New Issue