mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-06-03 21:54:02 +08:00
Summary: Pull Request resolved: https://github.com/facebookresearch/faiss/pull/2568 Add a fused kernel for exhaustive_L2sqr_blas() call that combines a computation of dot product and the search for the nearest centroid. As a result, no temporary dot product values are written and read in RAM. Speeds up the training of PQx[1] indices for dsub = 1, 2, 4, 8, and the effect is higher for higher values of [1]. AVX512 version provides additional overloads for dsub = 12, 16. The speedup is also beneficial for higher values of pq.cp.max_points_per_centroid (which is 256 by default). Speeds up IVFPQ training as well. AVX512 kernel is not enabled, but I've seen it speeding up the training TWICE versus AVX2 version. So, please feel free to use it by enabling AVX512 manually. Reviewed By: mdouze Differential Revision: D41166766 fbshipit-source-id: 443014e2e59396b3a90b9171fec8c8191052bcf4
31 lines
1.1 KiB
C++
31 lines
1.1 KiB
C++
#include <gtest/gtest.h>
|
|
|
|
#include <faiss/utils/simdlib.h>
|
|
|
|
using namespace faiss;
|
|
|
|
TEST(TEST_SIMDLIB, TestCmpltAndBlendInplace) {
|
|
simd8float32 lowestValues(0, 1, 2, 3, 4, 5, 6, 7);
|
|
simd8uint32 lowestIndices(0, 1, 2, 3, 4, 5, 6, 7);
|
|
|
|
simd8float32 candidateValues0(5, 5, 5, 5, 5, 5, 5, 5);
|
|
simd8uint32 candidateIndices0(10, 11, 12, 13, 14, 15, 16, 17);
|
|
cmplt_and_blend_inplace(
|
|
candidateValues0, candidateIndices0, lowestValues, lowestIndices);
|
|
|
|
simd8float32 candidateValues1(6, 6, 6, 6, 6, 6, 6, 6);
|
|
simd8uint32 candidateIndices1(20, 21, 22, 23, 24, 25, 26, 27);
|
|
cmplt_and_blend_inplace(
|
|
candidateValues1, candidateIndices1, lowestValues, lowestIndices);
|
|
|
|
simd8float32 candidateValues2(0, 1, 2, 3, 4, 5, 5, 5);
|
|
simd8uint32 candidateIndices2(30, 31, 32, 33, 34, 35, 36, 37);
|
|
cmplt_and_blend_inplace(
|
|
candidateValues2, candidateIndices2, lowestValues, lowestIndices);
|
|
|
|
simd8float32 expectedValues(0, 1, 2, 3, 4, 5, 5, 5);
|
|
simd8uint32 expectedIndices(0, 1, 2, 3, 4, 5, 16, 17);
|
|
ASSERT_EQ(lowestValues, expectedValues);
|
|
ASSERT_EQ(lowestIndices, expectedIndices);
|
|
}
|