mirror of
https://github.com/facebookresearch/faiss.git
synced 2025-06-03 21:54:02 +08:00
Changelog: - changed license: BSD+Patents -> MIT - propagates exceptions raised in sub-indexes of IndexShards and IndexReplicas - support for searching several inverted lists in parallel (parallel_mode != 0) - better support for PQ codes where nbit != 8 or 16 - IVFSpectralHash implementation: spectral hash codes inside an IVF - 6-bit per component scalar quantizer (4 and 8 bit were already supported) - combinations of inverted lists: HStackInvertedLists and VStackInvertedLists - configurable number of threads for OnDiskInvertedLists prefetching (including 0=no prefetch) - more test and demo code compatible with Python 3 (print with parentheses) - refactored benchmark code: data loading is now in a single file
48 lines
2.6 KiB
Plaintext
48 lines
2.6 KiB
Plaintext
/**
|
|
* Copyright (c) Facebook, Inc. and its affiliates.
|
|
*
|
|
* This source code is licensed under the MIT license found in the
|
|
* LICENSE file in the root directory of this source tree.
|
|
*/
|
|
|
|
#include "../WarpSelectKernel.cuh"
|
|
#include "../Limits.cuh"
|
|
|
|
#define WARP_SELECT_DECL(TYPE, DIR, WARP_Q) \
|
|
extern void runWarpSelect_ ## TYPE ## _ ## DIR ## _ ## WARP_Q ## _( \
|
|
Tensor<TYPE, 2, true>& in, \
|
|
Tensor<TYPE, 2, true>& outK, \
|
|
Tensor<int, 2, true>& outV, \
|
|
bool dir, \
|
|
int k, \
|
|
cudaStream_t stream)
|
|
|
|
#define WARP_SELECT_IMPL(TYPE, DIR, WARP_Q, THREAD_Q) \
|
|
void runWarpSelect_ ## TYPE ## _ ## DIR ## _ ## WARP_Q ## _( \
|
|
Tensor<TYPE, 2, true>& in, \
|
|
Tensor<TYPE, 2, true>& outK, \
|
|
Tensor<int, 2, true>& outV, \
|
|
bool dir, \
|
|
int k, \
|
|
cudaStream_t stream) { \
|
|
\
|
|
constexpr int kWarpSelectNumThreads = 128; \
|
|
auto grid = dim3(utils::divUp(in.getSize(0), \
|
|
(kWarpSelectNumThreads / kWarpSize))); \
|
|
auto block = dim3(kWarpSelectNumThreads); \
|
|
\
|
|
FAISS_ASSERT(k <= WARP_Q); \
|
|
FAISS_ASSERT(dir == DIR); \
|
|
\
|
|
auto kInit = dir ? Limits<TYPE>::getMin() : Limits<TYPE>::getMax(); \
|
|
auto vInit = -1; \
|
|
\
|
|
warpSelect<TYPE, int, DIR, WARP_Q, THREAD_Q, kWarpSelectNumThreads> \
|
|
<<<grid, block, 0, stream>>>(in, outK, outV, kInit, vInit, k); \
|
|
CUDA_TEST_ERROR(); \
|
|
}
|
|
|
|
#define WARP_SELECT_CALL(TYPE, DIR, WARP_Q) \
|
|
runWarpSelect_ ## TYPE ## _ ## DIR ## _ ## WARP_Q ## _( \
|
|
in, outK, outV, dir, k, stream)
|