11 #include "VectorResidual.cuh"
12 #include "../../FaissAssert.h"
13 #include "../utils/ConversionOperators.cuh"
14 #include "../utils/DeviceUtils.h"
15 #include "../utils/Tensor.cuh"
16 #include "../utils/StaticUtils.h"
17 #include <math_constants.h>
19 namespace faiss {
namespace gpu {
21 template <
typename Centro
idT,
bool LargeDim>
22 __global__
void calcResidual(Tensor<float, 2, true> vecs,
23 Tensor<CentroidT, 2, true> centroids,
24 Tensor<int, 1, true> vecToCentroid,
25 Tensor<float, 2, true> residuals) {
26 auto vec = vecs[blockIdx.x];
27 auto residual = residuals[blockIdx.x];
29 int centroidId = vecToCentroid[blockIdx.x];
32 if (centroidId == -1) {
34 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
35 residual[i] = CUDART_NAN_F;
38 residual[threadIdx.x] = CUDART_NAN_F;
44 auto centroid = centroids[centroidId];
47 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
48 residual[i] = vec[i] - ConvertTo<float>::to(centroid[i]);
51 residual[threadIdx.x] = vec[threadIdx.x] -
52 ConvertTo<float>::to(centroid[threadIdx.x]);
56 template <
typename Centro
idT>
57 void calcResidual(Tensor<float, 2, true>& vecs,
58 Tensor<CentroidT, 2, true>& centroids,
59 Tensor<int, 1, true>& vecToCentroid,
60 Tensor<float, 2, true>& residuals,
61 cudaStream_t stream) {
62 FAISS_ASSERT(vecs.getSize(1) == centroids.getSize(1));
63 FAISS_ASSERT(vecs.getSize(1) == residuals.getSize(1));
64 FAISS_ASSERT(vecs.getSize(0) == vecToCentroid.getSize(0));
65 FAISS_ASSERT(vecs.getSize(0) == residuals.getSize(0));
67 dim3 grid(vecs.getSize(0));
69 int maxThreads = getMaxThreadsCurrentDevice();
70 bool largeDim = vecs.getSize(1) > maxThreads;
71 dim3 block(std::min(vecs.getSize(1), maxThreads));
74 calcResidual<CentroidT, true><<<grid, block, 0, stream>>>(
75 vecs, centroids, vecToCentroid, residuals);
77 calcResidual<CentroidT, false><<<grid, block, 0, stream>>>(
78 vecs, centroids, vecToCentroid, residuals);
82 void runCalcResidual(Tensor<float, 2, true>& vecs,
83 Tensor<float, 2, true>& centroids,
84 Tensor<int, 1, true>& vecToCentroid,
85 Tensor<float, 2, true>& residuals,
86 cudaStream_t stream) {
87 calcResidual<float>(vecs, centroids, vecToCentroid, residuals, stream);
90 #ifdef FAISS_USE_FLOAT16
91 void runCalcResidual(Tensor<float, 2, true>& vecs,
92 Tensor<half, 2, true>& centroids,
93 Tensor<int, 1, true>& vecToCentroid,
94 Tensor<float, 2, true>& residuals,
95 cudaStream_t stream) {
96 calcResidual<half>(vecs, centroids, vecToCentroid, residuals, stream);