9 #include "VectorResidual.cuh"
10 #include "../../FaissAssert.h"
11 #include "../utils/ConversionOperators.cuh"
12 #include "../utils/DeviceUtils.h"
13 #include "../utils/Tensor.cuh"
14 #include "../utils/StaticUtils.h"
15 #include <math_constants.h>
17 namespace faiss {
namespace gpu {
19 template <
typename Centro
idT,
bool LargeDim>
20 __global__
void calcResidual(Tensor<float, 2, true> vecs,
21 Tensor<CentroidT, 2, true> centroids,
22 Tensor<int, 1, true> vecToCentroid,
23 Tensor<float, 2, true> residuals) {
24 auto vec = vecs[blockIdx.x];
25 auto residual = residuals[blockIdx.x];
27 int centroidId = vecToCentroid[blockIdx.x];
30 if (centroidId == -1) {
32 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
33 residual[i] = CUDART_NAN_F;
36 residual[threadIdx.x] = CUDART_NAN_F;
42 auto centroid = centroids[centroidId];
45 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
46 residual[i] = vec[i] - ConvertTo<float>::to(centroid[i]);
49 residual[threadIdx.x] = vec[threadIdx.x] -
50 ConvertTo<float>::to(centroid[threadIdx.x]);
54 template <
typename Centro
idT>
55 void calcResidual(Tensor<float, 2, true>& vecs,
56 Tensor<CentroidT, 2, true>& centroids,
57 Tensor<int, 1, true>& vecToCentroid,
58 Tensor<float, 2, true>& residuals,
59 cudaStream_t stream) {
60 FAISS_ASSERT(vecs.getSize(1) == centroids.getSize(1));
61 FAISS_ASSERT(vecs.getSize(1) == residuals.getSize(1));
62 FAISS_ASSERT(vecs.getSize(0) == vecToCentroid.getSize(0));
63 FAISS_ASSERT(vecs.getSize(0) == residuals.getSize(0));
65 dim3 grid(vecs.getSize(0));
67 int maxThreads = getMaxThreadsCurrentDevice();
68 bool largeDim = vecs.getSize(1) > maxThreads;
69 dim3 block(std::min(vecs.getSize(1), maxThreads));
72 calcResidual<CentroidT, true><<<grid, block, 0, stream>>>(
73 vecs, centroids, vecToCentroid, residuals);
75 calcResidual<CentroidT, false><<<grid, block, 0, stream>>>(
76 vecs, centroids, vecToCentroid, residuals);
82 void runCalcResidual(Tensor<float, 2, true>& vecs,
83 Tensor<float, 2, true>& centroids,
84 Tensor<int, 1, true>& vecToCentroid,
85 Tensor<float, 2, true>& residuals,
86 cudaStream_t stream) {
87 calcResidual<float>(vecs, centroids, vecToCentroid, residuals, stream);
90 #ifdef FAISS_USE_FLOAT16
91 void runCalcResidual(Tensor<float, 2, true>& vecs,
92 Tensor<half, 2, true>& centroids,
93 Tensor<int, 1, true>& vecToCentroid,
94 Tensor<float, 2, true>& residuals,
95 cudaStream_t stream) {
96 calcResidual<half>(vecs, centroids, vecToCentroid, residuals, stream);