8 #include "VectorResidual.cuh"
9 #include "../../FaissAssert.h"
10 #include "../utils/ConversionOperators.cuh"
11 #include "../utils/DeviceUtils.h"
12 #include "../utils/Tensor.cuh"
13 #include "../utils/StaticUtils.h"
14 #include <math_constants.h>
16 namespace faiss {
namespace gpu {
18 template <
typename Centro
idT,
bool LargeDim>
19 __global__
void calcResidual(Tensor<float, 2, true> vecs,
20 Tensor<CentroidT, 2, true> centroids,
21 Tensor<int, 1, true> vecToCentroid,
22 Tensor<float, 2, true> residuals) {
23 auto vec = vecs[blockIdx.x];
24 auto residual = residuals[blockIdx.x];
26 int centroidId = vecToCentroid[blockIdx.x];
29 if (centroidId == -1) {
31 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
32 residual[i] = CUDART_NAN_F;
35 residual[threadIdx.x] = CUDART_NAN_F;
41 auto centroid = centroids[centroidId];
44 for (
int i = threadIdx.x; i < vecs.getSize(1); i += blockDim.x) {
45 residual[i] = vec[i] - ConvertTo<float>::to(centroid[i]);
48 residual[threadIdx.x] = vec[threadIdx.x] -
49 ConvertTo<float>::to(centroid[threadIdx.x]);
53 template <
typename Centro
idT>
54 void calcResidual(Tensor<float, 2, true>& vecs,
55 Tensor<CentroidT, 2, true>& centroids,
56 Tensor<int, 1, true>& vecToCentroid,
57 Tensor<float, 2, true>& residuals,
58 cudaStream_t stream) {
59 FAISS_ASSERT(vecs.getSize(1) == centroids.getSize(1));
60 FAISS_ASSERT(vecs.getSize(1) == residuals.getSize(1));
61 FAISS_ASSERT(vecs.getSize(0) == vecToCentroid.getSize(0));
62 FAISS_ASSERT(vecs.getSize(0) == residuals.getSize(0));
64 dim3 grid(vecs.getSize(0));
66 int maxThreads = getMaxThreadsCurrentDevice();
67 bool largeDim = vecs.getSize(1) > maxThreads;
68 dim3 block(std::min(vecs.getSize(1), maxThreads));
71 calcResidual<CentroidT, true><<<grid, block, 0, stream>>>(
72 vecs, centroids, vecToCentroid, residuals);
74 calcResidual<CentroidT, false><<<grid, block, 0, stream>>>(
75 vecs, centroids, vecToCentroid, residuals);
81 void runCalcResidual(Tensor<float, 2, true>& vecs,
82 Tensor<float, 2, true>& centroids,
83 Tensor<int, 1, true>& vecToCentroid,
84 Tensor<float, 2, true>& residuals,
85 cudaStream_t stream) {
86 calcResidual<float>(vecs, centroids, vecToCentroid, residuals, stream);
89 #ifdef FAISS_USE_FLOAT16
90 void runCalcResidual(Tensor<float, 2, true>& vecs,
91 Tensor<half, 2, true>& centroids,
92 Tensor<int, 1, true>& vecToCentroid,
93 Tensor<float, 2, true>& residuals,
94 cudaStream_t stream) {
95 calcResidual<half>(vecs, centroids, vecToCentroid, residuals, stream);