[Fix] Apply kernel loop for bicubic interpolate (#271)

2025-01-14 08:09:43 +08:00 · 2021-12-09 12:56:32 +08:00 · 2021-12-09 12:56:32 +08:00 · d5c3be9739
commit d5c3be9739
parent c213879103
1 changed files with 43 additions and 45 deletions
--- a/csrc/backend_ops/tensorrt/bicubic_interpolate/trt_bicubic_interpolate_kernel.cu
+++ b/csrc/backend_ops/tensorrt/bicubic_interpolate/trt_bicubic_interpolate_kernel.cu
@ -89,10 +89,7 @@ __global__ void resize_cubic_kernel_torch(const int num_elements, const scalar_t
                                          int srcHeight, scalar_t *dst, int dstWidth, int dstHeight,
                                          bool align_corners, float height_scale,
                                          float width_scale) {
-  int index = threadIdx.x + blockIdx.x * blockDim.x;
-  if (index >= num_elements) {
-    return;
-  }
+  CUDA_1D_KERNEL_LOOP(index, num_elements) {
    // Special case: input and output are the same size, just copy
    const int output_x = index % dstWidth;
    const int output_y = index / dstWidth;
@ -141,6 +138,7 @@ __global__ void resize_cubic_kernel_torch(const int num_elements, const scalar_t
                                                coefficients[3], t_y));
      }
    }
+  }
 }

 template <typename scalar_t>