[Feature] Add Ascend support for RoIPool op (#2483)

Co-authored-by: wangxiaoxin_sherie <wangxiaoxin7@huawei.com>
2023-01-12 11:52:28 +08:00 · 2023-01-12 11:52:28 +08:00 · 2810718a99
parent 48ea88ab9f
commit 2810718a99
4 changed files with 56 additions and 12 deletions
--- a/docs/en/understand_mmcv/ops.md
+++ b/docs/en/understand_mmcv/ops.md
@ -43,7 +43,7 @@ We implement common ops used in detection, segmentation, etc.
 | PSAMask                      | √   | √    | √   |     | √      |
 | RotatedFeatureAlign          | √   | √    |     |     |        |
 | RoIPointPool3d               |     | √    | √   |     |        |
-| RoIPool                      |     | √    | √   |     |        |
+| RoIPool                      |     | √    | √   |     | √      |
 | RoIAlignRotated              | √   | √    | √   |     |        |
 | RiRoIAlignRotated            |     | √    |     |     |        |
 | RoIAlign                     | √   | √    | √   |     |        |
--- a/docs/zh_cn/understand_mmcv/ops.md
+++ b/docs/zh_cn/understand_mmcv/ops.md
@ -43,7 +43,7 @@ MMCV 提供了检测、分割等任务中常用的算子
 | PSAMask                      | √   | √    | √   |     | √      |
 | RotatedFeatureAlign          | √   | √    |     |     |        |
 | RoIPointPool3d               |     | √    | √   |     |        |
-| RoIPool                      |     | √    | √   |     |        |
+| RoIPool                      |     | √    | √   |     | √      |
 | RoIAlignRotated              | √   | √    | √   |     |        |
 | RiRoIAlignRotated            |     | √    |     |     |        |
 | RoIAlign                     | √   | √    | √   |     |        |
--- a/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
@ -0,0 +1,34 @@
+#include "pytorch_npu_helper.hpp"
+
+using namespace NPU_NAME_SPACE;
+using namespace std;
+
+void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output,
+                          Tensor argmax, int pooled_height, int pooled_width,
+                          float spatial_scale) {
+  int64_t pooled_height_64 = pooled_height;
+  int64_t pooled_width_64 = pooled_width;
+  int64_t pooled_channel = 1;
+  at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor(
+      {}, rois.options().dtype(at::kInt), rois);
+
+  OpCommand cmd;
+  cmd.Name("RoiPoolingWithArgMax")
+      .Input(input)
+      .Input(rois)
+      .Input(roi_actual_num)
+      .Output(output)
+      .Output(argmax)
+      .Attr("pooled_h", pooled_height_64)
+      .Attr("pooled_w", pooled_width_64)
+      .Attr("spatial_scale_h", spatial_scale)
+      .Attr("spatial_scale_w", spatial_scale)
+      .Attr("pool_channel", pooled_channel)
+      .Run();
+}
+
+void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
+                           Tensor argmax, int pooled_height, int pooled_width,
+                           float spatial_scale);
+
+REGISTER_NPU_IMPL(roi_pool_forward_impl, roi_pool_forward_npu);
--- a/tests/test_ops/test_roi_pool.py
+++ b/tests/test_ops/test_roi_pool.py
@ -5,7 +5,7 @@ import numpy as np
 import pytest
 import torch

-from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
+from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE

 _USING_PARROTS = True
 try:
@ -69,14 +69,20 @@ class TestRoiPool:
            np_output = np.array(output[0])
            np_grad = np.array(output[1])

-            x = torch.tensor(
-                np_input, dtype=dtype, device=device, requires_grad=True)
-            rois = torch.tensor(np_rois, dtype=dtype, device=device)
-
-            output = roi_pool(x, rois, (pool_h, pool_w), spatial_scale)
-            output.backward(torch.ones_like(output))
-            assert np.allclose(output.data.cpu().numpy(), np_output, 1e-3)
-            assert np.allclose(x.grad.data.cpu().numpy(), np_grad, 1e-3)
+            if device == 'npu':
+                import torch_npu  # noqa: F401
+                x = torch.tensor(np_input, dtype=dtype).npu()
+                rois = torch.tensor(np_rois, dtype=dtype).npu()
+                output = roi_pool(x, rois, (pool_h, pool_w), spatial_scale)
+                assert np.allclose(output.data.cpu().numpy(), np_output, 1e-3)
+            else:
+                x = torch.tensor(
+                    np_input, dtype=dtype, device=device, requires_grad=True)
+                rois = torch.tensor(np_rois, dtype=dtype, device=device)
+                output = roi_pool(x, rois, (pool_h, pool_w), spatial_scale)
+                output.backward(torch.ones_like(output))
+                assert np.allclose(output.data.cpu().numpy(), np_output, 1e-3)
+                assert np.allclose(x.grad.data.cpu().numpy(), np_grad, 1e-3)

    @pytest.mark.parametrize('device', [
        pytest.param(
@ -86,7 +92,11 @@ class TestRoiPool:
        pytest.param(
            'mlu',
            marks=pytest.mark.skipif(
-                not IS_MLU_AVAILABLE, reason='requires MLU support'))
+                not IS_MLU_AVAILABLE, reason='requires MLU support')),
+        pytest.param(
+            'npu',
+            marks=pytest.mark.skipif(
+                not IS_NPU_AVAILABLE, reason='requires NPU support'))
    ])
    @pytest.mark.parametrize('dtype', [
        torch.float,