From 2810718a99045bb762d24f465a69b7add34f3123 Mon Sep 17 00:00:00 2001
From: sherie <963372609@qq.com>
Date: Thu, 12 Jan 2023 11:52:28 +0800
Subject: [PATCH] [Feature] Add Ascend support for RoIPool op (#2483)

Co-authored-by: wangxiaoxin_sherie <wangxiaoxin7@huawei.com>
---
 docs/en/understand_mmcv/ops.md             |  2 +-
 docs/zh_cn/understand_mmcv/ops.md          |  2 +-
 mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp | 34 ++++++++++++++++++++++
 tests/test_ops/test_roi_pool.py            | 30 ++++++++++++-------
 4 files changed, 56 insertions(+), 12 deletions(-)
 create mode 100644 mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp

diff --git a/docs/en/understand_mmcv/ops.md b/docs/en/understand_mmcv/ops.md
index b1e3ea28f..6a0241412 100644
--- a/docs/en/understand_mmcv/ops.md
+++ b/docs/en/understand_mmcv/ops.md
@@ -43,7 +43,7 @@ We implement common ops used in detection, segmentation, etc.
 | PSAMask                      | √   | √    | √   |     | √      |
 | RotatedFeatureAlign          | √   | √    |     |     |        |
 | RoIPointPool3d               |     | √    | √   |     |        |
-| RoIPool                      |     | √    | √   |     |        |
+| RoIPool                      |     | √    | √   |     | √      |
 | RoIAlignRotated              | √   | √    | √   |     |        |
 | RiRoIAlignRotated            |     | √    |     |     |        |
 | RoIAlign                     | √   | √    | √   |     |        |
diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md
index 650cd537a..deeb60eed 100644
--- a/docs/zh_cn/understand_mmcv/ops.md
+++ b/docs/zh_cn/understand_mmcv/ops.md
@@ -43,7 +43,7 @@ MMCV 提供了检测、分割等任务中常用的算子
 | PSAMask                      | √   | √    | √   |     | √      |
 | RotatedFeatureAlign          | √   | √    |     |     |        |
 | RoIPointPool3d               |     | √    | √   |     |        |
-| RoIPool                      |     | √    | √   |     |        |
+| RoIPool                      |     | √    | √   |     | √      |
 | RoIAlignRotated              | √   | √    | √   |     |        |
 | RiRoIAlignRotated            |     | √    |     |     |        |
 | RoIAlign                     | √   | √    | √   |     |        |
diff --git a/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
new file mode 100644
index 000000000..36bd9c7a8
--- /dev/null
+++ b/mmcv/ops/csrc/pytorch/npu/roi_pool_npu.cpp
@@ -0,0 +1,34 @@
+#include "pytorch_npu_helper.hpp"
+
+using namespace NPU_NAME_SPACE;
+using namespace std;
+
+void roi_pool_forward_npu(Tensor input, Tensor rois, Tensor output,
+                          Tensor argmax, int pooled_height, int pooled_width,
+                          float spatial_scale) {
+  int64_t pooled_height_64 = pooled_height;
+  int64_t pooled_width_64 = pooled_width;
+  int64_t pooled_channel = 1;
+  at::Tensor roi_actual_num = at_npu::native::OpPreparation::ApplyTensor(
+      {}, rois.options().dtype(at::kInt), rois);
+
+  OpCommand cmd;
+  cmd.Name("RoiPoolingWithArgMax")
+      .Input(input)
+      .Input(rois)
+      .Input(roi_actual_num)
+      .Output(output)
+      .Output(argmax)
+      .Attr("pooled_h", pooled_height_64)
+      .Attr("pooled_w", pooled_width_64)
+      .Attr("spatial_scale_h", spatial_scale)
+      .Attr("spatial_scale_w", spatial_scale)
+      .Attr("pool_channel", pooled_channel)
+      .Run();
+}
+
+void roi_pool_forward_impl(Tensor input, Tensor rois, Tensor output,
+                           Tensor argmax, int pooled_height, int pooled_width,
+                           float spatial_scale);
+
+REGISTER_NPU_IMPL(roi_pool_forward_impl, roi_pool_forward_npu);
diff --git a/tests/test_ops/test_roi_pool.py b/tests/test_ops/test_roi_pool.py
index 39d0ddea9..be5ab9296 100644
--- a/tests/test_ops/test_roi_pool.py
+++ b/tests/test_ops/test_roi_pool.py
@@ -5,7 +5,7 @@ import numpy as np
 import pytest
 import torch
 
-from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
+from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE
 
 _USING_PARROTS = True
 try:
@@ -69,14 +69,20 @@ class TestRoiPool:
             np_output = np.array(output[0])
             np_grad = np.array(output[1])
 
-            x = torch.tensor(
-                np_input, dtype=dtype, device=device, requires_grad=True)
-            rois = torch.tensor(np_rois, dtype=dtype, device=device)
-
-            output = roi_pool(x, rois, (pool_h, pool_w), spatial_scale)
-            output.backward(torch.ones_like(output))
-            assert np.allclose(output.data.cpu().numpy(), np_output, 1e-3)
-            assert np.allclose(x.grad.data.cpu().numpy(), np_grad, 1e-3)
+            if device == 'npu':
+                import torch_npu  # noqa: F401
+                x = torch.tensor(np_input, dtype=dtype).npu()
+                rois = torch.tensor(np_rois, dtype=dtype).npu()
+                output = roi_pool(x, rois, (pool_h, pool_w), spatial_scale)
+                assert np.allclose(output.data.cpu().numpy(), np_output, 1e-3)
+            else:
+                x = torch.tensor(
+                    np_input, dtype=dtype, device=device, requires_grad=True)
+                rois = torch.tensor(np_rois, dtype=dtype, device=device)
+                output = roi_pool(x, rois, (pool_h, pool_w), spatial_scale)
+                output.backward(torch.ones_like(output))
+                assert np.allclose(output.data.cpu().numpy(), np_output, 1e-3)
+                assert np.allclose(x.grad.data.cpu().numpy(), np_grad, 1e-3)
 
     @pytest.mark.parametrize('device', [
         pytest.param(
@@ -86,7 +92,11 @@ class TestRoiPool:
         pytest.param(
             'mlu',
             marks=pytest.mark.skipif(
-                not IS_MLU_AVAILABLE, reason='requires MLU support'))
+                not IS_MLU_AVAILABLE, reason='requires MLU support')),
+        pytest.param(
+            'npu',
+            marks=pytest.mark.skipif(
+                not IS_NPU_AVAILABLE, reason='requires NPU support'))
     ])
     @pytest.mark.parametrize('dtype', [
         torch.float,