From 0d1b224fb1bd6aedffbdd54999d7ef8370aacee9 Mon Sep 17 00:00:00 2001
From: liuyuan1-v <125547457+liuyuan1-v@users.noreply.github.com>
Date: Thu, 23 Mar 2023 11:36:32 +0800
Subject: [PATCH] [Feature] Support NmsRotated with cambricon MLU backend
 (#2643)

* [Feature] Support NmsRotated with cambricon MLU backend

* [Feature] remove foolproofs in nms_rotated_mlu.cpp

* [Feature] fix lint in test_nms_rotated.py

* [Feature] fix kMLU not found in nms_rotated.cpp

* [Feature] modify mlu support in nms.py

* [Feature] modify nms_rotated support in ops.md

* [Feature] modify ops/nms.py
---
 docs/en/understand_mmcv/ops.md                |  2 +-
 docs/zh_cn/understand_mmcv/ops.md             |  2 +-
 mmcv/ops/csrc/pytorch/mlu/nms_rotated_mlu.cpp | 53 +++++++++++++++++++
 mmcv/ops/csrc/pytorch/nms_rotated.cpp         |  9 ++++
 mmcv/ops/nms.py                               |  9 ++--
 tests/test_ops/test_nms_rotated.py            | 14 +++--
 6 files changed, 80 insertions(+), 9 deletions(-)
 create mode 100644 mmcv/ops/csrc/pytorch/mlu/nms_rotated_mlu.cpp

diff --git a/docs/en/understand_mmcv/ops.md b/docs/en/understand_mmcv/ops.md
index 95cf94de5..e7212bbdd 100644
--- a/docs/en/understand_mmcv/ops.md
+++ b/docs/en/understand_mmcv/ops.md
@@ -35,7 +35,7 @@ We implement common ops used in detection, segmentation, etc.
 | ModulatedDeformConv2d        | √   | √    | √   |     | √      |
 | MultiScaleDeformableAttn     |     | √    | √   |     |        |
 | NMS                          | √   | √    | √   |     | √      |
-| NMSRotated                   | √   | √    |     |     | √      |
+| NMSRotated                   | √   | √    | √   |     | √      |
 | NMSQuadri                    | √   | √    |     |     |        |
 | PixelGroup                   | √   |      |     |     |        |
 | PointsInBoxes                | √   | √    |     |     |        |
diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md
index b4ace828d..81092144a 100644
--- a/docs/zh_cn/understand_mmcv/ops.md
+++ b/docs/zh_cn/understand_mmcv/ops.md
@@ -35,7 +35,7 @@ MMCV 提供了检测、分割等任务中常用的算子
 | ModulatedDeformConv2d        | √   | √    | √   |     | √      |
 | MultiScaleDeformableAttn     |     | √    | √   |     |        |
 | NMS                          | √   | √    | √   |     | √      |
-| NMSRotated                   | √   | √    |     |     | √      |
+| NMSRotated                   | √   | √    | √   |     | √      |
 | NMSQuadri                    | √   | √    |     |     |        |
 | PixelGroup                   | √   |      |     |     |        |
 | PointsInBoxes                | √   | √    |     |     |        |
diff --git a/mmcv/ops/csrc/pytorch/mlu/nms_rotated_mlu.cpp b/mmcv/ops/csrc/pytorch/mlu/nms_rotated_mlu.cpp
new file mode 100644
index 000000000..9b45a1780
--- /dev/null
+++ b/mmcv/ops/csrc/pytorch/mlu/nms_rotated_mlu.cpp
@@ -0,0 +1,53 @@
+/*************************************************************************
+ * Copyright (C) 2021 Cambricon.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *************************************************************************/
+#include "mlu_common_helper.h"
+
+Tensor nms_rotated_mlu(Tensor boxes, Tensor scores, float iou_threshold) {
+  if (boxes.numel() == 0) {
+    return at::empty({0}, boxes.options().dtype(at::kLong));
+  }
+
+  int boxes_num = boxes.size(0);
+  auto boxes_ = torch_mlu::cnnl::ops::cnnl_contiguous(boxes);
+  auto scores_ = torch_mlu::cnnl::ops::cnnl_contiguous(scores);
+  auto output = at::empty({boxes_num}, boxes.options().dtype(at::kInt));
+  auto output_size = at::empty({1}, scores.options().dtype(at::kInt));
+
+  MluOpTensorDescriptor boxes_desc, scores_desc, output_desc;
+  boxes_desc.set(boxes_);
+  scores_desc.set(scores_);
+  output_desc.set(output);
+
+  // workspace
+  size_t workspace_size = 0;
+  auto handle = mluOpGetCurrentHandle();
+  mluOpGetNmsRotatedWorkspaceSize(handle, boxes_desc.desc(), &workspace_size);
+  auto workspace = at::empty(workspace_size, boxes.options().dtype(at::kByte));
+
+  auto boxes_impl = torch_mlu::getMluTensorImpl(boxes_);
+  auto boxes_ptr = boxes_impl->cnnlMalloc();
+  auto scores_impl = torch_mlu::getMluTensorImpl(scores_);
+  auto scores_ptr = scores_impl->cnnlMalloc();
+  auto workspace_impl = torch_mlu::getMluTensorImpl(workspace);
+  auto workspace_ptr = workspace_impl->cnnlMalloc();
+  auto output_impl = torch_mlu::getMluTensorImpl(output);
+  auto output_ptr = output_impl->cnnlMalloc();
+  auto output_size_impl = torch_mlu::getMluTensorImpl(output_size);
+  auto output_size_ptr = output_size_impl->cnnlMalloc();
+
+  mluOpNmsRotated(handle, iou_threshold, boxes_desc.desc(), boxes_ptr,
+                  scores_desc.desc(), scores_ptr, workspace_ptr, workspace_size,
+                  output_desc.desc(), output_ptr, (int *)output_size_ptr);
+  int output_num = *static_cast<int *>(output_size.cpu().data_ptr());
+  auto ret = output.to(boxes.options().dtype(at::kLong));
+  return ret.slice(0, 0, output_num);
+}
diff --git a/mmcv/ops/csrc/pytorch/nms_rotated.cpp b/mmcv/ops/csrc/pytorch/nms_rotated.cpp
index b07ed5aa1..1d49c37dd 100644
--- a/mmcv/ops/csrc/pytorch/nms_rotated.cpp
+++ b/mmcv/ops/csrc/pytorch/nms_rotated.cpp
@@ -17,6 +17,11 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
                        const Tensor labels, const float iou_threshold);
 #endif
 
+#ifdef MMCV_WITH_MLU
+Tensor nms_rotated_mlu(const Tensor dets, const Tensor scores,
+                       const float iou_threshold);
+#endif
+
 // Interface for Python
 // inline is needed to prevent multiple function definitions when this header is
 // included by different cpps
@@ -36,6 +41,10 @@ Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order,
     return nms_rotated_npu(dets, scores, labels, iou_threshold);
 #else
     AT_ERROR("Not compiled with NPU support");
+#endif
+#ifdef MMCV_WITH_MLU
+  } else if (dets.device().type() == at::kMLU) {
+    return nms_rotated_mlu(dets, scores, iou_threshold);
 #endif
   }
 
diff --git a/mmcv/ops/nms.py b/mmcv/ops/nms.py
index 00d22f2ac..5115a95f6 100644
--- a/mmcv/ops/nms.py
+++ b/mmcv/ops/nms.py
@@ -458,11 +458,12 @@ def nms_rotated(dets: Tensor,
         input_labels = scores.new_empty(0, dtype=torch.int)
     else:
         input_labels = labels
-    if dets.device.type == 'npu':
+    if dets.device.type in ('npu', 'mlu'):
         order = scores.new_empty(0, dtype=torch.long)
-        coefficient = 57.29578  # 180 / PI
-        for i in range(dets.size()[0]):
-            dets_cw[i][4] *= coefficient  # radians to angle
+        if dets.device.type == 'npu':
+            coefficient = 57.29578  # 180 / PI
+            for i in range(dets.size()[0]):
+                dets_cw[i][4] *= coefficient  # radians to angle
         keep_inds = ext_module.nms_rotated(dets_cw, scores, order, dets_cw,
                                            input_labels, iou_threshold,
                                            multi_label)
diff --git a/tests/test_ops/test_nms_rotated.py b/tests/test_ops/test_nms_rotated.py
index bee562a6f..88b41fec8 100644
--- a/tests/test_ops/test_nms_rotated.py
+++ b/tests/test_ops/test_nms_rotated.py
@@ -3,7 +3,7 @@ import numpy as np
 import pytest
 import torch
 
-from mmcv.utils import IS_CUDA_AVAILABLE, IS_NPU_AVAILABLE
+from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE
 
 
 class TestNmsRotated:
@@ -16,7 +16,11 @@ class TestNmsRotated:
         pytest.param(
             'cuda',
             marks=pytest.mark.skipif(
-                not IS_CUDA_AVAILABLE, reason='requires CUDA support'))
+                not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+        pytest.param(
+            'mlu',
+            marks=pytest.mark.skipif(
+                not IS_MLU_AVAILABLE, reason='requires MLU support'))
     ])
     def test_ml_nms_rotated(self, device):
         from mmcv.ops import nms_rotated
@@ -58,7 +62,11 @@ class TestNmsRotated:
         pytest.param(
             'cuda',
             marks=pytest.mark.skipif(
-                not IS_CUDA_AVAILABLE, reason='requires CUDA support'))
+                not IS_CUDA_AVAILABLE, reason='requires CUDA support')),
+        pytest.param(
+            'mlu',
+            marks=pytest.mark.skipif(
+                not IS_MLU_AVAILABLE, reason='requires MLU support'))
     ])
     def test_nms_rotated(self, device):
         from mmcv.ops import nms_rotated