From 0d1b224fb1bd6aedffbdd54999d7ef8370aacee9 Mon Sep 17 00:00:00 2001 From: liuyuan1-v <125547457+liuyuan1-v@users.noreply.github.com> Date: Thu, 23 Mar 2023 11:36:32 +0800 Subject: [PATCH] [Feature] Support NmsRotated with cambricon MLU backend (#2643) * [Feature] Support NmsRotated with cambricon MLU backend * [Feature] remove foolproofs in nms_rotated_mlu.cpp * [Feature] fix lint in test_nms_rotated.py * [Feature] fix kMLU not found in nms_rotated.cpp * [Feature] modify mlu support in nms.py * [Feature] modify nms_rotated support in ops.md * [Feature] modify ops/nms.py --- docs/en/understand_mmcv/ops.md | 2 +- docs/zh_cn/understand_mmcv/ops.md | 2 +- mmcv/ops/csrc/pytorch/mlu/nms_rotated_mlu.cpp | 53 +++++++++++++++++++ mmcv/ops/csrc/pytorch/nms_rotated.cpp | 9 ++++ mmcv/ops/nms.py | 9 ++-- tests/test_ops/test_nms_rotated.py | 14 +++-- 6 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 mmcv/ops/csrc/pytorch/mlu/nms_rotated_mlu.cpp diff --git a/docs/en/understand_mmcv/ops.md b/docs/en/understand_mmcv/ops.md index 95cf94de5..e7212bbdd 100644 --- a/docs/en/understand_mmcv/ops.md +++ b/docs/en/understand_mmcv/ops.md @@ -35,7 +35,7 @@ We implement common ops used in detection, segmentation, etc. | ModulatedDeformConv2d | √ | √ | √ | | √ | | MultiScaleDeformableAttn | | √ | √ | | | | NMS | √ | √ | √ | | √ | -| NMSRotated | √ | √ | | | √ | +| NMSRotated | √ | √ | √ | | √ | | NMSQuadri | √ | √ | | | | | PixelGroup | √ | | | | | | PointsInBoxes | √ | √ | | | | diff --git a/docs/zh_cn/understand_mmcv/ops.md b/docs/zh_cn/understand_mmcv/ops.md index b4ace828d..81092144a 100644 --- a/docs/zh_cn/understand_mmcv/ops.md +++ b/docs/zh_cn/understand_mmcv/ops.md @@ -35,7 +35,7 @@ MMCV 提供了检测、分割等任务中常用的算子 | ModulatedDeformConv2d | √ | √ | √ | | √ | | MultiScaleDeformableAttn | | √ | √ | | | | NMS | √ | √ | √ | | √ | -| NMSRotated | √ | √ | | | √ | +| NMSRotated | √ | √ | √ | | √ | | NMSQuadri | √ | √ | | | | | PixelGroup | √ | | | | | | PointsInBoxes | √ | √ | | | | diff --git a/mmcv/ops/csrc/pytorch/mlu/nms_rotated_mlu.cpp b/mmcv/ops/csrc/pytorch/mlu/nms_rotated_mlu.cpp new file mode 100644 index 000000000..9b45a1780 --- /dev/null +++ b/mmcv/ops/csrc/pytorch/mlu/nms_rotated_mlu.cpp @@ -0,0 +1,53 @@ +/************************************************************************* + * Copyright (C) 2021 Cambricon. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + *************************************************************************/ +#include "mlu_common_helper.h" + +Tensor nms_rotated_mlu(Tensor boxes, Tensor scores, float iou_threshold) { + if (boxes.numel() == 0) { + return at::empty({0}, boxes.options().dtype(at::kLong)); + } + + int boxes_num = boxes.size(0); + auto boxes_ = torch_mlu::cnnl::ops::cnnl_contiguous(boxes); + auto scores_ = torch_mlu::cnnl::ops::cnnl_contiguous(scores); + auto output = at::empty({boxes_num}, boxes.options().dtype(at::kInt)); + auto output_size = at::empty({1}, scores.options().dtype(at::kInt)); + + MluOpTensorDescriptor boxes_desc, scores_desc, output_desc; + boxes_desc.set(boxes_); + scores_desc.set(scores_); + output_desc.set(output); + + // workspace + size_t workspace_size = 0; + auto handle = mluOpGetCurrentHandle(); + mluOpGetNmsRotatedWorkspaceSize(handle, boxes_desc.desc(), &workspace_size); + auto workspace = at::empty(workspace_size, boxes.options().dtype(at::kByte)); + + auto boxes_impl = torch_mlu::getMluTensorImpl(boxes_); + auto boxes_ptr = boxes_impl->cnnlMalloc(); + auto scores_impl = torch_mlu::getMluTensorImpl(scores_); + auto scores_ptr = scores_impl->cnnlMalloc(); + auto workspace_impl = torch_mlu::getMluTensorImpl(workspace); + auto workspace_ptr = workspace_impl->cnnlMalloc(); + auto output_impl = torch_mlu::getMluTensorImpl(output); + auto output_ptr = output_impl->cnnlMalloc(); + auto output_size_impl = torch_mlu::getMluTensorImpl(output_size); + auto output_size_ptr = output_size_impl->cnnlMalloc(); + + mluOpNmsRotated(handle, iou_threshold, boxes_desc.desc(), boxes_ptr, + scores_desc.desc(), scores_ptr, workspace_ptr, workspace_size, + output_desc.desc(), output_ptr, (int *)output_size_ptr); + int output_num = *static_cast(output_size.cpu().data_ptr()); + auto ret = output.to(boxes.options().dtype(at::kLong)); + return ret.slice(0, 0, output_num); +} diff --git a/mmcv/ops/csrc/pytorch/nms_rotated.cpp b/mmcv/ops/csrc/pytorch/nms_rotated.cpp index b07ed5aa1..1d49c37dd 100644 --- a/mmcv/ops/csrc/pytorch/nms_rotated.cpp +++ b/mmcv/ops/csrc/pytorch/nms_rotated.cpp @@ -17,6 +17,11 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores, const Tensor labels, const float iou_threshold); #endif +#ifdef MMCV_WITH_MLU +Tensor nms_rotated_mlu(const Tensor dets, const Tensor scores, + const float iou_threshold); +#endif + // Interface for Python // inline is needed to prevent multiple function definitions when this header is // included by different cpps @@ -36,6 +41,10 @@ Tensor nms_rotated(const Tensor dets, const Tensor scores, const Tensor order, return nms_rotated_npu(dets, scores, labels, iou_threshold); #else AT_ERROR("Not compiled with NPU support"); +#endif +#ifdef MMCV_WITH_MLU + } else if (dets.device().type() == at::kMLU) { + return nms_rotated_mlu(dets, scores, iou_threshold); #endif } diff --git a/mmcv/ops/nms.py b/mmcv/ops/nms.py index 00d22f2ac..5115a95f6 100644 --- a/mmcv/ops/nms.py +++ b/mmcv/ops/nms.py @@ -458,11 +458,12 @@ def nms_rotated(dets: Tensor, input_labels = scores.new_empty(0, dtype=torch.int) else: input_labels = labels - if dets.device.type == 'npu': + if dets.device.type in ('npu', 'mlu'): order = scores.new_empty(0, dtype=torch.long) - coefficient = 57.29578 # 180 / PI - for i in range(dets.size()[0]): - dets_cw[i][4] *= coefficient # radians to angle + if dets.device.type == 'npu': + coefficient = 57.29578 # 180 / PI + for i in range(dets.size()[0]): + dets_cw[i][4] *= coefficient # radians to angle keep_inds = ext_module.nms_rotated(dets_cw, scores, order, dets_cw, input_labels, iou_threshold, multi_label) diff --git a/tests/test_ops/test_nms_rotated.py b/tests/test_ops/test_nms_rotated.py index bee562a6f..88b41fec8 100644 --- a/tests/test_ops/test_nms_rotated.py +++ b/tests/test_ops/test_nms_rotated.py @@ -3,7 +3,7 @@ import numpy as np import pytest import torch -from mmcv.utils import IS_CUDA_AVAILABLE, IS_NPU_AVAILABLE +from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE class TestNmsRotated: @@ -16,7 +16,11 @@ class TestNmsRotated: pytest.param( 'cuda', marks=pytest.mark.skipif( - not IS_CUDA_AVAILABLE, reason='requires CUDA support')) + not IS_CUDA_AVAILABLE, reason='requires CUDA support')), + pytest.param( + 'mlu', + marks=pytest.mark.skipif( + not IS_MLU_AVAILABLE, reason='requires MLU support')) ]) def test_ml_nms_rotated(self, device): from mmcv.ops import nms_rotated @@ -58,7 +62,11 @@ class TestNmsRotated: pytest.param( 'cuda', marks=pytest.mark.skipif( - not IS_CUDA_AVAILABLE, reason='requires CUDA support')) + not IS_CUDA_AVAILABLE, reason='requires CUDA support')), + pytest.param( + 'mlu', + marks=pytest.mark.skipif( + not IS_MLU_AVAILABLE, reason='requires MLU support')) ]) def test_nms_rotated(self, device): from mmcv.ops import nms_rotated