add npu roipoint_pool3d_forward (#3147)

* add npu roipoint_pool3d_forward * nms_rotated npu add new attr is_angle
2024-07-19 01:33:34 +08:00 · 2024-07-19 01:33:34 +08:00 · 9f43b8c310
parent 44eab261b9
commit 9f43b8c310
4 changed files with 47 additions and 7 deletions
--- a/mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/nms_rotated_npu.cpp
@ -27,6 +27,7 @@ Tensor nms_rotated_npu(const Tensor dets, const Tensor scores,
      .Output(selectedBox)
      .Output(selectedIndex)
      .Attr("iou_threshold", (float)iou_threshold)
+      .Attr("is_angle", false)
      .Run();
  selectedIndex = selectedIndex.to(at::kLong);
  return selectedIndex;
--- a/mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/roipoint_pool3d_forward.cpp
@ -0,0 +1,38 @@
+#include "pytorch_npu_helper.hpp"
+
+using namespace NPU_NAME_SPACE;
+using namespace std;
+
+void roipoint_pool3d_forward_impl_npu(int batch_size, int pts_num,
+                                      int boxes_num, int feature_in_len,
+                                      int sampled_pts_num, const Tensor xyz,
+                                      const Tensor boxes3d,
+                                      const Tensor pts_feature,
+                                      Tensor pooled_features,
+                                      Tensor pooled_empty_flag) {
+  auto points_trans = xyz.transpose(1, 2).contiguous();
+  auto point_features_trans = pts_feature.transpose(1, 2).contiguous();
+  c10::SmallVector<int64_t, SIZE> features_trans_size = {
+      xyz.size(0), boxes3d.size(1), xyz.size(2) + pts_feature.size(2),
+      sampled_pts_num};
+  at::Tensor pooled_features_trans =
+      at::empty(features_trans_size, xyz.options());
+  c10::SmallVector<int64_t, SIZE> empty_flag_size = {boxes3d.size(0),
+                                                     boxes3d.size(1)};
+  EXEC_NPU_CMD(aclnnRoipointPool3dForward, points_trans, point_features_trans,
+               boxes3d, sampled_pts_num, pooled_features_trans,
+               pooled_empty_flag);
+  auto pooled_features_cache =
+      pooled_features_trans.transpose(2, 3).contiguous();
+  pooled_features.copy_(pooled_features_cache);
+}
+
+void roipoint_pool3d_forward_impl(int batch_size, int pts_num, int boxes_num,
+                                  int feature_in_len, int sampled_pts_num,
+                                  const Tensor xyz, const Tensor boxes3d,
+                                  const Tensor pts_feature,
+                                  Tensor pooled_features,
+                                  Tensor pooled_empty_flag);
+
+REGISTER_NPU_IMPL(roipoint_pool3d_forward_impl,
+                  roipoint_pool3d_forward_impl_npu);
--- a/mmcv/ops/nms.py
+++ b/mmcv/ops/nms.py
@ -413,10 +413,6 @@ def nms_rotated(dets: Tensor,
        input_labels = labels
    if dets.device.type in ('npu', 'mlu'):
        order = scores.new_empty(0, dtype=torch.long)
-        if dets.device.type == 'npu':
-            coefficient = 57.29578  # 180 / PI
-            for i in range(dets.size()[0]):
-                dets_cw[i][4] *= coefficient  # radians to angle
        keep_inds = ext_module.nms_rotated(dets_cw, scores, order, dets_cw,
                                           input_labels, iou_threshold,
                                           multi_label)
--- a/tests/test_ops/test_roipoint_pool3d.py
+++ b/tests/test_ops/test_roipoint_pool3d.py
@ -3,7 +3,7 @@ import pytest
 import torch

 from mmcv.ops import RoIPointPool3d
-from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
+from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE, IS_NPU_AVAILABLE


@pytest.mark.parametrize('device', [
@ -14,14 +14,19 @@ from mmcv.utils import IS_CUDA_AVAILABLE, IS_MLU_AVAILABLE
    pytest.param(
        'mlu',
        marks=pytest.mark.skipif(
-            not IS_MLU_AVAILABLE, reason='requires MLU support'))
+            not IS_MLU_AVAILABLE, reason='requires MLU support')),
+    pytest.param(
+        'npu',
+        marks=pytest.mark.skipif(
+            not IS_NPU_AVAILABLE, reason='requires NPU support'))
 ])
@pytest.mark.parametrize('dtype', [
    torch.float, torch.half,
    pytest.param(
        torch.double,
        marks=pytest.mark.skipif(
-            IS_MLU_AVAILABLE, reason='MLU does not support for double'))
+            IS_MLU_AVAILABLE or IS_NPU_AVAILABLE,
+            reason='MLU and NPU does not support for double'))
 ])
 def test_roipoint(device, dtype):
    points = torch.tensor(