[Enhancement] Refine the implementation of NMS op in Ascend device (#2658)

2023-03-15 23:09:33 +08:00 · 2023-03-15 23:09:33 +08:00 · 75290685b7
parent 9a671e48fb
commit 75290685b7
1 changed files with 2 additions and 4 deletions
--- a/mmcv/ops/csrc/pytorch/npu/nms_npu.cpp
+++ b/mmcv/ops/csrc/pytorch/npu/nms_npu.cpp
@ -4,10 +4,7 @@ using namespace NPU_NAME_SPACE;
 using namespace std;

 Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
-  at::Tensor boxed_offest = at_npu::native::OpPreparation::ApplyTensor(boxes);
-  at::Tensor ones_tensor =
-      at_npu::native::OpPreparation::ApplyTensor(boxes).fill_(1);
-  at::add_out(boxed_offest, boxes, ones_tensor, offset);
+  int64_t offset_64 = offset;
  at::Tensor iou_threshold_y = at_npu::native::OpPreparation::ApplyTensor(
                                   {}, boxes.options().dtype(at::kFloat), boxes)
                                   .fill_(iou_threshold);
@ -29,6 +26,7 @@ Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
      .Input(max_outputsize_y)
      .Input(iou_threshold_y)
      .Input(scores_threshold_y)
+      .Attr("offset", offset_64)
      .Output(output)
      .Run();
  auto outputsizeBool = at::gt(output, -1);