mmcv/mmcv/ops/csrc/pytorch/npu/nms_npu.cpp

46 lines
1.9 KiB
C++

#include "pytorch_npu_helper.hpp"
using namespace NPU_NAME_SPACE;
using namespace std;
Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
at::Tensor boxed_offest = at_npu::native::OpPreparation::ApplyTensor(boxes);
at::Tensor ones_tensor =
at_npu::native::OpPreparation::ApplyTensor(boxes).fill_(1);
at::add_out(boxed_offest, boxes, ones_tensor, offset);
at::Tensor iou_threshold_y = at_npu::native::OpPreparation::ApplyTensor(
{}, boxes.options().dtype(at::kFloat), boxes)
.fill_(iou_threshold);
at::Tensor scores_threshold_y =
at_npu::native::OpPreparation::ApplyTensor(
{}, boxes.options().dtype(at::kFloat), boxes)
.fill_(0);
at::Tensor max_outputsize_y = at_npu::native::OpPreparation::ApplyTensor(
{}, boxes.options().dtype(at::kInt), boxes)
.fill_(boxes.size(0));
c10::SmallVector<int64_t, SIZE> outputsize = {boxes.size(0)};
at::Tensor output = at_npu::native::OpPreparation::ApplyTensor(
outputsize, boxes.options().dtype(at::kInt), boxes)
.fill_(-1);
OpCommand cmd;
cmd.Name("NonMaxSuppressionV3")
.Input(boxes)
.Input(scores)
.Input(max_outputsize_y)
.Input(iou_threshold_y)
.Input(scores_threshold_y)
.Output(output)
.Run();
auto outputsizeBool = at::gt(output, -1);
auto outputsizeInt = outputsizeBool.to(at::ScalarType::Int);
auto countLen = at::sum(outputsizeInt, at::ScalarType::Int);
at::Tensor actual_output = output.slice(0, 0, countLen.item().toLong());
actual_output = at_npu::native::NPUNativeFunctions::npu_dtype_cast(
actual_output, at::kLong);
return actual_output;
}
Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset);
REGISTER_NPU_IMPL(nms_impl, nms_npu);