mirror of https://github.com/open-mmlab/mmcv.git
46 lines
1.9 KiB
C++
46 lines
1.9 KiB
C++
#include "pytorch_npu_helper.hpp"
|
|
|
|
using namespace NPU_NAME_SPACE;
|
|
using namespace std;
|
|
|
|
Tensor nms_npu(Tensor boxes, Tensor scores, float iou_threshold, int offset) {
|
|
at::Tensor boxed_offest = at_npu::native::OpPreparation::ApplyTensor(boxes);
|
|
at::Tensor ones_tensor =
|
|
at_npu::native::OpPreparation::ApplyTensor(boxes).fill_(1);
|
|
at::add_out(boxed_offest, boxes, ones_tensor, offset);
|
|
at::Tensor iou_threshold_y = at_npu::native::OpPreparation::ApplyTensor(
|
|
{}, boxes.options().dtype(at::kFloat), boxes)
|
|
.fill_(iou_threshold);
|
|
at::Tensor scores_threshold_y =
|
|
at_npu::native::OpPreparation::ApplyTensor(
|
|
{}, boxes.options().dtype(at::kFloat), boxes)
|
|
.fill_(0);
|
|
at::Tensor max_outputsize_y = at_npu::native::OpPreparation::ApplyTensor(
|
|
{}, boxes.options().dtype(at::kInt), boxes)
|
|
.fill_(boxes.size(0));
|
|
c10::SmallVector<int64_t, SIZE> outputsize = {boxes.size(0)};
|
|
at::Tensor output = at_npu::native::OpPreparation::ApplyTensor(
|
|
outputsize, boxes.options().dtype(at::kInt), boxes)
|
|
.fill_(-1);
|
|
OpCommand cmd;
|
|
cmd.Name("NonMaxSuppressionV3")
|
|
.Input(boxes)
|
|
.Input(scores)
|
|
.Input(max_outputsize_y)
|
|
.Input(iou_threshold_y)
|
|
.Input(scores_threshold_y)
|
|
.Output(output)
|
|
.Run();
|
|
auto outputsizeBool = at::gt(output, -1);
|
|
auto outputsizeInt = outputsizeBool.to(at::ScalarType::Int);
|
|
auto countLen = at::sum(outputsizeInt, at::ScalarType::Int);
|
|
at::Tensor actual_output = output.slice(0, 0, countLen.item().toLong());
|
|
actual_output = at_npu::native::NPUNativeFunctions::npu_dtype_cast(
|
|
actual_output, at::kLong);
|
|
return actual_output;
|
|
}
|
|
|
|
Tensor nms_impl(Tensor boxes, Tensor scores, float iou_threshold, int offset);
|
|
|
|
REGISTER_NPU_IMPL(nms_impl, nms_npu);
|