diff --git a/csrc/codebase/mmseg/segment.cpp b/csrc/codebase/mmseg/segment.cpp index 9735fb484..009ed23d7 100644 --- a/csrc/codebase/mmseg/segment.cpp +++ b/csrc/codebase/mmseg/segment.cpp @@ -1,6 +1,7 @@ // Copyright (c) OpenMMLab. All rights reserved. #include "codebase/mmseg/mmseg.h" +#include "core/logger.h" #include "core/tensor.h" #include "core/utils/device_utils.h" #include "core/utils/formatter.h" @@ -9,6 +10,9 @@ namespace mmdeploy::mmseg { +// TODO: resize masks on device +// TODO: when network output is on device, cast it to a smaller type (e.g. int16_t or int8_t +// according to num classes) to reduce DtoH footprint class ResizeMask : public MMSegmentation { public: explicit ResizeMask(const Value &cfg) : MMSegmentation(cfg) { @@ -39,45 +43,38 @@ class ResizeMask : public MMSegmentation { Device host{"cpu"}; OUTCOME_TRY(auto host_tensor, MakeAvailableOnDevice(mask, host, stream_)); OUTCOME_TRY(stream_.Wait()); - if (mask.data_type() == DataType::kINT64) { - // change kINT64 to 2 INT32 - TensorDesc desc{ - host_tensor.device(), DataType::kINT32, {1, 2, height, width}, host_tensor.name()}; - Tensor _host_tensor(desc, host_tensor.buffer()); - return MaskResize(_host_tensor, input_height, input_width); - } else if (mask.data_type() == DataType::kINT32) { - return MaskResize(host_tensor, input_height, input_width); - } else { - MMDEPLOY_ERROR("unsupported `output` tensor, dtype: {}", (int)mask.data_type()); - return Status(eNotSupported); + + OUTCOME_TRY(auto cv_type, GetCvType(mask.data_type())); + cv::Mat mask_mat(height, width, cv_type, host_tensor.data()); + + if (mask_mat.channels() > 1) { + cv::extractChannel(mask_mat, mask_mat, little_endian_ ? 0 : mask_mat.channels() - 1); } + if (mask_mat.type() != CV_32S) { + mask_mat.convertTo(mask_mat, CV_32S); + } + + cv::Mat resized_mask = cpu::Resize(mask_mat, input_height, input_width, "nearest"); + + SegmentorOutput output{cpu::CVMat2Tensor(resized_mask), input_height, input_width, classes_}; + return to_value(output); } private: - Result MaskResize(Tensor &tensor, int dst_height, int dst_width) { - auto channel = tensor.shape(1); - auto height = tensor.shape(2); - auto width = tensor.shape(3); - - // reshape tensor to convert it to cv::Mat - tensor.Reshape({1, height, width, channel}); - auto mat = cpu::Tensor2CVMat(tensor); - auto dst = cpu::Resize(mat, dst_height, dst_width, "nearest"); - if (channel == 1) { - auto output_tensor = cpu::CVMat2Tensor(dst); - SegmentorOutput output{output_tensor, dst_height, dst_width, classes_}; - return to_value(output); - } else { - cv::Mat _dst; - int channel = little_endian_ ? 0 : dst.dims - 1; - cv::extractChannel(dst, _dst, channel); - auto output_tensor = cpu::CVMat2Tensor(_dst); - SegmentorOutput output{output_tensor, dst_height, dst_width, classes_}; - return to_value(output); + static Result GetCvType(DataType type) { + switch (type) { + case DataType::kFLOAT: + return CV_32F; + case DataType::kINT64: + return CV_32SC2; + case DataType::kINT32: + return CV_32S; + default: + return Status(eNotSupported); } } - bool IsLittleEndian() { + static bool IsLittleEndian() { union Un { char a; int b; diff --git a/tests/test_codebase/test_mmdet/test_mmdet_core.py b/tests/test_codebase/test_mmdet/test_mmdet_core.py index bf48bc2ae..892e7bfdf 100644 --- a/tests/test_codebase/test_mmdet/test_mmdet_core.py +++ b/tests/test_codebase/test_mmdet/test_mmdet_core.py @@ -213,7 +213,7 @@ def test_multiclass_nms_with_keep_top_k(pre_top_k): model_inputs = {'boxes': test_boxes, 'scores': test_scores} import mmdeploy.backend.onnxruntime as ort_apis - backend_model = ort_apis.ORTWrapper(onnx_model_path, 'cuda:0', None) + backend_model = ort_apis.ORTWrapper(onnx_model_path, 'cpu', None) output = backend_model.forward(model_inputs) output = backend_model.output_to_list(output) dets = output[0]