[Feature] Add tensorrt single stage partition (#88)

* Add tensorrt single stage partition * add docstring&typehint
2021-09-23 15:49:36 +08:00 · 2021-09-23 15:49:36 +08:00 · 5453f9befa
parent ec66d47057
commit 5453f9befa
3 changed files with 53 additions and 4 deletions
--- a/configs/mmdet/single-stage/single-stage_partition_tensorrt_dynamic-320x320-1344x1344.py
+++ b/configs/mmdet/single-stage/single-stage_partition_tensorrt_dynamic-320x320-1344x1344.py
@ -0,0 +1,3 @@
+_base_ = ['./single-stage_tensorrt_dynamic-320x320-1344x1344.py']
+
+partition_config = dict(type='single_stage', apply_marks=True)
--- a/mmdeploy/apis/tensorrt/onnx2tensorrt.py
+++ b/mmdeploy/apis/tensorrt/onnx2tensorrt.py
@ -43,7 +43,7 @@ def onnx2tensorrt(work_dir: str,
        int8_param['calib_file'] = osp.join(work_dir, calib_file)
        int8_param['model_type'] = partition_type

-    assert device.startswith('cuda')
+    assert device.startswith('cuda'), 'TensorRT require cuda device.'
    device_id = parse_device_id(device)
    engine = create_trt_engine(
        onnx_model,
--- a/mmdeploy/mmdet/apis/inference.py
+++ b/mmdeploy/mmdet/apis/inference.py
@ -1,5 +1,5 @@
 from functools import partial
-from typing import Union
+from typing import Sequence, Tuple, Union

 import mmcv
 import numpy as np
@ -183,7 +183,7 @@ class PartitionSingleStageDetector(DeployBaseDetector):
        score_threshold = cfg.get('score_thr', post_params.score_threshold)
        pre_top_k = post_params.pre_top_k
        keep_top_k = cfg.get('max_per_img', post_params.keep_top_k)
-        return multiclass_nms(
+        ret = multiclass_nms(
            bboxes,
            scores,
            max_output_boxes_per_class,
@ -191,6 +191,8 @@ class PartitionSingleStageDetector(DeployBaseDetector):
            score_threshold=score_threshold,
            pre_top_k=pre_top_k,
            keep_top_k=keep_top_k)
+        ret = [r.cpu() for r in ret]
+        return ret


 class ONNXRuntimePSSDetector(PartitionSingleStageDetector):
@ -214,6 +216,48 @@ class ONNXRuntimePSSDetector(PartitionSingleStageDetector):
        return self.partition0_postprocess(scores, bboxes)


+class TensorRTPSSDetector(PartitionSingleStageDetector):
+    """TensorRT Wrapper for paritition single stage detector.
+
+    Args:
+        model_file (str): Path of the engine file.
+        class_names (list[str] | tuple[str]): Class names of the detector.
+        model_cfg (str | mmcv.Config): Model config file or Config object.
+        deploy_cfg (str | mmcv.Config): Deployment config file or Config
+            object.
+        device_id (int): Device index, should be same as the engine.
+    """
+
+    def __init__(self, model_file: str, class_names: Sequence[str],
+                 model_cfg: Union[str, mmcv.Config],
+                 deploy_cfg: Union[str,
+                                   mmcv.Config], device_id: int, **kwargs):
+        super(TensorRTPSSDetector,
+              self).__init__(class_names, model_cfg, deploy_cfg, device_id,
+                             **kwargs)
+        from mmdeploy.apis.tensorrt import TRTWrapper
+
+        self.model = TRTWrapper(model_file)
+        self.output_names = ['scores', 'boxes']
+
+    def forward_test(self, imgs: Sequence[torch.Tensor], *args,
+                     **kwargs) -> Tuple[torch.Tensor, torch.Tensor]:
+        """Run forward test.
+
+        Args:
+            imgs (Sequence[torch.Tensor]): The input images.
+
+        Return:
+            Tuple[torch.Tensor, torch.Tensor]: Output dets and labels.
+        """
+        input_data = imgs[0].contiguous()
+        with torch.cuda.device(self.device_id), torch.no_grad():
+            outputs = self.model({'input': input_data})
+            outputs = [outputs[name] for name in self.output_names]
+        scores, bboxes = outputs[:2]
+        return self.partition0_postprocess(scores, bboxes)
+
+
 class NCNNPSSDetector(PartitionSingleStageDetector):
    """Wrapper for detector's inference with NCNN."""

@ -504,7 +548,9 @@ ONNXRUNTIME_DETECTOR_MAP = dict(
    two_stage=ONNXRuntimePTSDetector)

 TENSORRT_DETECTOR_MAP = dict(
-    end2end=TensorRTDetector, two_stage=TensorRTPTSDetector)
+    end2end=TensorRTDetector,
+    single_stage=TensorRTPSSDetector,
+    two_stage=TensorRTPTSDetector)

 PPL_DETECTOR_MAP = dict(end2end=PPLDetector)