[Feature] Add tensorrt single stage partition (#88)

* Add tensorrt single stage partition

* add docstring&typehint
pull/12/head
q.yao 2021-09-23 15:49:36 +08:00 committed by GitHub
parent ec66d47057
commit 5453f9befa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 53 additions and 4 deletions

View File

@ -0,0 +1,3 @@
_base_ = ['./single-stage_tensorrt_dynamic-320x320-1344x1344.py']
partition_config = dict(type='single_stage', apply_marks=True)

View File

@ -43,7 +43,7 @@ def onnx2tensorrt(work_dir: str,
int8_param['calib_file'] = osp.join(work_dir, calib_file)
int8_param['model_type'] = partition_type
assert device.startswith('cuda')
assert device.startswith('cuda'), 'TensorRT require cuda device.'
device_id = parse_device_id(device)
engine = create_trt_engine(
onnx_model,

View File

@ -1,5 +1,5 @@
from functools import partial
from typing import Union
from typing import Sequence, Tuple, Union
import mmcv
import numpy as np
@ -183,7 +183,7 @@ class PartitionSingleStageDetector(DeployBaseDetector):
score_threshold = cfg.get('score_thr', post_params.score_threshold)
pre_top_k = post_params.pre_top_k
keep_top_k = cfg.get('max_per_img', post_params.keep_top_k)
return multiclass_nms(
ret = multiclass_nms(
bboxes,
scores,
max_output_boxes_per_class,
@ -191,6 +191,8 @@ class PartitionSingleStageDetector(DeployBaseDetector):
score_threshold=score_threshold,
pre_top_k=pre_top_k,
keep_top_k=keep_top_k)
ret = [r.cpu() for r in ret]
return ret
class ONNXRuntimePSSDetector(PartitionSingleStageDetector):
@ -214,6 +216,48 @@ class ONNXRuntimePSSDetector(PartitionSingleStageDetector):
return self.partition0_postprocess(scores, bboxes)
class TensorRTPSSDetector(PartitionSingleStageDetector):
"""TensorRT Wrapper for paritition single stage detector.
Args:
model_file (str): Path of the engine file.
class_names (list[str] | tuple[str]): Class names of the detector.
model_cfg (str | mmcv.Config): Model config file or Config object.
deploy_cfg (str | mmcv.Config): Deployment config file or Config
object.
device_id (int): Device index, should be same as the engine.
"""
def __init__(self, model_file: str, class_names: Sequence[str],
model_cfg: Union[str, mmcv.Config],
deploy_cfg: Union[str,
mmcv.Config], device_id: int, **kwargs):
super(TensorRTPSSDetector,
self).__init__(class_names, model_cfg, deploy_cfg, device_id,
**kwargs)
from mmdeploy.apis.tensorrt import TRTWrapper
self.model = TRTWrapper(model_file)
self.output_names = ['scores', 'boxes']
def forward_test(self, imgs: Sequence[torch.Tensor], *args,
**kwargs) -> Tuple[torch.Tensor, torch.Tensor]:
"""Run forward test.
Args:
imgs (Sequence[torch.Tensor]): The input images.
Return:
Tuple[torch.Tensor, torch.Tensor]: Output dets and labels.
"""
input_data = imgs[0].contiguous()
with torch.cuda.device(self.device_id), torch.no_grad():
outputs = self.model({'input': input_data})
outputs = [outputs[name] for name in self.output_names]
scores, bboxes = outputs[:2]
return self.partition0_postprocess(scores, bboxes)
class NCNNPSSDetector(PartitionSingleStageDetector):
"""Wrapper for detector's inference with NCNN."""
@ -504,7 +548,9 @@ ONNXRUNTIME_DETECTOR_MAP = dict(
two_stage=ONNXRuntimePTSDetector)
TENSORRT_DETECTOR_MAP = dict(
end2end=TensorRTDetector, two_stage=TensorRTPTSDetector)
end2end=TensorRTDetector,
single_stage=TensorRTPSSDetector,
two_stage=TensorRTPTSDetector)
PPL_DETECTOR_MAP = dict(end2end=PPLDetector)