2021-01-06 11:05:19 +08:00
|
|
|
import os
|
[Feature] : Add NonMaxSuppression TensorRT Plugin (#787)
* start trt plugin prototype
* Add test module, modify roialign convertor
* finish roi_align trt plugin
* fix conflict of RoiAlign and MMCVRoiAlign
* fix for lint
* fix test tensorrt module
* test_tensorrt move import to test func
* add except error type
* add tensorrt to setup.cfg
* code format with yapf
* fix for clang-format
* move tensorrt_utils to mmcv/tensorrt, add comments, better test module
* fix line endings, docformatter
* isort init, remove trailing whitespace
* add except type
* fix setup.py
* put import extension inside trt setup
* change c++ guard, update pytest script, better setup, etc
* sort import with isort
* sort import with isort
* move init of plugin lib to init_plugins.py
* add scatternd, nms plugin (WIP)
* fix bugs of trt_nms
* add trt nms test module
* fix bugs of scatternd
* code optimize, add comment about nms kernel
* fix transform_if bug of trt_nms_kernel
* fix struct name
* default nms offset=0, fix bugs of batched input
* format with clang-format
* onnx preprocess
* much better nms implementation, no need to transfer memory between host and device
* update preprocess_onnx
* parse constant tensor from initializer in preprocess_onnx
* update nms
* remove unnecessary codes
* workspace aligned address
* format trt_plugin_helper.hpp
* fix index memory bugs
* set alignment to 16 by default
* fix lint
* fix nms offset
* fix bugs of preprocess onnx
* update test for nms
* tensorrt only accept int32, not int64
* update nms comments
* fix indexing for scores in nms
* update trt temp
* make trt-nms compatiable to #803
* fix lint
* add docstring to trt_nms_kernel.cuda, add description to preprocess_onnx
* add comment to score indexing
* fix bugs of max output boxes
Co-authored-by: maningsheng <maningsheng@sensetime.com>
2021-02-23 15:09:49 +08:00
|
|
|
from functools import partial
|
2021-01-06 11:05:19 +08:00
|
|
|
|
|
|
|
import numpy as np
|
|
|
|
import onnx
|
|
|
|
import pytest
|
|
|
|
import torch
|
[Feature] : Add NonMaxSuppression TensorRT Plugin (#787)
* start trt plugin prototype
* Add test module, modify roialign convertor
* finish roi_align trt plugin
* fix conflict of RoiAlign and MMCVRoiAlign
* fix for lint
* fix test tensorrt module
* test_tensorrt move import to test func
* add except error type
* add tensorrt to setup.cfg
* code format with yapf
* fix for clang-format
* move tensorrt_utils to mmcv/tensorrt, add comments, better test module
* fix line endings, docformatter
* isort init, remove trailing whitespace
* add except type
* fix setup.py
* put import extension inside trt setup
* change c++ guard, update pytest script, better setup, etc
* sort import with isort
* sort import with isort
* move init of plugin lib to init_plugins.py
* add scatternd, nms plugin (WIP)
* fix bugs of trt_nms
* add trt nms test module
* fix bugs of scatternd
* code optimize, add comment about nms kernel
* fix transform_if bug of trt_nms_kernel
* fix struct name
* default nms offset=0, fix bugs of batched input
* format with clang-format
* onnx preprocess
* much better nms implementation, no need to transfer memory between host and device
* update preprocess_onnx
* parse constant tensor from initializer in preprocess_onnx
* update nms
* remove unnecessary codes
* workspace aligned address
* format trt_plugin_helper.hpp
* fix index memory bugs
* set alignment to 16 by default
* fix lint
* fix nms offset
* fix bugs of preprocess onnx
* update test for nms
* tensorrt only accept int32, not int64
* update nms comments
* fix indexing for scores in nms
* update trt temp
* make trt-nms compatiable to #803
* fix lint
* add docstring to trt_nms_kernel.cuda, add description to preprocess_onnx
* add comment to score indexing
* fix bugs of max output boxes
Co-authored-by: maningsheng <maningsheng@sensetime.com>
2021-02-23 15:09:49 +08:00
|
|
|
import torch.nn as nn
|
2021-01-06 11:05:19 +08:00
|
|
|
|
2021-01-20 11:15:07 +08:00
|
|
|
try:
|
|
|
|
from mmcv.tensorrt import (TRTWraper, is_tensorrt_plugin_loaded, onnx2trt,
|
|
|
|
save_trt_engine)
|
|
|
|
except ImportError:
|
|
|
|
pytest.skip(
|
|
|
|
'TensorRT should be installed from source.', allow_module_level=True)
|
|
|
|
|
|
|
|
if not torch.cuda.is_available():
|
|
|
|
pytest.skip(
|
|
|
|
'CUDA is required for this test module', allow_module_level=True)
|
|
|
|
|
|
|
|
if not is_tensorrt_plugin_loaded():
|
|
|
|
pytest.skip(
|
|
|
|
'Test requires to complie TensorRT plugins in mmcv',
|
|
|
|
allow_module_level=True)
|
|
|
|
|
|
|
|
|
[Feature] : Add NonMaxSuppression TensorRT Plugin (#787)
* start trt plugin prototype
* Add test module, modify roialign convertor
* finish roi_align trt plugin
* fix conflict of RoiAlign and MMCVRoiAlign
* fix for lint
* fix test tensorrt module
* test_tensorrt move import to test func
* add except error type
* add tensorrt to setup.cfg
* code format with yapf
* fix for clang-format
* move tensorrt_utils to mmcv/tensorrt, add comments, better test module
* fix line endings, docformatter
* isort init, remove trailing whitespace
* add except type
* fix setup.py
* put import extension inside trt setup
* change c++ guard, update pytest script, better setup, etc
* sort import with isort
* sort import with isort
* move init of plugin lib to init_plugins.py
* add scatternd, nms plugin (WIP)
* fix bugs of trt_nms
* add trt nms test module
* fix bugs of scatternd
* code optimize, add comment about nms kernel
* fix transform_if bug of trt_nms_kernel
* fix struct name
* default nms offset=0, fix bugs of batched input
* format with clang-format
* onnx preprocess
* much better nms implementation, no need to transfer memory between host and device
* update preprocess_onnx
* parse constant tensor from initializer in preprocess_onnx
* update nms
* remove unnecessary codes
* workspace aligned address
* format trt_plugin_helper.hpp
* fix index memory bugs
* set alignment to 16 by default
* fix lint
* fix nms offset
* fix bugs of preprocess onnx
* update test for nms
* tensorrt only accept int32, not int64
* update nms comments
* fix indexing for scores in nms
* update trt temp
* make trt-nms compatiable to #803
* fix lint
* add docstring to trt_nms_kernel.cuda, add description to preprocess_onnx
* add comment to score indexing
* fix bugs of max output boxes
Co-authored-by: maningsheng <maningsheng@sensetime.com>
2021-02-23 15:09:49 +08:00
|
|
|
class WrapFunction(nn.Module):
|
2021-01-20 11:15:07 +08:00
|
|
|
|
|
|
|
def __init__(self, wrapped_function):
|
|
|
|
super(WrapFunction, self).__init__()
|
|
|
|
self.wrapped_function = wrapped_function
|
|
|
|
|
|
|
|
def forward(self, *args, **kwargs):
|
|
|
|
return self.wrapped_function(*args, **kwargs)
|
|
|
|
|
|
|
|
|
2021-01-06 11:05:19 +08:00
|
|
|
onnx_file = 'tmp.onnx'
|
|
|
|
trt_file = 'tmp.engine'
|
|
|
|
|
|
|
|
|
|
|
|
def test_roialign():
|
|
|
|
try:
|
|
|
|
from mmcv.ops import RoIAlign
|
|
|
|
except (ImportError, ModuleNotFoundError):
|
|
|
|
pytest.skip('test requires compilation')
|
|
|
|
|
|
|
|
# trt config
|
|
|
|
fp16_mode = False
|
|
|
|
max_workspace_size = 1 << 30
|
|
|
|
|
|
|
|
# roi align config
|
|
|
|
pool_h = 2
|
|
|
|
pool_w = 2
|
|
|
|
spatial_scale = 1.0
|
|
|
|
sampling_ratio = 2
|
|
|
|
|
|
|
|
inputs = [([[[[1., 2.], [3., 4.]]]], [[0., 0., 0., 1., 1.]]),
|
|
|
|
([[[[1., 2.], [3., 4.]], [[4., 3.],
|
|
|
|
[2., 1.]]]], [[0., 0., 0., 1., 1.]]),
|
|
|
|
([[[[1., 2., 5., 6.], [3., 4., 7., 8.], [9., 10., 13., 14.],
|
|
|
|
[11., 12., 15., 16.]]]], [[0., 0., 0., 3., 3.]])]
|
|
|
|
|
|
|
|
wrapped_model = RoIAlign((pool_w, pool_h), spatial_scale, sampling_ratio,
|
|
|
|
'avg', True).cuda()
|
|
|
|
for case in inputs:
|
|
|
|
np_input = np.array(case[0], dtype=np.float32)
|
|
|
|
np_rois = np.array(case[1], dtype=np.float32)
|
|
|
|
input = torch.from_numpy(np_input).cuda()
|
|
|
|
rois = torch.from_numpy(np_rois).cuda()
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
|
torch.onnx.export(
|
|
|
|
wrapped_model, (input, rois),
|
|
|
|
onnx_file,
|
|
|
|
export_params=True,
|
|
|
|
keep_initializers_as_inputs=True,
|
|
|
|
input_names=['input', 'rois'],
|
|
|
|
output_names=['roi_feat'],
|
|
|
|
opset_version=11)
|
|
|
|
onnx_model = onnx.load(onnx_file)
|
|
|
|
|
|
|
|
# create trt engine and wraper
|
|
|
|
opt_shape_dict = {
|
|
|
|
'input': [list(input.shape),
|
|
|
|
list(input.shape),
|
|
|
|
list(input.shape)],
|
|
|
|
'rois': [list(rois.shape),
|
|
|
|
list(rois.shape),
|
|
|
|
list(rois.shape)]
|
|
|
|
}
|
|
|
|
trt_engine = onnx2trt(
|
|
|
|
onnx_model,
|
|
|
|
opt_shape_dict,
|
|
|
|
fp16_mode=fp16_mode,
|
|
|
|
max_workspace_size=max_workspace_size)
|
|
|
|
save_trt_engine(trt_engine, trt_file)
|
|
|
|
trt_model = TRTWraper(trt_file, ['input', 'rois'], ['roi_feat'])
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
|
trt_outputs = trt_model({'input': input, 'rois': rois})
|
|
|
|
trt_roi_feat = trt_outputs['roi_feat']
|
|
|
|
|
|
|
|
# compute pytorch_output
|
|
|
|
with torch.no_grad():
|
|
|
|
pytorch_roi_feat = wrapped_model(input, rois)
|
|
|
|
|
|
|
|
# allclose
|
|
|
|
if os.path.exists(onnx_file):
|
|
|
|
os.remove(onnx_file)
|
|
|
|
if os.path.exists(trt_file):
|
|
|
|
os.remove(trt_file)
|
|
|
|
assert torch.allclose(pytorch_roi_feat, trt_roi_feat)
|
2021-01-20 11:15:07 +08:00
|
|
|
|
|
|
|
|
[Feature] : Add NonMaxSuppression TensorRT Plugin (#787)
* start trt plugin prototype
* Add test module, modify roialign convertor
* finish roi_align trt plugin
* fix conflict of RoiAlign and MMCVRoiAlign
* fix for lint
* fix test tensorrt module
* test_tensorrt move import to test func
* add except error type
* add tensorrt to setup.cfg
* code format with yapf
* fix for clang-format
* move tensorrt_utils to mmcv/tensorrt, add comments, better test module
* fix line endings, docformatter
* isort init, remove trailing whitespace
* add except type
* fix setup.py
* put import extension inside trt setup
* change c++ guard, update pytest script, better setup, etc
* sort import with isort
* sort import with isort
* move init of plugin lib to init_plugins.py
* add scatternd, nms plugin (WIP)
* fix bugs of trt_nms
* add trt nms test module
* fix bugs of scatternd
* code optimize, add comment about nms kernel
* fix transform_if bug of trt_nms_kernel
* fix struct name
* default nms offset=0, fix bugs of batched input
* format with clang-format
* onnx preprocess
* much better nms implementation, no need to transfer memory between host and device
* update preprocess_onnx
* parse constant tensor from initializer in preprocess_onnx
* update nms
* remove unnecessary codes
* workspace aligned address
* format trt_plugin_helper.hpp
* fix index memory bugs
* set alignment to 16 by default
* fix lint
* fix nms offset
* fix bugs of preprocess onnx
* update test for nms
* tensorrt only accept int32, not int64
* update nms comments
* fix indexing for scores in nms
* update trt temp
* make trt-nms compatiable to #803
* fix lint
* add docstring to trt_nms_kernel.cuda, add description to preprocess_onnx
* add comment to score indexing
* fix bugs of max output boxes
Co-authored-by: maningsheng <maningsheng@sensetime.com>
2021-02-23 15:09:49 +08:00
|
|
|
def test_nms():
|
|
|
|
try:
|
|
|
|
import mmcv
|
|
|
|
from mmcv.ops import nms
|
|
|
|
except (ImportError, ModuleNotFoundError):
|
|
|
|
pytest.skip('test requires compilation')
|
|
|
|
os.environ['ONNX_BACKEND'] = 'MMCVTensorRT'
|
|
|
|
# trt config
|
|
|
|
fp16_mode = False
|
|
|
|
max_workspace_size = 1 << 30
|
|
|
|
data = mmcv.load('./tests/data/batched_nms_data.pkl')
|
|
|
|
boxes = data['boxes'].cuda()
|
|
|
|
scores = data['scores'].cuda()
|
|
|
|
nms = partial(nms, iou_threshold=0.7, offset=0)
|
|
|
|
wrapped_model = WrapFunction(nms)
|
|
|
|
wrapped_model.cpu().eval()
|
|
|
|
with torch.no_grad():
|
|
|
|
torch.onnx.export(
|
|
|
|
wrapped_model, (boxes.detach().cpu(), scores.detach().cpu()),
|
|
|
|
onnx_file,
|
|
|
|
export_params=True,
|
|
|
|
keep_initializers_as_inputs=True,
|
|
|
|
input_names=['boxes', 'scores'],
|
|
|
|
output_names=['dets', 'inds'],
|
|
|
|
opset_version=11)
|
|
|
|
onnx_model = onnx.load(onnx_file)
|
|
|
|
|
|
|
|
# create trt engine and wraper
|
|
|
|
opt_shape_dict = {
|
|
|
|
'boxes': [list(boxes.shape),
|
|
|
|
list(boxes.shape),
|
|
|
|
list(boxes.shape)],
|
|
|
|
'scores': [list(scores.shape),
|
|
|
|
list(scores.shape),
|
|
|
|
list(scores.shape)]
|
|
|
|
}
|
|
|
|
trt_engine = onnx2trt(
|
|
|
|
onnx_model,
|
|
|
|
opt_shape_dict,
|
|
|
|
fp16_mode=fp16_mode,
|
|
|
|
max_workspace_size=max_workspace_size)
|
|
|
|
save_trt_engine(trt_engine, trt_file)
|
|
|
|
trt_model = TRTWraper(trt_file, ['boxes', 'scores'], ['dets', 'inds'])
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
|
trt_outputs = trt_model({'boxes': boxes, 'scores': scores})
|
|
|
|
trt_dets = trt_outputs['dets']
|
|
|
|
trt_inds = trt_outputs['inds']
|
|
|
|
trt_inds = trt_inds.long()
|
|
|
|
|
|
|
|
# compute pytorch_output
|
|
|
|
with torch.no_grad():
|
|
|
|
pytorch_outputs = wrapped_model(boxes, scores)
|
|
|
|
pytorch_dets, pytorch_inds = pytorch_outputs
|
|
|
|
|
|
|
|
# allclose
|
|
|
|
if os.path.exists(onnx_file):
|
|
|
|
os.remove(onnx_file)
|
|
|
|
if os.path.exists(trt_file):
|
|
|
|
os.remove(trt_file)
|
|
|
|
num_boxes = pytorch_dets.shape[0]
|
|
|
|
trt_dets = trt_dets[:num_boxes, ...]
|
|
|
|
trt_inds = trt_inds[:num_boxes]
|
|
|
|
trt_scores = trt_dets[:, 4]
|
|
|
|
pytorch_scores = pytorch_dets[:, 4]
|
|
|
|
os.environ.pop('ONNX_BACKEND')
|
|
|
|
assert torch.allclose(pytorch_scores, trt_scores, atol=1e-3)
|
|
|
|
assert torch.equal(pytorch_inds, trt_inds)
|
|
|
|
|
|
|
|
|
|
|
|
def test_batched_nms():
|
|
|
|
try:
|
|
|
|
import mmcv
|
|
|
|
from mmcv.ops import batched_nms
|
|
|
|
except (ImportError, ModuleNotFoundError):
|
|
|
|
pytest.skip('test requires compilation')
|
|
|
|
|
|
|
|
# trt config
|
|
|
|
os.environ['ONNX_BACKEND'] = 'MMCVTensorRT'
|
|
|
|
fp16_mode = False
|
|
|
|
max_workspace_size = 1 << 30
|
|
|
|
data = mmcv.load('./tests/data/batched_nms_data.pkl')
|
|
|
|
nms_cfg = dict(type='nms', iou_threshold=0.7)
|
|
|
|
boxes = data['boxes'].cuda()
|
|
|
|
scores = data['scores'].cuda()
|
|
|
|
idxs = data['idxs'].cuda()
|
|
|
|
class_agnostic = False
|
|
|
|
|
|
|
|
nms = partial(batched_nms, nms_cfg=nms_cfg, class_agnostic=class_agnostic)
|
|
|
|
wrapped_model = WrapFunction(nms)
|
|
|
|
wrapped_model.cpu().eval()
|
|
|
|
input_data = (boxes.detach().cpu(), scores.detach().cpu(),
|
|
|
|
idxs.detach().cpu())
|
|
|
|
input_names = ['boxes', 'scores', 'idxs']
|
|
|
|
output_names = ['dets', 'inds']
|
|
|
|
with torch.no_grad():
|
|
|
|
torch.onnx.export(
|
|
|
|
wrapped_model,
|
|
|
|
input_data,
|
|
|
|
onnx_file,
|
|
|
|
export_params=True,
|
|
|
|
keep_initializers_as_inputs=True,
|
|
|
|
input_names=input_names,
|
|
|
|
output_names=output_names,
|
|
|
|
opset_version=11)
|
|
|
|
onnx_model = onnx.load(onnx_file)
|
|
|
|
# create trt engine and wraper
|
|
|
|
opt_shape_dict = {
|
|
|
|
'boxes': [list(boxes.shape),
|
|
|
|
list(boxes.shape),
|
|
|
|
list(boxes.shape)],
|
|
|
|
'scores': [list(scores.shape),
|
|
|
|
list(scores.shape),
|
|
|
|
list(scores.shape)],
|
|
|
|
'idxs': [list(idxs.shape),
|
|
|
|
list(idxs.shape),
|
|
|
|
list(idxs.shape)]
|
|
|
|
}
|
|
|
|
trt_engine = onnx2trt(
|
|
|
|
onnx_model,
|
|
|
|
opt_shape_dict,
|
|
|
|
fp16_mode=fp16_mode,
|
|
|
|
max_workspace_size=max_workspace_size)
|
|
|
|
save_trt_engine(trt_engine, trt_file)
|
|
|
|
trt_model = TRTWraper(trt_file, input_names, output_names)
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
|
trt_outputs = trt_model({
|
|
|
|
'boxes': boxes,
|
|
|
|
'scores': scores,
|
|
|
|
'idxs': idxs
|
|
|
|
})
|
|
|
|
trt_dets = trt_outputs['dets']
|
|
|
|
trt_inds = trt_outputs['inds']
|
|
|
|
trt_inds = trt_inds.long()
|
|
|
|
|
|
|
|
# compute pytorch_output
|
|
|
|
with torch.no_grad():
|
|
|
|
pytorch_outputs = wrapped_model(boxes, scores, idxs)
|
|
|
|
pytorch_dets, pytorch_inds = pytorch_outputs
|
|
|
|
# allclose
|
|
|
|
if os.path.exists(onnx_file):
|
|
|
|
os.remove(onnx_file)
|
|
|
|
if os.path.exists(trt_file):
|
|
|
|
os.remove(trt_file)
|
|
|
|
num_boxes = pytorch_dets.shape[0]
|
|
|
|
trt_dets = trt_dets[:num_boxes, ...]
|
|
|
|
trt_inds = trt_inds[:num_boxes]
|
|
|
|
trt_scores = trt_dets[:, 4]
|
|
|
|
pytorch_scores = pytorch_dets[:, 4]
|
|
|
|
|
|
|
|
os.environ.pop('ONNX_BACKEND')
|
|
|
|
assert torch.allclose(pytorch_scores, trt_scores)
|
|
|
|
assert torch.equal(pytorch_inds, trt_inds)
|
|
|
|
|
|
|
|
|
2021-01-20 11:15:07 +08:00
|
|
|
def test_scatternd():
|
|
|
|
|
|
|
|
def func(data):
|
|
|
|
data[:, :-2] += 1
|
|
|
|
data[:2, :] -= 1
|
|
|
|
return data
|
|
|
|
|
|
|
|
data = torch.zeros(4, 4).cuda()
|
|
|
|
wrapped_model = WrapFunction(func).eval().cuda()
|
|
|
|
|
|
|
|
input_names = ['input']
|
|
|
|
output_names = ['output']
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
|
torch.onnx.export(
|
|
|
|
wrapped_model, (data.clone(), ),
|
|
|
|
onnx_file,
|
|
|
|
export_params=True,
|
|
|
|
keep_initializers_as_inputs=True,
|
|
|
|
input_names=input_names,
|
|
|
|
output_names=output_names,
|
|
|
|
opset_version=11)
|
|
|
|
|
|
|
|
onnx_model = onnx.load(onnx_file)
|
|
|
|
|
|
|
|
# create trt engine and wraper
|
|
|
|
opt_shape_dict = {
|
|
|
|
'input': [list(data.shape),
|
|
|
|
list(data.shape),
|
|
|
|
list(data.shape)],
|
|
|
|
}
|
|
|
|
# trt config
|
|
|
|
fp16_mode = False
|
|
|
|
max_workspace_size = 1 << 30
|
|
|
|
|
|
|
|
trt_engine = onnx2trt(
|
|
|
|
onnx_model,
|
|
|
|
opt_shape_dict,
|
|
|
|
fp16_mode=fp16_mode,
|
|
|
|
max_workspace_size=max_workspace_size)
|
|
|
|
|
|
|
|
save_trt_engine(trt_engine, trt_file)
|
|
|
|
trt_model = TRTWraper(trt_file, input_names, output_names)
|
|
|
|
|
|
|
|
with torch.no_grad():
|
|
|
|
trt_outputs = trt_model({'input': data.clone()})
|
|
|
|
trt_results = trt_outputs['output']
|
|
|
|
|
|
|
|
# compute pytorch_output
|
|
|
|
with torch.no_grad():
|
|
|
|
pytorch_results = wrapped_model(data.clone())
|
|
|
|
|
|
|
|
# allclose
|
|
|
|
if os.path.exists(onnx_file):
|
|
|
|
os.remove(onnx_file)
|
|
|
|
if os.path.exists(trt_file):
|
|
|
|
os.remove(trt_file)
|
|
|
|
assert torch.allclose(pytorch_results, trt_results)
|