[Feature] Add ppl backend. (#32)

* add ppl support

* save

* add ppl speed test

* drop "add ppl speed test"

This reverts commit 467504a23074a2defdadeff9819c8d19bfefda71.

* add ppl backend

* resolve comments

* fix baseDeployClassifier
This commit is contained in:
AllentDan 2021-08-12 14:44:10 +08:00 committed by GitHub
parent 4f530bc758
commit dcb88e4439
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 168 additions and 5 deletions

View File

@ -1,2 +1,2 @@
[settings]
known_third_party = mmcls,mmcv,mmdet,numpy,onnx,packaging,pytest,setuptools,tensorrt,torch
known_third_party = mmcls,mmcv,mmdet,numpy,onnx,packaging,pyppl,pytest,setuptools,tensorrt,torch

View File

@ -0,0 +1 @@
backend = 'ppl'

View File

@ -0,0 +1 @@
_base_ = ['./mmcls_base.py', '../_base_/backends/ppl.py']

View File

@ -0,0 +1 @@
_base_ = ['./mask_base.py', '../_base_/backends/ppl.py']

1
configs/mmdet/ppl.py Normal file
View File

@ -0,0 +1 @@
_base_ = ['./base.py', '../_base_/backends/ppl.py']

View File

@ -0,0 +1,3 @@
from .ppl_utils import register_engines
__all__ = ['register_engines']

View File

@ -0,0 +1,53 @@
import logging
import sys
import pyppl.common as pplcommon
import pyppl.nn as pplnn
def register_engines(device_id: int,
disable_avx512: bool = False,
quick_select: bool = False):
"""Register engines for ppl runtime.
Args:
device_id (int): -1 for cpu.
disable_avx512 (bool): Wheather to disable avx512 for x86.
quick_select (bool): Wheather to use default algorithms.
"""
engines = []
if device_id == -1:
x86_engine = pplnn.X86EngineFactory.Create()
if not x86_engine:
logging.error('Failed to create x86 engine')
sys.exit(-1)
if disable_avx512:
status = x86_engine.Configure(pplnn.X86_CONF_DISABLE_AVX512)
if status != pplcommon.RC_SUCCESS:
logging.error('x86 engine Configure() failed: ' +
pplcommon.GetRetCodeStr(status))
sys.exit(-1)
engines.append(pplnn.Engine(x86_engine))
else:
cuda_options = pplnn.CudaEngineOptions()
cuda_options.device_id = device_id
cuda_engine = pplnn.CudaEngineFactory.Create(cuda_options)
if not cuda_engine:
logging.error('Failed to create cuda engine.')
sys.exit(-1)
if quick_select:
status = cuda_engine.Configure(
pplnn.CUDA_CONF_USE_DEFAULT_ALGORITHMS)
if status != pplcommon.RC_SUCCESS:
logging.error('cuda engine Configure() failed: ' +
pplcommon.GetRetCodeStr(status))
sys.exit(-1)
engines.append(pplnn.Engine(cuda_engine))
return engines

View File

@ -111,6 +111,10 @@ def init_backend_model(model_files: Sequence[str],
model_files[1],
class_names=class_names,
device_id=device_id)
elif backend == 'ppl':
from mmdeploy.mmcls.export import PPLClassifier
backend_model = PPLClassifier(
model_files[0], class_names=class_names, device_id=device_id)
else:
raise NotImplementedError(f'Unsupported backend type: {backend}')
return backend_model
@ -125,6 +129,10 @@ def init_backend_model(model_files: Sequence[str],
from mmdeploy.mmdet.export import TensorRTDetector
backend_model = TensorRTDetector(
model_files[0], class_names=class_names, device_id=device_id)
elif backend == 'ppl':
from mmdeploy.mmdet.export import PPLDetector
backend_model = PPLDetector(
model_files[0], class_names=class_names, device_id=device_id)
else:
raise NotImplementedError(f'Unsupported backend type: {backend}')
return backend_model

View File

@ -1,8 +1,8 @@
from .model_wrappers import (NCNNClassifier, ONNXRuntimeClassifier,
TensorRTClassifier)
PPLClassifier, TensorRTClassifier)
from .prepare_input import create_input
__all__ = [
'create_input', 'NCNNClassifier', 'ONNXRuntimeClassifier',
'TensorRTClassifier'
'TensorRTClassifier', 'PPLClassifier'
]

View File

@ -121,3 +121,50 @@ class NCNNClassifier(DeployBaseClassifier):
return [results]
else:
raise NotImplementedError('GPU device is not implemented.')
class PPLClassifier(DeployBaseClassifier):
"""Wrapper for classifier's inference with PPL."""
def __init__(self, onnx_file, class_names, device_id):
super(PPLClassifier, self).__init__(class_names, device_id)
import pyppl.nn as pplnn
from mmdeploy.apis.ppl import register_engines
# enable quick select by default to speed up pipeline
# TODO: open it to users after ppl supports saving serialized models
# TODO: disable_avx512 will be removed or open to users in config
engines = register_engines(
device_id, disable_avx512=False, quick_select=True)
cuda_options = pplnn.CudaEngineOptions()
cuda_options.device_id = device_id
runtime_builder = pplnn.OnnxRuntimeBuilderFactory.CreateFromFile(
onnx_file, engines)
assert runtime_builder is not None, 'Failed to create '\
'ONNXRuntimeBuilder.'
runtime_options = pplnn.RuntimeOptions()
runtime = runtime_builder.CreateRuntime(runtime_options)
assert runtime is not None, 'Failed to create the instance of Runtime.'
self.runtime = runtime
self.CLASSES = class_names
self.device_id = device_id
self.inputs = [
runtime.GetInputTensor(i) for i in range(runtime.GetInputCount())
]
def forward_test(self, imgs, *args, **kwargs):
import pyppl.common as pplcommon
input_data = imgs
self.inputs[0].ConvertFromHost(input_data.cpu().numpy())
status = self.runtime.Run()
assert status == pplcommon.RC_SUCCESS, 'Run() '\
'failed: ' + pplcommon.GetRetCodeStr(status)
status = self.runtime.Sync()
assert status == pplcommon.RC_SUCCESS, 'Sync() '\
'failed: ' + pplcommon.GetRetCodeStr(status)
results = self.runtime.GetOutputTensor(0).ConvertToHost()
results = np.array(results, copy=False)
return list(results)

View File

@ -1,9 +1,9 @@
from .model_wrappers import ONNXRuntimeDetector, TensorRTDetector
from .model_wrappers import ONNXRuntimeDetector, PPLDetector, TensorRTDetector
from .onnx_helper import clip_bboxes
from .prepare_input import create_input
from .tensorrt_helper import pad_with_value
__all__ = [
'clip_bboxes', 'TensorRTDetector', 'create_input', 'ONNXRuntimeDetector',
'pad_with_value'
'pad_with_value', 'PPLDetector'
]

View File

@ -174,3 +174,51 @@ class TensorRTDetector(DeployBaseDetector):
*ori_shape[2:])[inds, ...].reshape(
batch_size, -1, *ori_shape[2:])
return outputs
class PPLDetector(DeployBaseDetector):
"""Wrapper for detector's inference with TensorRT."""
def __init__(self, onnx_file, class_names, device_id):
super(PPLDetector, self).__init__(class_names, device_id)
import pyppl.nn as pplnn
from mmdeploy.apis.ppl import register_engines
# enable quick select by default to speed up pipeline
# TODO: open it to users after ppl supports saving serialized models
# TODO: disable_avx512 will be removed or open to users in config
engines = register_engines(
device_id, disable_avx512=False, quick_select=True)
cuda_options = pplnn.CudaEngineOptions()
cuda_options.device_id = device_id
runtime_builder = pplnn.OnnxRuntimeBuilderFactory.CreateFromFile(
onnx_file, engines)
assert runtime_builder is not None, 'Failed to create '\
'OnnxRuntimeBuilder.'
runtime_options = pplnn.RuntimeOptions()
runtime = runtime_builder.CreateRuntime(runtime_options)
assert runtime is not None, 'Failed to create the instance of Runtime.'
self.runtime = runtime
self.CLASSES = class_names
self.device_id = device_id
self.inputs = [
runtime.GetInputTensor(i) for i in range(runtime.GetInputCount())
]
def forward_test(self, imgs, *args, **kwargs):
import pyppl.common as pplcommon
input_data = imgs[0].contiguous()
self.inputs[0].ConvertFromHost(input_data.cpu().numpy())
status = self.runtime.Run()
assert status == pplcommon.RC_SUCCESS, 'Run() '\
'failed: ' + pplcommon.GetRetCodeStr(status)
status = self.runtime.Sync()
assert status == pplcommon.RC_SUCCESS, 'Sync() '\
'failed: ' + pplcommon.GetRetCodeStr(status)
outputs = []
for i in range(self.runtime.GetOutputCount()):
out_tensor = self.runtime.GetOutputTensor(i).ConvertToHost()
outputs.append(np.array(out_tensor, copy=False))
return outputs