mirror of
https://github.com/open-mmlab/mmdeploy.git
synced 2025-01-14 08:09:43 +08:00
[Feature] Add ppl backend. (#32)
* add ppl support * save * add ppl speed test * drop "add ppl speed test" This reverts commit 467504a23074a2defdadeff9819c8d19bfefda71. * add ppl backend * resolve comments * fix baseDeployClassifier
This commit is contained in:
parent
4f530bc758
commit
dcb88e4439
@ -1,2 +1,2 @@
|
||||
[settings]
|
||||
known_third_party = mmcls,mmcv,mmdet,numpy,onnx,packaging,pytest,setuptools,tensorrt,torch
|
||||
known_third_party = mmcls,mmcv,mmdet,numpy,onnx,packaging,pyppl,pytest,setuptools,tensorrt,torch
|
||||
|
1
configs/_base_/backends/ppl.py
Normal file
1
configs/_base_/backends/ppl.py
Normal file
@ -0,0 +1 @@
|
||||
backend = 'ppl'
|
1
configs/mmcls/mmcls_ppl.py
Normal file
1
configs/mmcls/mmcls_ppl.py
Normal file
@ -0,0 +1 @@
|
||||
_base_ = ['./mmcls_base.py', '../_base_/backends/ppl.py']
|
1
configs/mmdet/mask_ppl.py
Normal file
1
configs/mmdet/mask_ppl.py
Normal file
@ -0,0 +1 @@
|
||||
_base_ = ['./mask_base.py', '../_base_/backends/ppl.py']
|
1
configs/mmdet/ppl.py
Normal file
1
configs/mmdet/ppl.py
Normal file
@ -0,0 +1 @@
|
||||
_base_ = ['./base.py', '../_base_/backends/ppl.py']
|
3
mmdeploy/apis/ppl/__init__.py
Normal file
3
mmdeploy/apis/ppl/__init__.py
Normal file
@ -0,0 +1,3 @@
|
||||
from .ppl_utils import register_engines
|
||||
|
||||
__all__ = ['register_engines']
|
53
mmdeploy/apis/ppl/ppl_utils.py
Normal file
53
mmdeploy/apis/ppl/ppl_utils.py
Normal file
@ -0,0 +1,53 @@
|
||||
import logging
|
||||
import sys
|
||||
|
||||
import pyppl.common as pplcommon
|
||||
import pyppl.nn as pplnn
|
||||
|
||||
|
||||
def register_engines(device_id: int,
|
||||
disable_avx512: bool = False,
|
||||
quick_select: bool = False):
|
||||
"""Register engines for ppl runtime.
|
||||
|
||||
Args:
|
||||
device_id (int): -1 for cpu.
|
||||
disable_avx512 (bool): Wheather to disable avx512 for x86.
|
||||
quick_select (bool): Wheather to use default algorithms.
|
||||
"""
|
||||
engines = []
|
||||
if device_id == -1:
|
||||
x86_engine = pplnn.X86EngineFactory.Create()
|
||||
if not x86_engine:
|
||||
logging.error('Failed to create x86 engine')
|
||||
sys.exit(-1)
|
||||
|
||||
if disable_avx512:
|
||||
status = x86_engine.Configure(pplnn.X86_CONF_DISABLE_AVX512)
|
||||
if status != pplcommon.RC_SUCCESS:
|
||||
logging.error('x86 engine Configure() failed: ' +
|
||||
pplcommon.GetRetCodeStr(status))
|
||||
sys.exit(-1)
|
||||
|
||||
engines.append(pplnn.Engine(x86_engine))
|
||||
|
||||
else:
|
||||
cuda_options = pplnn.CudaEngineOptions()
|
||||
cuda_options.device_id = device_id
|
||||
|
||||
cuda_engine = pplnn.CudaEngineFactory.Create(cuda_options)
|
||||
if not cuda_engine:
|
||||
logging.error('Failed to create cuda engine.')
|
||||
sys.exit(-1)
|
||||
|
||||
if quick_select:
|
||||
status = cuda_engine.Configure(
|
||||
pplnn.CUDA_CONF_USE_DEFAULT_ALGORITHMS)
|
||||
if status != pplcommon.RC_SUCCESS:
|
||||
logging.error('cuda engine Configure() failed: ' +
|
||||
pplcommon.GetRetCodeStr(status))
|
||||
sys.exit(-1)
|
||||
|
||||
engines.append(pplnn.Engine(cuda_engine))
|
||||
|
||||
return engines
|
@ -111,6 +111,10 @@ def init_backend_model(model_files: Sequence[str],
|
||||
model_files[1],
|
||||
class_names=class_names,
|
||||
device_id=device_id)
|
||||
elif backend == 'ppl':
|
||||
from mmdeploy.mmcls.export import PPLClassifier
|
||||
backend_model = PPLClassifier(
|
||||
model_files[0], class_names=class_names, device_id=device_id)
|
||||
else:
|
||||
raise NotImplementedError(f'Unsupported backend type: {backend}')
|
||||
return backend_model
|
||||
@ -125,6 +129,10 @@ def init_backend_model(model_files: Sequence[str],
|
||||
from mmdeploy.mmdet.export import TensorRTDetector
|
||||
backend_model = TensorRTDetector(
|
||||
model_files[0], class_names=class_names, device_id=device_id)
|
||||
elif backend == 'ppl':
|
||||
from mmdeploy.mmdet.export import PPLDetector
|
||||
backend_model = PPLDetector(
|
||||
model_files[0], class_names=class_names, device_id=device_id)
|
||||
else:
|
||||
raise NotImplementedError(f'Unsupported backend type: {backend}')
|
||||
return backend_model
|
||||
|
@ -1,8 +1,8 @@
|
||||
from .model_wrappers import (NCNNClassifier, ONNXRuntimeClassifier,
|
||||
TensorRTClassifier)
|
||||
PPLClassifier, TensorRTClassifier)
|
||||
from .prepare_input import create_input
|
||||
|
||||
__all__ = [
|
||||
'create_input', 'NCNNClassifier', 'ONNXRuntimeClassifier',
|
||||
'TensorRTClassifier'
|
||||
'TensorRTClassifier', 'PPLClassifier'
|
||||
]
|
||||
|
@ -121,3 +121,50 @@ class NCNNClassifier(DeployBaseClassifier):
|
||||
return [results]
|
||||
else:
|
||||
raise NotImplementedError('GPU device is not implemented.')
|
||||
|
||||
|
||||
class PPLClassifier(DeployBaseClassifier):
|
||||
"""Wrapper for classifier's inference with PPL."""
|
||||
|
||||
def __init__(self, onnx_file, class_names, device_id):
|
||||
super(PPLClassifier, self).__init__(class_names, device_id)
|
||||
import pyppl.nn as pplnn
|
||||
from mmdeploy.apis.ppl import register_engines
|
||||
|
||||
# enable quick select by default to speed up pipeline
|
||||
# TODO: open it to users after ppl supports saving serialized models
|
||||
# TODO: disable_avx512 will be removed or open to users in config
|
||||
engines = register_engines(
|
||||
device_id, disable_avx512=False, quick_select=True)
|
||||
cuda_options = pplnn.CudaEngineOptions()
|
||||
cuda_options.device_id = device_id
|
||||
runtime_builder = pplnn.OnnxRuntimeBuilderFactory.CreateFromFile(
|
||||
onnx_file, engines)
|
||||
assert runtime_builder is not None, 'Failed to create '\
|
||||
'ONNXRuntimeBuilder.'
|
||||
|
||||
runtime_options = pplnn.RuntimeOptions()
|
||||
runtime = runtime_builder.CreateRuntime(runtime_options)
|
||||
assert runtime is not None, 'Failed to create the instance of Runtime.'
|
||||
|
||||
self.runtime = runtime
|
||||
self.CLASSES = class_names
|
||||
self.device_id = device_id
|
||||
self.inputs = [
|
||||
runtime.GetInputTensor(i) for i in range(runtime.GetInputCount())
|
||||
]
|
||||
|
||||
def forward_test(self, imgs, *args, **kwargs):
|
||||
import pyppl.common as pplcommon
|
||||
input_data = imgs
|
||||
self.inputs[0].ConvertFromHost(input_data.cpu().numpy())
|
||||
status = self.runtime.Run()
|
||||
assert status == pplcommon.RC_SUCCESS, 'Run() '\
|
||||
'failed: ' + pplcommon.GetRetCodeStr(status)
|
||||
status = self.runtime.Sync()
|
||||
assert status == pplcommon.RC_SUCCESS, 'Sync() '\
|
||||
'failed: ' + pplcommon.GetRetCodeStr(status)
|
||||
results = self.runtime.GetOutputTensor(0).ConvertToHost()
|
||||
results = np.array(results, copy=False)
|
||||
|
||||
return list(results)
|
||||
|
@ -1,9 +1,9 @@
|
||||
from .model_wrappers import ONNXRuntimeDetector, TensorRTDetector
|
||||
from .model_wrappers import ONNXRuntimeDetector, PPLDetector, TensorRTDetector
|
||||
from .onnx_helper import clip_bboxes
|
||||
from .prepare_input import create_input
|
||||
from .tensorrt_helper import pad_with_value
|
||||
|
||||
__all__ = [
|
||||
'clip_bboxes', 'TensorRTDetector', 'create_input', 'ONNXRuntimeDetector',
|
||||
'pad_with_value'
|
||||
'pad_with_value', 'PPLDetector'
|
||||
]
|
||||
|
@ -174,3 +174,51 @@ class TensorRTDetector(DeployBaseDetector):
|
||||
*ori_shape[2:])[inds, ...].reshape(
|
||||
batch_size, -1, *ori_shape[2:])
|
||||
return outputs
|
||||
|
||||
|
||||
class PPLDetector(DeployBaseDetector):
|
||||
"""Wrapper for detector's inference with TensorRT."""
|
||||
|
||||
def __init__(self, onnx_file, class_names, device_id):
|
||||
super(PPLDetector, self).__init__(class_names, device_id)
|
||||
import pyppl.nn as pplnn
|
||||
from mmdeploy.apis.ppl import register_engines
|
||||
|
||||
# enable quick select by default to speed up pipeline
|
||||
# TODO: open it to users after ppl supports saving serialized models
|
||||
# TODO: disable_avx512 will be removed or open to users in config
|
||||
engines = register_engines(
|
||||
device_id, disable_avx512=False, quick_select=True)
|
||||
cuda_options = pplnn.CudaEngineOptions()
|
||||
cuda_options.device_id = device_id
|
||||
runtime_builder = pplnn.OnnxRuntimeBuilderFactory.CreateFromFile(
|
||||
onnx_file, engines)
|
||||
assert runtime_builder is not None, 'Failed to create '\
|
||||
'OnnxRuntimeBuilder.'
|
||||
|
||||
runtime_options = pplnn.RuntimeOptions()
|
||||
runtime = runtime_builder.CreateRuntime(runtime_options)
|
||||
assert runtime is not None, 'Failed to create the instance of Runtime.'
|
||||
|
||||
self.runtime = runtime
|
||||
self.CLASSES = class_names
|
||||
self.device_id = device_id
|
||||
self.inputs = [
|
||||
runtime.GetInputTensor(i) for i in range(runtime.GetInputCount())
|
||||
]
|
||||
|
||||
def forward_test(self, imgs, *args, **kwargs):
|
||||
import pyppl.common as pplcommon
|
||||
input_data = imgs[0].contiguous()
|
||||
self.inputs[0].ConvertFromHost(input_data.cpu().numpy())
|
||||
status = self.runtime.Run()
|
||||
assert status == pplcommon.RC_SUCCESS, 'Run() '\
|
||||
'failed: ' + pplcommon.GetRetCodeStr(status)
|
||||
status = self.runtime.Sync()
|
||||
assert status == pplcommon.RC_SUCCESS, 'Sync() '\
|
||||
'failed: ' + pplcommon.GetRetCodeStr(status)
|
||||
outputs = []
|
||||
for i in range(self.runtime.GetOutputCount()):
|
||||
out_tensor = self.runtime.GetOutputTensor(i).ConvertToHost()
|
||||
outputs.append(np.array(out_tensor, copy=False))
|
||||
return outputs
|
||||
|
Loading…
x
Reference in New Issue
Block a user