[Enhancement] Support tvm (#1216)
* finish framework * add autotvm and auto-scheduler tuner * add python deploy api * add SDK net(WIP * add sdk support * support det, support vm * fix vm sdk * support two stage detector * add instance seg support * add docstring * update docs and ut * add quantize * update doc * update docs * synchronize stream * support dlpack * remove submodule * fix stride * add alignment * support dlpack * remove submodule * replace exclusive_scan * add backend check * add build script * fix comment * add ci * fix ci * ci fix2 * update build script * update ci * add pytest * update sed command * update sed again * add xgboost * remove tvm ut * update ansor runner * add stream sync * fix topk * sync default stream * fix tvm net * fix windowpull/1530/head
parent
ac47cad407
commit
7cb4b9b18a
|
@ -1,2 +1,3 @@
|
|||
cann
|
||||
CANN
|
||||
nd
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
name: backend-tvm
|
||||
|
||||
on:
|
||||
push:
|
||||
paths-ignore:
|
||||
- "demo/**"
|
||||
- "tools/**"
|
||||
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- "demo/**"
|
||||
- "tools/**"
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
script_install:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: 'recursive'
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install mmdeploy
|
||||
run: |
|
||||
python3 tools/scripts/build_ubuntu_x64_tvm.py
|
||||
source ~/mmdeploy.env
|
||||
python3 -m pip install torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cpu
|
||||
python3 -m pip install mmcv-full==1.5.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
|
||||
python3 -m pip install decorator psutil scipy attrs tornado pytest
|
||||
python3 -c 'import mmdeploy.apis.tvm as tvm_api; assert tvm_api.is_available()'
|
|
@ -1,9 +1,9 @@
|
|||
[submodule "third_party/cub"]
|
||||
path = third_party/cub
|
||||
url = https://github.com/NVIDIA/cub.git
|
||||
path = third_party/cub
|
||||
url = https://github.com/NVIDIA/cub.git
|
||||
[submodule "third_party/pybind11"]
|
||||
path = third_party/pybind11
|
||||
url = https://github.com/pybind/pybind11.git
|
||||
path = third_party/pybind11
|
||||
url = https://github.com/pybind/pybind11.git
|
||||
[submodule "third_party/spdlog"]
|
||||
path = third_party/spdlog
|
||||
url = https://github.com/gabime/spdlog.git
|
||||
path = third_party/spdlog
|
||||
url = https://github.com/gabime/spdlog.git
|
||||
|
|
26
README.md
26
README.md
|
@ -58,18 +58,18 @@ The supported Device-Platform-InferenceBackend matrix is presented as following,
|
|||
|
||||
The benchmark can be found from [here](docs/en/03-benchmark/benchmark.md)
|
||||
|
||||
| Device / Platform | Linux | Windows | macOS | Android |
|
||||
| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
|
||||
| x86_64 CPU | [![Build Status][pass-backend-ort]][ci-backend-ort]ONNXRuntime<br>[![Build Status][pass-backend-pplnn]][ci-backend-pplnn]pplnn<br>[![Build Status][pass-backend-ncnn]][ci-backend-ncnn]ncnn<br>[![Build Status][pass-backend-torchscript]][ci-backend-torchscript]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | - | - |
|
||||
| ARM CPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn | - | - | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
|
||||
| RISC-V | [![Build Status][pass-build-riscv64-gcc]][ci-build-riscv64-gcc]ncnn | - | - | - |
|
||||
| NVIDIA GPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]LibTorch | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn | - | - |
|
||||
| NVIDIA Jetson | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | - | - |
|
||||
| Huawei ascend310 | [![Build Status][pass-backend-ascend]][ci-backend-ascend]CANN | - | - | - |
|
||||
| Rockchip | [![Build Status][pass-backend-rknn]][ci-backend-rknn]RKNN | - | - | - |
|
||||
| Apple M1 | - | - | [![Build Status][pass-backend-coreml]][ci-backend-coreml]CoreML | - |
|
||||
| Adreno GPU | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
|
||||
| Hexagon DSP | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE |
|
||||
| Device / Platform | Linux | Windows | macOS | Android |
|
||||
| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
|
||||
| x86_64 CPU | [![Build Status][pass-backend-ort]][ci-backend-ort]ONNXRuntime<br>[![Build Status][pass-backend-pplnn]][ci-backend-pplnn]pplnn<br>[![Build Status][pass-backend-ncnn]][ci-backend-ncnn]ncnn<br>[![Build Status][pass-backend-torchscript]][ci-backend-torchscript]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO<br>[![Build Status][pass-build-tvm]][ci-build-tvm]TVM | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | - | - |
|
||||
| ARM CPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn | - | - | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
|
||||
| RISC-V | [![Build Status][pass-build-riscv64-gcc]][ci-build-riscv64-gcc]ncnn | - | - | - |
|
||||
| NVIDIA GPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-tvm]TVM | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn | - | - |
|
||||
| NVIDIA Jetson | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | - | - |
|
||||
| Huawei ascend310 | [![Build Status][pass-backend-ascend]][ci-backend-ascend]CANN | - | - | - |
|
||||
| Rockchip | [![Build Status][pass-backend-rknn]][ci-backend-rknn]RKNN | - | - | - |
|
||||
| Apple M1 | - | - | [![Build Status][pass-backend-coreml]][ci-backend-coreml]CoreML | - |
|
||||
| Adreno GPU | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
|
||||
| Hexagon DSP | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE |
|
||||
|
||||
### Efficient and scalable C/C++ SDK Framework
|
||||
|
||||
|
@ -178,6 +178,7 @@ This project is released under the [Apache 2.0 license](LICENSE).
|
|||
[ci-backend-torchscript]: https://github.com/open-mmlab/mmdeploy/actions/workflows/backend-torchscript.yml
|
||||
[ci-build-riscv64-gcc]: https://github.com/open-mmlab/mmdeploy/actions/workflows/linux-riscv64-gcc.yml
|
||||
[ci-build-rknpu]: https://github.com/open-mmlab/mmdeploy/actions/workflows/linux-rknpu.yml
|
||||
[ci-build-tvm]: https://github.com/open-mmlab/mmdeploy/actions/workflows/backend-tvm.yml
|
||||
[pass-backend-ascend]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ascend
|
||||
[pass-backend-coreml]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-coreml
|
||||
[pass-backend-ncnn]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ncnn
|
||||
|
@ -188,3 +189,4 @@ This project is released under the [Apache 2.0 license](LICENSE).
|
|||
[pass-backend-torchscript]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ort
|
||||
[pass-build-riscv64-gcc]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_riscv64_gcc
|
||||
[pass-build-rknpu]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_rknpu
|
||||
[pass-build-tvm]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_tvm
|
||||
|
|
|
@ -56,18 +56,18 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为
|
|||
|
||||
支持的设备平台和推理引擎如下表所示。benchmark请参考[这里](docs/zh_cn/03-benchmark/benchmark.md)
|
||||
|
||||
| Device / Platform | Linux | Windows | macOS | Android |
|
||||
| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
|
||||
| x86_64 CPU | [![Build Status][pass-backend-ort]][ci-backend-ort]ONNXRuntime<br>[![Build Status][pass-backend-pplnn]][ci-backend-pplnn]pplnn<br>[![Build Status][pass-backend-ncnn]][ci-backend-ncnn]ncnn<br>[![Build Status][pass-backend-torchscript]][ci-backend-torchscript]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | - | - |
|
||||
| ARM CPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn | - | - | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
|
||||
| RISC-V | [![Build Status][pass-build-riscv64-gcc]][ci-build-riscv64-gcc]ncnn | - | - | - |
|
||||
| NVIDIA GPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]LibTorch | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn | - | - |
|
||||
| NVIDIA Jetson | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | - | - |
|
||||
| Huawei ascend310 | [![Build Status][pass-backend-ascend]][ci-backend-ascend]CANN | - | - | - |
|
||||
| Rockchip | [![Build Status][pass-backend-rknn]][ci-backend-rknn]RKNN | - | - | - |
|
||||
| Apple M1 | - | - | [![Build Status][pass-backend-coreml]][ci-backend-coreml]CoreML | - |
|
||||
| Adreno GPU | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
|
||||
| Hexagon DSP | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE |
|
||||
| Device / Platform | Linux | Windows | macOS | Android |
|
||||
| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
|
||||
| x86_64 CPU | [![Build Status][pass-backend-ort]][ci-backend-ort]ONNXRuntime<br>[![Build Status][pass-backend-pplnn]][ci-backend-pplnn]pplnn<br>[![Build Status][pass-backend-ncnn]][ci-backend-ncnn]ncnn<br>[![Build Status][pass-backend-torchscript]][ci-backend-torchscript]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO<br>[![Build Status][pass-build-tvm]][ci-build-tvm]TVM | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | - | - |
|
||||
| ARM CPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn | - | - | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
|
||||
| RISC-V | [![Build Status][pass-build-riscv64-gcc]][ci-build-riscv64-gcc]ncnn | - | - | - |
|
||||
| NVIDIA GPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-tvm]TVM | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn | - | - |
|
||||
| NVIDIA Jetson | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | - | - |
|
||||
| Huawei ascend310 | [![Build Status][pass-backend-ascend]][ci-backend-ascend]CANN | - | - | - |
|
||||
| Rockchip | [![Build Status][pass-backend-rknn]][ci-backend-rknn]RKNN | - | - | - |
|
||||
| Apple M1 | - | - | [![Build Status][pass-backend-coreml]][ci-backend-coreml]CoreML | - |
|
||||
| Adreno GPU | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
|
||||
| Hexagon DSP | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE |
|
||||
|
||||
### SDK 可高度定制化
|
||||
|
||||
|
@ -204,6 +204,7 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为
|
|||
[ci-backend-torchscript]: https://github.com/open-mmlab/mmdeploy/actions/workflows/backend-torchscript.yml
|
||||
[ci-build-riscv64-gcc]: https://github.com/open-mmlab/mmdeploy/actions/workflows/linux-riscv64-gcc.yml
|
||||
[ci-build-rknpu]: https://github.com/open-mmlab/mmdeploy/actions/workflows/linux-rknpu.yml
|
||||
[ci-build-tvm]: https://github.com/open-mmlab/mmdeploy/actions/workflows/backend-tvm.yml
|
||||
[pass-backend-ascend]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ascend
|
||||
[pass-backend-coreml]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-coreml
|
||||
[pass-backend-ncnn]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ncnn
|
||||
|
@ -214,3 +215,4 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为
|
|||
[pass-backend-torchscript]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ort
|
||||
[pass-build-riscv64-gcc]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_riscv64_gcc
|
||||
[pass-build-rknpu]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_rknpu
|
||||
[pass-build-tvm]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_tvm
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
if (NOT DEFINED TVM_DIR)
|
||||
set(TVM_DIR $ENV{TVM_DIR})
|
||||
endif ()
|
||||
if (NOT TVM_DIR)
|
||||
message(FATAL_ERROR "Please set TVM_DIR with cmake -D option.")
|
||||
endif()
|
||||
|
||||
find_path(
|
||||
TVM_INCLUDE_DIR tvm/runtime/c_runtime_api.h
|
||||
HINTS ${TVM_DIR}
|
||||
PATH_SUFFIXES include)
|
||||
|
||||
find_path(
|
||||
DMLC_CORE_INCLUDE_DIR dmlc/io.h
|
||||
HINTS ${TVM_DIR}/3rdparty/dmlc-core
|
||||
PATH_SUFFIXES include)
|
||||
|
||||
find_path(
|
||||
DLPACK_INCLUDE_DIR dlpack/dlpack.h
|
||||
HINTS ${TVM_DIR}/3rdparty/dlpack
|
||||
PATH_SUFFIXES include)
|
||||
|
||||
find_library(
|
||||
TVM_LIBRARY_PATH tvm_runtime
|
||||
HINTS ${TVM_DIR}
|
||||
PATH_SUFFIXES build lib build/${CMAKE_BUILD_TYPE})
|
||||
if (NOT (TVM_INCLUDE_DIR AND DMLC_CORE_INCLUDE_DIR AND DLPACK_INCLUDE_DIR AND TVM_LIBRARY_PATH))
|
||||
message(FATAL_ERROR "Couldn't find tvm in TVM_DIR: "
|
||||
"${TVM_DIR}, please check if the path is correct.")
|
||||
endif()
|
||||
|
||||
add_library(tvm_runtime SHARED IMPORTED)
|
||||
set_property(TARGET tvm_runtime APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
|
||||
if (MSVC)
|
||||
set_target_properties(tvm_runtime PROPERTIES
|
||||
IMPORTED_IMPLIB_RELEASE ${TVM_LIBRARY_PATH}
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${TVM_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
else()
|
||||
set_target_properties(tvm_runtime PROPERTIES
|
||||
IMPORTED_LOCATION_RELEASE ${TVM_LIBRARY_PATH}
|
||||
INTERFACE_INCLUDE_DIRECTORIES ${TVM_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR}
|
||||
)
|
||||
endif()
|
|
@ -0,0 +1 @@
|
|||
backend_config = dict(type='tvm')
|
|
@ -0,0 +1,12 @@
|
|||
_base_ = ['./classification_static.py', '../_base_/backends/tvm.py']
|
||||
|
||||
onnx_config = dict(input_shape=[224, 224])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
shape=dict(input=[1, 3, 224, 224]),
|
||||
dtype=dict(input='float32'),
|
||||
tuner=dict(
|
||||
type='AutoScheduleTuner',
|
||||
log_file='tvm_tune_log.log',
|
||||
num_measure_trials=2000))
|
||||
])
|
|
@ -0,0 +1,16 @@
|
|||
_base_ = ['./classification_tvm-autotvm_static-224x224.py']
|
||||
|
||||
calib_config = dict(create_calib=True, calib_file='calib_data.h5')
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
shape=dict(input=[1, 3, 224, 224]),
|
||||
dtype=dict(input='float32'),
|
||||
tuner=dict(
|
||||
type='AutoTVMTuner',
|
||||
log_file='tvm_tune_log.log',
|
||||
n_trial=1000,
|
||||
tuner=dict(type='XGBTuner'),
|
||||
),
|
||||
qconfig=dict(calibrate_mode='kl_divergence', weight_scale='max'),
|
||||
)
|
||||
])
|
|
@ -0,0 +1,13 @@
|
|||
_base_ = ['./classification_static.py', '../_base_/backends/tvm.py']
|
||||
|
||||
onnx_config = dict(input_shape=[224, 224])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
shape=dict(input=[1, 3, 224, 224]),
|
||||
dtype=dict(input='float32'),
|
||||
tuner=dict(
|
||||
type='AutoTVMTuner',
|
||||
log_file='tvm_tune_log.log',
|
||||
n_trial=1000,
|
||||
tuner=dict(type='XGBTuner')))
|
||||
])
|
|
@ -0,0 +1,13 @@
|
|||
_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']
|
||||
|
||||
onnx_config = dict(input_shape=[1344, 800])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
use_vm=True,
|
||||
shape=dict(input=[1, 3, 800, 1344]),
|
||||
dtype=dict(input='float32'),
|
||||
tuner=dict(
|
||||
type='AutoScheduleTuner',
|
||||
log_file='tvm_tune_log.log',
|
||||
num_measure_trials=2000))
|
||||
])
|
|
@ -0,0 +1,15 @@
|
|||
_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']
|
||||
|
||||
onnx_config = dict(input_shape=[300, 300])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
use_vm=True,
|
||||
shape=dict(input=[1, 3, 300, 300]),
|
||||
dtype=dict(input='float32'),
|
||||
tuner=dict(
|
||||
type='AutoTVMTuner',
|
||||
log_file='tvm_tune_log.log',
|
||||
n_trial=1000,
|
||||
tuner=dict(type='XGBTuner'),
|
||||
))
|
||||
])
|
|
@ -0,0 +1,15 @@
|
|||
_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']
|
||||
|
||||
onnx_config = dict(input_shape=[1344, 800])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
use_vm=True,
|
||||
shape=dict(input=[1, 3, 800, 1344]),
|
||||
dtype=dict(input='float32'),
|
||||
tuner=dict(
|
||||
type='AutoTVMTuner',
|
||||
log_file='tvm_tune_log.log',
|
||||
n_trial=1000,
|
||||
tuner=dict(type='XGBTuner'),
|
||||
))
|
||||
])
|
|
@ -0,0 +1,15 @@
|
|||
_base_ = [
|
||||
'../_base_/base_instance-seg_static.py', '../../_base_/backends/tvm.py'
|
||||
]
|
||||
|
||||
onnx_config = dict(input_shape=[1344, 800])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
use_vm=True,
|
||||
shape=dict(input=[1, 3, 800, 1344]),
|
||||
dtype=dict(input='float32'),
|
||||
tuner=dict(
|
||||
type='AutoScheduleTuner',
|
||||
log_file='tvm_tune_log.log',
|
||||
num_measure_trials=20000))
|
||||
])
|
|
@ -0,0 +1,17 @@
|
|||
_base_ = [
|
||||
'../_base_/base_instance-seg_static.py', '../../_base_/backends/tvm.py'
|
||||
]
|
||||
|
||||
onnx_config = dict(input_shape=[1344, 800])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
use_vm=True,
|
||||
shape=dict(input=[1, 3, 800, 1344]),
|
||||
dtype=dict(input='float32'),
|
||||
tuner=dict(
|
||||
type='AutoTVMTuner',
|
||||
log_file='tvm_tune_log.log',
|
||||
n_trial=10000,
|
||||
tuner=dict(type='XGBTuner'),
|
||||
))
|
||||
])
|
|
@ -0,0 +1,12 @@
|
|||
_base_ = ['./segmentation_static.py', '../_base_/backends/tvm.py']
|
||||
|
||||
onnx_config = dict(input_shape=[1024, 512])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
shape=dict(input=[1, 3, 512, 1024]),
|
||||
dtype=dict(input='float32'),
|
||||
tuner=dict(
|
||||
type='AutoScheduleTuner',
|
||||
log_file='tvm_tune_log.log',
|
||||
num_measure_trials=2000))
|
||||
])
|
|
@ -0,0 +1,13 @@
|
|||
_base_ = ['./segmentation_static.py', '../_base_/backends/tvm.py']
|
||||
|
||||
onnx_config = dict(input_shape=[1024, 512])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
shape=dict(input=[1, 3, 512, 1024]),
|
||||
dtype=dict(input='float32'),
|
||||
tuner=dict(
|
||||
type='AutoTVMTuner',
|
||||
log_file='tvm_tune_log.log',
|
||||
n_trial=1000,
|
||||
tuner=dict(type='XGBTuner')))
|
||||
])
|
|
@ -14,6 +14,7 @@ namespace mmdeploy {
|
|||
namespace framework {
|
||||
|
||||
using TensorShape = std::vector<int64_t>;
|
||||
|
||||
struct TensorDesc {
|
||||
Device device;
|
||||
DataType data_type{DataType::kFLOAT};
|
||||
|
|
|
@ -11,13 +11,16 @@ class CpuHostMemory : public NonCopyable {
|
|||
public:
|
||||
CpuHostMemory() : size_(), data_(), owned_data_{false} {}
|
||||
Result<void> Init(size_t size, size_t alignment) {
|
||||
if (alignment != 1) {
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
data_ = std::malloc(size);
|
||||
size_t space = (size + alignment - 1) / alignment * alignment;
|
||||
#ifdef _MSC_VER
|
||||
data_ = _aligned_malloc(space, alignment);
|
||||
#else
|
||||
data_ = std::aligned_alloc(alignment, space);
|
||||
#endif
|
||||
if (!data_) {
|
||||
return Status(eOutOfMemory);
|
||||
}
|
||||
aligned_data_ = data_;
|
||||
size_ = size;
|
||||
owned_data_ = true;
|
||||
return success();
|
||||
|
@ -38,7 +41,11 @@ class CpuHostMemory : public NonCopyable {
|
|||
~CpuHostMemory() {
|
||||
if (data_) {
|
||||
if (owned_data_) {
|
||||
#ifdef _MSC_VER
|
||||
_aligned_free(data_);
|
||||
#else
|
||||
std::free(data_);
|
||||
#endif
|
||||
owned_data_ = false;
|
||||
}
|
||||
data_ = nullptr;
|
||||
|
@ -47,11 +54,12 @@ class CpuHostMemory : public NonCopyable {
|
|||
size_ = 0;
|
||||
}
|
||||
size_t size() const { return size_; }
|
||||
void* data() const { return data_; }
|
||||
void* data() const { return owned_data_ ? aligned_data_ : data_; }
|
||||
|
||||
private:
|
||||
size_t size_;
|
||||
void* data_;
|
||||
void* aligned_data_{nullptr};
|
||||
bool owned_data_;
|
||||
std::shared_ptr<void> external_;
|
||||
};
|
||||
|
|
|
@ -69,7 +69,7 @@ class CudaDeviceMemory : public NonCopyable {
|
|||
public:
|
||||
explicit CudaDeviceMemory(int device_id) : device_id_(device_id), size_(), owned_block_() {}
|
||||
Result<void> Init(size_t size, Allocator allocator, size_t alignment, uint64_t flags) {
|
||||
if (alignment != 1) {
|
||||
if (alignment > 256 || 256 % alignment != 0) {
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
allocator_ = std::move(allocator);
|
||||
|
|
|
@ -38,6 +38,10 @@ if ("coreml" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
|||
add_subdirectory(coreml)
|
||||
endif ()
|
||||
|
||||
if ("tvm" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
||||
add_subdirectory(tvm)
|
||||
endif ()
|
||||
|
||||
if ("rknn" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
||||
add_subdirectory(rknn)
|
||||
endif ()
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
project(mmdeploy_tvm_net)
|
||||
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/modules/FindTVM.cmake)
|
||||
|
||||
mmdeploy_add_module(${PROJECT_NAME} tvm_net.cpp)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${TVM_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR})
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE tvm_runtime mmdeploy_dlpack_utils)
|
||||
|
||||
add_library(mmdeploy::tvm_net ALIAS ${PROJECT_NAME})
|
|
@ -0,0 +1,282 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "tvm_net.h"
|
||||
|
||||
#include <tvm/runtime/container/adt.h>
|
||||
#include <tvm/runtime/device_api.h>
|
||||
#include <tvm/runtime/vm/executable.h>
|
||||
#include <tvm/runtime/vm/vm.h>
|
||||
|
||||
#include <fstream>
|
||||
|
||||
#include "mmdeploy/core/model.h"
|
||||
#include "mmdeploy/core/utils/filesystem.h"
|
||||
#include "mmdeploy/core/utils/formatter.h"
|
||||
#include "mmdeploy/utils/dlpack/dlpack_utils.h"
|
||||
|
||||
namespace mmdeploy::framework {
|
||||
|
||||
static DLDevice GetDLDevice(const Device& device) {
|
||||
DLDevice dev;
|
||||
if (device.is_device()) {
|
||||
dev = {kDLCUDA, device.device_id()};
|
||||
} else {
|
||||
dev = {kDLCPU, 0};
|
||||
}
|
||||
return dev;
|
||||
}
|
||||
|
||||
static Result<DLDataType> FromDataType(DataType data_type) {
|
||||
switch (data_type) {
|
||||
case DataType::kFLOAT:
|
||||
return DLDataType{kDLFloat, 32, 1};
|
||||
case DataType::kINT32:
|
||||
return DLDataType{kDLInt, 32, 1};
|
||||
case DataType::kINT64:
|
||||
return DLDataType{kDLInt, 64, 1};
|
||||
case DataType::kINT8:
|
||||
return DLDataType{kDLInt, 8, 1};
|
||||
default:
|
||||
MMDEPLOY_ERROR("Unsupported mmdeploy::DataType");
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
|
||||
static Result<DataType> ToDataType(DLDataType scalar_type) {
|
||||
if (scalar_type.lanes != 1) {
|
||||
MMDEPLOY_ERROR("Unsupported scalar_type.lanes==1.");
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
|
||||
if (scalar_type.code == kDLFloat && scalar_type.bits == 32) {
|
||||
return DataType::kFLOAT;
|
||||
} else if (scalar_type.code == kDLInt) {
|
||||
switch (scalar_type.bits) {
|
||||
case 32:
|
||||
return DataType::kINT32;
|
||||
case 64:
|
||||
return DataType::kINT64;
|
||||
case 8:
|
||||
return DataType::kINT8;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
MMDEPLOY_ERROR("Unsupported code: {}, bits: {}, lanes: {}.", std::to_string(scalar_type.code),
|
||||
std::to_string(scalar_type.bits), std::to_string(scalar_type.lanes));
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
|
||||
static std::vector<std::string> split_str(const std::string& s, char delim) {
|
||||
using namespace std;
|
||||
vector<string> result;
|
||||
stringstream ss(s);
|
||||
string item;
|
||||
|
||||
while (getline(ss, item, delim)) {
|
||||
result.push_back(item);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
Result<void> TVMNet::Init(const Value& args) {
|
||||
auto& context = args["context"];
|
||||
device_ = context["device"].get<Device>();
|
||||
stream_ = context["stream"].get<Stream>();
|
||||
|
||||
auto name = args["name"].get<std::string>();
|
||||
auto model = context["model"].get<Model>();
|
||||
OUTCOME_TRY(auto config, model.GetModelConfig(name));
|
||||
|
||||
auto tmp_dir = fs::temp_directory_path();
|
||||
std::string tmp_lib = (tmp_dir / fs::path(config.net)).string();
|
||||
OUTCOME_TRY(auto raw_lib, model.ReadFile(config.net));
|
||||
std::string tmp_label = (tmp_dir / fs::path(config.weights)).string();
|
||||
OUTCOME_TRY(auto raw_label, model.ReadFile(config.weights));
|
||||
|
||||
try {
|
||||
std::ofstream lib_out(tmp_lib, std::ios::binary);
|
||||
lib_out << raw_lib;
|
||||
lib_out.close();
|
||||
} catch (const std::exception& e) {
|
||||
MMDEPLOY_ERROR("unhandled exception when creating tmp library: {}", e.what());
|
||||
return Status(eFail);
|
||||
}
|
||||
|
||||
try {
|
||||
auto io_names = split_str(raw_label, '\n');
|
||||
auto input_names = split_str(io_names[0], ',');
|
||||
auto output_names = split_str(io_names[1], ',');
|
||||
DLDevice dev = GetDLDevice(device_);
|
||||
|
||||
mod_factory_ = tvm::runtime::Module::LoadFromFile(tmp_lib);
|
||||
|
||||
use_vm_ = false;
|
||||
if (io_names.size() > 2) {
|
||||
use_vm_ = true;
|
||||
OUTCOME_TRY(auto bytecode, model.ReadFile(io_names[2]));
|
||||
auto exec = tvm::runtime::vm::Executable::Load(bytecode, mod_factory_);
|
||||
const auto runtime_create = *tvm::runtime::Registry::Get("runtime._VirtualMachine");
|
||||
tvm::runtime::Module vm_ = runtime_create(exec);
|
||||
|
||||
// init vm
|
||||
auto func_init = vm_.GetFunction("init", false);
|
||||
auto alloc_type = static_cast<int>(tvm::runtime::vm::AllocatorType::kPooled);
|
||||
if (dev.device_type != kDLCPU) {
|
||||
func_init(static_cast<int>(kDLCPU), 0, alloc_type, int(dev.device_type), int(dev.device_id),
|
||||
alloc_type);
|
||||
} else {
|
||||
func_init(int(dev.device_type), int(dev.device_id), alloc_type);
|
||||
}
|
||||
|
||||
// get input ids
|
||||
auto func_input_index_ = vm_.GetFunction("get_input_index", false);
|
||||
for (auto name : input_names) {
|
||||
input_ids_[name] = func_input_index_(name, "main");
|
||||
}
|
||||
|
||||
// get function
|
||||
func_set_input_ = vm_.GetFunction("set_input");
|
||||
func_run_ = vm_.GetFunction("invoke");
|
||||
} else {
|
||||
// graph executor won't do synchronize stream after run?
|
||||
if (device_.is_device())
|
||||
tvm::runtime::DeviceAPI::Get(dev)->SetStream(dev, stream_.GetNative());
|
||||
tvm::runtime::Module gmod = mod_factory_.GetFunction("default")(dev);
|
||||
|
||||
// get function
|
||||
func_set_input_ = gmod.GetFunction("set_input");
|
||||
func_get_output_ = gmod.GetFunction("get_output");
|
||||
func_run_ = gmod.GetFunction("run");
|
||||
}
|
||||
|
||||
auto ToDesc = [&](const std::string& name) {
|
||||
return TensorDesc{device_, DataType::kFLOAT, {}, name};
|
||||
};
|
||||
|
||||
for (auto name : input_names) {
|
||||
input_tensors_.emplace_back(ToDesc(name));
|
||||
}
|
||||
|
||||
for (auto name : output_names) {
|
||||
output_tensors_.emplace_back(ToDesc(name));
|
||||
}
|
||||
|
||||
} catch (const std::exception& e) {
|
||||
MMDEPLOY_ERROR("unhandled exception when creating TVM Net: {}", e.what());
|
||||
return Status(eFail);
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> TVMNet::ForwardAsync(Event* event) { return Status(eNotSupported); }
|
||||
|
||||
Result<void> TVMNet::Deinit() { return success(); }
|
||||
|
||||
Result<Span<Tensor>> TVMNet::GetInputTensors() { return input_tensors_; }
|
||||
|
||||
Result<Span<Tensor>> TVMNet::GetOutputTensors() { return output_tensors_; }
|
||||
|
||||
Result<void> TVMNet::Reshape(Span<TensorShape> input_shapes) {
|
||||
for (size_t i = 0; i < input_shapes.size(); ++i) {
|
||||
input_tensors_[i].Reshape(input_shapes[i]);
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> TVMNet::Forward() {
|
||||
DLDevice dev = GetDLDevice(device_);
|
||||
try {
|
||||
OUTCOME_TRY(stream_.Wait());
|
||||
|
||||
if (use_vm_) {
|
||||
// vm
|
||||
|
||||
// set input
|
||||
int num_inputs = input_tensors_.size();
|
||||
std::vector<tvm::runtime::NDArray> args_arr(num_inputs);
|
||||
std::vector<TVMValue> tvm_values(num_inputs + 1);
|
||||
std::vector<int> tvm_type_codes(num_inputs + 1);
|
||||
tvm::runtime::TVMArgsSetter setter(tvm_values.data(), tvm_type_codes.data());
|
||||
setter(0, "main");
|
||||
for (int k = 0; k < num_inputs; ++k) {
|
||||
auto v = input_tensors_[k];
|
||||
OUTCOME_TRY(auto managed_tensor, ToDLPack(v, stream_));
|
||||
OUTCOME_TRY(stream_.Wait());
|
||||
args_arr[k] = tvm::runtime::NDArray::FromDLPack(managed_tensor);
|
||||
|
||||
int input_id = input_ids_[v.name()];
|
||||
setter(input_id + 1, args_arr[k]);
|
||||
}
|
||||
func_set_input_.CallPacked(
|
||||
tvm::runtime::TVMArgs(tvm_values.data(), tvm_type_codes.data(), num_inputs + 1), nullptr);
|
||||
|
||||
// run
|
||||
tvm::runtime::TVMRetValue ret = func_run_("main");
|
||||
if (device_.is_device()) {
|
||||
// tvm virtual machine use default stream.
|
||||
OUTCOME_TRY(Stream(device_, nullptr).Wait());
|
||||
}
|
||||
|
||||
// get output
|
||||
if (ret.type_code() == kTVMNDArrayHandle) {
|
||||
tvm::runtime::NDArray ndarray = ret.AsObjectRef<tvm::runtime::NDArray>();
|
||||
Tensor& v = output_tensors_[0];
|
||||
OUTCOME_TRY(v, FromDLPack(ndarray.ToDLPack(), v.name(), stream_));
|
||||
} else if (ret.type_code() == kTVMObjectHandle) {
|
||||
const auto& adt = ret.AsObjectRef<tvm::runtime::ADT>();
|
||||
for (int i = 0; i < output_tensors_.size(); ++i) {
|
||||
tvm::runtime::NDArray ndarray = tvm::runtime::Downcast<tvm::runtime::NDArray>(adt[i]);
|
||||
Tensor& v = output_tensors_[i];
|
||||
OUTCOME_TRY(v, FromDLPack(ndarray.ToDLPack(), v.name(), stream_));
|
||||
}
|
||||
} else {
|
||||
MMDEPLOY_ERROR("error return type code {}", ret.type_code());
|
||||
return Status(eFail);
|
||||
}
|
||||
} else {
|
||||
// graph executor
|
||||
|
||||
// set input
|
||||
for (auto v : input_tensors_) {
|
||||
OUTCOME_TRY(auto managed_tensor, ToDLPack(v, stream_));
|
||||
OUTCOME_TRY(stream_.Wait());
|
||||
auto ndarray = tvm::runtime::NDArray::FromDLPack(managed_tensor);
|
||||
|
||||
func_set_input_(v.name(), ndarray);
|
||||
}
|
||||
|
||||
// run
|
||||
func_run_();
|
||||
|
||||
// get output
|
||||
for (int i = 0; i < output_tensors_.size(); ++i) {
|
||||
tvm::runtime::NDArray ndarray = func_get_output_(i);
|
||||
Tensor& v = output_tensors_[i];
|
||||
OUTCOME_TRY(v, FromDLPack(ndarray.ToDLPack(), v.name(), stream_));
|
||||
}
|
||||
|
||||
OUTCOME_TRY(stream_.Wait());
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
MMDEPLOY_ERROR(e.what());
|
||||
return Status(eFail);
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
static std::unique_ptr<Net> Create(const Value& args) {
|
||||
auto p = std::make_unique<TVMNet>();
|
||||
if (auto status = p->Init(args)) {
|
||||
return p;
|
||||
} else {
|
||||
MMDEPLOY_ERROR("Failed to created TVMNet with config: {}", args);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
MMDEPLOY_REGISTER_FACTORY_FUNC(Net, (tvm, 0), Create);
|
||||
} // namespace mmdeploy::framework
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_SRC_NET_TVM_TVM_NET_H_
|
||||
#define MMDEPLOY_SRC_NET_TVM_TVM_NET_H_
|
||||
|
||||
#include <tvm/runtime/module.h>
|
||||
|
||||
#include "mmdeploy/core/net.h"
|
||||
|
||||
namespace mmdeploy::framework {
|
||||
|
||||
class TVMNet : public Net {
|
||||
public:
|
||||
~TVMNet() override = default;
|
||||
Result<void> Init(const Value& cfg) override;
|
||||
Result<void> Deinit() override;
|
||||
Result<Span<Tensor>> GetInputTensors() override;
|
||||
Result<Span<Tensor>> GetOutputTensors() override;
|
||||
Result<void> Reshape(Span<TensorShape> input_shapes) override;
|
||||
Result<void> Forward() override;
|
||||
Result<void> ForwardAsync(Event* event) override;
|
||||
|
||||
private:
|
||||
tvm::runtime::Module mod_factory_;
|
||||
|
||||
tvm::runtime::PackedFunc func_set_input_;
|
||||
tvm::runtime::PackedFunc func_get_output_;
|
||||
tvm::runtime::PackedFunc func_run_;
|
||||
bool use_vm_;
|
||||
|
||||
std::map<std::string, int> input_ids_;
|
||||
std::vector<Tensor> input_tensors_;
|
||||
std::vector<Tensor> output_tensors_;
|
||||
Device device_;
|
||||
Stream stream_;
|
||||
};
|
||||
|
||||
} // namespace mmdeploy::framework
|
||||
|
||||
#endif // MMDEPLOY_SRC_NET_TVM_TVM_NET_H_
|
|
@ -1,3 +1,4 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
add_subdirectory(dlpack)
|
||||
add_subdirectory(opencv)
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
project(mmdeploy_dlpack_utils)
|
||||
|
||||
mmdeploy_add_library(${PROJECT_NAME} STATIC dlpack_utils.cpp)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
PRIVATE mmdeploy::core)
|
||||
|
||||
target_include_directories(${PROJECT_NAME}
|
||||
INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
|
||||
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/third_party/dlpack>)
|
|
@ -0,0 +1,187 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "dlpack_utils.h"
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "dlpack.h"
|
||||
#include "mmdeploy/core/device.h"
|
||||
#include "mmdeploy/core/logger.h"
|
||||
#include "mmdeploy/core/status_code.h"
|
||||
#include "mmdeploy/core/tensor.h"
|
||||
#include "mmdeploy/core/types.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
using mmdeploy::framework::Device;
|
||||
using mmdeploy::framework::Stream;
|
||||
using mmdeploy::framework::Tensor;
|
||||
using mmdeploy::framework::TensorShape;
|
||||
|
||||
static inline int64_t element_size(DataType data_type) {
|
||||
switch (data_type) {
|
||||
case DataType::kFLOAT:
|
||||
return 4;
|
||||
case DataType::kHALF:
|
||||
return 2;
|
||||
case DataType::kINT8:
|
||||
return 1;
|
||||
case DataType::kINT32:
|
||||
return 4;
|
||||
case DataType::kINT64:
|
||||
return 8;
|
||||
default:
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int64_t get_size(const std::vector<int64_t>& shape) {
|
||||
if (shape.empty()) {
|
||||
return 0;
|
||||
}
|
||||
auto _size = std::accumulate(begin(shape), end(shape), 1LL, std::multiplies<>());
|
||||
return std::max(0LL, _size);
|
||||
}
|
||||
|
||||
inline static Result<Device> FromDLDevice(const DLDevice& device) {
|
||||
int device_id = device.device_id;
|
||||
|
||||
switch (device.device_type) {
|
||||
case kDLCPU:
|
||||
return Device("cpu", device_id);
|
||||
case kDLCUDA:
|
||||
return Device("cuda", device_id);
|
||||
default:
|
||||
MMDEPLOY_ERROR("Unsupported DLDevice.");
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
|
||||
inline static DLDevice ToDLDevice(const Device& device) {
|
||||
auto device_type = device.is_device() ? kDLCUDA : kDLCPU;
|
||||
int device_id = device.device_id();
|
||||
return DLDevice{device_type, device_id};
|
||||
}
|
||||
|
||||
inline static Result<DataType> FromDLDataType(const DLDataType& dtype) {
|
||||
if (dtype.lanes != 1) {
|
||||
MMDEPLOY_ERROR("DLDataType.lanes != 1 is not supported.");
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
switch (dtype.code) {
|
||||
case kDLFloat:
|
||||
if (dtype.bits == 32)
|
||||
return DataType::kFLOAT;
|
||||
else {
|
||||
MMDEPLOY_ERROR("Unsupported bits. {}", dtype.bits);
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
case kDLInt:
|
||||
if (dtype.bits == 32) return DataType::kINT32;
|
||||
if (dtype.bits == 64) return DataType::kINT64;
|
||||
if (dtype.bits == 8)
|
||||
return DataType::kINT8;
|
||||
else {
|
||||
MMDEPLOY_ERROR("Unsupported bits. {}", dtype.bits);
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
MMDEPLOY_ERROR("Unsupported DLDataType.");
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
|
||||
inline static Result<DLDataType> ToDLDataType(const DataType& dtype) {
|
||||
switch (dtype) {
|
||||
case DataType::kFLOAT:
|
||||
return DLDataType{kDLFloat, 32, 1};
|
||||
case DataType::kINT32:
|
||||
return DLDataType{kDLInt, 32, 1};
|
||||
case DataType::kINT64:
|
||||
return DLDataType{kDLInt, 64, 1};
|
||||
case DataType::kINT8:
|
||||
return DLDataType{kDLInt, 8, 1};
|
||||
default:
|
||||
MMDEPLOY_ERROR("Unsupported mmdeploy::DataType");
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
|
||||
static void TensorDeleter(struct DLManagedTensor* self) {
|
||||
auto tensor = static_cast<Tensor*>(self->manager_ctx);
|
||||
delete tensor;
|
||||
}
|
||||
|
||||
static bool IsContiguous(const int64_t* shape, const int64_t* stride, int ndim) {
|
||||
if (ndim <= 1 || stride == nullptr) return true;
|
||||
for (auto i = 1; i < ndim; ++i) {
|
||||
if (stride[i - 1] != shape[i] * stride[i]) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
Result<DLManagedTensor*> ToDLPack(Tensor& tensor, Stream stream) {
|
||||
using mmdeploy::framework::Buffer;
|
||||
auto managed_tensor = new DLManagedTensor();
|
||||
|
||||
// set deleter
|
||||
managed_tensor->deleter = TensorDeleter;
|
||||
Tensor* new_tensor = nullptr;
|
||||
|
||||
// create manager_ctx
|
||||
{
|
||||
auto desc = tensor.desc();
|
||||
uint64_t data_val = reinterpret_cast<uint64_t>(tensor.data());
|
||||
if ((data_val & 0xff) != 0) {
|
||||
// copy buffer if data is not aligned.
|
||||
new_tensor =
|
||||
new Tensor(desc, Buffer(desc.device, tensor.byte_size(), tensor.allocator(), 256));
|
||||
OUTCOME_TRY(tensor.CopyTo(*new_tensor, stream));
|
||||
} else {
|
||||
// reuse buffer
|
||||
new_tensor = new Tensor(desc, tensor.buffer());
|
||||
}
|
||||
managed_tensor->manager_ctx = static_cast<void*>(new_tensor);
|
||||
}
|
||||
|
||||
// setup dl_tensor
|
||||
{
|
||||
auto& dl_tensor = managed_tensor->dl_tensor;
|
||||
auto& desc = new_tensor->desc();
|
||||
dl_tensor.data = new_tensor->data();
|
||||
dl_tensor.device = ToDLDevice(desc.device);
|
||||
OUTCOME_TRY(dl_tensor.dtype, ToDLDataType(desc.data_type));
|
||||
dl_tensor.ndim = desc.shape.size();
|
||||
dl_tensor.byte_offset = 0;
|
||||
dl_tensor.shape = (int64_t*)(&(desc.shape[0]));
|
||||
dl_tensor.strides = nullptr;
|
||||
}
|
||||
|
||||
return managed_tensor;
|
||||
} // namespace mmdeploy
|
||||
|
||||
Result<Tensor> FromDLPack(DLManagedTensor* managed_tensor, const std::string& name, Stream stream) {
|
||||
using mmdeploy::framework::TensorDesc;
|
||||
auto& dl_tensor = managed_tensor->dl_tensor;
|
||||
if (!IsContiguous(dl_tensor.shape, dl_tensor.strides, dl_tensor.ndim)) {
|
||||
MMDEPLOY_ERROR("Only contiguous DLTensor is supported now.");
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
|
||||
TensorShape shape(dl_tensor.shape, dl_tensor.shape + dl_tensor.ndim);
|
||||
OUTCOME_TRY(auto device, FromDLDevice(dl_tensor.device));
|
||||
OUTCOME_TRY(auto dtype, FromDLDataType(dl_tensor.dtype));
|
||||
|
||||
// create tensor
|
||||
TensorDesc desc{device, dtype, shape, name};
|
||||
auto buffer_size = get_size(shape) * element_size(dtype);
|
||||
auto raw_data = static_cast<void*>(static_cast<uint8_t*>(dl_tensor.data) + dl_tensor.byte_offset);
|
||||
Tensor ret(desc);
|
||||
OUTCOME_TRY(ret.CopyFrom(raw_data, stream));
|
||||
|
||||
// delete old tensor
|
||||
if (managed_tensor->deleter != nullptr) managed_tensor->deleter(managed_tensor);
|
||||
return ret;
|
||||
}
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_CSRC_UTILS_DLPACK_DLPACK_UTILS_H_
|
||||
#define MMDEPLOY_CSRC_UTILS_DLPACK_DLPACK_UTILS_H_
|
||||
|
||||
#include "mmdeploy/core/device.h"
|
||||
#include "mmdeploy/core/tensor.h"
|
||||
|
||||
struct DLManagedTensor;
|
||||
namespace mmdeploy {
|
||||
|
||||
Result<DLManagedTensor*> ToDLPack(framework::Tensor& tensor, framework::Stream stream = {});
|
||||
Result<framework::Tensor> FromDLPack(DLManagedTensor* managed_tensor, const std::string& name = "",
|
||||
framework::Stream stream = {});
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif // MMDEPLOY_CSRC_UTILS_DLPACK_DLPACK_UTILS_H_
|
|
@ -49,4 +49,5 @@ Here is the verified installation script. If you want mmdeploy to support multip
|
|||
| build_ubuntu_x64_ort.py | 18.04/20.04 |
|
||||
| build_ubuntu_x64_pplnn.py | 18.04/20.04 |
|
||||
| build_ubuntu_x64_torchscript.py | 18.04/20.04 |
|
||||
| build_ubuntu_x64_tvm.py | 18.04/20.04 |
|
||||
| build_jetson_orin_python38.sh | JetPack5.0 L4T 34.1 |
|
||||
|
|
|
@ -100,7 +100,7 @@
|
|||
</tr>
|
||||
<tr>
|
||||
<td>MMDEPLOY_TARGET_BACKENDS</td>
|
||||
<td>{"trt", "ort", "pplnn", "ncnn", "openvino", "torchscript", "snpe"}</td>
|
||||
<td>{"trt", "ort", "pplnn", "ncnn", "openvino", "torchscript", "snpe", "tvm"}</td>
|
||||
<td>N/A</td>
|
||||
<td>Enabling inference engine. <b>By default, no target inference engine is set, since it highly depends on the use case.</b> When more than one engine are specified, it has to be set with a semicolon separated list of inference backend names, e.g. <pre><code>-DMMDEPLOY_TARGET_BACKENDS="trt;ort;pplnn;ncnn;openvino"</code></pre>
|
||||
After specifying the inference engine, it's package path has to be passed to cmake as follows, <br>
|
||||
|
@ -120,7 +120,9 @@
|
|||
6. <b>torchscript</b>: TorchScript. <code>Torch_DIR</code> is needed.
|
||||
<pre><code>-DTorch_DIR=${Torch_DIR}</code></pre>
|
||||
Currently, <b>The Model Converter supports torchscript, but SDK doesn't</b>.<br>
|
||||
7. <b>snpe</b>: qcom snpe. <code>SNPE_ROOT</code> must existed in the environment variable because of C/S mode.
|
||||
7. <b>snpe</b>: qcom snpe. <code>SNPE_ROOT</code> must existed in the environment variable because of C/S mode.<br>
|
||||
8. <b>coreml</b>: CoreML. <code>Torch_DIR</code> is required. <code>Torch_DIR</code>。 <br>
|
||||
9. <b>TVM</b>: TVM. <code>TVM_DIR</code> is required. <pre><code>-DTVM_DIR=${TVM_DIR}</code></pre>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
|
|
|
@ -7,10 +7,8 @@
|
|||
- [Install Dependencies for SDK](#install-dependencies-for-sdk)
|
||||
- [Install Inference Engines for MMDeploy](#install-inference-engines-for-mmdeploy)
|
||||
- [Build MMDeploy](#build-mmdeploy)
|
||||
- [Build Options Spec](#build-options-spec)
|
||||
- [Build Model Converter](#build-model-converter)
|
||||
- [Build Custom Ops](#build-custom-ops)
|
||||
- [Install Model Converter](#install-model-converter)
|
||||
- [Install Model Converter](#install-model-converter)
|
||||
- [Build SDK and Demo](#build-sdk-and-demo)
|
||||
|
||||
______________________________________________________________________
|
||||
|
@ -249,6 +247,18 @@ export ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest"
|
|||
</code></pre>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>TVM</td>
|
||||
<td>TVM</td>
|
||||
<td>
|
||||
1. Install TVM follow <a href="https://tvm.apache.org/docs/install/from_source.html">official guide</a>.<br>
|
||||
2. Setup environment
|
||||
<pre><code>
|
||||
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${TVM_HOME}/build
|
||||
export PYTHONPATH=${TVM_HOME}/python:${PYTHONPATH}
|
||||
</code></pre>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
# Test on TVM
|
||||
|
||||
## Supported Models
|
||||
|
||||
| Model | Codebase | Model config |
|
||||
| :---------------- | :--------------- | :---------------------------------------------------------------------------------------------: |
|
||||
| RetinaNet | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) |
|
||||
| Faster R-CNN | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) |
|
||||
| YOLOv3 | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) |
|
||||
| YOLOX | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) |
|
||||
| Mask R-CNN | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) |
|
||||
| SSD | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) |
|
||||
| ResNet | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) |
|
||||
| ResNeXt | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) |
|
||||
| SE-ResNet | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) |
|
||||
| MobileNetV2 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) |
|
||||
| ShuffleNetV1 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) |
|
||||
| ShuffleNetV2 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) |
|
||||
| VisionTransformer | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/vision_transformer) |
|
||||
| FCN | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) |
|
||||
| PSPNet | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) |
|
||||
| DeepLabV3 | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) |
|
||||
| DeepLabV3+ | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) |
|
||||
| UNet | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) |
|
||||
|
||||
The table above list the models that we have tested. Models not listed on the table might still be able to converted. Please have a try.
|
||||
|
||||
## Test
|
||||
|
||||
- Ubuntu 20.04
|
||||
- tvm 0.9.0
|
||||
|
||||
| mmcls | metric | PyTorch | TVM |
|
||||
| :----------------------------------------------------------------------------------------------------------------------------------------------------: | :----: | :-----: | :---: |
|
||||
| [ResNet-18](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet/resnet18_b32x8_imagenet.py) | top-1 | 69.90 | 69.90 |
|
||||
| [ResNeXt-50](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext/resnext50_32x4d_b32x8_imagenet.py) | top-1 | 77.90 | 77.90 |
|
||||
| [ShuffleNet V2](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2/shufflenet_v2_1x_b64x16_linearlr_bn_nowd_imagenet.py) | top-1 | 69.55 | 69.55 |
|
||||
| [MobileNet V2](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py) | top-1 | 71.86 | 71.86 |
|
||||
|
||||
<!-- | [Vision Transformer](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit-base-p16_ft-64xb64_in1k-384.py) | top-1 | 85.43 | 84.01 | -->
|
||||
|
||||
| mmdet(\*) | metric | PyTorch | TVM |
|
||||
| :-------------------------------------------------------------------------------------: | :----: | :-----: | :--: |
|
||||
| [SSD](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssd300_coco.py) | box AP | 25.5 | 25.5 |
|
||||
|
||||
\*: We only test model on ssd since dynamic shape is not supported for now.
|
||||
|
||||
| mmseg | metric | PyTorch | TVM |
|
||||
| :------------------------------------------------------------------------------------------------------------------------: | :----: | :-----: | :---: |
|
||||
| [FCN](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py) | mIoU | 72.25 | 72.36 |
|
||||
| [PSPNet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py) | mIoU | 78.55 | 77.90 |
|
|
@ -0,0 +1,8 @@
|
|||
# TVM feature support
|
||||
|
||||
MMDeploy has integrated TVM for model conversion and SDK. Features include:
|
||||
|
||||
- AutoTVM tuner
|
||||
- Ansor tuner
|
||||
- Graph Executor runtime
|
||||
- Virtual machine runtime
|
|
@ -35,6 +35,7 @@ You can switch between Chinese and English documents in the lower-left corner of
|
|||
03-benchmark/supported_models.md
|
||||
03-benchmark/benchmark.md
|
||||
03-benchmark/benchmark_edge.md
|
||||
03-benchmark/benchmark_tvm.md
|
||||
03-benchmark/quantization.md
|
||||
|
||||
.. toctree::
|
||||
|
|
|
@ -49,4 +49,5 @@ $ python3 tools/check_env.py
|
|||
| build_ubuntu_x64_ort.py | 18.04/20.04 |
|
||||
| build_ubuntu_x64_pplnn.py | 18.04/20.04 |
|
||||
| build_ubuntu_x64_torchscript.py | 18.04/20.04 |
|
||||
| build_ubuntu_x64_tvm.py | 18.04/20.04 |
|
||||
| build_jetson_orin_python38.sh | JetPack5.0 L4T 34.1 |
|
||||
|
|
|
@ -104,7 +104,7 @@
|
|||
|
||||
<tr>
|
||||
<td>MMDEPLOY_TARGET_BACKENDS</td>
|
||||
<td>{"trt", "ort", "pplnn", "ncnn", "openvino", "torchscript", "snpe", "coreml"}</td>
|
||||
<td>{"trt", "ort", "pplnn", "ncnn", "openvino", "torchscript", "snpe", "coreml", "tvm"}</td>
|
||||
<td>N/A</td>
|
||||
<td> <b>默认情况下,SDK不设置任何后端</b>, 因为它与应用场景高度相关。 当选择多个后端时, 中间使用分号隔开。比如,<pre><code>-DMMDEPLOY_TARGET_BACKENDS="trt;ort;pplnn;ncnn;openvino"</code></pre>
|
||||
构建时,几乎每个后端,都需设置一些路径变量,用来查找依赖包。<br>
|
||||
|
@ -121,6 +121,7 @@
|
|||
6. <b>torchscript</b>: 表示 TorchScript。目前仅模型转换支持 torchscript 格式,SDK 尚未支持。<br>
|
||||
7. <b>snpe</b>: 表示 qcom snpe。需要环境变量设置 SNPE_ROOT。<br>
|
||||
8. <b>coreml</b>: 表示 Core ML。目前在进行模型转换时需要设置 <code>Torch_DIR</code>。 <br>
|
||||
9. <b>tvm</b>: 表示 TVM。需要设置 <code>TVM_DIR</code>。<br>
|
||||
</td>
|
||||
</tr>
|
||||
|
||||
|
|
|
@ -8,10 +8,8 @@
|
|||
- [安装 MMDeploy SDK 依赖](#安装-mmdeploy-sdk-依赖)
|
||||
- [安装推理引擎](#安装推理引擎)
|
||||
- [编译 MMDeploy](#编译-mmdeploy)
|
||||
- [编译选项说明](#编译选项说明)
|
||||
- [编译安装 Model Converter](#编译安装-model-converter)
|
||||
- [编译自定义算子](#编译自定义算子)
|
||||
- [安装 Model Converter](#安装-model-converter)
|
||||
- [编译 Model Converter](#编译-model-converter)
|
||||
- [安装 Model Converter](#安装-model-converter)
|
||||
- [编译 SDK 和 Demos](#编译-sdk-和-demos)
|
||||
|
||||
______________________________________________________________________
|
||||
|
@ -246,6 +244,18 @@ export ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest"
|
|||
</code></pre>
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>TVM</td>
|
||||
<td>TVM</td>
|
||||
<td>
|
||||
1. 按照 <a href="https://tvm.apache.org/docs/install/from_source.html">官方指引</a>安装 TVM.<br>
|
||||
2. 配置环境
|
||||
<pre><code>
|
||||
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${TVM_HOME}/build
|
||||
export PYTHONPATH=${TVM_HOME}/python:${PYTHONPATH}
|
||||
</code></pre>
|
||||
</td>
|
||||
</tr>
|
||||
</tbody>
|
||||
</table>
|
||||
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
# TVM 测试
|
||||
|
||||
## 支持模型列表
|
||||
|
||||
| Model | Codebase | Model config |
|
||||
| :---------------- | :--------------- | :---------------------------------------------------------------------------------------------: |
|
||||
| RetinaNet | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) |
|
||||
| Faster R-CNN | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) |
|
||||
| YOLOv3 | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) |
|
||||
| YOLOX | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) |
|
||||
| Mask R-CNN | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) |
|
||||
| SSD | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) |
|
||||
| ResNet | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) |
|
||||
| ResNeXt | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) |
|
||||
| SE-ResNet | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) |
|
||||
| MobileNetV2 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) |
|
||||
| ShuffleNetV1 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) |
|
||||
| ShuffleNetV2 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) |
|
||||
| VisionTransformer | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/vision_transformer) |
|
||||
| FCN | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) |
|
||||
| PSPNet | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) |
|
||||
| DeepLabV3 | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) |
|
||||
| DeepLabV3+ | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) |
|
||||
| UNet | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) |
|
||||
|
||||
表中仅列出已测试模型,未列出的模型可能同样支持,可以自行尝试转换。
|
||||
|
||||
## Test
|
||||
|
||||
- Ubuntu 20.04
|
||||
- tvm 0.9.0
|
||||
|
||||
| mmcls | metric | PyTorch | TVM |
|
||||
| :----------------------------------------------------------------------------------------------------------------------------------------------------: | :----: | :-----: | :---: |
|
||||
| [ResNet-18](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet/resnet18_b32x8_imagenet.py) | top-1 | 69.90 | 69.90 |
|
||||
| [ResNeXt-50](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext/resnext50_32x4d_b32x8_imagenet.py) | top-1 | 77.90 | 77.90 |
|
||||
| [ShuffleNet V2](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2/shufflenet_v2_1x_b64x16_linearlr_bn_nowd_imagenet.py) | top-1 | 69.55 | 69.55 |
|
||||
| [MobileNet V2](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py) | top-1 | 71.86 | 71.86 |
|
||||
|
||||
<!-- | [Vision Transformer](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit-base-p16_ft-64xb64_in1k-384.py) | top-1 | 85.43 | 84.01 | -->
|
||||
|
||||
| mmdet(\*) | metric | PyTorch | TVM |
|
||||
| :-------------------------------------------------------------------------------------: | :----: | :-----: | :--: |
|
||||
| [SSD](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssd300_coco.py) | box AP | 25.5 | 25.5 |
|
||||
|
||||
\*: 由于暂时不支持动态转换,因此仅提供 SSD 的精度测试结果。
|
||||
|
||||
| mmseg | metric | PyTorch | TVM |
|
||||
| :------------------------------------------------------------------------------------------------------------------------: | :----: | :-----: | :---: |
|
||||
| [FCN](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py) | mIoU | 72.25 | 72.36 |
|
||||
| [PSPNet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py) | mIoU | 78.55 | 77.90 |
|
|
@ -0,0 +1,8 @@
|
|||
# TVM 特性支持
|
||||
|
||||
MMDeploy 已经将 TVM 集成到模型转换工具以及 SDK 当中。可用的特性包括:
|
||||
|
||||
- AutoTVM 调优器
|
||||
- Ansor 调优器
|
||||
- Graph Executor 运行时
|
||||
- Virtual Machine 运行时
|
|
@ -35,6 +35,7 @@
|
|||
03-benchmark/supported_models.md
|
||||
03-benchmark/benchmark.md
|
||||
03-benchmark/benchmark_edge.md
|
||||
03-benchmark/benchmark_tvm.md
|
||||
03-benchmark/quantization.md
|
||||
|
||||
.. toctree::
|
||||
|
|
|
@ -0,0 +1,12 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from mmdeploy.backend.tvm import get_library_ext, is_available
|
||||
from ..core import PIPELINE_MANAGER
|
||||
|
||||
__all__ = ['is_available', 'get_library_ext']
|
||||
|
||||
if is_available():
|
||||
from mmdeploy.backend.tvm import HDF5Dataset
|
||||
from mmdeploy.backend.tvm import from_onnx as _from_onnx
|
||||
from_onnx = PIPELINE_MANAGER.register_pipeline()(_from_onnx)
|
||||
|
||||
__all__ += ['from_onnx', 'HDF5Dataset']
|
|
@ -59,8 +59,6 @@ def visualize_model(model_cfg: Union[str, mmcv.Config],
|
|||
|
||||
if isinstance(model, (list, tuple)):
|
||||
assert len(model) > 0, 'Model should have at least one element.'
|
||||
assert all([isinstance(m, str) for m in model]), \
|
||||
'All elements in the list should be str'
|
||||
|
||||
if backend == Backend.PYTORCH:
|
||||
model = task_processor.init_pytorch_model(model[0])
|
||||
|
|
|
@ -10,6 +10,7 @@ from mmdeploy.utils import (Backend, Task, get_backend, get_codebase,
|
|||
get_common_config, get_ir_config,
|
||||
get_partition_config, get_root_logger,
|
||||
get_task_type, is_dynamic_batch, load_config)
|
||||
from mmdeploy.utils.config_utils import get_backend_config
|
||||
from mmdeploy.utils.constants import SDK_TASK_MAP as task_map
|
||||
from .tracer import add_transform_tag, get_transform_static
|
||||
|
||||
|
@ -80,7 +81,7 @@ def get_model_name_customs(deploy_cfg: mmcv.Config, model_cfg: mmcv.Config,
|
|||
def get_models(deploy_cfg: Union[str, mmcv.Config],
|
||||
model_cfg: Union[str, mmcv.Config], work_dir: str,
|
||||
device: str) -> List:
|
||||
"""Get the output model informantion for deploy.json.
|
||||
"""Get the output model information for deploy.json.
|
||||
|
||||
Args:
|
||||
deploy_cfg (mmcv.Config): Deploy config dict.
|
||||
|
@ -90,7 +91,7 @@ def get_models(deploy_cfg: Union[str, mmcv.Config],
|
|||
|
||||
Return:
|
||||
list[dict]: The list contains dicts composed of the model name, net,
|
||||
weghts, backend, precision batchsize and dynamic_shape.
|
||||
weights, backend, precision batch_size and dynamic_shape.
|
||||
"""
|
||||
name, _ = get_model_name_customs(deploy_cfg, model_cfg, work_dir, device)
|
||||
precision = 'FP32'
|
||||
|
@ -148,6 +149,26 @@ def get_models(deploy_cfg: Union[str, mmcv.Config],
|
|||
convert_to = deploy_cfg.backend_config.convert_to
|
||||
suffix = get_model_suffix(convert_to)
|
||||
net = replace_suffix(ir_name, suffix)
|
||||
elif backend == Backend.TVM:
|
||||
import os.path as osp
|
||||
|
||||
from mmdeploy.backend.tvm import get_library_ext
|
||||
ext = get_library_ext()
|
||||
net = replace_suffix(ir_name, ext)
|
||||
# get input and output name
|
||||
ir_cfg = get_ir_config(deploy_cfg)
|
||||
backend_cfg = get_backend_config(deploy_cfg)
|
||||
input_names = ir_cfg['input_names']
|
||||
output_names = ir_cfg['output_names']
|
||||
weights = replace_suffix(ir_name, '.txt')
|
||||
weights_path = osp.join(work_dir, weights)
|
||||
use_vm = backend_cfg.model_inputs[0].get('use_vm', False)
|
||||
bytecode_path = replace_suffix(ir_name, '.code')
|
||||
with open(weights_path, 'w') as f:
|
||||
f.write(','.join(input_names) + '\n')
|
||||
f.write(','.join(output_names) + '\n')
|
||||
if use_vm:
|
||||
f.write(bytecode_path + '\n')
|
||||
else:
|
||||
raise NotImplementedError(f'Not supported backend: {backend.value}.')
|
||||
|
||||
|
|
|
@ -1,6 +1,4 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import mmdeploy_python as c_api
|
||||
|
||||
from mmdeploy.utils import Backend, parse_device_id, parse_device_type
|
||||
from mmdeploy.utils.timer import TimeCounter
|
||||
from ..base import BACKEND_WRAPPER, BaseWrapper
|
||||
|
@ -11,6 +9,7 @@ class SDKWrapper(BaseWrapper):
|
|||
|
||||
def __init__(self, model_file, task_name, device):
|
||||
super().__init__([])
|
||||
import mmdeploy_python as c_api
|
||||
creator = getattr(c_api, task_name)
|
||||
device_id = parse_device_id(device)
|
||||
device_type = parse_device_type(device)
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import importlib
|
||||
import sys
|
||||
|
||||
from .backend_manager import TVMManager
|
||||
|
||||
|
||||
def is_available() -> bool:
|
||||
"""Check whether tvm package is installed.
|
||||
|
||||
Returns:
|
||||
bool: True if tvm package is installed.
|
||||
"""
|
||||
|
||||
return importlib.util.find_spec('tvm') is not None
|
||||
|
||||
|
||||
def get_library_ext() -> str:
|
||||
"""Get the extension of the library.
|
||||
|
||||
Returns:
|
||||
str: The extension name
|
||||
"""
|
||||
platform = sys.platform.lower()
|
||||
if platform == 'win32' or platform == 'cygwin':
|
||||
return '.dll'
|
||||
elif platform == 'linux' or platform == 'darwin' or platform == 'freebsd':
|
||||
return '.so'
|
||||
|
||||
|
||||
if is_available():
|
||||
from .onnx2tvm import from_onnx
|
||||
from .quantize import HDF5Dataset
|
||||
from .tuner import build_tvm_tuner
|
||||
|
||||
__all__ = ['from_onnx', 'build_tvm_tuner', 'HDF5Dataset', 'TVMManager']
|
||||
|
||||
try:
|
||||
# import wrapper if pytorch is available
|
||||
from .wrapper import TVMWrapper
|
||||
__all__ += ['TVMWrapper']
|
||||
except Exception:
|
||||
pass
|
|
@ -0,0 +1,37 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
from typing import Any, Optional, Sequence
|
||||
|
||||
from ..base import BACKEND_MANAGERS, BaseBackendManager
|
||||
|
||||
|
||||
@BACKEND_MANAGERS.register('tvm')
|
||||
class TVMManager(BaseBackendManager):
|
||||
|
||||
@classmethod
|
||||
def build_wrapper(cls,
|
||||
backend_files: Sequence[str],
|
||||
device: str = 'cpu',
|
||||
input_names: Optional[Sequence[str]] = None,
|
||||
output_names: Optional[Sequence[str]] = None,
|
||||
deploy_cfg: Optional[Any] = None,
|
||||
**kwargs):
|
||||
"""Build the wrapper for the backend model.
|
||||
|
||||
Args:
|
||||
backend_files (Sequence[str]): Backend files.
|
||||
device (str, optional): The device info. Defaults to 'cpu'.
|
||||
input_names (Optional[Sequence[str]], optional): input names.
|
||||
Defaults to None.
|
||||
output_names (Optional[Sequence[str]], optional): output names.
|
||||
Defaults to None.
|
||||
deploy_cfg (Optional[Any], optional): The deploy config. Defaults
|
||||
to None.
|
||||
"""
|
||||
from .wrapper import TVMWrapper
|
||||
bytecode = None if len(backend_files) <= 1 else backend_files[1]
|
||||
return TVMWrapper(
|
||||
backend_files[0],
|
||||
bytecode=bytecode,
|
||||
output_names=output_names,
|
||||
device=device)
|
|
@ -0,0 +1,97 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from typing import Callable, Dict, Optional, Union
|
||||
|
||||
import onnx
|
||||
from tvm.relay.frontend import from_onnx as relay_from_onnx
|
||||
from tvm.relay.quantize import QConfig
|
||||
from tvm.relay.quantize import qconfig as create_qconfig
|
||||
from tvm.relay.quantize import quantize
|
||||
from tvm.target import Target
|
||||
|
||||
from mmdeploy.utils import get_root_logger
|
||||
from .tuner import TVMTunerBase, build_tvm_tuner
|
||||
|
||||
|
||||
def from_onnx(onnx_model: Union[str, onnx.ModelProto],
|
||||
output_file: str,
|
||||
use_vm: bool = False,
|
||||
bytecode_file: str = '',
|
||||
shape: Optional[Dict] = None,
|
||||
dtype: Union[str, Dict] = 'float32',
|
||||
tuner: Optional[Union[TVMTunerBase, Dict]] = None,
|
||||
qconfig: Optional[Union[QConfig, Dict]] = None,
|
||||
dataset: Optional[Callable] = None):
|
||||
"""Convert ONNX model to tvm lib.
|
||||
|
||||
Args:
|
||||
onnx_model (Union[str, onnx.ModelProto]): ONNX model or model path
|
||||
output_file (str): output library path
|
||||
use_vm (bool, optional): Enable tvm virtual machine runtime.
|
||||
Defaults to False.
|
||||
bytecode_file (str, optional): output bytecode path for virtual
|
||||
machine. Defaults to ''.
|
||||
shape (Optional[Dict], optional): The input shape directory. Defaults
|
||||
to None.
|
||||
dtype (Union[str, Dict], optional): The input data type dictionary.
|
||||
Defaults to 'float32'.
|
||||
tuner (Optional[Union[TVMTunerBase, Dict]], optional): The tuner
|
||||
config. Defaults to None.
|
||||
|
||||
Return:
|
||||
lib: The converted tvm lib
|
||||
bytecode: The bytecode of virtual machine runtime.
|
||||
None if use_vm==False.
|
||||
|
||||
Examples:
|
||||
>>> from mmdeploy.backend.tvm import from_onnx
|
||||
>>> onnx_path = 'model.onnx'
|
||||
>>> output_file = 'model.so'
|
||||
>>> shape = {'input':[1,3,224,224]}
|
||||
>>> dtype = {'input':'float32'}
|
||||
>>> from_onnx(onnx_path, output_file, shape=shape, dtype=dtype)
|
||||
"""
|
||||
logger = get_root_logger()
|
||||
|
||||
if shape is not None and isinstance(dtype, Dict):
|
||||
assert len(shape) == len(dtype)
|
||||
for name in shape:
|
||||
assert name in dtype
|
||||
|
||||
if isinstance(onnx_model, str):
|
||||
onnx_model = onnx.load(onnx_model)
|
||||
assert isinstance(onnx_model, onnx.ModelProto
|
||||
), f'Expect onnx.ModelProto, but get {type(onnx_model)}.'
|
||||
|
||||
logger.info('Convert onnx to IRModule.')
|
||||
mod, params = relay_from_onnx(onnx_model, shape, dtype=dtype, opset=11)
|
||||
|
||||
# quantization
|
||||
if qconfig is not None:
|
||||
logger.info('Quantization')
|
||||
|
||||
if isinstance(qconfig, Dict):
|
||||
qconfig = create_qconfig(**qconfig)
|
||||
|
||||
with qconfig:
|
||||
mod = quantize(mod, params, dataset)
|
||||
|
||||
if tuner is None:
|
||||
# use default tuner
|
||||
tuner = dict(type='DefaultTuner', target=Target('llvm'))
|
||||
|
||||
if not issubclass(type(tuner), TVMTunerBase):
|
||||
tuner['use_vm'] = use_vm
|
||||
tuner = build_tvm_tuner(tuner)
|
||||
|
||||
logger.info(f'Tuning with {type(tuner).__name__} .')
|
||||
tuner.tune(mod, params)
|
||||
lib = tuner.build(mod, params)
|
||||
|
||||
logger.info(f'Export library to {output_file} .')
|
||||
bytecode = None
|
||||
if tuner.use_vm:
|
||||
bytecode, lib = lib.save()
|
||||
with open(bytecode_file, mode='wb') as f:
|
||||
f.write(bytecode)
|
||||
lib.export_library(output_file)
|
||||
return lib, bytecode
|
|
@ -0,0 +1,73 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
from typing import Any, Dict, Sequence, Union
|
||||
|
||||
import numpy as np
|
||||
import tvm
|
||||
from tvm.runtime.ndarray import array
|
||||
|
||||
|
||||
class HDF5Dataset:
|
||||
"""HDF5 dataset.
|
||||
|
||||
Args:
|
||||
calib_file (str | h5py.File): Input calibration file.
|
||||
input_shapes (Dict[str, Sequence[int]]): The shape of
|
||||
each input.
|
||||
model_type (str): Input model type, defaults to 'end2end'.
|
||||
device (str): Device type, default to llvm.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
calib_file: Union[str, Any],
|
||||
input_shapes: Dict[str, Sequence[int]],
|
||||
model_type: str = 'end2end',
|
||||
device: str = 'llvm',
|
||||
) -> None:
|
||||
import h5py
|
||||
if isinstance(calib_file, str):
|
||||
calib_file = h5py.File(calib_file, mode='r')
|
||||
|
||||
assert 'calib_data' in calib_file
|
||||
calib_data = calib_file['calib_data']
|
||||
assert model_type in calib_data
|
||||
calib_data = calib_data[model_type]
|
||||
|
||||
self.calib_file = calib_file
|
||||
self.calib_data = calib_data
|
||||
self.device = device
|
||||
self.input_shapes = input_shapes
|
||||
|
||||
first_input_group = calib_data[list(calib_data.keys())[0]]
|
||||
self.dataset_length = len(first_input_group)
|
||||
|
||||
def __call__(self):
|
||||
"""Create dataset generator.
|
||||
|
||||
Yields:
|
||||
Iterator[Any]: data in the dataset
|
||||
"""
|
||||
for idx in range(self.dataset_length):
|
||||
|
||||
ret = dict()
|
||||
for name, opt_shape in self.input_shapes.items():
|
||||
input_group = self.calib_data[name]
|
||||
data_np = input_group[str(idx)][...].astype(np.float32)
|
||||
|
||||
data_shape = data_np.shape
|
||||
|
||||
# tile the input data
|
||||
reps = [
|
||||
int(np.ceil(opt_s / data_s))
|
||||
for opt_s, data_s in zip(opt_shape, data_shape)
|
||||
]
|
||||
|
||||
data_np = np.tile(data_np, reps)
|
||||
|
||||
slice_list = tuple(slice(0, end) for end in opt_shape)
|
||||
data_np = data_np[slice_list]
|
||||
|
||||
data_nd = array(data_np, tvm.device(self.device))
|
||||
|
||||
ret[name] = data_nd
|
||||
yield ret
|
|
@ -0,0 +1,414 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os
|
||||
from abc import abstractmethod
|
||||
from typing import Any, Dict, Optional, Union
|
||||
|
||||
import tvm
|
||||
from mmcv.utils import Registry
|
||||
from tvm import IRModule, auto_scheduler, autotvm, relay
|
||||
from tvm.target import Target
|
||||
|
||||
from mmdeploy.utils import get_root_logger
|
||||
|
||||
TVM_TUNER = Registry('tvm_tuner')
|
||||
AUTOTVM_TUNER = Registry('autotvm_tuner')
|
||||
AUTOTVM_BUILDER = Registry('autotvm_builder')
|
||||
AUTOTVM_RUNNER = Registry('autotvm_runner')
|
||||
AUTO_SCHEDULER_BUILDER = Registry('auto_scheduler_builder')
|
||||
AUTO_SCHEDULER_RUNNER = Registry('auto_scheduler_runner')
|
||||
|
||||
|
||||
def build_tvm_tuner(cfg: Dict):
|
||||
"""Build the tvm tuner.
|
||||
|
||||
Args:
|
||||
cfg (Dict): The build config
|
||||
|
||||
Returns:
|
||||
Any: The tvm tuner instance
|
||||
"""
|
||||
return TVM_TUNER.build(cfg)
|
||||
|
||||
|
||||
def build_autotvm_tuner(cfg: Dict):
|
||||
"""Build the autotvm tuner.
|
||||
|
||||
Args:
|
||||
cfg (Dict): The build config
|
||||
|
||||
Returns:
|
||||
Any: The autotvm tuner instance
|
||||
"""
|
||||
return AUTOTVM_TUNER.build(cfg)
|
||||
|
||||
|
||||
def build_autotvm_builder(cfg: Dict):
|
||||
"""Build the autotvm builder.
|
||||
|
||||
Args:
|
||||
cfg (Dict): The build config
|
||||
|
||||
Returns:
|
||||
Any: The autotvm builder instance
|
||||
"""
|
||||
return AUTOTVM_BUILDER.build(cfg)
|
||||
|
||||
|
||||
def build_autotvm_runner(cfg: Dict):
|
||||
"""Build the autotvm runner.
|
||||
|
||||
Args:
|
||||
cfg (Dict): The build config
|
||||
|
||||
Returns:
|
||||
Any: The autotvm runner instance
|
||||
"""
|
||||
return AUTOTVM_RUNNER.build(cfg)
|
||||
|
||||
|
||||
def build_auto_scheduler_builder(cfg: Dict):
|
||||
"""Build the ansor builder.
|
||||
|
||||
Args:
|
||||
cfg (Dict): The build config
|
||||
|
||||
Returns:
|
||||
Any: The ansor builder instance
|
||||
"""
|
||||
return AUTO_SCHEDULER_BUILDER.build(cfg)
|
||||
|
||||
|
||||
def build_auto_scheduler_runner(cfg: Dict):
|
||||
"""Build the ansor tuner.
|
||||
|
||||
Args:
|
||||
cfg (Dict): The build config
|
||||
|
||||
Returns:
|
||||
Any: The ansor tuner instance
|
||||
"""
|
||||
return AUTO_SCHEDULER_RUNNER.build(cfg)
|
||||
|
||||
|
||||
AUTOTVM_TUNER.register_module()(autotvm.tuner.XGBTuner)
|
||||
AUTOTVM_TUNER.register_module()(autotvm.tuner.GATuner)
|
||||
AUTOTVM_TUNER.register_module()(autotvm.tuner.GridSearchTuner)
|
||||
AUTOTVM_TUNER.register_module()(autotvm.tuner.RandomTuner)
|
||||
|
||||
AUTOTVM_BUILDER.register_module()(autotvm.LocalBuilder)
|
||||
|
||||
AUTOTVM_RUNNER.register_module()(autotvm.LocalRunner)
|
||||
AUTOTVM_RUNNER.register_module()(autotvm.RPCRunner)
|
||||
|
||||
AUTO_SCHEDULER_BUILDER.register_module()(auto_scheduler.LocalBuilder)
|
||||
|
||||
AUTO_SCHEDULER_RUNNER.register_module()(auto_scheduler.LocalRunner)
|
||||
AUTO_SCHEDULER_RUNNER.register_module()(auto_scheduler.RPCRunner)
|
||||
|
||||
|
||||
class TVMTunerBase:
|
||||
"""The base class of TVM tuner.
|
||||
|
||||
Args:
|
||||
target (Union[str, Target]): The target platform to be tuned.
|
||||
opt_level (int): The optimization level.
|
||||
use_vm (bool): Enable tvm virtual machine runtime.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
target: Union[str, Target],
|
||||
opt_level: int = 3,
|
||||
use_vm: bool = False) -> None:
|
||||
if isinstance(target, str):
|
||||
target = Target(target)
|
||||
self._target = target
|
||||
self._opt_level = opt_level
|
||||
self._use_vm = use_vm
|
||||
|
||||
@property
|
||||
def use_vm(self) -> bool:
|
||||
"""Get use_vm.
|
||||
|
||||
Returns:
|
||||
bool: use_vm
|
||||
"""
|
||||
return self._use_vm
|
||||
|
||||
@abstractmethod
|
||||
def tune(self, mod: IRModule, params: Dict):
|
||||
"""Tune the graph.
|
||||
|
||||
Args:
|
||||
mod (IRModule): The graph module.
|
||||
params (Dict): The graph parameters.
|
||||
"""
|
||||
raise NotImplementedError('tune method not implemented.')
|
||||
|
||||
def build(self, mod: IRModule, params: Dict):
|
||||
"""Build tuning library.
|
||||
|
||||
Args:
|
||||
mod (IRModule): IRModule to build
|
||||
params (Dict): Parameter of the mod
|
||||
|
||||
Returns:
|
||||
lib: The runtime factory for the graph executor
|
||||
"""
|
||||
with tvm.transform.PassContext(opt_level=self._opt_level):
|
||||
if self._use_vm:
|
||||
ret = relay.vm.compile(mod, target=self._target, params=params)
|
||||
else:
|
||||
ret = relay.build_module.build(
|
||||
mod, target=self._target, params=params)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
@TVM_TUNER.register_module
|
||||
class DefaultTuner(TVMTunerBase):
|
||||
"""The Default tuner, do nothing when tuning.
|
||||
|
||||
Args:
|
||||
target (Union[str, Target]): The target platform to be tuned.
|
||||
opt_level (int): The optimization level.
|
||||
use_vm (bool): Enable tvm virtual machine runtime.
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
target: Union[str, Target],
|
||||
opt_level: int = 3,
|
||||
use_vm: bool = False) -> None:
|
||||
super().__init__(target, opt_level, use_vm)
|
||||
|
||||
def tune(self, mod: IRModule, params: Dict):
|
||||
"""Tune model, Default tuner does nothing."""
|
||||
pass
|
||||
|
||||
|
||||
@TVM_TUNER.register_module
|
||||
class AutoTVMTuner(TVMTunerBase):
|
||||
|
||||
def __init__(self,
|
||||
target: Union[str, Target],
|
||||
log_file: str,
|
||||
n_trial: int,
|
||||
tuner: Dict,
|
||||
opt_level: int = 3,
|
||||
use_vm: bool = False,
|
||||
early_stopping: Optional[int] = None,
|
||||
builder: Union[Dict,
|
||||
Any] = dict(type='LocalBuilder', timeout=10),
|
||||
runner: Union[Dict, Any] = dict(
|
||||
type='LocalRunner',
|
||||
number=20,
|
||||
repeat=3,
|
||||
timeout=4,
|
||||
min_repeat_ms=150),
|
||||
use_transfer_learning: bool = True) -> None:
|
||||
"""The AutoTVM tuner.
|
||||
|
||||
Args:
|
||||
target (Union[str, Target]): The target platform to tune.
|
||||
log_file (str): the log file path.
|
||||
n_trial (int): Maximum number of configs to try.
|
||||
tuner (Dict): The autotvm tuner config.
|
||||
opt_level (int, optional): The optimization level. Defaults to 3.
|
||||
use_vm (bool, optional): Enable tvm virtual machine.
|
||||
Defaults to False.
|
||||
early_stopping (Optional[int], optional): Early stop the tuning
|
||||
when not finding better configs in this number of trials.
|
||||
builder (Union[Dict, Any], optional): The builder config.
|
||||
runner (Union[Dict, Any], optional): The runner config.
|
||||
use_transfer_learning (bool, optional): Whether to use transfer
|
||||
learning. Defaults to True.
|
||||
"""
|
||||
super().__init__(target, opt_level, use_vm)
|
||||
self._log_file = log_file
|
||||
self._n_trial = n_trial
|
||||
self._tuner = tuner
|
||||
self._early_stopping = early_stopping
|
||||
self._use_transfer_learning = use_transfer_learning
|
||||
|
||||
if isinstance(builder, Dict):
|
||||
builder = build_autotvm_builder(builder)
|
||||
|
||||
if isinstance(runner, Dict):
|
||||
runner = build_autotvm_runner(runner)
|
||||
|
||||
self._measure_option = autotvm.measure_option(
|
||||
builder=builder, runner=runner)
|
||||
|
||||
def tune(self, mod: IRModule, params: Dict):
|
||||
"""Tune the graph.
|
||||
|
||||
Args:
|
||||
mod (IRModule): The graph module.
|
||||
params (Dict): The graph parameters.
|
||||
"""
|
||||
logger = get_root_logger()
|
||||
target = self._target
|
||||
logger.info('Create autotvm task.')
|
||||
tasks = autotvm.task.extract_from_program(
|
||||
mod['main'], target=target, params=params)
|
||||
|
||||
# create tmp log file
|
||||
if os.path.exists(self._log_file):
|
||||
os.remove(self._log_file)
|
||||
tmp_log_file = self._log_file + '.tmp'
|
||||
if os.path.exists(tmp_log_file):
|
||||
os.remove(tmp_log_file)
|
||||
|
||||
tuner_cfg = self._tuner
|
||||
for i, task in enumerate(reversed(tasks)):
|
||||
prefix = '[Task %3d/%3d] ' % (i + 1, len(tasks))
|
||||
|
||||
tuner_cfg['task'] = task
|
||||
tuner_obj = build_autotvm_tuner(tuner_cfg)
|
||||
|
||||
if self._use_transfer_learning:
|
||||
if os.path.isfile(tmp_log_file) and os.path.exists(
|
||||
tmp_log_file):
|
||||
tuner_obj.load_history(
|
||||
autotvm.record.load_from_file(tmp_log_file))
|
||||
|
||||
# do tuning
|
||||
tsk_trial = min(self._n_trial, len(task.config_space))
|
||||
tuner_obj.tune(
|
||||
n_trial=tsk_trial,
|
||||
early_stopping=self._early_stopping,
|
||||
measure_option=self._measure_option,
|
||||
callbacks=[
|
||||
autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
|
||||
autotvm.callback.log_to_file(tmp_log_file),
|
||||
],
|
||||
)
|
||||
|
||||
# pick best records to a cache file
|
||||
autotvm.record.pick_best(tmp_log_file, self._log_file)
|
||||
if os.path.exists(tmp_log_file):
|
||||
os.remove(tmp_log_file)
|
||||
|
||||
def build(self, mod: IRModule, params: Dict):
|
||||
"""Build tuning library.
|
||||
|
||||
Args:
|
||||
mod (IRModule): IRModule to build
|
||||
params (Dict): Parameter of the mod
|
||||
|
||||
Returns:
|
||||
lib: The runtime factory for the graph executor
|
||||
"""
|
||||
with autotvm.apply_history_best(self._log_file):
|
||||
with tvm.transform.PassContext(opt_level=self._opt_level):
|
||||
if self._use_vm:
|
||||
ret = relay.vm.compile(
|
||||
mod, target=self._target, params=params)
|
||||
else:
|
||||
ret = relay.build_module.build(
|
||||
mod, target=self._target, params=params)
|
||||
|
||||
return ret
|
||||
|
||||
|
||||
@TVM_TUNER.register_module
|
||||
class AutoScheduleTuner(TVMTunerBase):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
target: Union[str, Target],
|
||||
log_file: str,
|
||||
num_measure_trials: int,
|
||||
opt_level: int = 3,
|
||||
use_vm: bool = False,
|
||||
early_stopping: Optional[int] = None,
|
||||
builder: Union[Dict, Any] = dict(type='LocalBuilder', timeout=15),
|
||||
runner: Union[Dict, Any] = dict(
|
||||
type='LocalRunner', repeat=10, enable_cpu_cache_flush=True)
|
||||
) -> None:
|
||||
"""The Ansor tuner.
|
||||
|
||||
Args:
|
||||
target (Union[str, Target]): The target platform to tune.
|
||||
log_file (str): the log file path.
|
||||
num_measure_trials (int): Maximum number of configs to try.
|
||||
opt_level (int, optional): The optimization level. Defaults to 3.
|
||||
use_vm (bool, optional): Enable tvm virtual machine.
|
||||
Defaults to False.
|
||||
early_stopping (Optional[int], optional): Early stop the tuning
|
||||
when not finding better configs in this number of trials.
|
||||
builder (Union[Dict, Any], optional): The builder config.
|
||||
runner (Union[Dict, Any], optional): The runner config.
|
||||
"""
|
||||
super().__init__(target, opt_level, use_vm)
|
||||
self._log_file = log_file
|
||||
self._num_measure_trials = num_measure_trials
|
||||
self._early_stopping = early_stopping
|
||||
|
||||
if isinstance(builder, Dict):
|
||||
builder = build_auto_scheduler_builder(builder)
|
||||
|
||||
if isinstance(runner, Dict):
|
||||
# CUDA device need a different process for measurement
|
||||
if runner['type'] == 'LocalRunner':
|
||||
runner.pop('type')
|
||||
if Target(target).kind != 'llvm':
|
||||
if 'enable_cpu_cache_flush' in runner:
|
||||
runner['enable_cpu_cache_flush'] = False
|
||||
self._measure_ctx = auto_scheduler.LocalRPCMeasureContext(
|
||||
**runner)
|
||||
runner = self._measure_ctx.runner
|
||||
else:
|
||||
runner = build_auto_scheduler_runner(runner)
|
||||
|
||||
tune_option = auto_scheduler.TuningOptions(
|
||||
num_measure_trials=num_measure_trials,
|
||||
runner=runner,
|
||||
builder=builder,
|
||||
measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
|
||||
)
|
||||
self._tune_option = tune_option
|
||||
|
||||
def tune(self, mod: IRModule, params: Dict):
|
||||
"""Tune the graph.
|
||||
|
||||
Args:
|
||||
mod (IRModule): The graph module.
|
||||
params (Dict): The graph parameters.
|
||||
"""
|
||||
logger = get_root_logger()
|
||||
target = self._target
|
||||
|
||||
if os.path.exists(self._log_file):
|
||||
os.remove(self._log_file)
|
||||
|
||||
logger.info('Create auto scheduler task.')
|
||||
tasks, task_weights = auto_scheduler.extract_tasks(
|
||||
mod['main'], params, target)
|
||||
|
||||
tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
|
||||
|
||||
logger.info('Begin tuning.')
|
||||
tuner.tune(self._tune_option)
|
||||
|
||||
def build(self, mod: IRModule, params: Dict):
|
||||
"""Build tuning library.
|
||||
|
||||
Args:
|
||||
mod (IRModule): IRModule to build
|
||||
params (Dict): Parameter of the mod
|
||||
|
||||
Returns:
|
||||
lib: The runtime factory for the graph executor
|
||||
"""
|
||||
with auto_scheduler.ApplyHistoryBest(self._log_file):
|
||||
with tvm.transform.PassContext(
|
||||
opt_level=self._opt_level,
|
||||
config={'relay.backend.use_auto_scheduler': True}):
|
||||
if self._use_vm:
|
||||
ret = relay.vm.compile(
|
||||
mod, target=self._target, params=params)
|
||||
else:
|
||||
ret = relay.build_module.build(
|
||||
mod, target=self._target, params=params)
|
||||
|
||||
return ret
|
|
@ -0,0 +1,119 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import re
|
||||
from typing import Dict, Optional, Sequence, Union
|
||||
|
||||
import torch
|
||||
import tvm
|
||||
import tvm.contrib.graph_executor as runtime
|
||||
from tvm.runtime.vm import Executable, VirtualMachine
|
||||
|
||||
from mmdeploy.utils import Backend
|
||||
from mmdeploy.utils.timer import TimeCounter
|
||||
from ..base import BACKEND_WRAPPER, BaseWrapper
|
||||
|
||||
|
||||
@BACKEND_WRAPPER.register_module(Backend.TVM.value)
|
||||
class TVMWrapper(BaseWrapper):
|
||||
"""TVM runtime wrapper.
|
||||
|
||||
Args:
|
||||
lib (str): The path to the generated lib
|
||||
output_names (Sequence[str]): The output names.
|
||||
bytecode (Union[bytearray, str]): The bytecode for virtual machine.
|
||||
device (str): Device used to do the the inference
|
||||
|
||||
|
||||
Examples:
|
||||
>>> from mmdeploy.backend.tvm import TVMWrapper
|
||||
>>> lib_file = 'resnet.so'
|
||||
>>> model = TVMWrapper(lib_file, ['output'])
|
||||
>>> inputs = dict(input=torch.randn(1, 3, 224, 224))
|
||||
>>> outputs = model(inputs)
|
||||
>>> print(outputs)
|
||||
"""
|
||||
|
||||
def __init__(self,
|
||||
lib: str,
|
||||
output_names: Sequence[str],
|
||||
bytecode: Optional[Union[bytearray, str]] = None,
|
||||
device: str = 'cpu'):
|
||||
super().__init__(output_names)
|
||||
self.use_vm = False
|
||||
|
||||
if isinstance(lib, str):
|
||||
lib = tvm.runtime.load_module(lib)
|
||||
|
||||
match_result = re.match('([^:]+)(:[0-9]+)?$', device)
|
||||
assert match_result is not None, f'Can not parse device {device}.'
|
||||
device_type = match_result.group(1).lower()
|
||||
device_id = 0 if match_result.lastindex == 1 else int(
|
||||
match_result.group(2)[1:])
|
||||
device = tvm.device(device_type, device_id)
|
||||
|
||||
if bytecode is not None:
|
||||
self.use_vm = True
|
||||
if isinstance(bytecode, str):
|
||||
with open(bytecode, 'rb') as f:
|
||||
bytecode = f.read()
|
||||
|
||||
if self.use_vm:
|
||||
exec = Executable.load_exec(bytecode, lib)
|
||||
|
||||
module = VirtualMachine(exec, device)
|
||||
else:
|
||||
module = runtime.GraphModule(lib['default'](device))
|
||||
num_output = module.get_num_outputs()
|
||||
assert isinstance(output_names, Sequence)
|
||||
assert len(output_names) == num_output
|
||||
|
||||
self._lib = lib
|
||||
self._device = device
|
||||
self._module = module
|
||||
|
||||
def forward(self, inputs: Dict[str,
|
||||
torch.Tensor]) -> Dict[str, torch.Tensor]:
|
||||
"""Run forward inference.
|
||||
|
||||
Args:
|
||||
inputs (Dict[str, torch.Tensor]): The input name and tensor pairs.
|
||||
|
||||
Return:
|
||||
Dict[str, torch.Tensor]: The output name and tensor pairs.
|
||||
"""
|
||||
module = self._module
|
||||
device = self._device
|
||||
|
||||
mod_inputs = dict()
|
||||
for name, tensor in inputs.items():
|
||||
if tensor.device.type == 'cuda':
|
||||
mod_inputs[name] = tvm.nd.from_dlpack(tensor)
|
||||
else:
|
||||
mod_inputs[name] = tvm.nd.array(tensor.cpu().numpy(), device)
|
||||
|
||||
if self.use_vm:
|
||||
module.set_input('main', **mod_inputs)
|
||||
self.__tvm_execute()
|
||||
vm_ret = module.get_outputs()
|
||||
ret = dict()
|
||||
for idx, name in enumerate(self._output_names):
|
||||
ndarray = vm_ret[idx]
|
||||
tensor = torch.from_dlpack(ndarray.to_dlpack())
|
||||
ret[name] = tensor
|
||||
return ret
|
||||
|
||||
else:
|
||||
module.set_input(**mod_inputs)
|
||||
|
||||
self.__tvm_execute()
|
||||
|
||||
ret = dict()
|
||||
for idx, name in enumerate(self._output_names):
|
||||
ndarray = module.get_output(idx)
|
||||
tensor = torch.from_dlpack(ndarray.to_dlpack())
|
||||
ret[name] = tensor.clone()
|
||||
return ret
|
||||
|
||||
@TimeCounter.count_time(Backend.TVM.value)
|
||||
def __tvm_execute(self):
|
||||
module = self._module
|
||||
module.run()
|
|
@ -318,6 +318,9 @@ def single_roi_extractor__forward__openvino(ctx,
|
|||
return result
|
||||
|
||||
|
||||
@FUNCTION_REWRITER.register_rewriter(
|
||||
func_name='mmdet.models.roi_heads.SingleRoIExtractor.forward',
|
||||
backend=Backend.TVM.value)
|
||||
@FUNCTION_REWRITER.register_rewriter(
|
||||
func_name='mmdet.models.roi_heads.SingleRoIExtractor.forward',
|
||||
backend=Backend.COREML.value)
|
||||
|
@ -328,6 +331,7 @@ def single_roi_extractor__forward__coreml(ctx,
|
|||
rois,
|
||||
roi_scale_factor=None):
|
||||
"""Rewrite `forward` of SingleRoIExtractor for coreml."""
|
||||
backend = get_backend(ctx.cfg)
|
||||
out_size = self.roi_layers[0].output_size
|
||||
num_levels = len(feats)
|
||||
roi_feats = feats[0].new_zeros(rois.shape[0], self.out_channels, *out_size)
|
||||
|
@ -346,7 +350,8 @@ def single_roi_extractor__forward__coreml(ctx,
|
|||
# inds = mask.nonzero(as_tuple=False).squeeze(1)
|
||||
rois_t = rois * mask.unsqueeze(-1)
|
||||
# use the roi align in torhcvision
|
||||
self.roi_layers[i].use_torchvision = True
|
||||
if backend == Backend.COREML:
|
||||
self.roi_layers[i].use_torchvision = True
|
||||
roi_feats_t = self.roi_layers[i](feats[i], rois_t)
|
||||
roi_feats = roi_feats + roi_feats_t * (rois_t[:, -1] > 0).reshape(
|
||||
-1, 1, 1, 1)
|
||||
|
|
|
@ -65,6 +65,7 @@ class Backend(AdvancedEnum):
|
|||
RKNN = 'rknn'
|
||||
ASCEND = 'ascend'
|
||||
COREML = 'coreml'
|
||||
TVM = 'tvm'
|
||||
DEFAULT = 'default'
|
||||
|
||||
|
||||
|
|
|
@ -42,7 +42,7 @@ def get_backend_version():
|
|||
Returns:
|
||||
Dict: The name and the version of some supported backend.
|
||||
"""
|
||||
backend_library_list = ['tensorrt', 'onnxruntime', 'ncnn']
|
||||
backend_library_list = ['tensorrt', 'onnxruntime', 'ncnn', 'tvm']
|
||||
version_dict = dict()
|
||||
for backend in backend_library_list:
|
||||
version_dict[backend] = get_library_version(backend)
|
||||
|
|
|
@ -51,6 +51,8 @@ def backend_checker(backend: Backend, require_plugin: bool = False):
|
|||
from mmdeploy.apis.rknn import is_available
|
||||
elif backend == Backend.ASCEND:
|
||||
from mmdeploy.apis.ascend import is_available
|
||||
elif backend == Backend.TVM:
|
||||
from mmdeploy.apis.tvm import is_available
|
||||
else:
|
||||
warnings.warn('The backend checker is not available')
|
||||
return
|
||||
|
@ -110,6 +112,8 @@ def check_backend(backend: Backend, require_plugin: bool = False):
|
|||
from mmdeploy.backend.rknn import device_available as is_available
|
||||
elif backend == Backend.ASCEND:
|
||||
from mmdeploy.backend.ascend import is_available
|
||||
elif backend == Backend.TVM:
|
||||
from mmdeploy.backend.tvm import is_available
|
||||
else:
|
||||
warnings.warn('The backend checker is not available')
|
||||
return
|
||||
|
|
|
@ -0,0 +1,105 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os.path as osp
|
||||
import tempfile
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
from mmdeploy.utils import Backend
|
||||
from mmdeploy.utils.test import backend_checker
|
||||
|
||||
onnx_file = tempfile.NamedTemporaryFile(suffix='.onnx').name
|
||||
test_img = torch.rand([1, 3, 8, 8])
|
||||
|
||||
|
||||
@pytest.mark.skip(reason='This a not test class but a utility class.')
|
||||
class TestModel(nn.Module):
|
||||
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
self.conv = torch.nn.Conv2d(3, 8, 3, 1, 1)
|
||||
|
||||
def forward(self, x):
|
||||
return self.conv(x)
|
||||
|
||||
|
||||
test_model = TestModel().eval()
|
||||
|
||||
|
||||
def generate_onnx_file(model):
|
||||
with torch.no_grad():
|
||||
torch.onnx.export(
|
||||
model,
|
||||
test_img,
|
||||
onnx_file,
|
||||
output_names=['output'],
|
||||
input_names=['input'],
|
||||
keep_initializers_as_inputs=True,
|
||||
do_constant_folding=True,
|
||||
verbose=False,
|
||||
opset_version=11)
|
||||
assert osp.exists(onnx_file)
|
||||
|
||||
|
||||
@backend_checker(Backend.TVM)
|
||||
def test_onnx2tvm():
|
||||
from mmdeploy.apis.tvm import from_onnx, get_library_ext
|
||||
model = test_model
|
||||
generate_onnx_file(model)
|
||||
|
||||
work_dir, _ = osp.split(onnx_file)
|
||||
file_name = osp.splitext(onnx_file)[0]
|
||||
ext = get_library_ext()
|
||||
lib_path = osp.join(work_dir, file_name + ext)
|
||||
bytecode_path = osp.join(work_dir, file_name + '.code')
|
||||
log_file = osp.join(work_dir, file_name + '.log')
|
||||
shape = {'input': test_img.shape}
|
||||
dtype = {'input': 'float32'}
|
||||
target = 'llvm'
|
||||
|
||||
# test default tuner
|
||||
tuner_dict = dict(type='DefaultTuner', target=target)
|
||||
from_onnx(onnx_file, lib_path, shape=shape, dtype=dtype, tuner=tuner_dict)
|
||||
assert osp.exists(lib_path)
|
||||
|
||||
# test autotvm
|
||||
lib_path = osp.join(work_dir, file_name + '_autotvm' + ext)
|
||||
bytecode_path = osp.join(work_dir, file_name + '_autotvm.code')
|
||||
log_file = osp.join(work_dir, file_name + '_autotvm.log')
|
||||
tuner_dict = dict(
|
||||
type='AutoTVMTuner',
|
||||
target=target,
|
||||
log_file=log_file,
|
||||
n_trial=1,
|
||||
tuner=dict(type='XGBTuner'))
|
||||
from_onnx(
|
||||
onnx_file,
|
||||
lib_path,
|
||||
use_vm=True,
|
||||
bytecode_file=bytecode_path,
|
||||
shape=shape,
|
||||
dtype=dtype,
|
||||
tuner=tuner_dict)
|
||||
assert osp.exists(lib_path)
|
||||
assert osp.exists(bytecode_path)
|
||||
|
||||
# test ansor
|
||||
lib_path = osp.join(work_dir, file_name + '_ansor' + ext)
|
||||
bytecode_path = osp.join(work_dir, file_name + '_ansor.code')
|
||||
log_file = osp.join(work_dir, file_name + '_ansor.log')
|
||||
tuner_dict = dict(
|
||||
type='AutoScheduleTuner',
|
||||
target=target,
|
||||
log_file=log_file,
|
||||
num_measure_trials=2)
|
||||
from_onnx(
|
||||
onnx_file,
|
||||
lib_path,
|
||||
use_vm=True,
|
||||
bytecode_file=bytecode_path,
|
||||
shape=shape,
|
||||
dtype=dtype,
|
||||
tuner=tuner_dict)
|
||||
assert osp.exists(lib_path)
|
||||
assert osp.exists(bytecode_path)
|
|
@ -131,6 +131,18 @@ def onnx2backend(backend, onnx_file):
|
|||
dict(input_shapes=dict(input=test_img.shape)))
|
||||
from_onnx(onnx_file, work_dir, model_inputs)
|
||||
return backend_file
|
||||
elif backend == Backend.TVM:
|
||||
from mmdeploy.backend.tvm import from_onnx, get_library_ext
|
||||
ext = get_library_ext()
|
||||
lib_file = tempfile.NamedTemporaryFile(suffix=ext).name
|
||||
shape = {'input': test_img.shape}
|
||||
dtype = {'input': 'float32'}
|
||||
target = 'llvm'
|
||||
tuner_dict = dict(type='DefaultTuner', target=target)
|
||||
from_onnx(
|
||||
onnx_file, lib_file, shape=shape, dtype=dtype, tuner=tuner_dict)
|
||||
assert osp.exists(lib_file)
|
||||
return lib_file
|
||||
|
||||
|
||||
def create_wrapper(backend, model_files):
|
||||
|
@ -172,6 +184,10 @@ def create_wrapper(backend, model_files):
|
|||
from mmdeploy.backend.ascend import AscendWrapper
|
||||
ascend_model = AscendWrapper(model_files)
|
||||
return ascend_model
|
||||
elif backend == Backend.TVM:
|
||||
from mmdeploy.backend.tvm import TVMWrapper
|
||||
tvm_model = TVMWrapper(model_files, output_names=output_names)
|
||||
return tvm_model
|
||||
else:
|
||||
raise NotImplementedError(f'Unknown backend type: {backend.value}')
|
||||
|
||||
|
@ -207,13 +223,17 @@ def run_wrapper(backend, wrapper, input):
|
|||
elif backend == Backend.ASCEND:
|
||||
results = wrapper({'input': input})['output']
|
||||
return results
|
||||
elif backend == Backend.TVM:
|
||||
results = wrapper({'input': input})['output']
|
||||
return results
|
||||
else:
|
||||
raise NotImplementedError(f'Unknown backend type: {backend.value}')
|
||||
|
||||
|
||||
ALL_BACKEND = [
|
||||
Backend.TENSORRT, Backend.ONNXRUNTIME, Backend.PPLNN, Backend.NCNN,
|
||||
Backend.OPENVINO, Backend.TORCHSCRIPT, Backend.ASCEND, Backend.RKNN
|
||||
Backend.OPENVINO, Backend.TORCHSCRIPT, Backend.ASCEND, Backend.RKNN,
|
||||
Backend.TVM
|
||||
]
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,233 @@
|
|||
|
||||
|
||||
// copy from:
|
||||
// https://github.com/dmlc/dlpack/blob/v0.7/include/dlpack/dlpack.h
|
||||
/*!
|
||||
* Copyright (c) 2017 by Contributors
|
||||
* \file dlpack.h
|
||||
* \brief The common header of DLPack.
|
||||
*/
|
||||
#ifndef DLPACK_DLPACK_H_
|
||||
#define DLPACK_DLPACK_H_
|
||||
|
||||
/**
|
||||
* \brief Compatibility with C++
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
#define DLPACK_EXTERN_C extern "C"
|
||||
#else
|
||||
#define DLPACK_EXTERN_C
|
||||
#endif
|
||||
|
||||
/*! \brief The current version of dlpack */
|
||||
#define DLPACK_VERSION 70
|
||||
|
||||
/*! \brief The current ABI version of dlpack */
|
||||
#define DLPACK_ABI_VERSION 1
|
||||
|
||||
/*! \brief DLPACK_DLL prefix for windows */
|
||||
#ifdef _WIN32
|
||||
#ifdef DLPACK_EXPORTS
|
||||
#define DLPACK_DLL __declspec(dllexport)
|
||||
#else
|
||||
#define DLPACK_DLL __declspec(dllimport)
|
||||
#endif
|
||||
#else
|
||||
#define DLPACK_DLL
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
/*!
|
||||
* \brief The device type in DLDevice.
|
||||
*/
|
||||
#ifdef __cplusplus
|
||||
typedef enum : int32_t {
|
||||
#else
|
||||
typedef enum {
|
||||
#endif
|
||||
/*! \brief CPU device */
|
||||
kDLCPU = 1,
|
||||
/*! \brief CUDA GPU device */
|
||||
kDLCUDA = 2,
|
||||
/*!
|
||||
* \brief Pinned CUDA CPU memory by cudaMallocHost
|
||||
*/
|
||||
kDLCUDAHost = 3,
|
||||
/*! \brief OpenCL devices. */
|
||||
kDLOpenCL = 4,
|
||||
/*! \brief Vulkan buffer for next generation graphics. */
|
||||
kDLVulkan = 7,
|
||||
/*! \brief Metal for Apple GPU. */
|
||||
kDLMetal = 8,
|
||||
/*! \brief Verilog simulator buffer */
|
||||
kDLVPI = 9,
|
||||
/*! \brief ROCm GPUs for AMD GPUs */
|
||||
kDLROCM = 10,
|
||||
/*!
|
||||
* \brief Pinned ROCm CPU memory allocated by hipMallocHost
|
||||
*/
|
||||
kDLROCMHost = 11,
|
||||
/*!
|
||||
* \brief Reserved extension device type,
|
||||
* used for quickly test extension device
|
||||
* The semantics can differ depending on the implementation.
|
||||
*/
|
||||
kDLExtDev = 12,
|
||||
/*!
|
||||
* \brief CUDA managed/unified memory allocated by cudaMallocManaged
|
||||
*/
|
||||
kDLCUDAManaged = 13,
|
||||
/*!
|
||||
* \brief Unified shared memory allocated on a oneAPI non-partititioned
|
||||
* device. Call to oneAPI runtime is required to determine the device
|
||||
* type, the USM allocation type and the sycl context it is bound to.
|
||||
*
|
||||
*/
|
||||
kDLOneAPI = 14,
|
||||
/*! \brief GPU support for next generation WebGPU standard. */
|
||||
kDLWebGPU = 15,
|
||||
/*! \brief Qualcomm Hexagon DSP */
|
||||
kDLHexagon = 16,
|
||||
} DLDeviceType;
|
||||
|
||||
/*!
|
||||
* \brief A Device for Tensor and operator.
|
||||
*/
|
||||
typedef struct {
|
||||
/*! \brief The device type used in the device. */
|
||||
DLDeviceType device_type;
|
||||
/*!
|
||||
* \brief The device index.
|
||||
* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
|
||||
*/
|
||||
int32_t device_id;
|
||||
} DLDevice;
|
||||
|
||||
/*!
|
||||
* \brief The type code options DLDataType.
|
||||
*/
|
||||
typedef enum {
|
||||
/*! \brief signed integer */
|
||||
kDLInt = 0U,
|
||||
/*! \brief unsigned integer */
|
||||
kDLUInt = 1U,
|
||||
/*! \brief IEEE floating point */
|
||||
kDLFloat = 2U,
|
||||
/*!
|
||||
* \brief Opaque handle type, reserved for testing purposes.
|
||||
* Frameworks need to agree on the handle data type for the exchange to be well-defined.
|
||||
*/
|
||||
kDLOpaqueHandle = 3U,
|
||||
/*! \brief bfloat16 */
|
||||
kDLBfloat = 4U,
|
||||
/*!
|
||||
* \brief complex number
|
||||
* (C/C++/Python layout: compact struct per complex number)
|
||||
*/
|
||||
kDLComplex = 5U,
|
||||
} DLDataTypeCode;
|
||||
|
||||
/*!
|
||||
* \brief The data type the tensor can hold. The data type is assumed to follow the
|
||||
* native endian-ness. An explicit error message should be raised when attempting to
|
||||
* export an array with non-native endianness
|
||||
*
|
||||
* Examples
|
||||
* - float: type_code = 2, bits = 32, lanes=1
|
||||
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
|
||||
* - int8: type_code = 0, bits = 8, lanes=1
|
||||
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
|
||||
*/
|
||||
typedef struct {
|
||||
/*!
|
||||
* \brief Type code of base types.
|
||||
* We keep it uint8_t instead of DLDataTypeCode for minimal memory
|
||||
* footprint, but the value should be one of DLDataTypeCode enum values.
|
||||
* */
|
||||
uint8_t code;
|
||||
/*!
|
||||
* \brief Number of bits, common choices are 8, 16, 32.
|
||||
*/
|
||||
uint8_t bits;
|
||||
/*! \brief Number of lanes in the type, used for vector types. */
|
||||
uint16_t lanes;
|
||||
} DLDataType;
|
||||
|
||||
/*!
|
||||
* \brief Plain C Tensor object, does not manage memory.
|
||||
*/
|
||||
typedef struct {
|
||||
/*!
|
||||
* \brief The data pointer points to the allocated data. This will be CUDA
|
||||
* device pointer or cl_mem handle in OpenCL. It may be opaque on some device
|
||||
* types. This pointer is always aligned to 256 bytes as in CUDA. The
|
||||
* `byte_offset` field should be used to point to the beginning of the data.
|
||||
*
|
||||
* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
|
||||
* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
|
||||
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
|
||||
* (after which this note will be updated); at the moment it is recommended
|
||||
* to not rely on the data pointer being correctly aligned.
|
||||
*
|
||||
* For given DLTensor, the size of memory required to store the contents of
|
||||
* data is calculated as follows:
|
||||
*
|
||||
* \code{.c}
|
||||
* static inline size_t GetDataSize(const DLTensor* t) {
|
||||
* size_t size = 1;
|
||||
* for (tvm_index_t i = 0; i < t->ndim; ++i) {
|
||||
* size *= t->shape[i];
|
||||
* }
|
||||
* size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
|
||||
* return size;
|
||||
* }
|
||||
* \endcode
|
||||
*/
|
||||
void* data;
|
||||
/*! \brief The device of the tensor */
|
||||
DLDevice device;
|
||||
/*! \brief Number of dimensions */
|
||||
int32_t ndim;
|
||||
/*! \brief The data type of the pointer*/
|
||||
DLDataType dtype;
|
||||
/*! \brief The shape of the tensor */
|
||||
int64_t* shape;
|
||||
/*!
|
||||
* \brief strides of the tensor (in number of elements, not bytes)
|
||||
* can be NULL, indicating tensor is compact and row-majored.
|
||||
*/
|
||||
int64_t* strides;
|
||||
/*! \brief The offset in bytes to the beginning pointer to data */
|
||||
uint64_t byte_offset;
|
||||
} DLTensor;
|
||||
|
||||
/*!
|
||||
* \brief C Tensor object, manage memory of DLTensor. This data structure is
|
||||
* intended to facilitate the borrowing of DLTensor by another framework. It is
|
||||
* not meant to transfer the tensor. When the borrowing framework doesn't need
|
||||
* the tensor, it should call the deleter to notify the host that the resource
|
||||
* is no longer needed.
|
||||
*/
|
||||
typedef struct DLManagedTensor {
|
||||
/*! \brief DLTensor which is being memory managed */
|
||||
DLTensor dl_tensor;
|
||||
/*! \brief the context of the original host framework of DLManagedTensor in
|
||||
* which DLManagedTensor is used in the framework. It can also be NULL.
|
||||
*/
|
||||
void* manager_ctx;
|
||||
/*! \brief Destructor signature void (*)(void*) - this should be called
|
||||
* to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
|
||||
* if there is no way for the caller to provide a reasonable destructor.
|
||||
* The destructors deletes the argument self as well.
|
||||
*/
|
||||
void (*deleter)(struct DLManagedTensor* self);
|
||||
} DLManagedTensor;
|
||||
#ifdef __cplusplus
|
||||
} // DLPACK_EXTERN_C
|
||||
#endif
|
||||
#endif // DLPACK_DLPACK_H_
|
|
@ -21,6 +21,7 @@ def check_backend():
|
|||
ort_version = backend_versions['onnxruntime']
|
||||
trt_version = backend_versions['tensorrt']
|
||||
ncnn_version = backend_versions['ncnn']
|
||||
tvm_version = backend_versions['tvm']
|
||||
|
||||
import mmdeploy.apis.onnxruntime as ort_apis
|
||||
logger = get_root_logger()
|
||||
|
@ -35,6 +36,8 @@ def check_backend():
|
|||
logger.info(f'ncnn: {ncnn_version}\tops_is_avaliable : '
|
||||
f'{ncnn_apis.is_custom_ops_available()}')
|
||||
|
||||
logger.info(f'tvm: {tvm_version}')
|
||||
|
||||
import mmdeploy.apis.pplnn as pplnn_apis
|
||||
logger.info(f'pplnn_is_avaliable: {pplnn_apis.is_available()}')
|
||||
|
||||
|
|
|
@ -410,6 +410,51 @@ def main():
|
|||
deploy_cfg, coreml_files)
|
||||
|
||||
backend_files = coreml_files
|
||||
elif backend == Backend.TVM:
|
||||
import copy
|
||||
|
||||
from mmdeploy.apis.tvm import from_onnx, get_library_ext
|
||||
PIPELINE_MANAGER.set_log_level(log_level, [from_onnx])
|
||||
model_inputs = get_model_inputs(deploy_cfg)
|
||||
|
||||
if args.device.startswith('cuda'):
|
||||
target = 'cuda'
|
||||
else:
|
||||
target = 'llvm'
|
||||
|
||||
lib_ext = get_library_ext()
|
||||
|
||||
tvm_files = []
|
||||
for model_id, onnx_path in enumerate(ir_files):
|
||||
model_input = copy.deepcopy(model_inputs[model_id])
|
||||
use_vm = model_input.get('use_vm', False)
|
||||
if 'target' not in model_input['tuner']:
|
||||
model_input['tuner']['target'] = target
|
||||
lib_path = osp.splitext(onnx_path)[0] + lib_ext
|
||||
code_path = osp.splitext(
|
||||
onnx_path)[0] + '.code' if use_vm else None
|
||||
model_input['output_file'] = lib_path
|
||||
model_input['onnx_model'] = onnx_path
|
||||
model_input['bytecode_file'] = code_path
|
||||
|
||||
# create calibration dataset
|
||||
if 'qconfig' in model_input:
|
||||
calib_path = osp.join(args.work_dir, calib_filename)
|
||||
from mmdeploy.backend.tvm import HDF5Dataset
|
||||
partition_type = 'end2end' if partition_cfgs is None \
|
||||
else onnx_name
|
||||
dataset = HDF5Dataset(
|
||||
calib_path,
|
||||
model_input['shape'],
|
||||
model_type=partition_type,
|
||||
device=target)
|
||||
model_input['dataset'] = dataset()
|
||||
|
||||
from_onnx(**model_input)
|
||||
|
||||
tvm_files += [lib_path, code_path]
|
||||
|
||||
backend_files = tvm_files
|
||||
|
||||
if args.test_img is None:
|
||||
args.test_img = args.img
|
||||
|
|
|
@ -0,0 +1,163 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
import os
|
||||
import os.path as osp
|
||||
import sys
|
||||
import time
|
||||
|
||||
from ubuntu_utils import cmd_result, ensure_base_env, get_job
|
||||
|
||||
|
||||
def install_llvm(dep_dir):
|
||||
print('-' * 10 + 'install llvm' + '-' * 10)
|
||||
|
||||
os.chdir(dep_dir)
|
||||
os.system(
|
||||
'wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -' # noqa: E501
|
||||
)
|
||||
|
||||
ubuntu = cmd_result(
|
||||
""" lsb_release -a 2>/dev/null | grep "Release" | tail -n 1 | awk '{print $NF}' """ # noqa: E501
|
||||
)
|
||||
|
||||
nickname_dict = {
|
||||
'18.04': 'bionic',
|
||||
'20.04': 'focal',
|
||||
'22.04': 'jammy',
|
||||
'22.10': 'kinetic'
|
||||
}
|
||||
nickname = nickname_dict.get(ubuntu, None)
|
||||
if nickname is None:
|
||||
raise NotImplementedError(f'Unsupported ubuntu version {ubuntu}.')
|
||||
os.system(
|
||||
f"add-apt-repository 'deb http://apt.llvm.org/{nickname}/ llvm-toolchain-{nickname}-10 main'" # noqa: E501
|
||||
)
|
||||
os.system('sudo apt update')
|
||||
os.system(
|
||||
'sudo apt-get install llvm-10 lldb-10 llvm-10-dev libllvm10 llvm-10-runtime' # noqa: E501
|
||||
)
|
||||
|
||||
|
||||
def install_tvm(dep_dir):
|
||||
print('-' * 10 + 'build and install tvm' + '-' * 10)
|
||||
time.sleep(2)
|
||||
|
||||
os.system('sudo apt-get update')
|
||||
os.system(
|
||||
'sudo apt-get install -y python3 python3-dev python3-setuptools gcc libtinfo-dev zlib1g-dev build-essential cmake libedit-dev libxml2-dev' # noqa: E501
|
||||
)
|
||||
|
||||
# generate unzip and build dir
|
||||
os.chdir(dep_dir)
|
||||
|
||||
# git clone
|
||||
if not osp.exists('tvm'):
|
||||
os.system(
|
||||
'git clone --branch v0.10.0 --depth 1 --recursive https://github.com/apache/tvm tvm' # noqa: E501
|
||||
)
|
||||
|
||||
tvm_dir = osp.join(dep_dir, 'tvm')
|
||||
os.chdir(tvm_dir)
|
||||
|
||||
# build
|
||||
if not osp.exists('build'):
|
||||
os.system('mkdir build')
|
||||
os.system('cp cmake/config.cmake build')
|
||||
|
||||
os.chdir(osp.join(tvm_dir, 'build'))
|
||||
|
||||
os.system(
|
||||
""" sed -i "s@set(USE_LLVM OFF)@set(USE_LLVM /usr/bin/llvm-config-10)@g" config.cmake """ # noqa: E501
|
||||
)
|
||||
|
||||
os.system('cmake .. && make -j {} && make runtime'.format(g_jobs))
|
||||
|
||||
# set env
|
||||
os.system(
|
||||
""" echo 'export LD_LIBRARY_PATH={}:$LD_LIBRARY_PATH' >> ~/mmdeploy.env """ # noqa: E501
|
||||
.format(os.path.join(tvm_dir, 'build')))
|
||||
|
||||
# install python package
|
||||
os.chdir(osp.join(tvm_dir, 'python'))
|
||||
os.system(""" python3 setup.py install --user """)
|
||||
|
||||
# install dependency
|
||||
os.system(
|
||||
""" python3 -m pip install xgboost decorator psutil scipy attrs tornado """ # noqa: E501
|
||||
)
|
||||
|
||||
return tvm_dir
|
||||
|
||||
|
||||
def install_mmdeploy(work_dir, tvm_dir):
|
||||
print('-' * 10 + 'build and install mmdeploy' + '-' * 10)
|
||||
time.sleep(3)
|
||||
|
||||
os.chdir(work_dir)
|
||||
os.system('git submodule init')
|
||||
os.system('git submodule update')
|
||||
|
||||
if not os.path.exists('build'):
|
||||
os.system('mkdir build')
|
||||
|
||||
os.system('rm -rf build/CMakeCache.txt')
|
||||
|
||||
cmd = 'cd build && cmake ..'
|
||||
cmd += ' -DMMDEPLOY_BUILD_SDK=ON '
|
||||
cmd += ' -DMMDEPLOY_BUILD_EXAMPLES=ON '
|
||||
cmd += ' -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON '
|
||||
cmd += ' -DMMDEPLOY_TARGET_DEVICES=cpu '
|
||||
cmd += ' -DMMDEPLOY_TARGET_BACKENDS=tvm '
|
||||
cmd += ' -DTVM_DIR={} '.format(tvm_dir)
|
||||
os.system(cmd)
|
||||
|
||||
os.system('cd build && make -j {} && make install'.format(g_jobs))
|
||||
os.system('python3 -m pip install -v -e .')
|
||||
os.system(""" echo 'export PATH={}:$PATH' >> ~/mmdeploy.env """.format(
|
||||
os.path.join(work_dir, 'mmdeploy', 'backend', 'tvm')))
|
||||
try:
|
||||
import mmcv
|
||||
print(mmcv.__version__)
|
||||
os.system('python3 tools/check_env.py')
|
||||
except Exception:
|
||||
print('Please install torch & mmcv later...')
|
||||
return 0
|
||||
|
||||
|
||||
def main():
|
||||
"""Auto install mmdeploy with tvm. To verify this script:
|
||||
|
||||
1) use `sudo docker run -v /path/to/mmdeploy:/root/mmdeploy -v /path/to/Miniconda3-latest-Linux-x86_64.sh:/root/miniconda.sh -it ubuntu:18.04 /bin/bash` # noqa: E501
|
||||
2) install conda and setup python environment
|
||||
3) run `python3 tools/scripts/build_ubuntu_x64_tvm.py`
|
||||
|
||||
Returns:
|
||||
_type_: _description_
|
||||
"""
|
||||
global g_jobs
|
||||
g_jobs = get_job(sys.argv)
|
||||
print('g_jobs {}'.format(g_jobs))
|
||||
|
||||
work_dir = osp.abspath(osp.join(__file__, '..', '..', '..'))
|
||||
dep_dir = osp.abspath(osp.join(work_dir, '..', 'mmdeploy-dep'))
|
||||
if not osp.exists(dep_dir):
|
||||
if osp.isfile(dep_dir):
|
||||
print('{} already exists and it is a file, exit.'.format(work_dir))
|
||||
return -1
|
||||
os.mkdir(dep_dir)
|
||||
|
||||
success = ensure_base_env(work_dir, dep_dir)
|
||||
if success != 0:
|
||||
return -1
|
||||
|
||||
install_llvm(dep_dir)
|
||||
tvm_dir = install_tvm(dep_dir)
|
||||
if install_mmdeploy(work_dir, tvm_dir) != 0:
|
||||
return -1
|
||||
|
||||
if osp.exists('~/mmdeploy.env'):
|
||||
print('Please source ~/mmdeploy.env to setup your env !')
|
||||
os.system('cat ~/mmdeploy.env')
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue