[Enhancement] Support tvm (#1216)

* finish framework

* add autotvm and auto-scheduler tuner

* add python deploy api

* add SDK net(WIP

* add sdk support

* support det, support vm

* fix vm sdk

* support two stage detector

* add instance seg support

* add docstring

* update docs and ut

* add quantize

* update doc

* update docs

* synchronize stream

* support dlpack

* remove submodule

* fix stride

* add alignment

* support dlpack

* remove submodule

* replace exclusive_scan

* add backend check

* add build script

* fix comment

* add ci

* fix ci

* ci fix2

* update build script

* update ci

* add pytest

* update sed command

* update sed again

* add xgboost

* remove tvm ut

* update ansor runner

* add stream sync

* fix topk

* sync default stream

* fix tvm net

* fix window
pull/1530/head
q.yao 2022-12-12 21:19:40 +08:00 committed by GitHub
parent ac47cad407
commit 7cb4b9b18a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
60 changed files with 2392 additions and 55 deletions

View File

@ -1,2 +1,3 @@
cann
CANN
nd

View File

@ -0,0 +1,41 @@
name: backend-tvm
on:
push:
paths-ignore:
- "demo/**"
- "tools/**"
pull_request:
paths-ignore:
- "demo/**"
- "tools/**"
- "docs/**"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
script_install:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: [3.7]
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'recursive'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install mmdeploy
run: |
python3 tools/scripts/build_ubuntu_x64_tvm.py
source ~/mmdeploy.env
python3 -m pip install torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cpu
python3 -m pip install mmcv-full==1.5.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
python3 -m pip install decorator psutil scipy attrs tornado pytest
python3 -c 'import mmdeploy.apis.tvm as tvm_api; assert tvm_api.is_available()'

12
.gitmodules vendored
View File

@ -1,9 +1,9 @@
[submodule "third_party/cub"]
path = third_party/cub
url = https://github.com/NVIDIA/cub.git
path = third_party/cub
url = https://github.com/NVIDIA/cub.git
[submodule "third_party/pybind11"]
path = third_party/pybind11
url = https://github.com/pybind/pybind11.git
path = third_party/pybind11
url = https://github.com/pybind/pybind11.git
[submodule "third_party/spdlog"]
path = third_party/spdlog
url = https://github.com/gabime/spdlog.git
path = third_party/spdlog
url = https://github.com/gabime/spdlog.git

View File

@ -58,18 +58,18 @@ The supported Device-Platform-InferenceBackend matrix is presented as following,
The benchmark can be found from [here](docs/en/03-benchmark/benchmark.md)
| Device / Platform | Linux | Windows | macOS | Android |
| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
| x86_64 CPU | [![Build Status][pass-backend-ort]][ci-backend-ort]ONNXRuntime<br>[![Build Status][pass-backend-pplnn]][ci-backend-pplnn]pplnn<br>[![Build Status][pass-backend-ncnn]][ci-backend-ncnn]ncnn<br>[![Build Status][pass-backend-torchscript]][ci-backend-torchscript]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | - | - |
| ARM CPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn | - | - | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
| RISC-V | [![Build Status][pass-build-riscv64-gcc]][ci-build-riscv64-gcc]ncnn | - | - | - |
| NVIDIA GPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]LibTorch | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn | - | - |
| NVIDIA Jetson | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | - | - |
| Huawei ascend310 | [![Build Status][pass-backend-ascend]][ci-backend-ascend]CANN | - | - | - |
| Rockchip | [![Build Status][pass-backend-rknn]][ci-backend-rknn]RKNN | - | - | - |
| Apple M1 | - | - | [![Build Status][pass-backend-coreml]][ci-backend-coreml]CoreML | - |
| Adreno GPU | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
| Hexagon DSP | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE |
| Device / Platform | Linux | Windows | macOS | Android |
| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
| x86_64 CPU | [![Build Status][pass-backend-ort]][ci-backend-ort]ONNXRuntime<br>[![Build Status][pass-backend-pplnn]][ci-backend-pplnn]pplnn<br>[![Build Status][pass-backend-ncnn]][ci-backend-ncnn]ncnn<br>[![Build Status][pass-backend-torchscript]][ci-backend-torchscript]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO<br>[![Build Status][pass-build-tvm]][ci-build-tvm]TVM | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | - | - |
| ARM CPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn | - | - | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
| RISC-V | [![Build Status][pass-build-riscv64-gcc]][ci-build-riscv64-gcc]ncnn | - | - | - |
| NVIDIA GPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-tvm]TVM | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn | - | - |
| NVIDIA Jetson | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | - | - |
| Huawei ascend310 | [![Build Status][pass-backend-ascend]][ci-backend-ascend]CANN | - | - | - |
| Rockchip | [![Build Status][pass-backend-rknn]][ci-backend-rknn]RKNN | - | - | - |
| Apple M1 | - | - | [![Build Status][pass-backend-coreml]][ci-backend-coreml]CoreML | - |
| Adreno GPU | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
| Hexagon DSP | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE |
### Efficient and scalable C/C++ SDK Framework
@ -178,6 +178,7 @@ This project is released under the [Apache 2.0 license](LICENSE).
[ci-backend-torchscript]: https://github.com/open-mmlab/mmdeploy/actions/workflows/backend-torchscript.yml
[ci-build-riscv64-gcc]: https://github.com/open-mmlab/mmdeploy/actions/workflows/linux-riscv64-gcc.yml
[ci-build-rknpu]: https://github.com/open-mmlab/mmdeploy/actions/workflows/linux-rknpu.yml
[ci-build-tvm]: https://github.com/open-mmlab/mmdeploy/actions/workflows/backend-tvm.yml
[pass-backend-ascend]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ascend
[pass-backend-coreml]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-coreml
[pass-backend-ncnn]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ncnn
@ -188,3 +189,4 @@ This project is released under the [Apache 2.0 license](LICENSE).
[pass-backend-torchscript]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ort
[pass-build-riscv64-gcc]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_riscv64_gcc
[pass-build-rknpu]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_rknpu
[pass-build-tvm]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_tvm

View File

@ -56,18 +56,18 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为
支持的设备平台和推理引擎如下表所示。benchmark请参考[这里](docs/zh_cn/03-benchmark/benchmark.md)
| Device / Platform | Linux | Windows | macOS | Android |
| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
| x86_64 CPU | [![Build Status][pass-backend-ort]][ci-backend-ort]ONNXRuntime<br>[![Build Status][pass-backend-pplnn]][ci-backend-pplnn]pplnn<br>[![Build Status][pass-backend-ncnn]][ci-backend-ncnn]ncnn<br>[![Build Status][pass-backend-torchscript]][ci-backend-torchscript]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | - | - |
| ARM CPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn | - | - | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
| RISC-V | [![Build Status][pass-build-riscv64-gcc]][ci-build-riscv64-gcc]ncnn | - | - | - |
| NVIDIA GPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]LibTorch | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn | - | - |
| NVIDIA Jetson | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | - | - |
| Huawei ascend310 | [![Build Status][pass-backend-ascend]][ci-backend-ascend]CANN | - | - | - |
| Rockchip | [![Build Status][pass-backend-rknn]][ci-backend-rknn]RKNN | - | - | - |
| Apple M1 | - | - | [![Build Status][pass-backend-coreml]][ci-backend-coreml]CoreML | - |
| Adreno GPU | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
| Hexagon DSP | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE |
| Device / Platform | Linux | Windows | macOS | Android |
| ----------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------- |
| x86_64 CPU | [![Build Status][pass-backend-ort]][ci-backend-ort]ONNXRuntime<br>[![Build Status][pass-backend-pplnn]][ci-backend-pplnn]pplnn<br>[![Build Status][pass-backend-ncnn]][ci-backend-ncnn]ncnn<br>[![Build Status][pass-backend-torchscript]][ci-backend-torchscript]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO<br>[![Build Status][pass-build-tvm]][ci-build-tvm]TVM | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]OpenVINO | - | - |
| ARM CPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn | - | - | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
| RISC-V | [![Build Status][pass-build-riscv64-gcc]][ci-build-riscv64-gcc]ncnn | - | - | - |
| NVIDIA GPU | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]LibTorch<br>[![Build Status][pass-build-rknpu]][ci-build-tvm]TVM | [![Build Status][pass-build-rknpu]][ci-build-rknpu]ONNXRuntime<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]pplnn | - | - |
| NVIDIA Jetson | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | [![Build Status][pass-build-rknpu]][ci-build-rknpu]TensorRT | - | - |
| Huawei ascend310 | [![Build Status][pass-backend-ascend]][ci-backend-ascend]CANN | - | - | - |
| Rockchip | [![Build Status][pass-backend-rknn]][ci-backend-rknn]RKNN | - | - | - |
| Apple M1 | - | - | [![Build Status][pass-backend-coreml]][ci-backend-coreml]CoreML | - |
| Adreno GPU | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE<br>[![Build Status][pass-build-rknpu]][ci-build-rknpu]ncnn |
| Hexagon DSP | - | - | - | [![Build Status][pass-backend-snpe]][ci-backend-snpe]SNPE |
### SDK 可高度定制化
@ -204,6 +204,7 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为
[ci-backend-torchscript]: https://github.com/open-mmlab/mmdeploy/actions/workflows/backend-torchscript.yml
[ci-build-riscv64-gcc]: https://github.com/open-mmlab/mmdeploy/actions/workflows/linux-riscv64-gcc.yml
[ci-build-rknpu]: https://github.com/open-mmlab/mmdeploy/actions/workflows/linux-rknpu.yml
[ci-build-tvm]: https://github.com/open-mmlab/mmdeploy/actions/workflows/backend-tvm.yml
[pass-backend-ascend]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ascend
[pass-backend-coreml]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-coreml
[pass-backend-ncnn]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ncnn
@ -214,3 +215,4 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为
[pass-backend-torchscript]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/backend-ort
[pass-build-riscv64-gcc]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_riscv64_gcc
[pass-build-rknpu]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_rknpu
[pass-build-tvm]: https://img.shields.io/github/workflow/status/open-mmlab/mmdeploy/build_tvm

View File

@ -0,0 +1,47 @@
# Copyright (c) OpenMMLab. All rights reserved.
if (NOT DEFINED TVM_DIR)
set(TVM_DIR $ENV{TVM_DIR})
endif ()
if (NOT TVM_DIR)
message(FATAL_ERROR "Please set TVM_DIR with cmake -D option.")
endif()
find_path(
TVM_INCLUDE_DIR tvm/runtime/c_runtime_api.h
HINTS ${TVM_DIR}
PATH_SUFFIXES include)
find_path(
DMLC_CORE_INCLUDE_DIR dmlc/io.h
HINTS ${TVM_DIR}/3rdparty/dmlc-core
PATH_SUFFIXES include)
find_path(
DLPACK_INCLUDE_DIR dlpack/dlpack.h
HINTS ${TVM_DIR}/3rdparty/dlpack
PATH_SUFFIXES include)
find_library(
TVM_LIBRARY_PATH tvm_runtime
HINTS ${TVM_DIR}
PATH_SUFFIXES build lib build/${CMAKE_BUILD_TYPE})
if (NOT (TVM_INCLUDE_DIR AND DMLC_CORE_INCLUDE_DIR AND DLPACK_INCLUDE_DIR AND TVM_LIBRARY_PATH))
message(FATAL_ERROR "Couldn't find tvm in TVM_DIR: "
"${TVM_DIR}, please check if the path is correct.")
endif()
add_library(tvm_runtime SHARED IMPORTED)
set_property(TARGET tvm_runtime APPEND PROPERTY IMPORTED_CONFIGURATIONS RELEASE)
if (MSVC)
set_target_properties(tvm_runtime PROPERTIES
IMPORTED_IMPLIB_RELEASE ${TVM_LIBRARY_PATH}
INTERFACE_INCLUDE_DIRECTORIES ${TVM_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR}
)
else()
set_target_properties(tvm_runtime PROPERTIES
IMPORTED_LOCATION_RELEASE ${TVM_LIBRARY_PATH}
INTERFACE_INCLUDE_DIRECTORIES ${TVM_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR}
)
endif()

View File

@ -0,0 +1 @@
backend_config = dict(type='tvm')

View File

@ -0,0 +1,12 @@
_base_ = ['./classification_static.py', '../_base_/backends/tvm.py']
onnx_config = dict(input_shape=[224, 224])
backend_config = dict(model_inputs=[
dict(
shape=dict(input=[1, 3, 224, 224]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoScheduleTuner',
log_file='tvm_tune_log.log',
num_measure_trials=2000))
])

View File

@ -0,0 +1,16 @@
_base_ = ['./classification_tvm-autotvm_static-224x224.py']
calib_config = dict(create_calib=True, calib_file='calib_data.h5')
backend_config = dict(model_inputs=[
dict(
shape=dict(input=[1, 3, 224, 224]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=1000,
tuner=dict(type='XGBTuner'),
),
qconfig=dict(calibrate_mode='kl_divergence', weight_scale='max'),
)
])

View File

@ -0,0 +1,13 @@
_base_ = ['./classification_static.py', '../_base_/backends/tvm.py']
onnx_config = dict(input_shape=[224, 224])
backend_config = dict(model_inputs=[
dict(
shape=dict(input=[1, 3, 224, 224]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=1000,
tuner=dict(type='XGBTuner')))
])

View File

@ -0,0 +1,13 @@
_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']
onnx_config = dict(input_shape=[1344, 800])
backend_config = dict(model_inputs=[
dict(
use_vm=True,
shape=dict(input=[1, 3, 800, 1344]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoScheduleTuner',
log_file='tvm_tune_log.log',
num_measure_trials=2000))
])

View File

@ -0,0 +1,15 @@
_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']
onnx_config = dict(input_shape=[300, 300])
backend_config = dict(model_inputs=[
dict(
use_vm=True,
shape=dict(input=[1, 3, 300, 300]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=1000,
tuner=dict(type='XGBTuner'),
))
])

View File

@ -0,0 +1,15 @@
_base_ = ['../_base_/base_static.py', '../../_base_/backends/tvm.py']
onnx_config = dict(input_shape=[1344, 800])
backend_config = dict(model_inputs=[
dict(
use_vm=True,
shape=dict(input=[1, 3, 800, 1344]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=1000,
tuner=dict(type='XGBTuner'),
))
])

View File

@ -0,0 +1,15 @@
_base_ = [
'../_base_/base_instance-seg_static.py', '../../_base_/backends/tvm.py'
]
onnx_config = dict(input_shape=[1344, 800])
backend_config = dict(model_inputs=[
dict(
use_vm=True,
shape=dict(input=[1, 3, 800, 1344]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoScheduleTuner',
log_file='tvm_tune_log.log',
num_measure_trials=20000))
])

View File

@ -0,0 +1,17 @@
_base_ = [
'../_base_/base_instance-seg_static.py', '../../_base_/backends/tvm.py'
]
onnx_config = dict(input_shape=[1344, 800])
backend_config = dict(model_inputs=[
dict(
use_vm=True,
shape=dict(input=[1, 3, 800, 1344]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=10000,
tuner=dict(type='XGBTuner'),
))
])

View File

@ -0,0 +1,12 @@
_base_ = ['./segmentation_static.py', '../_base_/backends/tvm.py']
onnx_config = dict(input_shape=[1024, 512])
backend_config = dict(model_inputs=[
dict(
shape=dict(input=[1, 3, 512, 1024]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoScheduleTuner',
log_file='tvm_tune_log.log',
num_measure_trials=2000))
])

View File

@ -0,0 +1,13 @@
_base_ = ['./segmentation_static.py', '../_base_/backends/tvm.py']
onnx_config = dict(input_shape=[1024, 512])
backend_config = dict(model_inputs=[
dict(
shape=dict(input=[1, 3, 512, 1024]),
dtype=dict(input='float32'),
tuner=dict(
type='AutoTVMTuner',
log_file='tvm_tune_log.log',
n_trial=1000,
tuner=dict(type='XGBTuner')))
])

View File

@ -14,6 +14,7 @@ namespace mmdeploy {
namespace framework {
using TensorShape = std::vector<int64_t>;
struct TensorDesc {
Device device;
DataType data_type{DataType::kFLOAT};

View File

@ -11,13 +11,16 @@ class CpuHostMemory : public NonCopyable {
public:
CpuHostMemory() : size_(), data_(), owned_data_{false} {}
Result<void> Init(size_t size, size_t alignment) {
if (alignment != 1) {
return Status(eNotSupported);
}
data_ = std::malloc(size);
size_t space = (size + alignment - 1) / alignment * alignment;
#ifdef _MSC_VER
data_ = _aligned_malloc(space, alignment);
#else
data_ = std::aligned_alloc(alignment, space);
#endif
if (!data_) {
return Status(eOutOfMemory);
}
aligned_data_ = data_;
size_ = size;
owned_data_ = true;
return success();
@ -38,7 +41,11 @@ class CpuHostMemory : public NonCopyable {
~CpuHostMemory() {
if (data_) {
if (owned_data_) {
#ifdef _MSC_VER
_aligned_free(data_);
#else
std::free(data_);
#endif
owned_data_ = false;
}
data_ = nullptr;
@ -47,11 +54,12 @@ class CpuHostMemory : public NonCopyable {
size_ = 0;
}
size_t size() const { return size_; }
void* data() const { return data_; }
void* data() const { return owned_data_ ? aligned_data_ : data_; }
private:
size_t size_;
void* data_;
void* aligned_data_{nullptr};
bool owned_data_;
std::shared_ptr<void> external_;
};

View File

@ -69,7 +69,7 @@ class CudaDeviceMemory : public NonCopyable {
public:
explicit CudaDeviceMemory(int device_id) : device_id_(device_id), size_(), owned_block_() {}
Result<void> Init(size_t size, Allocator allocator, size_t alignment, uint64_t flags) {
if (alignment != 1) {
if (alignment > 256 || 256 % alignment != 0) {
return Status(eNotSupported);
}
allocator_ = std::move(allocator);

View File

@ -38,6 +38,10 @@ if ("coreml" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(coreml)
endif ()
if ("tvm" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(tvm)
endif ()
if ("rknn" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(rknn)
endif ()

View File

@ -0,0 +1,11 @@
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_tvm_net)
include(${CMAKE_SOURCE_DIR}/cmake/modules/FindTVM.cmake)
mmdeploy_add_module(${PROJECT_NAME} tvm_net.cpp)
target_include_directories(${PROJECT_NAME} PRIVATE ${TVM_INCLUDE_DIR} ${DLPACK_INCLUDE_DIR} ${DMLC_CORE_INCLUDE_DIR})
target_link_libraries(${PROJECT_NAME} PRIVATE tvm_runtime mmdeploy_dlpack_utils)
add_library(mmdeploy::tvm_net ALIAS ${PROJECT_NAME})

View File

@ -0,0 +1,282 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "tvm_net.h"
#include <tvm/runtime/container/adt.h>
#include <tvm/runtime/device_api.h>
#include <tvm/runtime/vm/executable.h>
#include <tvm/runtime/vm/vm.h>
#include <fstream>
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/utils/filesystem.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/utils/dlpack/dlpack_utils.h"
namespace mmdeploy::framework {
static DLDevice GetDLDevice(const Device& device) {
DLDevice dev;
if (device.is_device()) {
dev = {kDLCUDA, device.device_id()};
} else {
dev = {kDLCPU, 0};
}
return dev;
}
static Result<DLDataType> FromDataType(DataType data_type) {
switch (data_type) {
case DataType::kFLOAT:
return DLDataType{kDLFloat, 32, 1};
case DataType::kINT32:
return DLDataType{kDLInt, 32, 1};
case DataType::kINT64:
return DLDataType{kDLInt, 64, 1};
case DataType::kINT8:
return DLDataType{kDLInt, 8, 1};
default:
MMDEPLOY_ERROR("Unsupported mmdeploy::DataType");
return Status(eNotSupported);
}
}
static Result<DataType> ToDataType(DLDataType scalar_type) {
if (scalar_type.lanes != 1) {
MMDEPLOY_ERROR("Unsupported scalar_type.lanes==1.");
return Status(eNotSupported);
}
if (scalar_type.code == kDLFloat && scalar_type.bits == 32) {
return DataType::kFLOAT;
} else if (scalar_type.code == kDLInt) {
switch (scalar_type.bits) {
case 32:
return DataType::kINT32;
case 64:
return DataType::kINT64;
case 8:
return DataType::kINT8;
default:
break;
}
}
MMDEPLOY_ERROR("Unsupported code: {}, bits: {}, lanes: {}.", std::to_string(scalar_type.code),
std::to_string(scalar_type.bits), std::to_string(scalar_type.lanes));
return Status(eNotSupported);
}
static std::vector<std::string> split_str(const std::string& s, char delim) {
using namespace std;
vector<string> result;
stringstream ss(s);
string item;
while (getline(ss, item, delim)) {
result.push_back(item);
}
return result;
}
Result<void> TVMNet::Init(const Value& args) {
auto& context = args["context"];
device_ = context["device"].get<Device>();
stream_ = context["stream"].get<Stream>();
auto name = args["name"].get<std::string>();
auto model = context["model"].get<Model>();
OUTCOME_TRY(auto config, model.GetModelConfig(name));
auto tmp_dir = fs::temp_directory_path();
std::string tmp_lib = (tmp_dir / fs::path(config.net)).string();
OUTCOME_TRY(auto raw_lib, model.ReadFile(config.net));
std::string tmp_label = (tmp_dir / fs::path(config.weights)).string();
OUTCOME_TRY(auto raw_label, model.ReadFile(config.weights));
try {
std::ofstream lib_out(tmp_lib, std::ios::binary);
lib_out << raw_lib;
lib_out.close();
} catch (const std::exception& e) {
MMDEPLOY_ERROR("unhandled exception when creating tmp library: {}", e.what());
return Status(eFail);
}
try {
auto io_names = split_str(raw_label, '\n');
auto input_names = split_str(io_names[0], ',');
auto output_names = split_str(io_names[1], ',');
DLDevice dev = GetDLDevice(device_);
mod_factory_ = tvm::runtime::Module::LoadFromFile(tmp_lib);
use_vm_ = false;
if (io_names.size() > 2) {
use_vm_ = true;
OUTCOME_TRY(auto bytecode, model.ReadFile(io_names[2]));
auto exec = tvm::runtime::vm::Executable::Load(bytecode, mod_factory_);
const auto runtime_create = *tvm::runtime::Registry::Get("runtime._VirtualMachine");
tvm::runtime::Module vm_ = runtime_create(exec);
// init vm
auto func_init = vm_.GetFunction("init", false);
auto alloc_type = static_cast<int>(tvm::runtime::vm::AllocatorType::kPooled);
if (dev.device_type != kDLCPU) {
func_init(static_cast<int>(kDLCPU), 0, alloc_type, int(dev.device_type), int(dev.device_id),
alloc_type);
} else {
func_init(int(dev.device_type), int(dev.device_id), alloc_type);
}
// get input ids
auto func_input_index_ = vm_.GetFunction("get_input_index", false);
for (auto name : input_names) {
input_ids_[name] = func_input_index_(name, "main");
}
// get function
func_set_input_ = vm_.GetFunction("set_input");
func_run_ = vm_.GetFunction("invoke");
} else {
// graph executor won't do synchronize stream after run
if (device_.is_device())
tvm::runtime::DeviceAPI::Get(dev)->SetStream(dev, stream_.GetNative());
tvm::runtime::Module gmod = mod_factory_.GetFunction("default")(dev);
// get function
func_set_input_ = gmod.GetFunction("set_input");
func_get_output_ = gmod.GetFunction("get_output");
func_run_ = gmod.GetFunction("run");
}
auto ToDesc = [&](const std::string& name) {
return TensorDesc{device_, DataType::kFLOAT, {}, name};
};
for (auto name : input_names) {
input_tensors_.emplace_back(ToDesc(name));
}
for (auto name : output_names) {
output_tensors_.emplace_back(ToDesc(name));
}
} catch (const std::exception& e) {
MMDEPLOY_ERROR("unhandled exception when creating TVM Net: {}", e.what());
return Status(eFail);
}
return success();
}
Result<void> TVMNet::ForwardAsync(Event* event) { return Status(eNotSupported); }
Result<void> TVMNet::Deinit() { return success(); }
Result<Span<Tensor>> TVMNet::GetInputTensors() { return input_tensors_; }
Result<Span<Tensor>> TVMNet::GetOutputTensors() { return output_tensors_; }
Result<void> TVMNet::Reshape(Span<TensorShape> input_shapes) {
for (size_t i = 0; i < input_shapes.size(); ++i) {
input_tensors_[i].Reshape(input_shapes[i]);
}
return success();
}
Result<void> TVMNet::Forward() {
DLDevice dev = GetDLDevice(device_);
try {
OUTCOME_TRY(stream_.Wait());
if (use_vm_) {
// vm
// set input
int num_inputs = input_tensors_.size();
std::vector<tvm::runtime::NDArray> args_arr(num_inputs);
std::vector<TVMValue> tvm_values(num_inputs + 1);
std::vector<int> tvm_type_codes(num_inputs + 1);
tvm::runtime::TVMArgsSetter setter(tvm_values.data(), tvm_type_codes.data());
setter(0, "main");
for (int k = 0; k < num_inputs; ++k) {
auto v = input_tensors_[k];
OUTCOME_TRY(auto managed_tensor, ToDLPack(v, stream_));
OUTCOME_TRY(stream_.Wait());
args_arr[k] = tvm::runtime::NDArray::FromDLPack(managed_tensor);
int input_id = input_ids_[v.name()];
setter(input_id + 1, args_arr[k]);
}
func_set_input_.CallPacked(
tvm::runtime::TVMArgs(tvm_values.data(), tvm_type_codes.data(), num_inputs + 1), nullptr);
// run
tvm::runtime::TVMRetValue ret = func_run_("main");
if (device_.is_device()) {
// tvm virtual machine use default stream.
OUTCOME_TRY(Stream(device_, nullptr).Wait());
}
// get output
if (ret.type_code() == kTVMNDArrayHandle) {
tvm::runtime::NDArray ndarray = ret.AsObjectRef<tvm::runtime::NDArray>();
Tensor& v = output_tensors_[0];
OUTCOME_TRY(v, FromDLPack(ndarray.ToDLPack(), v.name(), stream_));
} else if (ret.type_code() == kTVMObjectHandle) {
const auto& adt = ret.AsObjectRef<tvm::runtime::ADT>();
for (int i = 0; i < output_tensors_.size(); ++i) {
tvm::runtime::NDArray ndarray = tvm::runtime::Downcast<tvm::runtime::NDArray>(adt[i]);
Tensor& v = output_tensors_[i];
OUTCOME_TRY(v, FromDLPack(ndarray.ToDLPack(), v.name(), stream_));
}
} else {
MMDEPLOY_ERROR("error return type code {}", ret.type_code());
return Status(eFail);
}
} else {
// graph executor
// set input
for (auto v : input_tensors_) {
OUTCOME_TRY(auto managed_tensor, ToDLPack(v, stream_));
OUTCOME_TRY(stream_.Wait());
auto ndarray = tvm::runtime::NDArray::FromDLPack(managed_tensor);
func_set_input_(v.name(), ndarray);
}
// run
func_run_();
// get output
for (int i = 0; i < output_tensors_.size(); ++i) {
tvm::runtime::NDArray ndarray = func_get_output_(i);
Tensor& v = output_tensors_[i];
OUTCOME_TRY(v, FromDLPack(ndarray.ToDLPack(), v.name(), stream_));
}
OUTCOME_TRY(stream_.Wait());
}
} catch (const std::exception& e) {
MMDEPLOY_ERROR(e.what());
return Status(eFail);
}
return success();
}
static std::unique_ptr<Net> Create(const Value& args) {
auto p = std::make_unique<TVMNet>();
if (auto status = p->Init(args)) {
return p;
} else {
MMDEPLOY_ERROR("Failed to created TVMNet with config: {}", args);
}
return nullptr;
}
MMDEPLOY_REGISTER_FACTORY_FUNC(Net, (tvm, 0), Create);
} // namespace mmdeploy::framework

View File

@ -0,0 +1,40 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_NET_TVM_TVM_NET_H_
#define MMDEPLOY_SRC_NET_TVM_TVM_NET_H_
#include <tvm/runtime/module.h>
#include "mmdeploy/core/net.h"
namespace mmdeploy::framework {
class TVMNet : public Net {
public:
~TVMNet() override = default;
Result<void> Init(const Value& cfg) override;
Result<void> Deinit() override;
Result<Span<Tensor>> GetInputTensors() override;
Result<Span<Tensor>> GetOutputTensors() override;
Result<void> Reshape(Span<TensorShape> input_shapes) override;
Result<void> Forward() override;
Result<void> ForwardAsync(Event* event) override;
private:
tvm::runtime::Module mod_factory_;
tvm::runtime::PackedFunc func_set_input_;
tvm::runtime::PackedFunc func_get_output_;
tvm::runtime::PackedFunc func_run_;
bool use_vm_;
std::map<std::string, int> input_ids_;
std::vector<Tensor> input_tensors_;
std::vector<Tensor> output_tensors_;
Device device_;
Stream stream_;
};
} // namespace mmdeploy::framework
#endif // MMDEPLOY_SRC_NET_TVM_TVM_NET_H_

View File

@ -1,3 +1,4 @@
# Copyright (c) OpenMMLab. All rights reserved.
add_subdirectory(dlpack)
add_subdirectory(opencv)

View File

@ -0,0 +1,14 @@
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_dlpack_utils)
mmdeploy_add_library(${PROJECT_NAME} STATIC dlpack_utils.cpp)
target_link_libraries(${PROJECT_NAME}
PRIVATE mmdeploy::core)
target_include_directories(${PROJECT_NAME}
INTERFACE $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>)
target_include_directories(${PROJECT_NAME} PRIVATE
$<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/third_party/dlpack>)

View File

@ -0,0 +1,187 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "dlpack_utils.h"
#include <numeric>
#include "dlpack.h"
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/logger.h"
#include "mmdeploy/core/status_code.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/types.h"
namespace mmdeploy {
using mmdeploy::framework::Device;
using mmdeploy::framework::Stream;
using mmdeploy::framework::Tensor;
using mmdeploy::framework::TensorShape;
static inline int64_t element_size(DataType data_type) {
switch (data_type) {
case DataType::kFLOAT:
return 4;
case DataType::kHALF:
return 2;
case DataType::kINT8:
return 1;
case DataType::kINT32:
return 4;
case DataType::kINT64:
return 8;
default:
return 0;
}
}
static inline int64_t get_size(const std::vector<int64_t>& shape) {
if (shape.empty()) {
return 0;
}
auto _size = std::accumulate(begin(shape), end(shape), 1LL, std::multiplies<>());
return std::max(0LL, _size);
}
inline static Result<Device> FromDLDevice(const DLDevice& device) {
int device_id = device.device_id;
switch (device.device_type) {
case kDLCPU:
return Device("cpu", device_id);
case kDLCUDA:
return Device("cuda", device_id);
default:
MMDEPLOY_ERROR("Unsupported DLDevice.");
return Status(eNotSupported);
}
}
inline static DLDevice ToDLDevice(const Device& device) {
auto device_type = device.is_device() ? kDLCUDA : kDLCPU;
int device_id = device.device_id();
return DLDevice{device_type, device_id};
}
inline static Result<DataType> FromDLDataType(const DLDataType& dtype) {
if (dtype.lanes != 1) {
MMDEPLOY_ERROR("DLDataType.lanes != 1 is not supported.");
return Status(eNotSupported);
}
switch (dtype.code) {
case kDLFloat:
if (dtype.bits == 32)
return DataType::kFLOAT;
else {
MMDEPLOY_ERROR("Unsupported bits. {}", dtype.bits);
return Status(eNotSupported);
}
case kDLInt:
if (dtype.bits == 32) return DataType::kINT32;
if (dtype.bits == 64) return DataType::kINT64;
if (dtype.bits == 8)
return DataType::kINT8;
else {
MMDEPLOY_ERROR("Unsupported bits. {}", dtype.bits);
return Status(eNotSupported);
}
break;
default:
MMDEPLOY_ERROR("Unsupported DLDataType.");
return Status(eNotSupported);
}
}
inline static Result<DLDataType> ToDLDataType(const DataType& dtype) {
switch (dtype) {
case DataType::kFLOAT:
return DLDataType{kDLFloat, 32, 1};
case DataType::kINT32:
return DLDataType{kDLInt, 32, 1};
case DataType::kINT64:
return DLDataType{kDLInt, 64, 1};
case DataType::kINT8:
return DLDataType{kDLInt, 8, 1};
default:
MMDEPLOY_ERROR("Unsupported mmdeploy::DataType");
return Status(eNotSupported);
}
}
static void TensorDeleter(struct DLManagedTensor* self) {
auto tensor = static_cast<Tensor*>(self->manager_ctx);
delete tensor;
}
static bool IsContiguous(const int64_t* shape, const int64_t* stride, int ndim) {
if (ndim <= 1 || stride == nullptr) return true;
for (auto i = 1; i < ndim; ++i) {
if (stride[i - 1] != shape[i] * stride[i]) return false;
}
return true;
}
Result<DLManagedTensor*> ToDLPack(Tensor& tensor, Stream stream) {
using mmdeploy::framework::Buffer;
auto managed_tensor = new DLManagedTensor();
// set deleter
managed_tensor->deleter = TensorDeleter;
Tensor* new_tensor = nullptr;
// create manager_ctx
{
auto desc = tensor.desc();
uint64_t data_val = reinterpret_cast<uint64_t>(tensor.data());
if ((data_val & 0xff) != 0) {
// copy buffer if data is not aligned.
new_tensor =
new Tensor(desc, Buffer(desc.device, tensor.byte_size(), tensor.allocator(), 256));
OUTCOME_TRY(tensor.CopyTo(*new_tensor, stream));
} else {
// reuse buffer
new_tensor = new Tensor(desc, tensor.buffer());
}
managed_tensor->manager_ctx = static_cast<void*>(new_tensor);
}
// setup dl_tensor
{
auto& dl_tensor = managed_tensor->dl_tensor;
auto& desc = new_tensor->desc();
dl_tensor.data = new_tensor->data();
dl_tensor.device = ToDLDevice(desc.device);
OUTCOME_TRY(dl_tensor.dtype, ToDLDataType(desc.data_type));
dl_tensor.ndim = desc.shape.size();
dl_tensor.byte_offset = 0;
dl_tensor.shape = (int64_t*)(&(desc.shape[0]));
dl_tensor.strides = nullptr;
}
return managed_tensor;
} // namespace mmdeploy
Result<Tensor> FromDLPack(DLManagedTensor* managed_tensor, const std::string& name, Stream stream) {
using mmdeploy::framework::TensorDesc;
auto& dl_tensor = managed_tensor->dl_tensor;
if (!IsContiguous(dl_tensor.shape, dl_tensor.strides, dl_tensor.ndim)) {
MMDEPLOY_ERROR("Only contiguous DLTensor is supported now.");
return Status(eNotSupported);
}
TensorShape shape(dl_tensor.shape, dl_tensor.shape + dl_tensor.ndim);
OUTCOME_TRY(auto device, FromDLDevice(dl_tensor.device));
OUTCOME_TRY(auto dtype, FromDLDataType(dl_tensor.dtype));
// create tensor
TensorDesc desc{device, dtype, shape, name};
auto buffer_size = get_size(shape) * element_size(dtype);
auto raw_data = static_cast<void*>(static_cast<uint8_t*>(dl_tensor.data) + dl_tensor.byte_offset);
Tensor ret(desc);
OUTCOME_TRY(ret.CopyFrom(raw_data, stream));
// delete old tensor
if (managed_tensor->deleter != nullptr) managed_tensor->deleter(managed_tensor);
return ret;
}
} // namespace mmdeploy

View File

@ -0,0 +1,17 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CSRC_UTILS_DLPACK_DLPACK_UTILS_H_
#define MMDEPLOY_CSRC_UTILS_DLPACK_DLPACK_UTILS_H_
#include "mmdeploy/core/device.h"
#include "mmdeploy/core/tensor.h"
struct DLManagedTensor;
namespace mmdeploy {
Result<DLManagedTensor*> ToDLPack(framework::Tensor& tensor, framework::Stream stream = {});
Result<framework::Tensor> FromDLPack(DLManagedTensor* managed_tensor, const std::string& name = "",
framework::Stream stream = {});
} // namespace mmdeploy
#endif // MMDEPLOY_CSRC_UTILS_DLPACK_DLPACK_UTILS_H_

View File

@ -49,4 +49,5 @@ Here is the verified installation script. If you want mmdeploy to support multip
| build_ubuntu_x64_ort.py | 18.04/20.04 |
| build_ubuntu_x64_pplnn.py | 18.04/20.04 |
| build_ubuntu_x64_torchscript.py | 18.04/20.04 |
| build_ubuntu_x64_tvm.py | 18.04/20.04 |
| build_jetson_orin_python38.sh | JetPack5.0 L4T 34.1 |

View File

@ -100,7 +100,7 @@
</tr>
<tr>
<td>MMDEPLOY_TARGET_BACKENDS</td>
<td>{"trt", "ort", "pplnn", "ncnn", "openvino", "torchscript", "snpe"}</td>
<td>{"trt", "ort", "pplnn", "ncnn", "openvino", "torchscript", "snpe", "tvm"}</td>
<td>N/A</td>
<td>Enabling inference engine. <b>By default, no target inference engine is set, since it highly depends on the use case.</b> When more than one engine are specified, it has to be set with a semicolon separated list of inference backend names, e.g. <pre><code>-DMMDEPLOY_TARGET_BACKENDS="trt;ort;pplnn;ncnn;openvino"</code></pre>
After specifying the inference engine, it's package path has to be passed to cmake as follows, <br>
@ -120,7 +120,9 @@
6. <b>torchscript</b>: TorchScript. <code>Torch_DIR</code> is needed.
<pre><code>-DTorch_DIR=${Torch_DIR}</code></pre>
Currently, <b>The Model Converter supports torchscript, but SDK doesn't</b>.<br>
7. <b>snpe</b>: qcom snpe. <code>SNPE_ROOT</code> must existed in the environment variable because of C/S mode.
7. <b>snpe</b>: qcom snpe. <code>SNPE_ROOT</code> must existed in the environment variable because of C/S mode.<br>
8. <b>coreml</b>: CoreML. <code>Torch_DIR</code> is required. <code>Torch_DIR</code><br>
9. <b>TVM</b>: TVM. <code>TVM_DIR</code> is required. <pre><code>-DTVM_DIR=${TVM_DIR}</code></pre>
</td>
</tr>
<tr>

View File

@ -7,10 +7,8 @@
- [Install Dependencies for SDK](#install-dependencies-for-sdk)
- [Install Inference Engines for MMDeploy](#install-inference-engines-for-mmdeploy)
- [Build MMDeploy](#build-mmdeploy)
- [Build Options Spec](#build-options-spec)
- [Build Model Converter](#build-model-converter)
- [Build Custom Ops](#build-custom-ops)
- [Install Model Converter](#install-model-converter)
- [Install Model Converter](#install-model-converter)
- [Build SDK and Demo](#build-sdk-and-demo)
______________________________________________________________________
@ -249,6 +247,18 @@ export ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest"
</code></pre>
</td>
</tr>
<tr>
<td>TVM</td>
<td>TVM</td>
<td>
1. Install TVM follow <a href="https://tvm.apache.org/docs/install/from_source.html">official guide</a>.<br>
2. Setup environment
<pre><code>
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${TVM_HOME}/build
export PYTHONPATH=${TVM_HOME}/python:${PYTHONPATH}
</code></pre>
</td>
</tr>
</tbody>
</table>

View File

@ -0,0 +1,51 @@
# Test on TVM
## Supported Models
| Model | Codebase | Model config |
| :---------------- | :--------------- | :---------------------------------------------------------------------------------------------: |
| RetinaNet | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) |
| Faster R-CNN | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) |
| YOLOv3 | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) |
| YOLOX | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) |
| Mask R-CNN | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) |
| SSD | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) |
| ResNet | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) |
| ResNeXt | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) |
| SE-ResNet | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) |
| MobileNetV2 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) |
| ShuffleNetV1 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) |
| ShuffleNetV2 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) |
| VisionTransformer | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/vision_transformer) |
| FCN | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) |
| PSPNet | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) |
| DeepLabV3 | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) |
| DeepLabV3+ | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) |
| UNet | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) |
The table above list the models that we have tested. Models not listed on the table might still be able to converted. Please have a try.
## Test
- Ubuntu 20.04
- tvm 0.9.0
| mmcls | metric | PyTorch | TVM |
| :----------------------------------------------------------------------------------------------------------------------------------------------------: | :----: | :-----: | :---: |
| [ResNet-18](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet/resnet18_b32x8_imagenet.py) | top-1 | 69.90 | 69.90 |
| [ResNeXt-50](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext/resnext50_32x4d_b32x8_imagenet.py) | top-1 | 77.90 | 77.90 |
| [ShuffleNet V2](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2/shufflenet_v2_1x_b64x16_linearlr_bn_nowd_imagenet.py) | top-1 | 69.55 | 69.55 |
| [MobileNet V2](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py) | top-1 | 71.86 | 71.86 |
<!-- | [Vision Transformer](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit-base-p16_ft-64xb64_in1k-384.py) | top-1 | 85.43 | 84.01 | -->
| mmdet(\*) | metric | PyTorch | TVM |
| :-------------------------------------------------------------------------------------: | :----: | :-----: | :--: |
| [SSD](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssd300_coco.py) | box AP | 25.5 | 25.5 |
\*: We only test model on ssd since dynamic shape is not supported for now.
| mmseg | metric | PyTorch | TVM |
| :------------------------------------------------------------------------------------------------------------------------: | :----: | :-----: | :---: |
| [FCN](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py) | mIoU | 72.25 | 72.36 |
| [PSPNet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py) | mIoU | 78.55 | 77.90 |

View File

@ -0,0 +1,8 @@
# TVM feature support
MMDeploy has integrated TVM for model conversion and SDK. Features include:
- AutoTVM tuner
- Ansor tuner
- Graph Executor runtime
- Virtual machine runtime

View File

@ -35,6 +35,7 @@ You can switch between Chinese and English documents in the lower-left corner of
03-benchmark/supported_models.md
03-benchmark/benchmark.md
03-benchmark/benchmark_edge.md
03-benchmark/benchmark_tvm.md
03-benchmark/quantization.md
.. toctree::

View File

@ -49,4 +49,5 @@ $ python3 tools/check_env.py
| build_ubuntu_x64_ort.py | 18.04/20.04 |
| build_ubuntu_x64_pplnn.py | 18.04/20.04 |
| build_ubuntu_x64_torchscript.py | 18.04/20.04 |
| build_ubuntu_x64_tvm.py | 18.04/20.04 |
| build_jetson_orin_python38.sh | JetPack5.0 L4T 34.1 |

View File

@ -104,7 +104,7 @@
<tr>
<td>MMDEPLOY_TARGET_BACKENDS</td>
<td>{"trt", "ort", "pplnn", "ncnn", "openvino", "torchscript", "snpe", "coreml"}</td>
<td>{"trt", "ort", "pplnn", "ncnn", "openvino", "torchscript", "snpe", "coreml", "tvm"}</td>
<td>N/A</td>
<td> <b>默认情况下SDK不设置任何后端</b>, 因为它与应用场景高度相关。 当选择多个后端时, 中间使用分号隔开。比如,<pre><code>-DMMDEPLOY_TARGET_BACKENDS="trt;ort;pplnn;ncnn;openvino"</code></pre>
构建时,几乎每个后端,都需设置一些路径变量,用来查找依赖包。<br>
@ -121,6 +121,7 @@
6. <b>torchscript</b>: 表示 TorchScript。目前仅模型转换支持 torchscript 格式SDK 尚未支持。<br>
7. <b>snpe</b>: 表示 qcom snpe。需要环境变量设置 SNPE_ROOT。<br>
8. <b>coreml</b>: 表示 Core ML。目前在进行模型转换时需要设置 <code>Torch_DIR</code><br>
9. <b>tvm</b>: 表示 TVM。需要设置 <code>TVM_DIR</code><br>
</td>
</tr>

View File

@ -8,10 +8,8 @@
- [安装 MMDeploy SDK 依赖](#安装-mmdeploy-sdk-依赖)
- [安装推理引擎](#安装推理引擎)
- [编译 MMDeploy](#编译-mmdeploy)
- [编译选项说明](#编译选项说明)
- [编译安装 Model Converter](#编译安装-model-converter)
- [编译自定义算子](#编译自定义算子)
- [安装 Model Converter](#安装-model-converter)
- [编译 Model Converter](#编译-model-converter)
- [安装 Model Converter](#安装-model-converter)
- [编译 SDK 和 Demos](#编译-sdk-和-demos)
______________________________________________________________________
@ -246,6 +244,18 @@ export ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest"
</code></pre>
</td>
</tr>
<tr>
<td>TVM</td>
<td>TVM</td>
<td>
1. 按照 <a href="https://tvm.apache.org/docs/install/from_source.html">官方指引</a>安装 TVM.<br>
2. 配置环境
<pre><code>
export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${TVM_HOME}/build
export PYTHONPATH=${TVM_HOME}/python:${PYTHONPATH}
</code></pre>
</td>
</tr>
</tbody>
</table>

View File

@ -0,0 +1,51 @@
# TVM 测试
## 支持模型列表
| Model | Codebase | Model config |
| :---------------- | :--------------- | :---------------------------------------------------------------------------------------------: |
| RetinaNet | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) |
| Faster R-CNN | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) |
| YOLOv3 | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) |
| YOLOX | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) |
| Mask R-CNN | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) |
| SSD | MMDetection | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) |
| ResNet | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) |
| ResNeXt | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) |
| SE-ResNet | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) |
| MobileNetV2 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) |
| ShuffleNetV1 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) |
| ShuffleNetV2 | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) |
| VisionTransformer | MMClassification | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/vision_transformer) |
| FCN | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) |
| PSPNet | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) |
| DeepLabV3 | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) |
| DeepLabV3+ | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) |
| UNet | MMSegmentation | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) |
表中仅列出已测试模型,未列出的模型可能同样支持,可以自行尝试转换。
## Test
- Ubuntu 20.04
- tvm 0.9.0
| mmcls | metric | PyTorch | TVM |
| :----------------------------------------------------------------------------------------------------------------------------------------------------: | :----: | :-----: | :---: |
| [ResNet-18](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet/resnet18_b32x8_imagenet.py) | top-1 | 69.90 | 69.90 |
| [ResNeXt-50](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext/resnext50_32x4d_b32x8_imagenet.py) | top-1 | 77.90 | 77.90 |
| [ShuffleNet V2](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2/shufflenet_v2_1x_b64x16_linearlr_bn_nowd_imagenet.py) | top-1 | 69.55 | 69.55 |
| [MobileNet V2](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2/mobilenet-v2_8xb32_in1k.py) | top-1 | 71.86 | 71.86 |
<!-- | [Vision Transformer](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit-base-p16_ft-64xb64_in1k-384.py) | top-1 | 85.43 | 84.01 | -->
| mmdet(\*) | metric | PyTorch | TVM |
| :-------------------------------------------------------------------------------------: | :----: | :-----: | :--: |
| [SSD](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd/ssd300_coco.py) | box AP | 25.5 | 25.5 |
\*: 由于暂时不支持动态转换,因此仅提供 SSD 的精度测试结果。
| mmseg | metric | PyTorch | TVM |
| :------------------------------------------------------------------------------------------------------------------------: | :----: | :-----: | :---: |
| [FCN](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn/fcn_r50-d8_512x1024_40k_cityscapes.py) | mIoU | 72.25 | 72.36 |
| [PSPNet](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet/pspnet_r50-d8_512x1024_80k_cityscapes.py) | mIoU | 78.55 | 77.90 |

View File

@ -0,0 +1,8 @@
# TVM 特性支持
MMDeploy 已经将 TVM 集成到模型转换工具以及 SDK 当中。可用的特性包括:
- AutoTVM 调优器
- Ansor 调优器
- Graph Executor 运行时
- Virtual Machine 运行时

View File

@ -35,6 +35,7 @@
03-benchmark/supported_models.md
03-benchmark/benchmark.md
03-benchmark/benchmark_edge.md
03-benchmark/benchmark_tvm.md
03-benchmark/quantization.md
.. toctree::

View File

@ -0,0 +1,12 @@
# Copyright (c) OpenMMLab. All rights reserved.
from mmdeploy.backend.tvm import get_library_ext, is_available
from ..core import PIPELINE_MANAGER
__all__ = ['is_available', 'get_library_ext']
if is_available():
from mmdeploy.backend.tvm import HDF5Dataset
from mmdeploy.backend.tvm import from_onnx as _from_onnx
from_onnx = PIPELINE_MANAGER.register_pipeline()(_from_onnx)
__all__ += ['from_onnx', 'HDF5Dataset']

View File

@ -59,8 +59,6 @@ def visualize_model(model_cfg: Union[str, mmcv.Config],
if isinstance(model, (list, tuple)):
assert len(model) > 0, 'Model should have at least one element.'
assert all([isinstance(m, str) for m in model]), \
'All elements in the list should be str'
if backend == Backend.PYTORCH:
model = task_processor.init_pytorch_model(model[0])

View File

@ -10,6 +10,7 @@ from mmdeploy.utils import (Backend, Task, get_backend, get_codebase,
get_common_config, get_ir_config,
get_partition_config, get_root_logger,
get_task_type, is_dynamic_batch, load_config)
from mmdeploy.utils.config_utils import get_backend_config
from mmdeploy.utils.constants import SDK_TASK_MAP as task_map
from .tracer import add_transform_tag, get_transform_static
@ -80,7 +81,7 @@ def get_model_name_customs(deploy_cfg: mmcv.Config, model_cfg: mmcv.Config,
def get_models(deploy_cfg: Union[str, mmcv.Config],
model_cfg: Union[str, mmcv.Config], work_dir: str,
device: str) -> List:
"""Get the output model informantion for deploy.json.
"""Get the output model information for deploy.json.
Args:
deploy_cfg (mmcv.Config): Deploy config dict.
@ -90,7 +91,7 @@ def get_models(deploy_cfg: Union[str, mmcv.Config],
Return:
list[dict]: The list contains dicts composed of the model name, net,
weghts, backend, precision batchsize and dynamic_shape.
weights, backend, precision batch_size and dynamic_shape.
"""
name, _ = get_model_name_customs(deploy_cfg, model_cfg, work_dir, device)
precision = 'FP32'
@ -148,6 +149,26 @@ def get_models(deploy_cfg: Union[str, mmcv.Config],
convert_to = deploy_cfg.backend_config.convert_to
suffix = get_model_suffix(convert_to)
net = replace_suffix(ir_name, suffix)
elif backend == Backend.TVM:
import os.path as osp
from mmdeploy.backend.tvm import get_library_ext
ext = get_library_ext()
net = replace_suffix(ir_name, ext)
# get input and output name
ir_cfg = get_ir_config(deploy_cfg)
backend_cfg = get_backend_config(deploy_cfg)
input_names = ir_cfg['input_names']
output_names = ir_cfg['output_names']
weights = replace_suffix(ir_name, '.txt')
weights_path = osp.join(work_dir, weights)
use_vm = backend_cfg.model_inputs[0].get('use_vm', False)
bytecode_path = replace_suffix(ir_name, '.code')
with open(weights_path, 'w') as f:
f.write(','.join(input_names) + '\n')
f.write(','.join(output_names) + '\n')
if use_vm:
f.write(bytecode_path + '\n')
else:
raise NotImplementedError(f'Not supported backend: {backend.value}.')

View File

@ -1,6 +1,4 @@
# Copyright (c) OpenMMLab. All rights reserved.
import mmdeploy_python as c_api
from mmdeploy.utils import Backend, parse_device_id, parse_device_type
from mmdeploy.utils.timer import TimeCounter
from ..base import BACKEND_WRAPPER, BaseWrapper
@ -11,6 +9,7 @@ class SDKWrapper(BaseWrapper):
def __init__(self, model_file, task_name, device):
super().__init__([])
import mmdeploy_python as c_api
creator = getattr(c_api, task_name)
device_id = parse_device_id(device)
device_type = parse_device_type(device)

View File

@ -0,0 +1,43 @@
# Copyright (c) OpenMMLab. All rights reserved.
import importlib
import sys
from .backend_manager import TVMManager
def is_available() -> bool:
"""Check whether tvm package is installed.
Returns:
bool: True if tvm package is installed.
"""
return importlib.util.find_spec('tvm') is not None
def get_library_ext() -> str:
"""Get the extension of the library.
Returns:
str: The extension name
"""
platform = sys.platform.lower()
if platform == 'win32' or platform == 'cygwin':
return '.dll'
elif platform == 'linux' or platform == 'darwin' or platform == 'freebsd':
return '.so'
if is_available():
from .onnx2tvm import from_onnx
from .quantize import HDF5Dataset
from .tuner import build_tvm_tuner
__all__ = ['from_onnx', 'build_tvm_tuner', 'HDF5Dataset', 'TVMManager']
try:
# import wrapper if pytorch is available
from .wrapper import TVMWrapper
__all__ += ['TVMWrapper']
except Exception:
pass

View File

@ -0,0 +1,37 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Any, Optional, Sequence
from ..base import BACKEND_MANAGERS, BaseBackendManager
@BACKEND_MANAGERS.register('tvm')
class TVMManager(BaseBackendManager):
@classmethod
def build_wrapper(cls,
backend_files: Sequence[str],
device: str = 'cpu',
input_names: Optional[Sequence[str]] = None,
output_names: Optional[Sequence[str]] = None,
deploy_cfg: Optional[Any] = None,
**kwargs):
"""Build the wrapper for the backend model.
Args:
backend_files (Sequence[str]): Backend files.
device (str, optional): The device info. Defaults to 'cpu'.
input_names (Optional[Sequence[str]], optional): input names.
Defaults to None.
output_names (Optional[Sequence[str]], optional): output names.
Defaults to None.
deploy_cfg (Optional[Any], optional): The deploy config. Defaults
to None.
"""
from .wrapper import TVMWrapper
bytecode = None if len(backend_files) <= 1 else backend_files[1]
return TVMWrapper(
backend_files[0],
bytecode=bytecode,
output_names=output_names,
device=device)

View File

@ -0,0 +1,97 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Callable, Dict, Optional, Union
import onnx
from tvm.relay.frontend import from_onnx as relay_from_onnx
from tvm.relay.quantize import QConfig
from tvm.relay.quantize import qconfig as create_qconfig
from tvm.relay.quantize import quantize
from tvm.target import Target
from mmdeploy.utils import get_root_logger
from .tuner import TVMTunerBase, build_tvm_tuner
def from_onnx(onnx_model: Union[str, onnx.ModelProto],
output_file: str,
use_vm: bool = False,
bytecode_file: str = '',
shape: Optional[Dict] = None,
dtype: Union[str, Dict] = 'float32',
tuner: Optional[Union[TVMTunerBase, Dict]] = None,
qconfig: Optional[Union[QConfig, Dict]] = None,
dataset: Optional[Callable] = None):
"""Convert ONNX model to tvm lib.
Args:
onnx_model (Union[str, onnx.ModelProto]): ONNX model or model path
output_file (str): output library path
use_vm (bool, optional): Enable tvm virtual machine runtime.
Defaults to False.
bytecode_file (str, optional): output bytecode path for virtual
machine. Defaults to ''.
shape (Optional[Dict], optional): The input shape directory. Defaults
to None.
dtype (Union[str, Dict], optional): The input data type dictionary.
Defaults to 'float32'.
tuner (Optional[Union[TVMTunerBase, Dict]], optional): The tuner
config. Defaults to None.
Return:
lib: The converted tvm lib
bytecode: The bytecode of virtual machine runtime.
None if use_vm==False.
Examples:
>>> from mmdeploy.backend.tvm import from_onnx
>>> onnx_path = 'model.onnx'
>>> output_file = 'model.so'
>>> shape = {'input':[1,3,224,224]}
>>> dtype = {'input':'float32'}
>>> from_onnx(onnx_path, output_file, shape=shape, dtype=dtype)
"""
logger = get_root_logger()
if shape is not None and isinstance(dtype, Dict):
assert len(shape) == len(dtype)
for name in shape:
assert name in dtype
if isinstance(onnx_model, str):
onnx_model = onnx.load(onnx_model)
assert isinstance(onnx_model, onnx.ModelProto
), f'Expect onnx.ModelProto, but get {type(onnx_model)}.'
logger.info('Convert onnx to IRModule.')
mod, params = relay_from_onnx(onnx_model, shape, dtype=dtype, opset=11)
# quantization
if qconfig is not None:
logger.info('Quantization')
if isinstance(qconfig, Dict):
qconfig = create_qconfig(**qconfig)
with qconfig:
mod = quantize(mod, params, dataset)
if tuner is None:
# use default tuner
tuner = dict(type='DefaultTuner', target=Target('llvm'))
if not issubclass(type(tuner), TVMTunerBase):
tuner['use_vm'] = use_vm
tuner = build_tvm_tuner(tuner)
logger.info(f'Tuning with {type(tuner).__name__} .')
tuner.tune(mod, params)
lib = tuner.build(mod, params)
logger.info(f'Export library to {output_file} .')
bytecode = None
if tuner.use_vm:
bytecode, lib = lib.save()
with open(bytecode_file, mode='wb') as f:
f.write(bytecode)
lib.export_library(output_file)
return lib, bytecode

View File

@ -0,0 +1,73 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Any, Dict, Sequence, Union
import numpy as np
import tvm
from tvm.runtime.ndarray import array
class HDF5Dataset:
"""HDF5 dataset.
Args:
calib_file (str | h5py.File): Input calibration file.
input_shapes (Dict[str, Sequence[int]]): The shape of
each input.
model_type (str): Input model type, defaults to 'end2end'.
device (str): Device type, default to llvm.
"""
def __init__(
self,
calib_file: Union[str, Any],
input_shapes: Dict[str, Sequence[int]],
model_type: str = 'end2end',
device: str = 'llvm',
) -> None:
import h5py
if isinstance(calib_file, str):
calib_file = h5py.File(calib_file, mode='r')
assert 'calib_data' in calib_file
calib_data = calib_file['calib_data']
assert model_type in calib_data
calib_data = calib_data[model_type]
self.calib_file = calib_file
self.calib_data = calib_data
self.device = device
self.input_shapes = input_shapes
first_input_group = calib_data[list(calib_data.keys())[0]]
self.dataset_length = len(first_input_group)
def __call__(self):
"""Create dataset generator.
Yields:
Iterator[Any]: data in the dataset
"""
for idx in range(self.dataset_length):
ret = dict()
for name, opt_shape in self.input_shapes.items():
input_group = self.calib_data[name]
data_np = input_group[str(idx)][...].astype(np.float32)
data_shape = data_np.shape
# tile the input data
reps = [
int(np.ceil(opt_s / data_s))
for opt_s, data_s in zip(opt_shape, data_shape)
]
data_np = np.tile(data_np, reps)
slice_list = tuple(slice(0, end) for end in opt_shape)
data_np = data_np[slice_list]
data_nd = array(data_np, tvm.device(self.device))
ret[name] = data_nd
yield ret

View File

@ -0,0 +1,414 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
from abc import abstractmethod
from typing import Any, Dict, Optional, Union
import tvm
from mmcv.utils import Registry
from tvm import IRModule, auto_scheduler, autotvm, relay
from tvm.target import Target
from mmdeploy.utils import get_root_logger
TVM_TUNER = Registry('tvm_tuner')
AUTOTVM_TUNER = Registry('autotvm_tuner')
AUTOTVM_BUILDER = Registry('autotvm_builder')
AUTOTVM_RUNNER = Registry('autotvm_runner')
AUTO_SCHEDULER_BUILDER = Registry('auto_scheduler_builder')
AUTO_SCHEDULER_RUNNER = Registry('auto_scheduler_runner')
def build_tvm_tuner(cfg: Dict):
"""Build the tvm tuner.
Args:
cfg (Dict): The build config
Returns:
Any: The tvm tuner instance
"""
return TVM_TUNER.build(cfg)
def build_autotvm_tuner(cfg: Dict):
"""Build the autotvm tuner.
Args:
cfg (Dict): The build config
Returns:
Any: The autotvm tuner instance
"""
return AUTOTVM_TUNER.build(cfg)
def build_autotvm_builder(cfg: Dict):
"""Build the autotvm builder.
Args:
cfg (Dict): The build config
Returns:
Any: The autotvm builder instance
"""
return AUTOTVM_BUILDER.build(cfg)
def build_autotvm_runner(cfg: Dict):
"""Build the autotvm runner.
Args:
cfg (Dict): The build config
Returns:
Any: The autotvm runner instance
"""
return AUTOTVM_RUNNER.build(cfg)
def build_auto_scheduler_builder(cfg: Dict):
"""Build the ansor builder.
Args:
cfg (Dict): The build config
Returns:
Any: The ansor builder instance
"""
return AUTO_SCHEDULER_BUILDER.build(cfg)
def build_auto_scheduler_runner(cfg: Dict):
"""Build the ansor tuner.
Args:
cfg (Dict): The build config
Returns:
Any: The ansor tuner instance
"""
return AUTO_SCHEDULER_RUNNER.build(cfg)
AUTOTVM_TUNER.register_module()(autotvm.tuner.XGBTuner)
AUTOTVM_TUNER.register_module()(autotvm.tuner.GATuner)
AUTOTVM_TUNER.register_module()(autotvm.tuner.GridSearchTuner)
AUTOTVM_TUNER.register_module()(autotvm.tuner.RandomTuner)
AUTOTVM_BUILDER.register_module()(autotvm.LocalBuilder)
AUTOTVM_RUNNER.register_module()(autotvm.LocalRunner)
AUTOTVM_RUNNER.register_module()(autotvm.RPCRunner)
AUTO_SCHEDULER_BUILDER.register_module()(auto_scheduler.LocalBuilder)
AUTO_SCHEDULER_RUNNER.register_module()(auto_scheduler.LocalRunner)
AUTO_SCHEDULER_RUNNER.register_module()(auto_scheduler.RPCRunner)
class TVMTunerBase:
"""The base class of TVM tuner.
Args:
target (Union[str, Target]): The target platform to be tuned.
opt_level (int): The optimization level.
use_vm (bool): Enable tvm virtual machine runtime.
"""
def __init__(self,
target: Union[str, Target],
opt_level: int = 3,
use_vm: bool = False) -> None:
if isinstance(target, str):
target = Target(target)
self._target = target
self._opt_level = opt_level
self._use_vm = use_vm
@property
def use_vm(self) -> bool:
"""Get use_vm.
Returns:
bool: use_vm
"""
return self._use_vm
@abstractmethod
def tune(self, mod: IRModule, params: Dict):
"""Tune the graph.
Args:
mod (IRModule): The graph module.
params (Dict): The graph parameters.
"""
raise NotImplementedError('tune method not implemented.')
def build(self, mod: IRModule, params: Dict):
"""Build tuning library.
Args:
mod (IRModule): IRModule to build
params (Dict): Parameter of the mod
Returns:
lib: The runtime factory for the graph executor
"""
with tvm.transform.PassContext(opt_level=self._opt_level):
if self._use_vm:
ret = relay.vm.compile(mod, target=self._target, params=params)
else:
ret = relay.build_module.build(
mod, target=self._target, params=params)
return ret
@TVM_TUNER.register_module
class DefaultTuner(TVMTunerBase):
"""The Default tuner, do nothing when tuning.
Args:
target (Union[str, Target]): The target platform to be tuned.
opt_level (int): The optimization level.
use_vm (bool): Enable tvm virtual machine runtime.
"""
def __init__(self,
target: Union[str, Target],
opt_level: int = 3,
use_vm: bool = False) -> None:
super().__init__(target, opt_level, use_vm)
def tune(self, mod: IRModule, params: Dict):
"""Tune model, Default tuner does nothing."""
pass
@TVM_TUNER.register_module
class AutoTVMTuner(TVMTunerBase):
def __init__(self,
target: Union[str, Target],
log_file: str,
n_trial: int,
tuner: Dict,
opt_level: int = 3,
use_vm: bool = False,
early_stopping: Optional[int] = None,
builder: Union[Dict,
Any] = dict(type='LocalBuilder', timeout=10),
runner: Union[Dict, Any] = dict(
type='LocalRunner',
number=20,
repeat=3,
timeout=4,
min_repeat_ms=150),
use_transfer_learning: bool = True) -> None:
"""The AutoTVM tuner.
Args:
target (Union[str, Target]): The target platform to tune.
log_file (str): the log file path.
n_trial (int): Maximum number of configs to try.
tuner (Dict): The autotvm tuner config.
opt_level (int, optional): The optimization level. Defaults to 3.
use_vm (bool, optional): Enable tvm virtual machine.
Defaults to False.
early_stopping (Optional[int], optional): Early stop the tuning
when not finding better configs in this number of trials.
builder (Union[Dict, Any], optional): The builder config.
runner (Union[Dict, Any], optional): The runner config.
use_transfer_learning (bool, optional): Whether to use transfer
learning. Defaults to True.
"""
super().__init__(target, opt_level, use_vm)
self._log_file = log_file
self._n_trial = n_trial
self._tuner = tuner
self._early_stopping = early_stopping
self._use_transfer_learning = use_transfer_learning
if isinstance(builder, Dict):
builder = build_autotvm_builder(builder)
if isinstance(runner, Dict):
runner = build_autotvm_runner(runner)
self._measure_option = autotvm.measure_option(
builder=builder, runner=runner)
def tune(self, mod: IRModule, params: Dict):
"""Tune the graph.
Args:
mod (IRModule): The graph module.
params (Dict): The graph parameters.
"""
logger = get_root_logger()
target = self._target
logger.info('Create autotvm task.')
tasks = autotvm.task.extract_from_program(
mod['main'], target=target, params=params)
# create tmp log file
if os.path.exists(self._log_file):
os.remove(self._log_file)
tmp_log_file = self._log_file + '.tmp'
if os.path.exists(tmp_log_file):
os.remove(tmp_log_file)
tuner_cfg = self._tuner
for i, task in enumerate(reversed(tasks)):
prefix = '[Task %3d/%3d] ' % (i + 1, len(tasks))
tuner_cfg['task'] = task
tuner_obj = build_autotvm_tuner(tuner_cfg)
if self._use_transfer_learning:
if os.path.isfile(tmp_log_file) and os.path.exists(
tmp_log_file):
tuner_obj.load_history(
autotvm.record.load_from_file(tmp_log_file))
# do tuning
tsk_trial = min(self._n_trial, len(task.config_space))
tuner_obj.tune(
n_trial=tsk_trial,
early_stopping=self._early_stopping,
measure_option=self._measure_option,
callbacks=[
autotvm.callback.progress_bar(tsk_trial, prefix=prefix),
autotvm.callback.log_to_file(tmp_log_file),
],
)
# pick best records to a cache file
autotvm.record.pick_best(tmp_log_file, self._log_file)
if os.path.exists(tmp_log_file):
os.remove(tmp_log_file)
def build(self, mod: IRModule, params: Dict):
"""Build tuning library.
Args:
mod (IRModule): IRModule to build
params (Dict): Parameter of the mod
Returns:
lib: The runtime factory for the graph executor
"""
with autotvm.apply_history_best(self._log_file):
with tvm.transform.PassContext(opt_level=self._opt_level):
if self._use_vm:
ret = relay.vm.compile(
mod, target=self._target, params=params)
else:
ret = relay.build_module.build(
mod, target=self._target, params=params)
return ret
@TVM_TUNER.register_module
class AutoScheduleTuner(TVMTunerBase):
def __init__(
self,
target: Union[str, Target],
log_file: str,
num_measure_trials: int,
opt_level: int = 3,
use_vm: bool = False,
early_stopping: Optional[int] = None,
builder: Union[Dict, Any] = dict(type='LocalBuilder', timeout=15),
runner: Union[Dict, Any] = dict(
type='LocalRunner', repeat=10, enable_cpu_cache_flush=True)
) -> None:
"""The Ansor tuner.
Args:
target (Union[str, Target]): The target platform to tune.
log_file (str): the log file path.
num_measure_trials (int): Maximum number of configs to try.
opt_level (int, optional): The optimization level. Defaults to 3.
use_vm (bool, optional): Enable tvm virtual machine.
Defaults to False.
early_stopping (Optional[int], optional): Early stop the tuning
when not finding better configs in this number of trials.
builder (Union[Dict, Any], optional): The builder config.
runner (Union[Dict, Any], optional): The runner config.
"""
super().__init__(target, opt_level, use_vm)
self._log_file = log_file
self._num_measure_trials = num_measure_trials
self._early_stopping = early_stopping
if isinstance(builder, Dict):
builder = build_auto_scheduler_builder(builder)
if isinstance(runner, Dict):
# CUDA device need a different process for measurement
if runner['type'] == 'LocalRunner':
runner.pop('type')
if Target(target).kind != 'llvm':
if 'enable_cpu_cache_flush' in runner:
runner['enable_cpu_cache_flush'] = False
self._measure_ctx = auto_scheduler.LocalRPCMeasureContext(
**runner)
runner = self._measure_ctx.runner
else:
runner = build_auto_scheduler_runner(runner)
tune_option = auto_scheduler.TuningOptions(
num_measure_trials=num_measure_trials,
runner=runner,
builder=builder,
measure_callbacks=[auto_scheduler.RecordToFile(log_file)],
)
self._tune_option = tune_option
def tune(self, mod: IRModule, params: Dict):
"""Tune the graph.
Args:
mod (IRModule): The graph module.
params (Dict): The graph parameters.
"""
logger = get_root_logger()
target = self._target
if os.path.exists(self._log_file):
os.remove(self._log_file)
logger.info('Create auto scheduler task.')
tasks, task_weights = auto_scheduler.extract_tasks(
mod['main'], params, target)
tuner = auto_scheduler.TaskScheduler(tasks, task_weights)
logger.info('Begin tuning.')
tuner.tune(self._tune_option)
def build(self, mod: IRModule, params: Dict):
"""Build tuning library.
Args:
mod (IRModule): IRModule to build
params (Dict): Parameter of the mod
Returns:
lib: The runtime factory for the graph executor
"""
with auto_scheduler.ApplyHistoryBest(self._log_file):
with tvm.transform.PassContext(
opt_level=self._opt_level,
config={'relay.backend.use_auto_scheduler': True}):
if self._use_vm:
ret = relay.vm.compile(
mod, target=self._target, params=params)
else:
ret = relay.build_module.build(
mod, target=self._target, params=params)
return ret

View File

@ -0,0 +1,119 @@
# Copyright (c) OpenMMLab. All rights reserved.
import re
from typing import Dict, Optional, Sequence, Union
import torch
import tvm
import tvm.contrib.graph_executor as runtime
from tvm.runtime.vm import Executable, VirtualMachine
from mmdeploy.utils import Backend
from mmdeploy.utils.timer import TimeCounter
from ..base import BACKEND_WRAPPER, BaseWrapper
@BACKEND_WRAPPER.register_module(Backend.TVM.value)
class TVMWrapper(BaseWrapper):
"""TVM runtime wrapper.
Args:
lib (str): The path to the generated lib
output_names (Sequence[str]): The output names.
bytecode (Union[bytearray, str]): The bytecode for virtual machine.
device (str): Device used to do the the inference
Examples:
>>> from mmdeploy.backend.tvm import TVMWrapper
>>> lib_file = 'resnet.so'
>>> model = TVMWrapper(lib_file, ['output'])
>>> inputs = dict(input=torch.randn(1, 3, 224, 224))
>>> outputs = model(inputs)
>>> print(outputs)
"""
def __init__(self,
lib: str,
output_names: Sequence[str],
bytecode: Optional[Union[bytearray, str]] = None,
device: str = 'cpu'):
super().__init__(output_names)
self.use_vm = False
if isinstance(lib, str):
lib = tvm.runtime.load_module(lib)
match_result = re.match('([^:]+)(:[0-9]+)?$', device)
assert match_result is not None, f'Can not parse device {device}.'
device_type = match_result.group(1).lower()
device_id = 0 if match_result.lastindex == 1 else int(
match_result.group(2)[1:])
device = tvm.device(device_type, device_id)
if bytecode is not None:
self.use_vm = True
if isinstance(bytecode, str):
with open(bytecode, 'rb') as f:
bytecode = f.read()
if self.use_vm:
exec = Executable.load_exec(bytecode, lib)
module = VirtualMachine(exec, device)
else:
module = runtime.GraphModule(lib['default'](device))
num_output = module.get_num_outputs()
assert isinstance(output_names, Sequence)
assert len(output_names) == num_output
self._lib = lib
self._device = device
self._module = module
def forward(self, inputs: Dict[str,
torch.Tensor]) -> Dict[str, torch.Tensor]:
"""Run forward inference.
Args:
inputs (Dict[str, torch.Tensor]): The input name and tensor pairs.
Return:
Dict[str, torch.Tensor]: The output name and tensor pairs.
"""
module = self._module
device = self._device
mod_inputs = dict()
for name, tensor in inputs.items():
if tensor.device.type == 'cuda':
mod_inputs[name] = tvm.nd.from_dlpack(tensor)
else:
mod_inputs[name] = tvm.nd.array(tensor.cpu().numpy(), device)
if self.use_vm:
module.set_input('main', **mod_inputs)
self.__tvm_execute()
vm_ret = module.get_outputs()
ret = dict()
for idx, name in enumerate(self._output_names):
ndarray = vm_ret[idx]
tensor = torch.from_dlpack(ndarray.to_dlpack())
ret[name] = tensor
return ret
else:
module.set_input(**mod_inputs)
self.__tvm_execute()
ret = dict()
for idx, name in enumerate(self._output_names):
ndarray = module.get_output(idx)
tensor = torch.from_dlpack(ndarray.to_dlpack())
ret[name] = tensor.clone()
return ret
@TimeCounter.count_time(Backend.TVM.value)
def __tvm_execute(self):
module = self._module
module.run()

View File

@ -318,6 +318,9 @@ def single_roi_extractor__forward__openvino(ctx,
return result
@FUNCTION_REWRITER.register_rewriter(
func_name='mmdet.models.roi_heads.SingleRoIExtractor.forward',
backend=Backend.TVM.value)
@FUNCTION_REWRITER.register_rewriter(
func_name='mmdet.models.roi_heads.SingleRoIExtractor.forward',
backend=Backend.COREML.value)
@ -328,6 +331,7 @@ def single_roi_extractor__forward__coreml(ctx,
rois,
roi_scale_factor=None):
"""Rewrite `forward` of SingleRoIExtractor for coreml."""
backend = get_backend(ctx.cfg)
out_size = self.roi_layers[0].output_size
num_levels = len(feats)
roi_feats = feats[0].new_zeros(rois.shape[0], self.out_channels, *out_size)
@ -346,7 +350,8 @@ def single_roi_extractor__forward__coreml(ctx,
# inds = mask.nonzero(as_tuple=False).squeeze(1)
rois_t = rois * mask.unsqueeze(-1)
# use the roi align in torhcvision
self.roi_layers[i].use_torchvision = True
if backend == Backend.COREML:
self.roi_layers[i].use_torchvision = True
roi_feats_t = self.roi_layers[i](feats[i], rois_t)
roi_feats = roi_feats + roi_feats_t * (rois_t[:, -1] > 0).reshape(
-1, 1, 1, 1)

View File

@ -65,6 +65,7 @@ class Backend(AdvancedEnum):
RKNN = 'rknn'
ASCEND = 'ascend'
COREML = 'coreml'
TVM = 'tvm'
DEFAULT = 'default'

View File

@ -42,7 +42,7 @@ def get_backend_version():
Returns:
Dict: The name and the version of some supported backend.
"""
backend_library_list = ['tensorrt', 'onnxruntime', 'ncnn']
backend_library_list = ['tensorrt', 'onnxruntime', 'ncnn', 'tvm']
version_dict = dict()
for backend in backend_library_list:
version_dict[backend] = get_library_version(backend)

View File

@ -51,6 +51,8 @@ def backend_checker(backend: Backend, require_plugin: bool = False):
from mmdeploy.apis.rknn import is_available
elif backend == Backend.ASCEND:
from mmdeploy.apis.ascend import is_available
elif backend == Backend.TVM:
from mmdeploy.apis.tvm import is_available
else:
warnings.warn('The backend checker is not available')
return
@ -110,6 +112,8 @@ def check_backend(backend: Backend, require_plugin: bool = False):
from mmdeploy.backend.rknn import device_available as is_available
elif backend == Backend.ASCEND:
from mmdeploy.backend.ascend import is_available
elif backend == Backend.TVM:
from mmdeploy.backend.tvm import is_available
else:
warnings.warn('The backend checker is not available')
return

View File

@ -0,0 +1,105 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os.path as osp
import tempfile
import pytest
import torch
import torch.nn as nn
from mmdeploy.utils import Backend
from mmdeploy.utils.test import backend_checker
onnx_file = tempfile.NamedTemporaryFile(suffix='.onnx').name
test_img = torch.rand([1, 3, 8, 8])
@pytest.mark.skip(reason='This a not test class but a utility class.')
class TestModel(nn.Module):
def __init__(self):
super().__init__()
self.conv = torch.nn.Conv2d(3, 8, 3, 1, 1)
def forward(self, x):
return self.conv(x)
test_model = TestModel().eval()
def generate_onnx_file(model):
with torch.no_grad():
torch.onnx.export(
model,
test_img,
onnx_file,
output_names=['output'],
input_names=['input'],
keep_initializers_as_inputs=True,
do_constant_folding=True,
verbose=False,
opset_version=11)
assert osp.exists(onnx_file)
@backend_checker(Backend.TVM)
def test_onnx2tvm():
from mmdeploy.apis.tvm import from_onnx, get_library_ext
model = test_model
generate_onnx_file(model)
work_dir, _ = osp.split(onnx_file)
file_name = osp.splitext(onnx_file)[0]
ext = get_library_ext()
lib_path = osp.join(work_dir, file_name + ext)
bytecode_path = osp.join(work_dir, file_name + '.code')
log_file = osp.join(work_dir, file_name + '.log')
shape = {'input': test_img.shape}
dtype = {'input': 'float32'}
target = 'llvm'
# test default tuner
tuner_dict = dict(type='DefaultTuner', target=target)
from_onnx(onnx_file, lib_path, shape=shape, dtype=dtype, tuner=tuner_dict)
assert osp.exists(lib_path)
# test autotvm
lib_path = osp.join(work_dir, file_name + '_autotvm' + ext)
bytecode_path = osp.join(work_dir, file_name + '_autotvm.code')
log_file = osp.join(work_dir, file_name + '_autotvm.log')
tuner_dict = dict(
type='AutoTVMTuner',
target=target,
log_file=log_file,
n_trial=1,
tuner=dict(type='XGBTuner'))
from_onnx(
onnx_file,
lib_path,
use_vm=True,
bytecode_file=bytecode_path,
shape=shape,
dtype=dtype,
tuner=tuner_dict)
assert osp.exists(lib_path)
assert osp.exists(bytecode_path)
# test ansor
lib_path = osp.join(work_dir, file_name + '_ansor' + ext)
bytecode_path = osp.join(work_dir, file_name + '_ansor.code')
log_file = osp.join(work_dir, file_name + '_ansor.log')
tuner_dict = dict(
type='AutoScheduleTuner',
target=target,
log_file=log_file,
num_measure_trials=2)
from_onnx(
onnx_file,
lib_path,
use_vm=True,
bytecode_file=bytecode_path,
shape=shape,
dtype=dtype,
tuner=tuner_dict)
assert osp.exists(lib_path)
assert osp.exists(bytecode_path)

View File

@ -131,6 +131,18 @@ def onnx2backend(backend, onnx_file):
dict(input_shapes=dict(input=test_img.shape)))
from_onnx(onnx_file, work_dir, model_inputs)
return backend_file
elif backend == Backend.TVM:
from mmdeploy.backend.tvm import from_onnx, get_library_ext
ext = get_library_ext()
lib_file = tempfile.NamedTemporaryFile(suffix=ext).name
shape = {'input': test_img.shape}
dtype = {'input': 'float32'}
target = 'llvm'
tuner_dict = dict(type='DefaultTuner', target=target)
from_onnx(
onnx_file, lib_file, shape=shape, dtype=dtype, tuner=tuner_dict)
assert osp.exists(lib_file)
return lib_file
def create_wrapper(backend, model_files):
@ -172,6 +184,10 @@ def create_wrapper(backend, model_files):
from mmdeploy.backend.ascend import AscendWrapper
ascend_model = AscendWrapper(model_files)
return ascend_model
elif backend == Backend.TVM:
from mmdeploy.backend.tvm import TVMWrapper
tvm_model = TVMWrapper(model_files, output_names=output_names)
return tvm_model
else:
raise NotImplementedError(f'Unknown backend type: {backend.value}')
@ -207,13 +223,17 @@ def run_wrapper(backend, wrapper, input):
elif backend == Backend.ASCEND:
results = wrapper({'input': input})['output']
return results
elif backend == Backend.TVM:
results = wrapper({'input': input})['output']
return results
else:
raise NotImplementedError(f'Unknown backend type: {backend.value}')
ALL_BACKEND = [
Backend.TENSORRT, Backend.ONNXRUNTIME, Backend.PPLNN, Backend.NCNN,
Backend.OPENVINO, Backend.TORCHSCRIPT, Backend.ASCEND, Backend.RKNN
Backend.OPENVINO, Backend.TORCHSCRIPT, Backend.ASCEND, Backend.RKNN,
Backend.TVM
]

233
third_party/dlpack/dlpack.h vendored 100644
View File

@ -0,0 +1,233 @@
// copy from:
// https://github.com/dmlc/dlpack/blob/v0.7/include/dlpack/dlpack.h
/*!
* Copyright (c) 2017 by Contributors
* \file dlpack.h
* \brief The common header of DLPack.
*/
#ifndef DLPACK_DLPACK_H_
#define DLPACK_DLPACK_H_
/**
* \brief Compatibility with C++
*/
#ifdef __cplusplus
#define DLPACK_EXTERN_C extern "C"
#else
#define DLPACK_EXTERN_C
#endif
/*! \brief The current version of dlpack */
#define DLPACK_VERSION 70
/*! \brief The current ABI version of dlpack */
#define DLPACK_ABI_VERSION 1
/*! \brief DLPACK_DLL prefix for windows */
#ifdef _WIN32
#ifdef DLPACK_EXPORTS
#define DLPACK_DLL __declspec(dllexport)
#else
#define DLPACK_DLL __declspec(dllimport)
#endif
#else
#define DLPACK_DLL
#endif
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
extern "C" {
#endif
/*!
* \brief The device type in DLDevice.
*/
#ifdef __cplusplus
typedef enum : int32_t {
#else
typedef enum {
#endif
/*! \brief CPU device */
kDLCPU = 1,
/*! \brief CUDA GPU device */
kDLCUDA = 2,
/*!
* \brief Pinned CUDA CPU memory by cudaMallocHost
*/
kDLCUDAHost = 3,
/*! \brief OpenCL devices. */
kDLOpenCL = 4,
/*! \brief Vulkan buffer for next generation graphics. */
kDLVulkan = 7,
/*! \brief Metal for Apple GPU. */
kDLMetal = 8,
/*! \brief Verilog simulator buffer */
kDLVPI = 9,
/*! \brief ROCm GPUs for AMD GPUs */
kDLROCM = 10,
/*!
* \brief Pinned ROCm CPU memory allocated by hipMallocHost
*/
kDLROCMHost = 11,
/*!
* \brief Reserved extension device type,
* used for quickly test extension device
* The semantics can differ depending on the implementation.
*/
kDLExtDev = 12,
/*!
* \brief CUDA managed/unified memory allocated by cudaMallocManaged
*/
kDLCUDAManaged = 13,
/*!
* \brief Unified shared memory allocated on a oneAPI non-partititioned
* device. Call to oneAPI runtime is required to determine the device
* type, the USM allocation type and the sycl context it is bound to.
*
*/
kDLOneAPI = 14,
/*! \brief GPU support for next generation WebGPU standard. */
kDLWebGPU = 15,
/*! \brief Qualcomm Hexagon DSP */
kDLHexagon = 16,
} DLDeviceType;
/*!
* \brief A Device for Tensor and operator.
*/
typedef struct {
/*! \brief The device type used in the device. */
DLDeviceType device_type;
/*!
* \brief The device index.
* For vanilla CPU memory, pinned memory, or managed memory, this is set to 0.
*/
int32_t device_id;
} DLDevice;
/*!
* \brief The type code options DLDataType.
*/
typedef enum {
/*! \brief signed integer */
kDLInt = 0U,
/*! \brief unsigned integer */
kDLUInt = 1U,
/*! \brief IEEE floating point */
kDLFloat = 2U,
/*!
* \brief Opaque handle type, reserved for testing purposes.
* Frameworks need to agree on the handle data type for the exchange to be well-defined.
*/
kDLOpaqueHandle = 3U,
/*! \brief bfloat16 */
kDLBfloat = 4U,
/*!
* \brief complex number
* (C/C++/Python layout: compact struct per complex number)
*/
kDLComplex = 5U,
} DLDataTypeCode;
/*!
* \brief The data type the tensor can hold. The data type is assumed to follow the
* native endian-ness. An explicit error message should be raised when attempting to
* export an array with non-native endianness
*
* Examples
* - float: type_code = 2, bits = 32, lanes=1
* - float4(vectorized 4 float): type_code = 2, bits = 32, lanes=4
* - int8: type_code = 0, bits = 8, lanes=1
* - std::complex<float>: type_code = 5, bits = 64, lanes = 1
*/
typedef struct {
/*!
* \brief Type code of base types.
* We keep it uint8_t instead of DLDataTypeCode for minimal memory
* footprint, but the value should be one of DLDataTypeCode enum values.
* */
uint8_t code;
/*!
* \brief Number of bits, common choices are 8, 16, 32.
*/
uint8_t bits;
/*! \brief Number of lanes in the type, used for vector types. */
uint16_t lanes;
} DLDataType;
/*!
* \brief Plain C Tensor object, does not manage memory.
*/
typedef struct {
/*!
* \brief The data pointer points to the allocated data. This will be CUDA
* device pointer or cl_mem handle in OpenCL. It may be opaque on some device
* types. This pointer is always aligned to 256 bytes as in CUDA. The
* `byte_offset` field should be used to point to the beginning of the data.
*
* Note that as of Nov 2021, multiply libraries (CuPy, PyTorch, TensorFlow,
* TVM, perhaps others) do not adhere to this 256 byte aligment requirement
* on CPU/CUDA/ROCm, and always use `byte_offset=0`. This must be fixed
* (after which this note will be updated); at the moment it is recommended
* to not rely on the data pointer being correctly aligned.
*
* For given DLTensor, the size of memory required to store the contents of
* data is calculated as follows:
*
* \code{.c}
* static inline size_t GetDataSize(const DLTensor* t) {
* size_t size = 1;
* for (tvm_index_t i = 0; i < t->ndim; ++i) {
* size *= t->shape[i];
* }
* size *= (t->dtype.bits * t->dtype.lanes + 7) / 8;
* return size;
* }
* \endcode
*/
void* data;
/*! \brief The device of the tensor */
DLDevice device;
/*! \brief Number of dimensions */
int32_t ndim;
/*! \brief The data type of the pointer*/
DLDataType dtype;
/*! \brief The shape of the tensor */
int64_t* shape;
/*!
* \brief strides of the tensor (in number of elements, not bytes)
* can be NULL, indicating tensor is compact and row-majored.
*/
int64_t* strides;
/*! \brief The offset in bytes to the beginning pointer to data */
uint64_t byte_offset;
} DLTensor;
/*!
* \brief C Tensor object, manage memory of DLTensor. This data structure is
* intended to facilitate the borrowing of DLTensor by another framework. It is
* not meant to transfer the tensor. When the borrowing framework doesn't need
* the tensor, it should call the deleter to notify the host that the resource
* is no longer needed.
*/
typedef struct DLManagedTensor {
/*! \brief DLTensor which is being memory managed */
DLTensor dl_tensor;
/*! \brief the context of the original host framework of DLManagedTensor in
* which DLManagedTensor is used in the framework. It can also be NULL.
*/
void* manager_ctx;
/*! \brief Destructor signature void (*)(void*) - this should be called
* to destruct manager_ctx which holds the DLManagedTensor. It can be NULL
* if there is no way for the caller to provide a reasonable destructor.
* The destructors deletes the argument self as well.
*/
void (*deleter)(struct DLManagedTensor* self);
} DLManagedTensor;
#ifdef __cplusplus
} // DLPACK_EXTERN_C
#endif
#endif // DLPACK_DLPACK_H_

View File

@ -21,6 +21,7 @@ def check_backend():
ort_version = backend_versions['onnxruntime']
trt_version = backend_versions['tensorrt']
ncnn_version = backend_versions['ncnn']
tvm_version = backend_versions['tvm']
import mmdeploy.apis.onnxruntime as ort_apis
logger = get_root_logger()
@ -35,6 +36,8 @@ def check_backend():
logger.info(f'ncnn: {ncnn_version}\tops_is_avaliable : '
f'{ncnn_apis.is_custom_ops_available()}')
logger.info(f'tvm: {tvm_version}')
import mmdeploy.apis.pplnn as pplnn_apis
logger.info(f'pplnn_is_avaliable: {pplnn_apis.is_available()}')

View File

@ -410,6 +410,51 @@ def main():
deploy_cfg, coreml_files)
backend_files = coreml_files
elif backend == Backend.TVM:
import copy
from mmdeploy.apis.tvm import from_onnx, get_library_ext
PIPELINE_MANAGER.set_log_level(log_level, [from_onnx])
model_inputs = get_model_inputs(deploy_cfg)
if args.device.startswith('cuda'):
target = 'cuda'
else:
target = 'llvm'
lib_ext = get_library_ext()
tvm_files = []
for model_id, onnx_path in enumerate(ir_files):
model_input = copy.deepcopy(model_inputs[model_id])
use_vm = model_input.get('use_vm', False)
if 'target' not in model_input['tuner']:
model_input['tuner']['target'] = target
lib_path = osp.splitext(onnx_path)[0] + lib_ext
code_path = osp.splitext(
onnx_path)[0] + '.code' if use_vm else None
model_input['output_file'] = lib_path
model_input['onnx_model'] = onnx_path
model_input['bytecode_file'] = code_path
# create calibration dataset
if 'qconfig' in model_input:
calib_path = osp.join(args.work_dir, calib_filename)
from mmdeploy.backend.tvm import HDF5Dataset
partition_type = 'end2end' if partition_cfgs is None \
else onnx_name
dataset = HDF5Dataset(
calib_path,
model_input['shape'],
model_type=partition_type,
device=target)
model_input['dataset'] = dataset()
from_onnx(**model_input)
tvm_files += [lib_path, code_path]
backend_files = tvm_files
if args.test_img is None:
args.test_img = args.img

View File

@ -0,0 +1,163 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp
import sys
import time
from ubuntu_utils import cmd_result, ensure_base_env, get_job
def install_llvm(dep_dir):
print('-' * 10 + 'install llvm' + '-' * 10)
os.chdir(dep_dir)
os.system(
'wget --no-check-certificate -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -' # noqa: E501
)
ubuntu = cmd_result(
""" lsb_release -a 2>/dev/null | grep "Release" | tail -n 1 | awk '{print $NF}' """ # noqa: E501
)
nickname_dict = {
'18.04': 'bionic',
'20.04': 'focal',
'22.04': 'jammy',
'22.10': 'kinetic'
}
nickname = nickname_dict.get(ubuntu, None)
if nickname is None:
raise NotImplementedError(f'Unsupported ubuntu version {ubuntu}.')
os.system(
f"add-apt-repository 'deb http://apt.llvm.org/{nickname}/ llvm-toolchain-{nickname}-10 main'" # noqa: E501
)
os.system('sudo apt update')
os.system(
'sudo apt-get install llvm-10 lldb-10 llvm-10-dev libllvm10 llvm-10-runtime' # noqa: E501
)
def install_tvm(dep_dir):
print('-' * 10 + 'build and install tvm' + '-' * 10)
time.sleep(2)
os.system('sudo apt-get update')
os.system(
'sudo apt-get install -y python3 python3-dev python3-setuptools gcc libtinfo-dev zlib1g-dev build-essential cmake libedit-dev libxml2-dev' # noqa: E501
)
# generate unzip and build dir
os.chdir(dep_dir)
# git clone
if not osp.exists('tvm'):
os.system(
'git clone --branch v0.10.0 --depth 1 --recursive https://github.com/apache/tvm tvm' # noqa: E501
)
tvm_dir = osp.join(dep_dir, 'tvm')
os.chdir(tvm_dir)
# build
if not osp.exists('build'):
os.system('mkdir build')
os.system('cp cmake/config.cmake build')
os.chdir(osp.join(tvm_dir, 'build'))
os.system(
""" sed -i "s@set(USE_LLVM OFF)@set(USE_LLVM /usr/bin/llvm-config-10)@g" config.cmake """ # noqa: E501
)
os.system('cmake .. && make -j {} && make runtime'.format(g_jobs))
# set env
os.system(
""" echo 'export LD_LIBRARY_PATH={}:$LD_LIBRARY_PATH' >> ~/mmdeploy.env """ # noqa: E501
.format(os.path.join(tvm_dir, 'build')))
# install python package
os.chdir(osp.join(tvm_dir, 'python'))
os.system(""" python3 setup.py install --user """)
# install dependency
os.system(
""" python3 -m pip install xgboost decorator psutil scipy attrs tornado """ # noqa: E501
)
return tvm_dir
def install_mmdeploy(work_dir, tvm_dir):
print('-' * 10 + 'build and install mmdeploy' + '-' * 10)
time.sleep(3)
os.chdir(work_dir)
os.system('git submodule init')
os.system('git submodule update')
if not os.path.exists('build'):
os.system('mkdir build')
os.system('rm -rf build/CMakeCache.txt')
cmd = 'cd build && cmake ..'
cmd += ' -DMMDEPLOY_BUILD_SDK=ON '
cmd += ' -DMMDEPLOY_BUILD_EXAMPLES=ON '
cmd += ' -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON '
cmd += ' -DMMDEPLOY_TARGET_DEVICES=cpu '
cmd += ' -DMMDEPLOY_TARGET_BACKENDS=tvm '
cmd += ' -DTVM_DIR={} '.format(tvm_dir)
os.system(cmd)
os.system('cd build && make -j {} && make install'.format(g_jobs))
os.system('python3 -m pip install -v -e .')
os.system(""" echo 'export PATH={}:$PATH' >> ~/mmdeploy.env """.format(
os.path.join(work_dir, 'mmdeploy', 'backend', 'tvm')))
try:
import mmcv
print(mmcv.__version__)
os.system('python3 tools/check_env.py')
except Exception:
print('Please install torch & mmcv later...')
return 0
def main():
"""Auto install mmdeploy with tvm. To verify this script:
1) use `sudo docker run -v /path/to/mmdeploy:/root/mmdeploy -v /path/to/Miniconda3-latest-Linux-x86_64.sh:/root/miniconda.sh -it ubuntu:18.04 /bin/bash` # noqa: E501
2) install conda and setup python environment
3) run `python3 tools/scripts/build_ubuntu_x64_tvm.py`
Returns:
_type_: _description_
"""
global g_jobs
g_jobs = get_job(sys.argv)
print('g_jobs {}'.format(g_jobs))
work_dir = osp.abspath(osp.join(__file__, '..', '..', '..'))
dep_dir = osp.abspath(osp.join(work_dir, '..', 'mmdeploy-dep'))
if not osp.exists(dep_dir):
if osp.isfile(dep_dir):
print('{} already exists and it is a file, exit.'.format(work_dir))
return -1
os.mkdir(dep_dir)
success = ensure_base_env(work_dir, dep_dir)
if success != 0:
return -1
install_llvm(dep_dir)
tvm_dir = install_tvm(dep_dir)
if install_mmdeploy(work_dir, tvm_dir) != 0:
return -1
if osp.exists('~/mmdeploy.env'):
print('Please source ~/mmdeploy.env to setup your env !')
os.system('cat ~/mmdeploy.env')
if __name__ == '__main__':
main()