Merge master:ea7706cb into sync_master

pull/1091/head
RunningLeon 2022-09-22 19:49:50 +08:00
commit 40adb329a3
263 changed files with 8748 additions and 807 deletions

View File

@ -1,48 +0,0 @@
---
name: Bug report
about: Create a report to help us improve
title: ''
labels: ''
assignees: ''
---
Thanks for your bug report. We appreciate it a lot.
**Checklist**
1. I have searched related issues but cannot get the expected help.
2. I have read the [FAQ documentation](https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/faq.md) but cannot get the expected help.
3. The bug has not been fixed in the latest version.
**Describe the bug**
A clear and concise description of what the bug is.
**Reproduction**
1. What command or script did you run?
```none
A placeholder for the command.
```
2. Did you make any modifications on the code or config? Did you understand what you have modified?
**Environment**
1. Please run `python tools/check_env.py` to collect necessary environment information and paste it here.
2. You may add addition that may be helpful for locating the problem, such as
- How you installed PyTorch \[e.g., pip, conda, source\]
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
**Error traceback**
If applicable, paste the error trackback here.
```none
A placeholder for trackback.
```
**Bug fix**
If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!

View File

@ -0,0 +1,56 @@
name: Bug report
description: Create a report to help us improve
body:
- type: checkboxes
attributes:
label: Checklist
options:
- label: I have searched related issues but cannot get the expected help.
- label: 2. I have read the [FAQ documentation](https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/faq.md) but cannot get the expected help.
- label: 3. The bug has not been fixed in the latest version.
- type: textarea
attributes:
label: Describe the bug
description: A clear and concise description of what the bug is.
validations:
required: true
- type: textarea
attributes:
label: Reproduction
description: |
1. What command or script did you run?
2. Did you make any modifications on the code or config? Did you understand what you have modified?
placeholder: |
A placeholder for the command.
validations:
required: true
- type: textarea
attributes:
label: Environment
description: |
1. Please run `python tools/check_env.py` to collect necessary environment information and paste it here.
2. You may add addition that may be helpful for locating the problem, such as
- How you installed PyTorch \[e.g., pip, conda, source\]
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
placeholder: Environment here.
render: Shell
validations:
required: true
- type: textarea
attributes:
label: Error traceback
description: |
If applicable, paste the error trackback here.
placeholder: Logs and traceback here.
render: Shell
- type: markdown
attributes:
value: >
If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
Thanks for your bug report. We appreciate it a lot.
labels: ['Bug']

View File

@ -1,23 +0,0 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
**Describe the feature**
**Motivation**
A clear and concise description of the motivation of the feature.
Ex1. It is inconvenient when \[....\].
**Related resources**
If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
**Additional context**
Add any other context or screenshots about the feature request here.
If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.

View File

@ -0,0 +1,27 @@
name: Feature request
description: Suggest an idea for this project
body:
- type: markdown
attributes:
value: >
## Describe the feature
- type: textarea
attributes:
label: Motivation
description: |
A clear and concise description of the motivation of the feature.
Ex1. It is inconvenient when \[....\].
validations:
required: true
- type: textarea
attributes:
label: Related resources
description: |
If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
- type: textarea
attributes:
label: Additional context
description: |
Add any other context or screenshots about the feature request here.
If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.

View File

@ -24,21 +24,29 @@ pattern = re.compile(r'\[.*?\]\(.*?\)')
def analyze_doc(home, path):
print('analyze {}'.format(path))
problem_list = []
code_block = False
code_block = 0
with open(path) as f:
lines = f.readlines()
for line in lines:
line = line.strip()
if line.startswith('```'):
code_block = not code_block
continue
code_block = 1 - code_block
if code_block is True:
if code_block > 0:
continue
if '[' in line and ']' in line and '(' in line and ')' in line:
all = pattern.findall(line)
for item in all:
# skip ![]()
if item.find('[') == item.find(']') - 1:
continue
# process the case [text()]()
offset = item.find('](')
if offset == -1:
continue
item = item[offset:]
start = item.find('(')
end = item.find(')')
ref = item[start + 1:end]
@ -62,7 +70,7 @@ def analyze_doc(home, path):
def traverse(target):
if os.path.isfile(target):
analyze_doc('./', target)
analyze_doc(os.path.dirname(target), target)
return
for home, dirs, files in os.walk(target):
for filename in files:

View File

@ -0,0 +1,70 @@
#!/bin/sh
set -e
# print env
python3 tools/check_env.py
deploy_cfg=configs/mmcls/classification_onnxruntime_dynamic.py
device=cpu
model_cfg=../mmclassification/configs/resnet/resnet18_8xb32_in1k.py
checkpoint=https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth
sdk_cfg=configs/mmcls/classification_sdk_dynamic.py
input_img=../mmclassification/demo/demo.JPEG
work_dir=work_dir
echo "------------------------------------------------------------------------------------------------------------"
echo "deploy_cfg=$deploy_cfg"
echo "model_cfg=$model_cfg"
echo "checkpoint=$checkpoint"
echo "device=$device"
echo "------------------------------------------------------------------------------------------------------------"
mkdir -p $work_dir
python3 tools/deploy.py \
$deploy_cfg \
$model_cfg \
$checkpoint \
$input_img \
--device $device \
--work-dir $work_dir \
--dump-info
# prepare dataset
wget -P data/ https://github.com/open-mmlab/mmdeploy/files/9401216/imagenet-val100.zip
unzip data/imagenet-val100.zip -d data/
echo "Running test with ort"
python3 tools/test.py \
$deploy_cfg \
$model_cfg \
--model $work_dir/end2end.onnx \
--device $device \
--out $work_dir/ort_out.pkl \
--metrics accuracy \
--device $device \
--log2file $work_dir/test_ort.log \
--speed-test \
--log-interval 50 \
--warmup 20 \
--batch-size 32
echo "Running test with sdk"
# change topk for test
sed -i 's/"topk": 5/"topk": 1000/g' work_dir/pipeline.json
python3 tools/test.py \
$sdk_cfg \
$model_cfg \
--model $work_dir \
--device $device \
--out $work_dir/sdk_out.pkl \
--metrics accuracy \
--device $device \
--log2file $work_dir/test_sdk.log \
--speed-test \
--log-interval 50 \
--warmup 20 \
--batch-size 1

View File

@ -0,0 +1,54 @@
name: backend-ascend
on:
push:
paths-ignore:
- "demo/**"
- "tools/**"
pull_request:
paths-ignore:
- "demo/**"
- "tools/**"
- "docs/**"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build_sdk_demo:
runs-on: ubuntu-18.04
strategy:
matrix:
python-version: [3.7]
steps:
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'recursive'
- name: update
run: sudo apt update
- name: Install dependencies
run: |
sudo apt update
sudo apt install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev libc++1-9 libc++abi1-9
sudo add-apt-repository ppa:ignaciovizzo/opencv3-nonfree
sudo apt install libopencv-dev
pkg-config --libs opencv
- name: Install Ascend Toolkit
run: |
mkdir -p $GITHUB_WORKSPACE/Ascend
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%205.1.RC2/Ascend-cann-toolkit_5.1.RC2_linux-x86_64.run
sh Ascend-cann-toolkit_5.1.RC2_linux-x86_64.run --install --install-path=$GITHUB_WORKSPACE/Ascend --quiet --chip=Ascend310 --blacklist=devtools
- name: Build SDK Demo with Ascend backend
run: |
mkdir -p build && pushd build
source $GITHUB_WORKSPACE/Ascend/ascend-toolkit/set_env.sh
export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/Ascend/ascend-toolkit/latest/runtime/lib64/stub:$LD_LIBRARY_PATH
cmake .. -DCMAKE_CXX_COMPILER=g++-7 -DMMDEPLOY_SHARED_LIBS=ON -DMMDEPLOY_BUILD_SDK=ON -DMMDEPLOY_BUILD_SDK_PYTHON_API=OFF -DMMDEPLOY_TARGET_DEVICES=cpu -DMMDEPLOY_BUILD_EXAMPLES=ON -DMMDEPLOY_TARGET_BACKENDS=acl -DMMDEPLOY_CODEBASES=all
make install -j4

View File

@ -0,0 +1,71 @@
name: backend-coreml
on:
push:
paths:
- "csrc/**"
- "demo/csrc/**"
- "CMakeLists.txt"
pull_request:
paths:
- "csrc/**"
- "demo/csrc/**"
- "CMakeLists.txt"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
DEVELOPER_DIR: /Applications/Xcode_13.4.1.app/Contents/Developer
permissions:
contents: read
jobs:
build_macos_arm64:
runs-on: macos-12
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'recursive'
- name: install opencv
run: |
wget https://github.com/irexyc/mmdeploy-ci-resource/releases/download/opencv/opencv-osx-arm64-4.6.0.tar.gz
mkdir $GITHUB_WORKSPACE/opencv-install
tar xf opencv-osx-arm64-4.6.0.tar.gz -C $GITHUB_WORKSPACE/opencv-install
- name: install libtorch
run: |
wget https://github.com/irexyc/mmdeploy-ci-resource/releases/download/libtorch/libtorch-osx-arm64-1.8.0.tar.gz
mkdir $GITHUB_WORKSPACE/libtorch-install
tar xf libtorch-osx-arm64-1.8.0.tar.gz -C $GITHUB_WORKSPACE/libtorch-install
- name: build
run: |
mkdir build && cd build
cmake .. -DCMAKE_OSX_ARCHITECTURES="arm64" \
-DCMAKE_SYSTEM_PROCESSOR="arm64" \
-DMMDEPLOY_BUILD_SDK=ON \
-DMMDEPLOY_TARGET_DEVICES="cpu" \
-DMMDEPLOY_CODEBASES=all \
-DOpenCV_DIR=$GITHUB_WORKSPACE/opencv-install/lib/cmake/opencv4 \
-DTorch_DIR=$GITHUB_WORKSPACE/libtorch-install/share/cmake/Torch \
-DMMDEPLOY_TARGET_BACKENDS="coreml" \
-DMMDEPLOY_BUILD_EXAMPLES=ON \
-DMMDEPLOY_SHARED_LIBS=OFF
cmake --build . -j 3
cmake --build . --target install
- name: build-shared
run: |
mkdir build-shared && cd build-shared
cmake .. -DCMAKE_OSX_ARCHITECTURES="arm64" \
-DCMAKE_SYSTEM_PROCESSOR="arm64" \
-DMMDEPLOY_BUILD_SDK=ON \
-DMMDEPLOY_TARGET_DEVICES="cpu" \
-DMMDEPLOY_CODEBASES=all \
-DOpenCV_DIR=$GITHUB_WORKSPACE/opencv-install/lib/cmake/opencv4 \
-DTorch_DIR=$GITHUB_WORKSPACE/libtorch-install/share/cmake/Torch \
-DMMDEPLOY_TARGET_BACKENDS="coreml" \
-DMMDEPLOY_BUILD_EXAMPLES=ON \
-DMMDEPLOY_SHARED_LIBS=ON
cmake --build . -j 3
cmake --build . --target install

View File

@ -22,11 +22,6 @@ jobs:
strategy:
matrix:
python-version: [3.7]
torch: [1.9.0]
include:
- torch: 1.9.0
torch_version: torch1.9
torchvision: 0.10.0
steps:
- name: Checkout repository
uses: actions/checkout@v3
@ -65,3 +60,23 @@ jobs:
echo $(pwd)
ln -s build/bin/mmdeploy_onnx2ncnn ./
python3 .github/scripts/test_onnx2ncnn.py --run 1
script_install:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: [3.7]
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'recursive'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install mmdeploy
run: |
python3 tools/scripts/build_ubuntu_x64_ncnn.py
python3 -m pip install torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cpu
python3 -m pip install mmcv-full==1.5.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
python3 -c 'import mmdeploy.apis.ncnn as ncnn_api; assert ncnn_api.is_available() and ncnn_api.is_custom_ops_available()'

View File

@ -0,0 +1,48 @@
name: backend-ort
on:
push:
paths-ignore:
- "demo/**"
- "tools/**"
pull_request:
paths-ignore:
- "demo/**"
- "tools/**"
- "docs/**"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
script_install:
runs-on: ubuntu-20.04
strategy:
matrix:
python-version: [3.7]
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'recursive'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install mmdeploy
run: |
python3 tools/scripts/build_ubuntu_x64_ort.py
python3 -m pip install torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cpu
python3 -m pip install mmcv-full==1.5.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
python3 -c 'import mmdeploy.apis.onnxruntime as ort_api; assert ort_api.is_available() and ort_api.is_custom_ops_available()'
- name: test mmcls full pipeline
run: |
pip install openmim
mim install mmcls
git clone --depth 1 --single-branch --branch master https://github.com/open-mmlab/mmclassification.git ../mmclassification
export MMDEPLOY_DIR=$(pwd)
export ONNXRUNTIME_DIR=$MMDEPLOY_DIR/../mmdeploy-dep/onnxruntime-linux-x64-1.8.1
export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$MMDEPLOY_DIR/build/install/lib:$LD_LIBRARY_PATH
bash .github/scripts/test_mmcls_full_pipeline.sh

View File

@ -0,0 +1,39 @@
name: backend-pplnn
on:
push:
paths-ignore:
- "demo/**"
- "tools/**"
pull_request:
paths-ignore:
- "demo/**"
- "tools/**"
- "docs/**"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
script_install:
runs-on: ubuntu-18.04
strategy:
matrix:
python-version: [3.7]
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'recursive'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install mmdeploy
run: |
python3 tools/scripts/build_ubuntu_x64_pplnn.py
python3 -m pip install torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cpu
python3 -m pip install mmcv-full==1.5.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
python3 -c 'import mmdeploy.apis.pplnn as pplnn_api; assert pplnn_api.is_available()'

View File

@ -0,0 +1,36 @@
name: backend-ort
on:
push:
paths-ignore:
- "demo/**"
- "tools/**"
pull_request:
paths-ignore:
- "demo/**"
- "tools/**"
- "docs/**"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
script_install:
runs-on: ubuntu-18.04
strategy:
matrix:
python-version: [3.7]
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'recursive'
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Install mmdeploy
run: |
python3 tools/scripts/build_ubuntu_x64_torchscript.py

View File

@ -121,7 +121,7 @@ jobs:
run: |
python -V
python -m pip install mmcv-full==${{matrix.mmcv}} -f https://download.openmmlab.com/mmcv/dist/cu102/${{matrix.torch_version}}/index.html
python -m pip install -r requirements.txt
CFLAGS=`python -c 'import sysconfig;print("-I"+sysconfig.get_paths()["include"])'` python -m pip install -r requirements.txt
pip install -U pycuda
python -m pip install -U numpy
- name: Build and install
@ -167,7 +167,7 @@ jobs:
run: |
python -V
python -m pip install mmcv-full==${{matrix.mmcv}} -f https://download.openmmlab.com/mmcv/dist/cu111/${{matrix.torch_version}}/index.html
python -m pip install -r requirements.txt
CFLAGS=`python -c 'import sysconfig;print("-I"+sysconfig.get_paths()["include"])'` python -m pip install -r requirements.txt
pip install -U pycuda
python -m pip install -U numpy
- name: Build and install

View File

@ -0,0 +1,56 @@
name: build_riscv64_gcc
on:
push:
paths:
- "csrc/**"
- "demo/csrc/**"
- "CMakeLists.txt"
pull_request:
paths-ignore:
- "csrc/**"
- "demo/csrc/**"
- "CMakeLists.txt"
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
build_riscv64_gcc:
runs-on: ubuntu-20.04
steps:
- name: Checkout repository
uses: actions/checkout@v3
with:
submodules: 'recursive'
- name: riscv64-gnu-toolchain
run: |
sudo apt-get update
sudo apt-get install g++-riscv64-linux-gnu
- name: install opencv
run: |
mkdir $GITHUB_WORKSPACE/opencv-install
wget https://github.com/irexyc/mmdeploy-ci-resource/raw/opencv/opencv_4.6.0_linux_riscv64.tar.gz
tar xf opencv_4.6.0_linux_riscv64.tar.gz -C $GITHUB_WORKSPACE/opencv-install
- name: install ncnn
run: |
mkdir $GITHUB_WORKSPACE/ncnn-install
wget https://github.com/irexyc/mmdeploy-ci-resource/raw/ncnn/ncnn_20220729_linux_riscv64.tar.gz
tar xf ncnn_20220729_linux_riscv64.tar.gz -C $GITHUB_WORKSPACE/ncnn-install
- name: build
run: |
mkdir build && cd build
cmake .. \
-DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/riscv64-linux-gnu.cmake \
-DMMDEPLOY_BUILD_SDK=ON \
-DMMDEPLOY_SHARED_LIBS=ON \
-DMMDEPLOY_BUILD_EXAMPLES=ON \
-DMMDEPLOY_TARGET_DEVICES="cpu" \
-DMMDEPLOY_TARGET_BACKENDS="ncnn" \
-Dncnn_DIR=$GITHUB_WORKSPACE/ncnn-install/lib/cmake/ncnn/ \
-DMMDEPLOY_CODEBASES=all \
-DOpenCV_DIR=$GITHUB_WORKSPACE/opencv-install/lib/cmake/opencv4
make -j$(nproc)
make install

View File

@ -50,7 +50,7 @@ jobs:
run: |
python -V
python -m pip install mmcv-full==${{matrix.mmcv}} -f https://download.openmmlab.com/mmcv/dist/cu111/${{matrix.torch_version}}/index.html
python -m pip install -r requirements.txt
CFLAGS=`python -c 'import sysconfig;print("-I"+sysconfig.get_paths()["include"])'` python -m pip install -r requirements.txt
python -m pip install -U numpy
- name: Install mmcls

8
.gitignore vendored
View File

@ -155,6 +155,14 @@ mmdeploy/backend/ncnn/onnx2ncnn
# OCR dicts
dicts
# ascend
fusion_result.json
# snpe
grpc-cpp-plugin
service/snpe/grpc_cpp_plugin
# elena-code
csrc/mmdeploy/preprocess/elena/json
csrc/mmdeploy/preprocess/elena/cpu_kernel/*
csrc/mmdeploy/preprocess/elena/cuda_kernel/*

View File

@ -5,7 +5,7 @@ endif ()
message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
cmake_minimum_required(VERSION 3.14)
project(MMDeploy VERSION 0.7.0)
project(MMDeploy VERSION 0.8.0)
set(CMAKE_CXX_STANDARD 17)
@ -34,6 +34,7 @@ option(MMDEPLOY_BUILD_EXAMPLES "build examples" OFF)
option(MMDEPLOY_SPDLOG_EXTERNAL "use external spdlog" OFF)
option(MMDEPLOY_ZIP_MODEL "support SDK model in zip format" OFF)
option(MMDEPLOY_COVERAGE "build SDK for coverage" OFF)
option(MMDEPLOY_ELENA_FUSION "use elena to fuse preprocess" OFF)
set(MMDEPLOY_TARGET_DEVICES "cpu" CACHE STRING "target devices to support")
set(MMDEPLOY_TARGET_BACKENDS "" CACHE STRING "target inference engines to support")
@ -77,6 +78,10 @@ if (MSVC)
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/wd4251>)
endif ()
if(APPLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fobjc-arc")
endif()
add_library(MMDeployStaticModules INTERFACE)
add_library(MMDeployDynamicModules INTERFACE)
add_library(MMDeployLibs INTERFACE)

View File

@ -53,9 +53,9 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为
### 支持多种推理后端
| ONNX Runtime | TensorRT | ppl.nn | ncnn | OpenVINO | LibTorch | snpe | Ascend | Core ML | RKNN | more |
| ------------ | -------- | ------ | ---- | -------- | -------- | ---- | ------ | ------- | ---- | ------------------------------------------------- |
| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | [benchmark](docs/zh_cn/03-benchmark/benchmark.md) |
| ONNX Runtime | TensorRT | ppl.nn | ncnn | OpenVINO | LibTorch | snpe | Ascend | Core ML | RKNN | more |
| ------------ | -------- | ------ | ---- | -------- | -------- | ---- | ------ | ------- | ---- | ---------------------------------------------- |
| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | [benchmark](docs/en/03-benchmark/benchmark.md) |
### SDK 可高度定制化

View File

@ -16,19 +16,19 @@ find_package(CUDA REQUIRED)
if (MSVC)
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc.exe)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=/wd4819,/wd4828")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=/wd4819,/wd4828")
if (HAVE_CXX_FLAG_UTF_8)
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=/utf-8")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=/utf-8")
endif ()
else ()
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc)
# Explicitly set the cuda host compiler. Because the default host compiler #
# selected by cmake maybe wrong.
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
set(CUDA_NVCC_FLAGS
"${CUDA_NVCC_FLAGS} -Xcompiler=-fPIC,-Wall,-fvisibility=hidden")
set(CMAKE_CUDA_FLAGS
"${CMAKE_CUDA_FLAGS} -Xcompiler=-fPIC,-Wall,-fvisibility=hidden")
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=-fno-gnu-unique")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=-fno-gnu-unique")
endif ()
endif ()
@ -62,10 +62,12 @@ if (NOT CMAKE_CUDA_ARCHITECTURES)
endif ()
endif ()
set(CUDA_NVCC_FLAGS_DEBUG "-g -O0")
set(CUDA_NVCC_FLAGS_RELEASE "-O3")
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
set(CMAKE_CUDA_FLAGS_DEBUG "-g -O0")
set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMMDEPLOY_USE_CUDA=1")
if (NOT MSVC)
set(CMAKE_CUDA_STANDARD 14)
endif ()
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_NVCC_FLAGS} ${_NVCC_FLAGS}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${_NVCC_FLAGS}")

View File

@ -0,0 +1,17 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR riscv)
set(CMAKE_C_COMPILER "riscv64-linux-gnu-gcc")
set(CMAKE_CXX_COMPILER "riscv64-linux-gnu-g++")
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
set(CMAKE_C_FLAGS "-march=rv64gc")
set(CMAKE_CXX_FLAGS "-march=rv64gc")
# cache flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")

View File

@ -0,0 +1,26 @@
set(CMAKE_SYSTEM_NAME Linux)
set(CMAKE_SYSTEM_PROCESSOR riscv)
if(DEFINED ENV{RISCV_ROOT_PATH})
file(TO_CMAKE_PATH $ENV{RISCV_ROOT_PATH} RISCV_ROOT_PATH)
else()
message(FATAL_ERROR "RISCV_ROOT_PATH env must be defined")
endif()
set(CMAKE_C_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-gcc)
set(CMAKE_CXX_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-g++)
set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot" CACHE PATH "riscv sysroot")
set(CMAKE_FIND_ROOT_PATH ${RISCV_ROOT_PATH}/riscv64-unknown-linux-gnu)
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
set(CMAKE_C_FLAGS "-march=rv64gc")
set(CMAKE_CXX_FLAGS "-march=rv64gc")
# cache flags
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")

View File

@ -0,0 +1 @@
backend_config = dict(type='ascend')

View File

@ -0,0 +1 @@
backend_config = dict(type='coreml', convert_to='mlprogram')

View File

@ -0,0 +1,8 @@
backend_config = dict(
type='rknn',
common_config=dict(
mean_values=None,
std_values=None,
target_platform='rk3588',
optimization_level=3),
quantization_config=dict(do_quantization=False, dataset=None))

View File

@ -0,0 +1,9 @@
_base_ = ['./classification_dynamic.py', '../_base_/backends/ascend.py']
onnx_config = dict(input_shape=[224, 224])
backend_config = dict(model_inputs=[
dict(
dynamic_batch_size=[1, 2, 4, 8],
input_shapes=dict(input=[-1, 3, 224, 224]))
])

View File

@ -0,0 +1,5 @@
_base_ = ['./classification_static.py', '../_base_/backends/ascend.py']
onnx_config = dict(input_shape=[224, 224])
backend_config = dict(
model_inputs=[dict(input_shapes=dict(input=[1, 3, 224, 224]))])

View File

@ -0,0 +1,12 @@
_base_ = ['../_base_/torchscript_config.py', '../_base_/backends/coreml.py']
codebase_config = dict(type='mmcls', task='Classification')
backend_config = dict(model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 224, 224],
max_shape=[8, 3, 224, 224],
default_shape=[1, 3, 224, 224])))
])

View File

@ -0,0 +1,5 @@
_base_ = ['./classification_static.py', '../_base_/backends/rknn.py']
onnx_config = dict(input_shape=[224, 224])
codebase_config = dict(model_type='rknn')
backend_config = dict(input_size_list=[[3, 224, 224]])

View File

@ -9,5 +9,5 @@ backend_config = dict(
input=dict(
min_shape=[1, 3, 224, 224],
opt_shape=[4, 3, 224, 224],
max_shape=[64, 3, 224, 224])))
max_shape=[8, 3, 224, 224])))
])

View File

@ -9,5 +9,5 @@ backend_config = dict(
input=dict(
min_shape=[1, 3, 224, 224],
opt_shape=[4, 3, 224, 224],
max_shape=[64, 3, 224, 224])))
max_shape=[8, 3, 224, 224])))
])

View File

@ -9,5 +9,5 @@ backend_config = dict(
input=dict(
min_shape=[1, 3, 224, 224],
opt_shape=[4, 3, 224, 224],
max_shape=[64, 3, 224, 224])))
max_shape=[8, 3, 224, 224])))
])

View File

@ -0,0 +1,11 @@
_base_ = ['./base_torchscript.py', '../../_base_/backends/coreml.py']
ir_config = dict(input_shape=(1344, 800))
backend_config = dict(model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 800, 1344],
max_shape=[1, 3, 800, 1344],
default_shape=[1, 3, 800, 1344])))
])

View File

@ -0,0 +1,8 @@
_base_ = ['../_base_/base_dynamic.py', '../../_base_/backends/ascend.py']
onnx_config = dict(input_shape=[1344, 800])
backend_config = dict(model_inputs=[
dict(
dynamic_image_size=[(800, 1344), (1344, 800)],
input_shapes=dict(input=[1, 3, -1, -1]))
])

View File

@ -0,0 +1,5 @@
_base_ = ['../_base_/base_static.py', '../../_base_/backends/ascend.py']
onnx_config = dict(input_shape=[640, 640])
backend_config = dict(
model_inputs=[dict(input_shapes=dict(input=[1, 3, 640, 640]))])

View File

@ -0,0 +1,5 @@
_base_ = ['../_base_/base_static.py', '../../_base_/backends/ascend.py']
onnx_config = dict(input_shape=[1344, 800])
backend_config = dict(
model_inputs=[dict(input_shapes=dict(input=[1, 3, 800, 1344]))])

View File

@ -0,0 +1 @@
_base_ = ['../_base_/base_coreml_static-800x1344.py']

View File

@ -0,0 +1,17 @@
_base_ = ['../_base_/base_static.py', '../../_base_/backends/rknn.py']
onnx_config = dict(input_shape=[640, 640])
codebase_config = dict(model_type='rknn')
backend_config = dict(input_size_list=[[3, 640, 640]])
partition_config = dict(
type='rknn', # the partition policy name
apply_marks=True, # should always be set to True
partition_cfg=[
dict(
save_file='model.onnx', # name to save the partitioned onnx model
start=['detector_forward:input'], # [mark_name:input/output, ...]
end=['yolo_head:input']) # [mark_name:input/output, ...]
])

View File

@ -0,0 +1,9 @@
_base_ = ['./voxel-detection_dynamic.py', '../../_base_/backends/openvino.py']
onnx_config = dict(input_shape=None)
backend_config = dict(model_inputs=[
dict(
opt_shapes=dict(
voxels=[20000, 64, 4], num_points=[20000], coors=[20000, 4]))
])

View File

@ -1,6 +1,6 @@
_base_ = ['./voxel-detection_dynamic.py', '../../_base_/backends/tensorrt.py']
backend_config = dict(
common_config=dict(max_workspace_size=1 << 30),
common_config=dict(max_workspace_size=1 << 32),
model_inputs=[
dict(
input_shapes=dict(

View File

@ -0,0 +1,18 @@
_base_ = ['./voxel-detection_dynamic.py', '../../_base_/backends/tensorrt.py']
backend_config = dict(
common_config=dict(max_workspace_size=1 << 32),
model_inputs=[
dict(
input_shapes=dict(
voxels=dict(
min_shape=[5000, 64, 4],
opt_shape=[20000, 64, 4],
max_shape=[30000, 64, 4]),
num_points=dict(
min_shape=[5000], opt_shape=[20000], max_shape=[30000]),
coors=dict(
min_shape=[5000, 4],
opt_shape=[20000, 4],
max_shape=[30000, 4]),
))
])

View File

@ -0,0 +1,8 @@
_base_ = ['./text-detection_dynamic.py', '../../_base_/backends/ascend.py']
onnx_config = dict(input_shape=None)
backend_config = dict(model_inputs=[
dict(
input_shapes=dict(input=[-1, 3, -1, -1]),
dynamic_dims=[(1, 640, 640), (4, 640, 640), (1, 1280, 1280)])
])

View File

@ -0,0 +1,5 @@
_base_ = ['./text-detection_static.py', '../../_base_/backends/ascend.py']
onnx_config = dict(input_shape=[640, 640])
backend_config = dict(
model_inputs=[dict(input_shapes=dict(input=[1, 3, 640, 640]))])

View File

@ -0,0 +1,3 @@
_base_ = ['./pose-detection_static.py', '../_base_/backends/ncnn-int8.py']
onnx_config = dict(input_shape=[256, 256])

View File

@ -0,0 +1,5 @@
_base_ = ['./segmentation_static.py', '../_base_/backends/ascend.py']
onnx_config = dict(input_shape=[2048, 1024])
backend_config = dict(
model_inputs=[dict(input_shapes=dict(input=[1, 3, 1024, 2048]))])

View File

@ -0,0 +1,5 @@
_base_ = ['./segmentation_static.py', '../_base_/backends/ascend.py']
onnx_config = dict(input_shape=[1024, 512])
backend_config = dict(
model_inputs=[dict(input_shapes=dict(input=[1, 3, 512, 1024]))])

View File

@ -0,0 +1,14 @@
_base_ = [
'../_base_/torchscript_config.py', '../_base_/backends/coreml.py',
'./segmentation_static.py'
]
ir_config = dict(input_shape=[1024, 512])
backend_config = dict(model_inputs=[
dict(
input_shapes=dict(
input=dict(
min_shape=[1, 3, 512, 1024],
max_shape=[1, 3, 512, 1024],
default_shape=[1, 3, 512, 1024])))
])

View File

@ -0,0 +1,7 @@
_base_ = ['./segmentation_static.py', '../_base_/backends/rknn.py']
onnx_config = dict(input_shape=[512, 512])
codebase_config = dict(model_type='rknn')
backend_config = dict(input_size_list=[[3, 512, 512]])

View File

@ -0,0 +1,3 @@
_base_ = ['./text-detection_static.py', '../../_base_/backends/ncnn-int8.py']
onnx_config = dict(input_shape=None)

View File

@ -32,7 +32,8 @@ if ("ncnn" IN_LIST MMDEPLOY_TARGET_BACKENDS)
endif ()
# build TorchScript ops
if ("torchscript" IN_LIST MMDEPLOY_TARGET_BACKENDS)
message(STATUS "Build torchsciprt custom ops")
if ("torchscript" IN_LIST MMDEPLOY_TARGET_BACKENDS
OR "coreml" IN_LIST MMDEPLOY_TARGET_BACKENDS)
message(STATUS "Build torchscript custom ops")
add_subdirectory(torchscript)
endif ()

View File

@ -10,7 +10,7 @@ else ()
endif ()
if (NOT ANDROID AND NOT IOS)
if (NOT ANDROID AND NOT IOS AND NOT CMAKE_CROSSCOMPILING)
add_subdirectory(ops)
add_subdirectory(onnx2ncnn)
add_subdirectory(pyncnn_ext)

View File

@ -0,0 +1,150 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "gather_topk.hpp"
#include <assert.h>
#include <stdio.h>
#include <chrono>
#include "NvInferVersion.h"
#include "gather_topk_kernel.hpp"
#include "trt_serialize.hpp"
namespace mmdeploy {
namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"GatherTopk"};
} // namespace
GatherTopk::GatherTopk(const std::string &name) : TRTPluginBase(name) {}
GatherTopk::GatherTopk(const std::string name, const void *data, size_t length)
: TRTPluginBase(name) {}
nvinfer1::IPluginV2DynamicExt *GatherTopk::clone() const TRT_NOEXCEPT {
GatherTopk *plugin = new GatherTopk(mLayerName);
plugin->setPluginNamespace(getPluginNamespace());
return plugin;
}
nvinfer1::DimsExprs GatherTopk::getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
nvinfer1::IExprBuilder &exprBuilder) TRT_NOEXCEPT {
assert(inputs[0].nbDims >= inputs[1].nbDims);
nvinfer1::DimsExprs ret;
ret.nbDims = inputs[0].nbDims;
for (int i = 0; i < inputs[1].nbDims; ++i) {
ret.d[i] = inputs[1].d[i];
}
for (int i = inputs[1].nbDims; i < inputs[0].nbDims; ++i) {
ret.d[i] = inputs[0].d[i];
}
return ret;
}
bool GatherTopk::supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *ioDesc,
int nbInputs, int nbOutputs) TRT_NOEXCEPT {
switch (pos) {
case 0:
// data
return (ioDesc[pos].type == nvinfer1::DataType::kFLOAT &&
ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR) ||
(ioDesc[pos].type == nvinfer1::DataType::kINT32 &&
ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR);
case 1:
// indices
return ioDesc[pos].type == nvinfer1::DataType::kINT32 &&
ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR;
case 2:
// output
return ioDesc[pos].type == ioDesc[0].type && ioDesc[pos].format == ioDesc[0].format;
default:
return true;
}
return true;
}
void GatherTopk::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
const nvinfer1::DynamicPluginTensorDesc *outputs,
int nbOutputs) TRT_NOEXCEPT {}
size_t GatherTopk::getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
const nvinfer1::PluginTensorDesc *outputs,
int nbOutputs) const TRT_NOEXCEPT {
return 0;
}
int GatherTopk::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
void *const *outputs, void *workSpace, cudaStream_t stream) TRT_NOEXCEPT {
const int *dims = &(inputDesc[0].dims.d[0]);
const int *indices_dims = &(inputDesc[1].dims.d[0]);
int nbDims = inputDesc[0].dims.nbDims;
int indice_nbDims = inputDesc[1].dims.nbDims;
const void *data = inputs[0];
const void *indices = inputs[1];
void *output = outputs[0];
auto data_type = inputDesc[0].type;
switch (data_type) {
case nvinfer1::DataType::kFLOAT:
gather_topk_impl<float>((float *)data, (int *)indices, dims, nbDims, indices_dims,
indice_nbDims, (float *)output, stream);
break;
case nvinfer1::DataType::kINT32:
gather_topk_impl<int>((int *)data, (int *)indices, dims, nbDims, indices_dims, indice_nbDims,
(int *)output, stream);
break;
default:
break;
}
return 0;
}
nvinfer1::DataType GatherTopk::getOutputDataType(int index, const nvinfer1::DataType *inputTypes,
int nbInputs) const TRT_NOEXCEPT {
return inputTypes[0];
}
// IPluginV2 Methods
const char *GatherTopk::getPluginType() const TRT_NOEXCEPT { return PLUGIN_NAME; }
const char *GatherTopk::getPluginVersion() const TRT_NOEXCEPT { return PLUGIN_VERSION; }
int GatherTopk::getNbOutputs() const TRT_NOEXCEPT { return 1; }
size_t GatherTopk::getSerializationSize() const TRT_NOEXCEPT { return 0; }
void GatherTopk::serialize(void *buffer) const TRT_NOEXCEPT {}
GatherTopkCreator::GatherTopkCreator() {
mPluginAttributes.clear();
mFC.nbFields = mPluginAttributes.size();
mFC.fields = mPluginAttributes.data();
}
const char *GatherTopkCreator::getPluginName() const TRT_NOEXCEPT { return PLUGIN_NAME; }
const char *GatherTopkCreator::getPluginVersion() const TRT_NOEXCEPT { return PLUGIN_VERSION; }
nvinfer1::IPluginV2 *GatherTopkCreator::createPlugin(
const char *name, const nvinfer1::PluginFieldCollection *fc) TRT_NOEXCEPT {
auto *plugin = new GatherTopk(name);
plugin->setPluginNamespace(getPluginNamespace());
return plugin;
}
nvinfer1::IPluginV2 *GatherTopkCreator::deserializePlugin(const char *name, const void *serialData,
size_t serialLength) TRT_NOEXCEPT {
auto plugin = new GatherTopk(name, serialData, serialLength);
plugin->setPluginNamespace(getPluginNamespace());
return plugin;
}
REGISTER_TENSORRT_PLUGIN(GatherTopkCreator);
} // namespace mmdeploy

View File

@ -0,0 +1,64 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef TRT_SCATTERND_HPP
#define TRT_SCATTERND_HPP
#include <cublas_v2.h>
#include <memory>
#include <string>
#include <vector>
#include "trt_plugin_base.hpp"
namespace mmdeploy {
class GatherTopk : public TRTPluginBase {
public:
GatherTopk(const std::string &name);
GatherTopk(const std::string name, const void *data, size_t length);
GatherTopk() = delete;
// IPluginV2DynamicExt Methods
nvinfer1::IPluginV2DynamicExt *clone() const TRT_NOEXCEPT override;
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs,
int nbInputs, nvinfer1::IExprBuilder &exprBuilder)
TRT_NOEXCEPT override;
bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *ioDesc, int nbInputs,
int nbOutputs) TRT_NOEXCEPT override;
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs,
const nvinfer1::DynamicPluginTensorDesc *out,
int nbOutputs) TRT_NOEXCEPT override;
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
const nvinfer1::PluginTensorDesc *outputs,
int nbOutputs) const TRT_NOEXCEPT override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT override;
// IPluginV2Ext Methods
nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes,
int nbInputs) const TRT_NOEXCEPT override;
// IPluginV2 Methods
const char *getPluginType() const TRT_NOEXCEPT override;
const char *getPluginVersion() const TRT_NOEXCEPT override;
int getNbOutputs() const TRT_NOEXCEPT override;
size_t getSerializationSize() const TRT_NOEXCEPT override;
void serialize(void *buffer) const TRT_NOEXCEPT override;
};
class GatherTopkCreator : public TRTPluginCreatorBase {
public:
GatherTopkCreator();
const char *getPluginName() const TRT_NOEXCEPT override;
const char *getPluginVersion() const TRT_NOEXCEPT override;
nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
TRT_NOEXCEPT override;
nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData,
size_t serialLength) TRT_NOEXCEPT override;
};
} // namespace mmdeploy
#endif // TRT_SCATTERND_HPP

View File

@ -0,0 +1,46 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include <functional>
#include <numeric>
#include <vector>
#include "common_cuda_helper.hpp"
#include "gather_topk_kernel.hpp"
#include "trt_plugin_helper.hpp"
template <typename scalar_t>
__global__ void gather_topk_kernel(const scalar_t* input, const int* indices, scalar_t* output,
int batch, int num_input, int num_indices, int channel) {
CUDA_1D_KERNEL_LOOP(index, batch * num_indices * channel) {
const int b_id = index / (num_indices * channel);
const int n_id = (index / channel) % num_indices;
const int c_id = index % channel;
const int input_n_id = indices[b_id * num_indices + n_id];
const scalar_t value = input[b_id * num_input * channel + input_n_id * channel + c_id];
output[b_id * num_indices * channel + n_id * channel + c_id] = value;
}
}
template <typename scalar_t>
void gather_topk_impl(const scalar_t* input, const int* indices, const int* dims, int nbDims,
const int* indices_dims, int indice_nbDims, scalar_t* output,
cudaStream_t stream) {
int batch = 1;
for (int i = 0; i < indice_nbDims - 1; ++i) batch *= dims[i];
int num_input = dims[indice_nbDims - 1];
int num_indices = indices_dims[indice_nbDims - 1];
int channel = 1;
for (int i = indice_nbDims; i < nbDims; ++i) channel *= dims[i];
const int col_block = DIVUP(batch * num_indices * channel, THREADS_PER_BLOCK);
gather_topk_kernel<<<col_block, THREADS_PER_BLOCK, 0, stream>>>(input, indices, output, batch,
num_input, num_indices, channel);
}
template void gather_topk_impl<float>(const float* input, const int* indices, const int* dims,
int nbDims, const int* indices_dims, int indice_nbDims,
float* output, cudaStream_t stream);
template void gather_topk_impl<int32_t>(const int32_t* input, const int* indices, const int* dims,
int nbDims, const int* indices_dims, int indice_nbDims,
int32_t* output, cudaStream_t stream);

View File

@ -0,0 +1,10 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef TRT_GRID_SAMPLER_KERNEL_HPP
#define TRT_GRID_SAMPLER_KERNEL_HPP
#include <cuda_runtime.h>
template <typename scalar_t>
void gather_topk_impl(const scalar_t* input, const int* indices, const int* dims, int nbDims,
const int* indices_dims, int indice_nbDims, scalar_t* output,
cudaStream_t stream);
#endif // TRT_GRID_SAMPLER_KERNEL_HPP

View File

@ -0,0 +1,183 @@
// Copyright (c) OpenMMLab. All rights reserved
#include "scaled_dot_product_attention.hpp"
#include <assert.h>
#include <chrono>
#include "scaled_dot_product_attention_kernel.hpp"
#include "trt_serialize.hpp"
using namespace nvinfer1;
namespace mmdeploy {
namespace {
static const char *PLUGIN_VERSION{"1"};
static const char *PLUGIN_NAME{"ScaledDotProductAttentionTRT"};
} // namespace
ScaledDotProductAttentionTRT::ScaledDotProductAttentionTRT(const std::string &name)
: TRTPluginBase(name), mask_dim(0) {}
ScaledDotProductAttentionTRT::ScaledDotProductAttentionTRT(const std::string name, const void *data,
size_t length)
: TRTPluginBase(name), mask_dim(0) {}
ScaledDotProductAttentionTRT::~ScaledDotProductAttentionTRT() {}
nvinfer1::IPluginV2DynamicExt *ScaledDotProductAttentionTRT::clone() const TRT_NOEXCEPT {
ScaledDotProductAttentionTRT *plugin = new ScaledDotProductAttentionTRT(mLayerName);
plugin->setPluginNamespace(getPluginNamespace());
return plugin;
}
nvinfer1::DimsExprs ScaledDotProductAttentionTRT::getOutputDimensions(
int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
nvinfer1::IExprBuilder &exprBuilder) TRT_NOEXCEPT {
if (outputIndex == 0) return inputs[0];
nvinfer1::DimsExprs ret;
ret.nbDims = 3;
ret.d[0] = inputs[0].d[0];
ret.d[1] = inputs[0].d[1];
ret.d[2] = inputs[1].d[1];
return ret;
}
bool ScaledDotProductAttentionTRT::supportsFormatCombination(
int pos, const nvinfer1::PluginTensorDesc *ioDesc, int nbInputs, int nbOutputs) TRT_NOEXCEPT {
if (pos == 0) {
return (ioDesc[pos].type == nvinfer1::DataType::kFLOAT &&
ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR);
} else {
return ioDesc[pos].type == ioDesc[0].type && ioDesc[pos].format == ioDesc[0].format;
}
}
// Attach the plugin object to an execution context and grant the plugin the
// access to some context resource.
void ScaledDotProductAttentionTRT::attachToContext(cudnnContext *cudnnContext,
cublasContext *cublasContext,
IGpuAllocator *gpuAllocator) TRT_NOEXCEPT {
_cublas_handle = cublasContext;
_cudnn_handle = cudnnContext;
cudnnCreateTensorDescriptor(&_x_desc);
cudnnCreateTensorDescriptor(&_y_desc);
cudnnCreateTensorDescriptor(&_mask_desc);
}
// Detach the plugin object from its execution context.
void ScaledDotProductAttentionTRT::detachFromContext() TRT_NOEXCEPT {
cudnnDestroyTensorDescriptor(_y_desc);
cudnnDestroyTensorDescriptor(_x_desc);
cudnnDestroyTensorDescriptor(_mask_desc);
}
void ScaledDotProductAttentionTRT::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
int nbInputs,
const nvinfer1::DynamicPluginTensorDesc *out,
int nbOutputs) TRT_NOEXCEPT {
if (nbInputs != 4) {
mask_dim = 0;
} else {
mask_dim = in[3].desc.dims.nbDims;
}
}
int ScaledDotProductAttentionTRT::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs,
void *workSpace, cudaStream_t stream) TRT_NOEXCEPT {
if (CUDNN_STATUS_SUCCESS != cudnnSetStream(_cudnn_handle, stream)) return 1;
if (CUBLAS_STATUS_SUCCESS != cublasSetStream(_cublas_handle, stream)) return 1;
int B = inputDesc[0].dims.d[0]; // batch * heads
int Nt = inputDesc[0].dims.d[1];
int Ns = inputDesc[1].dims.d[1];
int E = inputDesc[0].dims.d[2]; // embeding size
const void *query = inputs[0];
const void *key = inputs[1];
const void *value = inputs[2];
const void *mask = nullptr;
int mask_dims[3];
mask_dims[0] = 0;
if (mask_dim > 0) {
mask = inputs[3];
// check if mask need broadcast
if (mask_dim == 2) {
mask_dims[0] = 1;
mask_dims[1] = inputDesc[3].dims.d[0];
mask_dims[2] = inputDesc[3].dims.d[1];
} else {
mask_dims[0] = inputDesc[3].dims.d[0];
mask_dims[1] = inputDesc[3].dims.d[1];
mask_dims[2] = inputDesc[3].dims.d[2];
}
}
void *output = outputs[0];
void *attn = outputs[1];
auto data_type = inputDesc[0].type;
cudnnDataType_t cudnn_dtype{};
convert_trt2cudnn_dtype(data_type, &cudnn_dtype);
switch (data_type) {
case nvinfer1::DataType::kFLOAT:
dot_product_attention_impl<float>((float *)query, (float *)key, (float *)value, (float *)mask,
(float *)attn, (float *)output, B, Nt, Ns, E, &mask_dims[0],
_x_desc, _y_desc, _mask_desc, cudnn_dtype, stream,
_cublas_handle, _cudnn_handle);
break;
default:
return 1;
}
return 0;
}
nvinfer1::DataType ScaledDotProductAttentionTRT::getOutputDataType(
int index, const nvinfer1::DataType *inputTypes, int nbInputs) const TRT_NOEXCEPT {
return inputTypes[0];
}
// IPluginV2 Methods
const char *ScaledDotProductAttentionTRT::getPluginType() const TRT_NOEXCEPT { return PLUGIN_NAME; }
const char *ScaledDotProductAttentionTRT::getPluginVersion() const TRT_NOEXCEPT {
return PLUGIN_VERSION;
}
int ScaledDotProductAttentionTRT::getNbOutputs() const TRT_NOEXCEPT { return 2; }
size_t ScaledDotProductAttentionTRT::getSerializationSize() const TRT_NOEXCEPT { return 0; }
void ScaledDotProductAttentionTRT::serialize(void *buffer) const TRT_NOEXCEPT {}
////////////////////// creator /////////////////////////////
ScaledDotProductAttentionTRTCreator::ScaledDotProductAttentionTRTCreator() {}
const char *ScaledDotProductAttentionTRTCreator::getPluginName() const TRT_NOEXCEPT {
return PLUGIN_NAME;
}
const char *ScaledDotProductAttentionTRTCreator::getPluginVersion() const TRT_NOEXCEPT {
return PLUGIN_VERSION;
}
nvinfer1::IPluginV2 *ScaledDotProductAttentionTRTCreator::createPlugin(
const char *name, const nvinfer1::PluginFieldCollection *fc) TRT_NOEXCEPT {
ScaledDotProductAttentionTRT *plugin = new ScaledDotProductAttentionTRT(name);
plugin->setPluginNamespace(getPluginNamespace());
return plugin;
}
nvinfer1::IPluginV2 *ScaledDotProductAttentionTRTCreator::deserializePlugin(
const char *name, const void *serialData, size_t serialLength) TRT_NOEXCEPT {
auto plugin = new ScaledDotProductAttentionTRT(name, serialData, serialLength);
plugin->setPluginNamespace(getPluginNamespace());
return plugin;
}
REGISTER_TENSORRT_PLUGIN(ScaledDotProductAttentionTRTCreator);
} // namespace mmdeploy

View File

@ -0,0 +1,73 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef TRT_SCALED_DOT_PRODUCT_ATTENTION_HPP
#define TRT_SCALED_DOT_PRODUCT_ATTENTION_HPP
#include <cublas_v2.h>
#include <memory>
#include <string>
#include <vector>
#include "trt_plugin_base.hpp"
namespace mmdeploy {
class ScaledDotProductAttentionTRT : public TRTPluginBase {
public:
ScaledDotProductAttentionTRT(const std::string &name);
ScaledDotProductAttentionTRT(const std::string name, const void *data, size_t length);
ScaledDotProductAttentionTRT() = delete;
~ScaledDotProductAttentionTRT() TRT_NOEXCEPT override;
virtual void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs,
const nvinfer1::DynamicPluginTensorDesc *out,
int nbOutputs) TRT_NOEXCEPT override;
// IPluginV2DynamicExt Methods
nvinfer1::IPluginV2DynamicExt *clone() const TRT_NOEXCEPT override;
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs,
int nbInputs, nvinfer1::IExprBuilder &exprBuilder)
TRT_NOEXCEPT override;
bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *ioDesc, int nbInputs,
int nbOutputs) TRT_NOEXCEPT override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT override;
// IPluginV2Ext Methods
nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes,
int nbInputs) const TRT_NOEXCEPT override;
// IPluginV2 Methods
const char *getPluginType() const TRT_NOEXCEPT override;
const char *getPluginVersion() const TRT_NOEXCEPT override;
int getNbOutputs() const TRT_NOEXCEPT override;
size_t getSerializationSize() const TRT_NOEXCEPT override;
void serialize(void *buffer) const TRT_NOEXCEPT override;
void attachToContext(cudnnContext *cudnn, cublasContext *cublas,
nvinfer1::IGpuAllocator *allocator) TRT_NOEXCEPT override;
void detachFromContext() TRT_NOEXCEPT override;
private:
int mask_dim;
cublasHandle_t _cublas_handle{};
cudnnHandle_t _cudnn_handle{};
cudnnTensorDescriptor_t _x_desc{}, _y_desc{}, _mask_desc{};
};
class ScaledDotProductAttentionTRTCreator : public TRTPluginCreatorBase {
public:
ScaledDotProductAttentionTRTCreator();
const char *getPluginName() const TRT_NOEXCEPT override;
const char *getPluginVersion() const TRT_NOEXCEPT override;
nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
TRT_NOEXCEPT override;
nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData,
size_t serialLength) TRT_NOEXCEPT override;
};
} // namespace mmdeploy
#endif // TRT_SCALED_DOT_PRODUCT_ATTENTION_HPP

View File

@ -0,0 +1,103 @@
// Copyright (c) OpenMMLab. All rights reserved
#include <thrust/functional.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/iterator/transform_iterator.h>
#include <thrust/transform.h>
#include <cmath>
#include <vector>
#include "common_cuda_helper.hpp"
#include "scaled_dot_product_attention_kernel.hpp"
#include "trt_plugin_helper.hpp"
template <typename scalar_t>
cublasStatus_t cublasgemmStridedBatchedWrap(cublasHandle_t handle, cublasOperation_t transa,
cublasOperation_t transb, int m, int n, int k,
const scalar_t* alpha, const scalar_t* A, int lda,
long long int strideA, const scalar_t* B, int ldb,
long long int strideB, const scalar_t* beta,
scalar_t* C, int ldc, long long int strideC,
int batchCount);
template <>
cublasStatus_t cublasgemmStridedBatchedWrap<float>(cublasHandle_t handle, cublasOperation_t transa,
cublasOperation_t transb, int m, int n, int k,
const float* alpha, const float* A, int lda,
long long int strideA, const float* B, int ldb,
long long int strideB, const float* beta,
float* C, int ldc, long long int strideC,
int batchCount) {
return cublasSgemmStridedBatched(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb,
strideB, beta, C, ldc, strideC, batchCount);
}
template <>
cublasStatus_t cublasgemmStridedBatchedWrap<__half>(cublasHandle_t handle, cublasOperation_t transa,
cublasOperation_t transb, int m, int n, int k,
const __half* alpha, const __half* A, int lda,
long long int strideA, const __half* B, int ldb,
long long int strideB, const __half* beta,
__half* C, int ldc, long long int strideC,
int batchCount) {
return cublasHgemmStridedBatched(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb,
strideB, beta, C, ldc, strideC, batchCount);
}
template <typename scalar_t>
void dot_product_attention_impl(const scalar_t* query, const scalar_t* key, const scalar_t* value,
const scalar_t* mask, scalar_t* attn, scalar_t* output, int B,
int Nt, int Ns, int E, const int* mask_dims,
cudnnTensorDescriptor_t& x_desc, cudnnTensorDescriptor_t& y_desc,
cudnnTensorDescriptor_t& mask_desc, cudnnDataType_t cudnn_dtype,
cudaStream_t stream, cublasHandle_t cublas_handle,
cudnnHandle_t cudnn_handle) {
{
// Q @ K
const int m = Ns;
const int n = Nt;
const int k = E;
const auto alpha = scalar_t(1.0f / sqrt(float(E)));
const auto beta = scalar_t(0);
cublasgemmStridedBatchedWrap(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, m, n, k, &alpha, key, k,
Ns * E, query, k, Nt * E, &beta, attn, m, Nt * Ns, B);
}
if (mask_dims != nullptr && mask_dims[0] != 0) {
const auto alpha = scalar_t(1);
const auto beta = scalar_t(1);
cudnnSetTensor4dDescriptor(mask_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, mask_dims[0],
mask_dims[1], mask_dims[2]);
cudnnSetTensor4dDescriptor(x_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, B, Nt, Ns);
cudnnAddTensor(cudnn_handle, &alpha, mask_desc, mask, &beta, x_desc, attn);
}
{
// softmax attention
const auto alpha = scalar_t(1);
const auto beta = scalar_t(0);
cudnnSetTensor4dDescriptor(x_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, B * Nt, Ns, 1, 1);
cudnnSetTensor4dDescriptor(y_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, B * Nt, Ns, 1, 1);
cudnnSoftmaxForward(cudnn_handle, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_INSTANCE, &alpha,
x_desc, attn, &beta, y_desc, attn);
}
{
// attn @ v
const int m = E;
const int n = Nt;
const int k = Ns;
const auto alpha = scalar_t(1);
const auto beta = scalar_t(0);
cublasgemmStridedBatchedWrap(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, value, m,
Ns * E, (const scalar_t*)(attn), k, Ns * Nt, &beta, output, m,
Nt * E, B);
}
}
template void dot_product_attention_impl<float>(
const float* query, const float* key, const float* value, const float* mask, float* attn,
float* output, int B, int Nt, int Ns, int E, const int* mask_dims,
cudnnTensorDescriptor_t& x_desc, cudnnTensorDescriptor_t& y_desc,
cudnnTensorDescriptor_t& mask_desc, cudnnDataType_t cudnn_dtype, cudaStream_t stream,
cublasHandle_t cublas_handle, cudnnHandle_t cudnn_handle);

View File

@ -0,0 +1,17 @@
// Copyright (c) OpenMMLab. All rights reserved
#ifndef TRT_SCALED_DOT_PRODUCT_ATTENTION_KERNEL_HPP
#define TRT_SCALED_DOT_PRODUCT_ATTENTION_KERNEL_HPP
#include <cublas_v2.h>
#include <cuda_runtime.h>
#include <cudnn.h>
template <typename scalar_t>
void dot_product_attention_impl(const scalar_t* query, const scalar_t* key, const scalar_t* value,
const scalar_t* mask, scalar_t* attn, scalar_t* output, int B,
int Nt, int Ns, int E, const int* mask_dims,
cudnnTensorDescriptor_t& x_desc, cudnnTensorDescriptor_t& y_desc,
cudnnTensorDescriptor_t& mask_desc, cudnnDataType_t cudnn_dtype,
cudaStream_t stream, cublasHandle_t cublas_handle,
cudnnHandle_t cudnn_handle);
#endif

View File

@ -1,10 +0,0 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "torch/script.h"
TORCH_LIBRARY(mmdeploy, m) {
m.def(
"modulated_deform_conv(Tensor input, Tensor weight, Tensor bias, Tensor offset, Tensor "
"mask, "
"int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int "
"dilation_h,int dilation_w, int groups, int deform_groups, bool with_bias) -> Tensor");
}

View File

@ -0,0 +1,13 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "torch/script.h"
TORCH_LIBRARY(mmdeploy, m) {
m.def(
"modulated_deform_conv(Tensor input, Tensor weight, Tensor bias, Tensor offset, Tensor "
"mask, "
"int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int "
"dilation_h,int dilation_w, int groups, int deform_groups, bool with_bias) -> Tensor")
.def(
"coreml_nms(Tensor boxes, Tensor scores, float iou_threshold, "
"float score_threshold, int max_boxes) -> Tensor[]");
}

View File

@ -0,0 +1,31 @@
#include <assert.h>
#include <vector>
#include "torch/script.h"
namespace mmdeploy {
using at::Tensor;
std::vector<Tensor> coreml_nms_cpu(Tensor boxes, Tensor scores, double iou_threshold,
double score_threshold, int64_t max_boxes) {
assert(boxes.dim() == 3); // bboxes with shape (batch_size, num_bboxes, 4)
assert(boxes.size(2) == 4);
assert(boxes.size(0) == scores.size(0)); // check batch size
assert(boxes.size(1) == scores.size(1)); // check num boxes
auto batch_size = boxes.size(0);
auto num_boxes = boxes.size(1);
auto num_classes = scores.size(2);
Tensor ret_boxes = at::zeros({batch_size, max_boxes, 4});
Tensor ret_scores = at::zeros({batch_size, max_boxes, num_classes});
Tensor indices = at::zeros({batch_size, max_boxes}, at::kInt);
Tensor num_outputs = at::zeros({batch_size}, at::kInt);
return std::vector<Tensor>({ret_boxes, ret_scores, indices, num_outputs});
}
TORCH_LIBRARY_IMPL(mmdeploy, CPU, m) { m.impl("coreml_nms", coreml_nms_cpu); }
} // namespace mmdeploy

View File

@ -0,0 +1,55 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include <algorithm>
#include <numeric>
#include "mmdeploy/codebase/mmcls/mmcls.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/experimental/module_adapter.h"
using std::vector;
namespace mmdeploy::mmcls {
class MultiLabelLinearClsHead : public MMClassification {
public:
explicit MultiLabelLinearClsHead(const Value& cfg) : MMClassification(cfg) {}
Result<Value> operator()(const Value& infer_res) {
MMDEPLOY_DEBUG("infer_res: {}", infer_res);
auto output = infer_res["output"].get<Tensor>();
if (!(output.shape().size() >= 2 && output.data_type() == DataType::kFLOAT)) {
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(),
(int)output.data_type());
return Status(eNotSupported);
}
auto class_num = (int)output.shape(1);
OUTCOME_TRY(auto _scores, MakeAvailableOnDevice(output, kHost, stream()));
OUTCOME_TRY(stream().Wait());
return GetLabels(_scores, class_num);
}
private:
Value GetLabels(const Tensor& scores, int class_num) const {
auto scores_data = scores.data<float>();
ClassifyOutput output;
for (int i = 0; i < class_num; ++i) {
auto label = ClassifyOutput::Label{i, scores_data[i]};
MMDEPLOY_DEBUG("label_id: {}, score: {}", label.label_id, label.score);
output.labels.push_back(label);
}
return to_value(std::move(output));
}
private:
static constexpr const auto kHost = Device{0};
};
REGISTER_CODEBASE_COMPONENT(MMClassification, MultiLabelLinearClsHead);
} // namespace mmdeploy::mmcls

View File

@ -4,6 +4,7 @@
#include "mmdeploy/core/registry.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/experimental/module_adapter.h"
using namespace std;

View File

@ -68,7 +68,7 @@ class Device {
constexpr explicit Device(int platform_id, int device_id = 0)
: platform_id_(platform_id), device_id_(device_id) {}
MMDEPLOY_API explicit Device(const char *platform_name, int device_id = 0);
MMDEPLOY_API explicit Device(const char* platform_name, int device_id = 0);
constexpr int device_id() const noexcept { return device_id_; }
@ -78,11 +78,11 @@ class Device {
constexpr bool is_device() const noexcept { return platform_id() > 0; }
constexpr bool operator==(const Device &other) const noexcept {
constexpr bool operator==(const Device& other) const noexcept {
return platform_id_ == other.platform_id_ && device_id_ == other.device_id_;
}
constexpr bool operator!=(const Device &other) const noexcept { return !(*this == other); }
constexpr bool operator!=(const Device& other) const noexcept { return !(*this == other); }
constexpr explicit operator bool() const noexcept { return platform_id_ >= 0 && device_id_ >= 0; }
@ -104,7 +104,7 @@ enum class MemcpyKind : int { HtoD, DtoH, DtoD };
class MMDEPLOY_API Platform {
public:
// throws if not found
explicit Platform(const char *platform_name);
explicit Platform(const char* platform_name);
// throws if not found
explicit Platform(int platform_id);
@ -113,11 +113,11 @@ class MMDEPLOY_API Platform {
int GetPlatformId() const;
// "" if invalid
const char *GetPlatformName() const;
const char* GetPlatformName() const;
bool operator==(const Platform &other) { return impl_ == other.impl_; }
bool operator==(const Platform& other) { return impl_ == other.impl_; }
bool operator!=(const Platform &other) { return !(*this == other); }
bool operator!=(const Platform& other) { return !(*this == other); }
explicit operator bool() const noexcept { return static_cast<bool>(impl_); }
@ -132,7 +132,7 @@ class MMDEPLOY_API Platform {
Platform GetPlatform(int platform_id);
Platform GetPlatform(const char *platform_name);
Platform GetPlatform(const char* platform_name);
class MMDEPLOY_API Stream {
public:
@ -140,7 +140,7 @@ class MMDEPLOY_API Stream {
explicit Stream(Device device, uint64_t flags = 0);
explicit Stream(Device device, void *native, uint64_t flags = 0);
explicit Stream(Device device, void* native, uint64_t flags = 0);
explicit Stream(Device device, std::shared_ptr<void> native, uint64_t flags = 0);
@ -150,25 +150,25 @@ class MMDEPLOY_API Stream {
Result<void> Wait();
Result<void> DependsOn(Event &event);
Result<void> DependsOn(Event& event);
Result<void> Submit(Kernel &kernel);
Result<void> Submit(Kernel& kernel);
void *GetNative(ErrorCode *ec = nullptr);
void* GetNative(ErrorCode* ec = nullptr);
Result<void> Copy(const Buffer &src, Buffer &dst, size_t size = -1, size_t src_offset = 0,
Result<void> Copy(const Buffer& src, Buffer& dst, size_t size = -1, size_t src_offset = 0,
size_t dst_offset = 0);
Result<void> Copy(const void *host_ptr, Buffer &dst, size_t size = -1, size_t dst_offset = 0);
Result<void> Copy(const void* host_ptr, Buffer& dst, size_t size = -1, size_t dst_offset = 0);
Result<void> Copy(const Buffer &src, void *host_ptr, size_t size = -1, size_t src_offset = 0);
Result<void> Copy(const Buffer& src, void* host_ptr, size_t size = -1, size_t src_offset = 0);
Result<void> Fill(const Buffer &dst, void *pattern, size_t pattern_size, size_t size = -1,
Result<void> Fill(const Buffer& dst, void* pattern, size_t pattern_size, size_t size = -1,
size_t offset = 0);
bool operator==(const Stream &other) const { return impl_ == other.impl_; }
bool operator==(const Stream& other) const { return impl_ == other.impl_; }
bool operator!=(const Stream &other) const { return !(*this == other); }
bool operator!=(const Stream& other) const { return !(*this == other); }
explicit operator bool() const noexcept { return static_cast<bool>(impl_); }
@ -184,7 +184,7 @@ class MMDEPLOY_API Stream {
};
template <typename T>
T GetNative(Stream &stream, ErrorCode *ec = nullptr) {
T GetNative(Stream& stream, ErrorCode* ec = nullptr) {
return reinterpret_cast<T>(stream.GetNative(ec));
}
@ -194,7 +194,7 @@ class MMDEPLOY_API Event {
explicit Event(Device device, uint64_t flags = 0);
explicit Event(Device device, void *native, uint64_t flags = 0);
explicit Event(Device device, void* native, uint64_t flags = 0);
explicit Event(Device device, std::shared_ptr<void> native, uint64_t flags = 0);
@ -204,13 +204,13 @@ class MMDEPLOY_API Event {
Result<void> Wait();
Result<void> Record(Stream &stream);
Result<void> Record(Stream& stream);
void *GetNative(ErrorCode *ec = nullptr);
void* GetNative(ErrorCode* ec = nullptr);
bool operator==(const Event &other) const { return impl_ == other.impl_; }
bool operator==(const Event& other) const { return impl_ == other.impl_; }
bool operator!=(const Event &other) const { return !(*this == other); }
bool operator!=(const Event& other) const { return !(*this == other); }
explicit operator bool() const noexcept { return static_cast<bool>(impl_); }
@ -223,7 +223,7 @@ class MMDEPLOY_API Event {
};
template <typename T>
T GetNative(Event &event, ErrorCode *ec = nullptr) {
T GetNative(Event& event, ErrorCode* ec = nullptr) {
return reinterpret_cast<T>(event.GetNative(ec));
}
@ -234,7 +234,7 @@ class MMDEPLOY_API Kernel {
Device GetDevice() const;
void *GetNative(ErrorCode *ec = nullptr);
void* GetNative(ErrorCode* ec = nullptr);
explicit operator bool() const noexcept { return static_cast<bool>(impl_); }
@ -243,7 +243,7 @@ class MMDEPLOY_API Kernel {
};
template <typename T>
T GetNative(Kernel &kernel, ErrorCode *ec = nullptr) {
T GetNative(Kernel& kernel, ErrorCode* ec = nullptr) {
return reinterpret_cast<T>(kernel.GetNative(ec));
}
@ -269,25 +269,25 @@ class MMDEPLOY_API Buffer {
Buffer(Device device, size_t size, Allocator allocator, size_t alignment = 1, uint64_t flags = 0);
Buffer(Device device, size_t size, void *native, uint64_t flags = 0);
Buffer(Device device, size_t size, void* native, uint64_t flags = 0);
Buffer(Device device, size_t size, std::shared_ptr<void> native, uint64_t flags = 0);
// create sub-buffer
Buffer(Buffer &buffer, size_t offset, size_t size, uint64_t flags = 0);
Buffer(Buffer& buffer, size_t offset, size_t size, uint64_t flags = 0);
size_t GetSize(ErrorCode *ec = nullptr) const;
size_t GetSize(ErrorCode* ec = nullptr) const;
// bool IsSubBuffer(ErrorCode *ec = nullptr);
// bool IsSubBuffer(ErrorCode* ec = nullptr);
void *GetNative(ErrorCode *ec = nullptr) const;
void* GetNative(ErrorCode* ec = nullptr) const;
Device GetDevice() const;
Allocator GetAllocator() const;
bool operator==(const Buffer &other) const { return impl_ == other.impl_; }
bool operator==(const Buffer& other) const { return impl_ == other.impl_; }
bool operator!=(const Buffer &other) const { return !(*this == other); }
bool operator!=(const Buffer& other) const { return !(*this == other); }
explicit operator bool() const noexcept { return static_cast<bool>(impl_); }
@ -300,12 +300,12 @@ class MMDEPLOY_API Buffer {
};
template <typename T>
T GetNative(Buffer &buffer, ErrorCode *ec = nullptr) {
T GetNative(Buffer& buffer, ErrorCode* ec = nullptr) {
return reinterpret_cast<T>(buffer.GetNative(ec));
}
template <typename T>
T GetNative(const Buffer &buffer, ErrorCode *ec = nullptr) {
T GetNative(const Buffer& buffer, ErrorCode* ec = nullptr) {
return reinterpret_cast<T>(buffer.GetNative(ec));
}
@ -315,13 +315,15 @@ class MMDEPLOY_API PlatformRegistry {
int Register(Creator creator);
int GetPlatform(const char *name, Platform *platform);
int AddAlias(const char* name, const char* target);
int GetPlatform(int id, Platform *platform);
int GetPlatform(const char* name, Platform* platform);
int GetPlatformId(const char *name);
int GetPlatform(int id, Platform* platform);
PlatformImpl *GetPlatformImpl(PlatformId id);
int GetPlatformId(const char* name);
PlatformImpl* GetPlatformImpl(PlatformId id);
private:
int GetNextId();
@ -335,8 +337,9 @@ class MMDEPLOY_API PlatformRegistry {
Platform platform;
};
std::vector<Entry> entries_;
std::vector<std::pair<std::string, std::string>> aliases_;
};
MMDEPLOY_API PlatformRegistry &gPlatformRegistry();
MMDEPLOY_API PlatformRegistry& gPlatformRegistry();
} // namespace mmdeploy

View File

@ -321,6 +321,11 @@ int PlatformRegistry::Register(Creator creator) {
return 0;
}
int PlatformRegistry::AddAlias(const char* name, const char* target) {
aliases_.emplace_back(name, target);
return 0;
}
int PlatformRegistry::GetNextId() {
for (int i = 1;; ++i) {
if (IsAvailable(i)) {
@ -339,6 +344,12 @@ bool PlatformRegistry::IsAvailable(int id) {
}
int PlatformRegistry::GetPlatform(const char* name, Platform* platform) {
for (const auto& alias : aliases_) {
if (name == alias.first) {
name = alias.second.c_str();
break;
}
}
for (const auto& entry : entries_) {
if (entry.name == name) {
*platform = entry.platform;
@ -357,7 +368,14 @@ int PlatformRegistry::GetPlatform(int id, Platform* platform) {
}
return -1;
}
int PlatformRegistry::GetPlatformId(const char* name) {
for (const auto& alias : aliases_) {
if (name == alias.first) {
name = alias.second.c_str();
break;
}
}
for (const auto& entry : entries_) {
if (entry.name == name) {
return entry.id;

View File

@ -20,6 +20,7 @@ Model::Model(const std::string& model_path) {
Model::Model(const void* buffer, size_t size) { Init(buffer, size).value(); }
Result<void> Model::Init(const std::string& model_path) {
model_path_ = model_path;
if (!fs::exists(model_path)) {
MMDEPLOY_ERROR("'{}' doesn't exist", model_path);
return Status(eFileNotExist);
@ -45,6 +46,8 @@ Result<void> Model::Init(const std::string& model_path) {
return Status(eNotSupported);
}
const std::string& Model::GetModelPath() const { return model_path_; }
Result<void> Model::Init(const void* buffer, size_t size) {
auto registry = ModelRegistry::Get();
auto entries = registry.ListEntries();

View File

@ -94,7 +94,14 @@ class MMDEPLOY_API Model {
*/
explicit operator bool() const { return impl_ != nullptr; }
/**
* @brief get model_path that init with DirectoryModel
* @return file path of an sdk model
*/
const std::string& GetModelPath() const;
private:
std::string model_path_;
std::shared_ptr<ModelImpl> impl_;
deploy_meta_info_t meta_;
};

View File

@ -94,17 +94,23 @@ class Span {
constexpr Span& operator=(const Span& other) noexcept = default;
friend bool operator==(const Span& a, const Span& b) {
if (a.size() != b.size()) return false;
template <typename U>
friend bool operator!=(const Span& a, const Span<U>& b) {
if (a.size() != b.size()) {
return true;
}
for (size_type i = 0; i < a.size(); ++i) {
if (a[i] != b[i]) {
return false;
return true;
}
}
return true;
return false;
}
friend bool operator!=(const Span& a, const Span& b) { return !(a == b); }
template <typename U>
friend bool operator==(const Span& a, const Span<U>& b) {
return !(a != b);
}
private:
T* data_;

View File

@ -115,9 +115,9 @@ Result<void> Tensor::CopyFrom(const Tensor& tensor, Stream stream) {
if (!stream) {
auto device = desc_.device.is_device() ? desc_.device : tensor.desc().device;
auto default_stream = Stream::GetDefault(device);
OUTCOME_TRY(default_stream.Copy(tensor.buffer(), buffer_));
OUTCOME_TRY(default_stream.Copy(tensor.buffer(), buffer_, tensor.byte_size()));
} else {
OUTCOME_TRY(stream.Copy(tensor.buffer(), buffer_));
OUTCOME_TRY(stream.Copy(tensor.buffer(), buffer_, tensor.byte_size()));
}
return success();
}
@ -141,9 +141,9 @@ Result<void> Tensor::CopyTo(Tensor& tensor, Stream stream) const {
if (!stream) {
Device device = desc_.device.is_device() ? desc_.device : tensor.desc().device;
Stream default_stream = Stream::GetDefault(device);
return default_stream.Copy(buffer_, tensor.buffer());
return default_stream.Copy(buffer_, tensor.buffer(), byte_size());
} else {
return stream.Copy(buffer_, tensor.buffer());
return stream.Copy(buffer_, tensor.buffer(), byte_size());
}
}
@ -158,9 +158,9 @@ Result<void> Tensor::CopyFrom(void* host_ptr, Stream stream) {
Allocate();
if (!stream) {
auto default_stream = Stream::GetDefault(desc_.device);
return default_stream.Copy(host_ptr, buffer_, buffer_.GetSize());
return default_stream.Copy(host_ptr, buffer_, byte_size());
} else {
return stream.Copy(host_ptr, buffer_, buffer_.GetSize());
return stream.Copy(host_ptr, buffer_, byte_size());
}
}
@ -174,9 +174,9 @@ Result<void> Tensor::CopyTo(void* host_ptr, Stream stream) const {
}
if (!stream) {
auto default_stream = Stream::GetDefault(desc_.device);
return default_stream.Copy(buffer_, host_ptr, buffer_.GetSize());
return default_stream.Copy(buffer_, host_ptr, byte_size());
} else {
return stream.Copy(buffer_, host_ptr, buffer_.GetSize());
return stream.Copy(buffer_, host_ptr, byte_size());
}
}

View File

@ -5,3 +5,7 @@ add_subdirectory(cpu)
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
add_subdirectory(cuda)
endif ()
if ("acl" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(acl)
endif ()

View File

@ -0,0 +1,7 @@
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_acl_device)
file(GLOB_RECURSE SRCS "*.cpp")
mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")

View File

@ -0,0 +1,14 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/core/device_impl.h"
namespace mmdeploy {
class AclPlatformRegisterer {
public:
AclPlatformRegisterer() { gPlatformRegistry().AddAlias("npu", "cpu"); }
};
AclPlatformRegisterer g_acl_platform_registerer;
} // namespace mmdeploy

View File

@ -105,7 +105,7 @@ Result<void> CpuPlatformImpl::CopyImpl(const void* src, void* dst, size_t src_si
task();
return success();
}
if (st.GetDevice() != Device(0, 0)) {
if (st.GetDevice().platform_id() != 0) {
return Status(eInvalidArgument);
}
auto cpu_stream = static_cast<CpuStreamImpl*>(st.GetNative());
@ -126,6 +126,7 @@ Result<void> CpuPlatformImpl::Copy(const void* host_ptr, Buffer dst, size_t size
}
return CopyImpl(host_ptr, dst_ptr, size, dst.GetSize(), 0, dst_offset, size, stream);
}
Result<void> CpuPlatformImpl::Copy(Buffer src, void* host_ptr, size_t size, size_t src_offset,
Stream stream) {
auto src_ptr = src.GetNative();
@ -145,7 +146,7 @@ Result<void> CpuPlatformImpl::Copy(Buffer src, Buffer dst, size_t size, size_t s
return Status(eInvalidArgument);
}
auto device = src.GetDevice();
if (device.platform_id() != 0 || device != dst.GetDevice()) {
if (device.platform_id() != 0 || device.platform_id() != dst.GetDevice().platform_id()) {
return Status(eInvalidArgument);
}
return CopyImpl(src_ptr, dst_ptr, src.GetSize(), dst.GetSize(), src_offset, dst_offset, size,

View File

@ -26,5 +26,17 @@ if ("snpe" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(snpe)
endif ()
if ("acl" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(acl)
endif ()
if ("torchscript" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(torchscript)
endif ()
if ("coreml" IN_LIST MMDEPLOY_TARGET_BACKENDS)
add_subdirectory(coreml)
endif ()
mmdeploy_add_module(${PROJECT_NAME} net_module.cpp)
add_library(mmdeploy::net_module ALIAS ${PROJECT_NAME})

View File

@ -0,0 +1,14 @@
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_acl_net)
if ("acl" IN_LIST MMDEPLOY_TARGET_BACKENDS)
if (NOT DEFINED ASCEND_TOOLKIT_HOME)
set(ASCEND_TOOLKIT_HOME $ENV{ASCEND_TOOLKIT_HOME})
endif ()
mmdeploy_add_module(${PROJECT_NAME} acl_net.cpp)
target_include_directories(${PROJECT_NAME} PRIVATE
$<BUILD_INTERFACE:${ASCEND_TOOLKIT_HOME}/runtime/include>)
target_link_libraries(${PROJECT_NAME} PRIVATE
$<BUILD_INTERFACE:${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub/libascendcl.so>)
endif ()

View File

@ -0,0 +1,659 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/net/acl/acl_net.h"
#include "mmdeploy/core/logger.h"
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/utils/formatter.h"
std::ostream& operator<<(std::ostream& os, const aclmdlIODims& dims) {
os << dims.name << " [";
for (int i = 0; i < dims.dimCount; ++i) {
os << (i ? ", " : "") << dims.dims[i];
}
os << "]";
return os;
}
std::ostream& operator<<(std::ostream& os, const aclmdlBatch& batch) {
os << "batch [";
for (int i = 0; i < batch.batchCount; ++i) {
os << (i ? ", " : "") << batch.batch[i];
}
os << "]";
return os;
}
std::ostream& operator<<(std::ostream& os, const aclmdlHW& hw) {
os << "HW [";
for (int i = 0; i < hw.hwCount; ++i) {
os << (i ? ", " : "") << "(" << hw.hw[i][0] << ", " << hw.hw[i][1] << ")";
}
os << "]";
return os;
}
namespace mmdeploy {
namespace {
inline Result<void> _m(aclError ec, SourceLocation loc = SourceLocation::current()) {
if (ec == ACL_SUCCESS) {
return success();
} else {
return Status(eFail, loc);
}
}
template <typename T>
inline Result<T*> _p(T* ptr, SourceLocation loc = SourceLocation::current()) {
if (ptr) {
return ptr;
} else {
return Status(eFail, loc);
}
}
struct Context {
Context() {
std::lock_guard lock{mutex_};
if (ref_count_++ != 0) {
return;
}
auto ret = aclInit(nullptr);
if (ret == ACL_SUCCESS) {
MMDEPLOY_INFO("ACL initialized.");
owned_acl_ = true;
} else if (ret == ACL_ERROR_REPEAT_INITIALIZE) {
MMDEPLOY_INFO("ACL has already been initialized.");
} else {
MMDEPLOY_ERROR("aclInit() failed: {}", ret);
assert(ret == 0);
}
}
~Context() {
std::lock_guard lock{mutex_};
if (--ref_count_ != 0) {
return;
}
// skip aclFinalize if aclInit is not successfully called by us.
if (owned_acl_) {
auto ret = aclFinalize();
if (ret == ACL_SUCCESS) {
MMDEPLOY_INFO("ACL finalized.");
owned_acl_ = false;
} else if (ret == ACL_ERROR_REPEAT_FINALIZE) {
MMDEPLOY_INFO("ACL has already been finalized.");
} else {
MMDEPLOY_ERROR("aclFinalize() failed: {}", ret);
}
}
}
static bool owned_acl_;
static int ref_count_;
static std::mutex mutex_;
};
bool Context::owned_acl_ = false;
int Context::ref_count_ = 0;
std::mutex Context::mutex_{};
} // namespace
AclNet::~AclNet() {
auto dtor = [&]() -> Result<void> {
auto n_inputs = aclmdlGetDatasetNumBuffers(input_dataset_);
for (int i = 0; i < n_inputs; ++i) {
auto buffer = aclmdlGetDatasetBuffer(input_dataset_, i);
auto data = aclGetDataBufferAddr(buffer);
OUTCOME_TRY(_m(aclrtFree(data)));
}
input_tensor_.clear();
OUTCOME_TRY(_m(aclmdlDestroyDataset(input_dataset_)));
auto n_outputs = aclmdlGetDatasetNumBuffers(output_dataset_);
for (int i = 0; i < n_outputs; ++i) {
auto buffer = aclmdlGetDatasetBuffer(output_dataset_, i);
auto data = aclGetDataBufferAddr(buffer);
OUTCOME_TRY(_m(aclrtFree(data)));
}
output_tensor_.clear();
OUTCOME_TRY(_m(aclmdlDestroyDataset(output_dataset_)));
OUTCOME_TRY(_m(aclmdlDestroyDesc(model_desc_)));
OUTCOME_TRY(_m(aclmdlUnload(model_id_)));
return success();
};
if (auto r = dtor(); !r) {
MMDEPLOY_ERROR("uninit failed: {}", r.error().message().c_str());
}
}
namespace {
Result<DataType> FromAclDataType(aclDataType data_type) {
switch (data_type) {
case ACL_FLOAT:
return DataType::kFLOAT;
case ACL_FLOAT16:
return DataType::kHALF;
case ACL_INT8:
return DataType::kINT8;
case ACL_INT32:
return DataType::kINT32;
case ACL_INT64:
return DataType::kINT64;
default:
return Status(eNotSupported);
}
}
Result<aclDataType> ToAclDataType(DataType data_type) {
switch (data_type) {
case DataType::kFLOAT:
return ACL_FLOAT;
case DataType::kHALF:
return ACL_FLOAT16;
case DataType::kINT8:
return ACL_INT8;
case DataType::kINT32:
return ACL_INT32;
case DataType::kINT64:
return ACL_INT64;
default:
return Status(eNotSupported);
}
}
Result<TensorDesc> ToTensorDesc(const aclmdlIODims& dims, aclDataType data_type) {
auto extract_name = [](const std::string& name) {
if (auto pos = name.find_last_of(':'); pos != std::string::npos) {
return name.substr(pos + 1);
} else {
return name;
}
};
OUTCOME_TRY(auto _data_type, FromAclDataType(data_type));
return TensorDesc{Device(0), _data_type,
TensorShape(&dims.dims[0], &dims.dims[0] + dims.dimCount),
extract_name(dims.name)};
}
Result<size_t> GetByteSize(const aclmdlIODims& dims, aclDataType data_type) {
size_t byte_size = aclDataTypeSize(data_type);
for (int i = 0; i < dims.dimCount; ++i) {
if (dims.dims[i] < 0) {
return Status(eInvalidArgument);
}
byte_size *= dims.dims[i];
}
return byte_size;
}
} // namespace
// all dims must be fixed
auto AclNet::CreateBuffers(const aclmdlIODims& dims, aclDataType data_type) -> Result<Buffers> {
OUTCOME_TRY(auto byte_size, GetByteSize(dims, data_type));
Buffers pair{};
void* dev_ptr{};
OUTCOME_TRY(_m(aclrtMalloc(&dev_ptr, byte_size, ACL_MEM_MALLOC_HUGE_FIRST)));
OUTCOME_TRY(_m(aclrtMemset(dev_ptr, byte_size, 0, byte_size)));
OUTCOME_TRY(pair.device_buffer, _p(aclCreateDataBuffer(dev_ptr, byte_size)));
OUTCOME_TRY(auto desc, ToTensorDesc(dims, data_type));
void* host_ptr{};
OUTCOME_TRY(_m(aclrtMallocHost(&host_ptr, byte_size)));
memset(host_ptr, 0, byte_size);
pair.host_tensor =
Tensor(desc, std::shared_ptr<void>(host_ptr, [](void* p) { aclrtFreeHost(p); }));
return pair;
}
auto AclNet::CreateBuffersDynamicBatchSize(aclmdlIODims dims, aclDataType data_type)
-> Result<Buffers> {
for (int i = 0; i < dims.dimCount; ++i) {
if (dims.dims[i] == -1) {
dims.dims[i] = dynamic_batch_size_.back();
}
}
return CreateBuffers(dims, data_type);
}
auto AclNet::CreateBuffersDynamicImageSize(int index, aclmdlIODims dims, aclDataType data_type)
-> Result<Buffers> {
aclmdlHW hw_desc{};
OUTCOME_TRY(_m(aclmdlGetDynamicHW(model_desc_, index, &hw_desc)));
if (hw_desc.hwCount > 0) {
auto& val = *std::max_element(hw_desc.hw, hw_desc.hw + hw_desc.hwCount,
[](auto u, auto v) { return u[0] * u[1] < v[0] * v[1]; });
int ptr = 0;
for (int i = 0; i < dims.dimCount; ++i) {
if (dims.dims[i] == -1) {
if (ptr == 2) {
return Status(eInvalidArgument);
}
dims.dims[i] = val[ptr++];
}
}
if (ptr != 2) {
return Status(eInvalidArgument);
}
}
return CreateBuffers(dims, data_type);
}
auto AclNet::CreateBuffersDynamicDims(int index, int dim_count, const aclmdlIODims& dims,
aclDataType data_type) -> Result<Buffers> {
int max_index = -1;
size_t max_value = 0;
aclmdlIODims max_shape{};
for (int j = 0; j < dynamic_input_dims_.size(); ++j) {
aclmdlIODims shape{};
strncpy(shape.name, dims.name, sizeof(shape.name));
shape.dimCount = dims.dimCount;
std::copy(dynamic_input_dims_[j].dims + dim_count,
dynamic_input_dims_[j].dims + dim_count + dims.dimCount, shape.dims);
OUTCOME_TRY(auto byte_size, GetByteSize(shape, data_type));
if (byte_size > max_value) {
max_index = j;
max_value = byte_size;
max_shape = shape;
}
}
if (max_index < 0) {
return Status(eInvalidArgument);
}
MMDEPLOY_INFO("max shape for input {}: {}", index, max_shape);
return CreateBuffers(max_shape, data_type);
}
Result<void> AclNet::ConfigDynamicShapes() {
aclError status = ACL_SUCCESS;
{
size_t dynamic_tensor_index{};
status = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &dynamic_tensor_index);
if (status == ACL_SUCCESS) {
dynamic_tensor_index_ = static_cast<int>(dynamic_tensor_index);
MMDEPLOY_INFO("dynamic tensor index: {}", dynamic_tensor_index);
}
}
if (dynamic_tensor_index_ >= 0) {
aclmdlBatch batch_desc{};
status = aclmdlGetDynamicBatch(model_desc_, &batch_desc);
if (status == ACL_SUCCESS && batch_desc.batchCount > 0) {
MMDEPLOY_INFO("{}, status = {}", batch_desc, status);
input_shape_type_ = kDynamicBatchSize;
dynamic_batch_size_.insert(dynamic_batch_size_.end(), batch_desc.batch,
batch_desc.batch + batch_desc.batchCount);
std::sort(dynamic_batch_size_.begin(), dynamic_batch_size_.end());
}
size_t dynamic_gear_count{0};
if (input_shape_type_ == kStatic) {
status = aclmdlGetInputDynamicGearCount(model_desc_, -1, &dynamic_gear_count);
dynamic_input_dims_.resize(dynamic_gear_count);
if (status == ACL_SUCCESS && dynamic_gear_count > 0) {
status = aclmdlGetInputDynamicDims(model_desc_, -1, dynamic_input_dims_.data(),
dynamic_gear_count);
for (const auto& dims : dynamic_input_dims_) {
MMDEPLOY_INFO("dynamic input dims: {}", dims);
}
input_shape_type_ = kDynamicDims;
} else {
input_shape_type_ = kDynamicImageSize;
}
}
}
return success();
}
Result<void> AclNet::CreateInputBuffers() {
input_dataset_ = aclmdlCreateDataset();
auto n_inputs = aclmdlGetNumInputs(model_desc_);
MMDEPLOY_INFO("n_inputs = {}, dynamic_tensor_index_ = {}", n_inputs, dynamic_tensor_index_);
int dim_count = 0;
for (int i = 0; i < n_inputs; ++i) {
if (i == dynamic_tensor_index_) {
void* data{};
auto input_len = aclmdlGetInputSizeByIndex(model_desc_, i);
OUTCOME_TRY(_m(aclrtMalloc(&data, input_len, ACL_MEM_MALLOC_HUGE_FIRST)));
OUTCOME_TRY(auto buffer, _p(aclCreateDataBuffer(data, input_len)));
OUTCOME_TRY(_m(aclmdlAddDatasetBuffer(input_dataset_, buffer)));
} else {
Buffers buffers{};
aclmdlIODims dims{};
OUTCOME_TRY(_m(aclmdlGetInputDims(model_desc_, i, &dims)));
input_dims_.push_back(dims);
auto data_type = aclmdlGetInputDataType(model_desc_, i);
input_data_type_.push_back(data_type);
MMDEPLOY_INFO("{}", dims);
switch (input_shape_type_) {
case kStatic: {
OUTCOME_TRY(buffers, CreateBuffers(dims, data_type));
break;
}
case kDynamicBatchSize: {
OUTCOME_TRY(buffers, CreateBuffersDynamicBatchSize(dims, data_type));
break;
}
case kDynamicImageSize: {
OUTCOME_TRY(buffers, CreateBuffersDynamicImageSize(i, dims, data_type));
break;
}
case kDynamicDims: {
OUTCOME_TRY(buffers, CreateBuffersDynamicDims(i, dim_count, dims, data_type));
break;
}
default:
return Status(eInvalidArgument);
}
OUTCOME_TRY(_m(aclmdlAddDatasetBuffer(input_dataset_, buffers.device_buffer)));
input_tensor_.push_back(std::move(buffers.host_tensor));
dim_count += dims.dimCount;
}
}
return success();
}
Result<void> AclNet::CreateOutputBuffers() {
output_dataset_ = aclmdlCreateDataset();
auto n_outputs = aclmdlGetNumOutputs(model_desc_);
std::vector<aclmdlIODims> output_dims;
for (int i = 0; i < n_outputs; ++i) {
aclmdlIODims dims{};
OUTCOME_TRY(_m(aclmdlGetOutputDims(model_desc_, i, &dims))); // return max dims
output_dims_.push_back(dims);
MMDEPLOY_INFO("{}", dims);
auto data_type = aclmdlGetOutputDataType(model_desc_, i);
output_data_type_.push_back(data_type);
OUTCOME_TRY(auto buffers, CreateBuffers(dims, data_type));
OUTCOME_TRY(_m(aclmdlAddDatasetBuffer(output_dataset_, buffers.device_buffer)));
output_tensor_.push_back(std::move(buffers.host_tensor));
}
return success();
}
Result<void> AclNet::Init(const Value& args) {
auto& context = args["context"];
cpu_stream_ = context["stream"].get<Stream>();
auto name = args["name"].get<std::string>();
auto model = context["model"].get<Model>();
device_id_ = context["device"].get<Device>().device_id();
acl_context_ = std::make_shared<Context>();
OUTCOME_TRY(auto config, model.GetModelConfig(name));
OUTCOME_TRY(auto binary, model.ReadFile(config.net));
OUTCOME_TRY(_m(aclrtSetDevice(device_id_)));
OUTCOME_TRY(_m(aclmdlLoadFromMem(binary.data(), binary.size(), &model_id_)));
model_desc_ = aclmdlCreateDesc();
OUTCOME_TRY(_m(aclmdlGetDesc(model_desc_, model_id_)));
// dynamic_tensor_index_
// input_shape_type_
// dynamic_batch_size_
// dynamic_input_dims_
if (auto r = ConfigDynamicShapes(); !r) {
MMDEPLOY_ERROR("Failed to config dynamic shapes");
return r.as_failure();
}
// input_dataset_
// input_data_type_
// input_dims_
// input_tensor_
if (auto r = CreateInputBuffers(); !r) {
MMDEPLOY_ERROR("Failed to create input buffers");
return r.as_failure();
}
// output_dataset_
// output_data_type_
// output_dims_
// output_tensor_
if (auto r = CreateOutputBuffers(); !r) {
MMDEPLOY_ERROR("Failed to create output buffers");
return r.as_failure();
}
return success();
}
Result<void> AclNet::Deinit() { return success(); }
Result<Span<Tensor>> AclNet::GetInputTensors() { return input_tensor_; }
Result<Span<Tensor>> AclNet::GetOutputTensors() { return output_tensor_; }
Result<void> AclNet::Reshape(Span<TensorShape> input_shapes) {
OUTCOME_TRY(_m(aclrtSetDevice(device_id_)));
// Sanity checks
if (input_shapes.size() != input_dims_.size()) {
MMDEPLOY_ERROR("inconsistent num inputs");
return Status(eInvalidArgument);
}
for (int i = 0; i < input_dims_.size(); ++i) {
if (input_shapes[i].size() != input_dims_[i].dimCount) {
MMDEPLOY_ERROR("inconsistent num of dims");
return Status(eInvalidArgument);
}
}
switch (input_shape_type_) {
case kStatic: {
OUTCOME_TRY(ReshapeStatic(input_shapes));
break;
}
case kDynamicBatchSize: {
OUTCOME_TRY(ReshapeDynamicBatchSize(input_shapes));
break;
}
case kDynamicImageSize: {
OUTCOME_TRY(ReshapeDynamicImageSize(input_shapes));
break;
}
case kDynamicDims: {
OUTCOME_TRY(ReshapeDynamicDims(input_shapes));
break;
}
default:
return Status(eInvalidArgument);
}
for (int i = 0; i < input_shapes.size(); ++i) {
auto buffer = input_tensor_[i].buffer();
auto desc = input_tensor_[i].desc();
desc.shape = input_shapes[i];
input_tensor_[i] = Tensor(std::move(desc), std::move(buffer));
}
for (int i = 0; i < output_dims_.size(); ++i) {
aclmdlIODims dims{};
OUTCOME_TRY(_m(aclmdlGetCurOutputDims(model_desc_, i, &dims)));
auto buffer = output_tensor_[i].buffer();
auto desc = output_tensor_[i].desc();
desc.shape = TensorShape(&dims.dims[0], &dims.dims[0] + dims.dimCount);
output_tensor_[i] = Tensor(std::move(desc), std::move(buffer));
}
return success();
}
Result<void> AclNet::ReshapeStatic(Span<TensorShape> input_shapes) {
for (int i = 0; i < input_dims_.size(); ++i) {
Span src(input_shapes[i]);
Span ref(input_dims_[i].dims, input_dims_[i].dimCount);
if (src != ref) {
MMDEPLOY_ERROR("Shape mismatch {} vs {}", src, ref);
return Status(eInvalidArgument);
}
}
return success();
}
Result<void> AclNet::ReshapeDynamicBatchSize(Span<TensorShape> input_shapes) {
int batch_size = -1;
for (int i = 0; i < input_dims_.size(); ++i) {
for (int j = 0; j < input_dims_[i].dimCount; ++j) {
if (input_dims_[i].dims[j] == -1) {
if (batch_size != -1 && batch_size != input_shapes[i][j]) {
// inconsistent batch size
return Status(eInvalidArgument);
}
batch_size = input_shapes[i][j];
}
}
}
if (batch_size < 0) {
MMDEPLOY_ERROR("unable to determine batch size");
return Status(eFail);
}
MMDEPLOY_INFO("batch size {} {}", batch_size, dynamic_tensor_index_);
auto index =
std::lower_bound(dynamic_batch_size_.begin(), dynamic_batch_size_.end(), batch_size) -
dynamic_batch_size_.begin();
if (index == dynamic_batch_size_.size()) {
MMDEPLOY_ERROR("Unsupported batch size: {}", batch_size);
}
// TODO: memset padding memory to avoid potential extra computation
OUTCOME_TRY(_m(aclmdlSetDynamicBatchSize(model_id_, input_dataset_, dynamic_tensor_index_,
dynamic_batch_size_[index])));
return success();
}
Result<void> AclNet::ReshapeDynamicImageSize(Span<TensorShape> input_shapes) {
uint64_t hw[2];
bool found = false;
for (int i = 0; i < input_dims_.size(); ++i) {
uint64_t tmp[2];
int ptr = 0;
for (int j = 0; j < input_dims_[i].dimCount; ++j) {
if (input_dims_[i].dims[j] == -1) {
if (ptr == 2) {
MMDEPLOY_ERROR("dynamic HW size out of bounds: {}", input_dims_[i]);
return Status(eInvalidArgument);
} else {
tmp[ptr++] = input_shapes[i][j];
}
}
}
if (ptr && ptr != 2) {
MMDEPLOY_ERROR("Partially determined dynamic HW size: {}", input_dims_[i]);
return Status(eInvalidArgument);
}
if (ptr == 2) {
if (found) {
if (hw[0] != tmp[0] || hw[1] != tmp[1]) {
MMDEPLOY_ERROR("Inconsistent dynamic HW size: ({}, {}) vs ({}, {})", hw[0], hw[1], tmp[0],
tmp[1]);
return Status(eInvalidArgument);
}
} else {
found = true;
hw[0] = tmp[0];
hw[1] = tmp[1];
}
}
}
if (!found) {
MMDEPLOY_ERROR("Unable to determine image size");
return Status(eInvalidArgument);
}
MMDEPLOY_INFO("dynamic HW size ({}, {})", hw[0], hw[1]);
OUTCOME_TRY(
_m(aclmdlSetDynamicHWSize(model_id_, input_dataset_, dynamic_tensor_index_, hw[0], hw[1])));
return success();
}
Result<void> AclNet::ReshapeDynamicDims(Span<TensorShape> input_shapes) {
std::vector<int> match(dynamic_input_dims_.size(), 1);
aclmdlIODims dims{};
for (int i = 0; i < input_shapes.size(); ++i) {
const auto& shape = input_shapes[i];
for (int j = 0; j < shape.size(); ++j) {
if (input_dims_[i].dims[j] == -1) {
for (int k = 0; k < dynamic_input_dims_.size(); ++k) {
// disable profile when dims mismatch, except for the first dim (batch size)
if (j == 0 && shape[j] < dynamic_input_dims_[k].dims[dims.dimCount]) {
// pass
} else if (shape[j] != dynamic_input_dims_[k].dims[dims.dimCount]) {
match[k] = 0;
}
}
} else {
if (input_dims_[i].dims[j] != shape[j]) {
return Status(eNotSupported);
}
}
dims.dims[dims.dimCount++] = shape[j];
}
}
int dims_index = std::find(match.begin(), match.end(), 1) - match.begin();
if (dims_index == match.size()) {
MMDEPLOY_ERROR("Shape not supported: {}", dims);
return Status(eNotSupported);
}
// TODO: memset padding memory to avoid potential extra computation
OUTCOME_TRY(_m(aclmdlSetInputDynamicDims(model_id_, input_dataset_, dynamic_tensor_index_,
&dynamic_input_dims_[dims_index])));
return success();
}
Result<void> AclNet::Forward() {
OUTCOME_TRY(cpu_stream_.Wait());
OUTCOME_TRY(_m(aclrtSetDevice(device_id_)));
for (int i = 0; i < input_tensor_.size(); ++i) {
auto buffer = aclmdlGetDatasetBuffer(input_dataset_, i);
auto buffer_size = aclGetDataBufferSizeV2(buffer);
auto buffer_data = aclGetDataBufferAddr(buffer);
auto host_ptr = input_tensor_[i].data();
OUTCOME_TRY(_m(aclrtMemcpy(buffer_data, buffer_size, host_ptr, input_tensor_[i].byte_size(),
ACL_MEMCPY_HOST_TO_DEVICE)));
}
OUTCOME_TRY(_m(aclmdlExecute(model_id_, input_dataset_, output_dataset_)));
for (int i = 0; i < output_tensor_.size(); ++i) {
auto buffer = aclmdlGetDatasetBuffer(output_dataset_, i);
auto buffer_data = aclGetDataBufferAddr(buffer);
auto host_ptr = output_tensor_[i].data();
OUTCOME_TRY(_m(aclrtMemcpy(host_ptr, output_tensor_[i].byte_size(), buffer_data,
output_tensor_[i].byte_size(), ACL_MEMCPY_DEVICE_TO_HOST)));
}
return success();
}
Result<void> AclNet::ForwardAsync(Event* event) { return Status(eNotSupported); }
class AclNetCreator : public Creator<Net> {
public:
const char* GetName() const override { return "ascend"; }
int GetVersion() const override { return 0; }
std::unique_ptr<Net> Create(const Value& args) override {
try {
auto p = std::make_unique<AclNet>();
if (auto r = p->Init(args)) {
return p;
} else {
MMDEPLOY_ERROR("error creating AclNet: {}", r.error().message().c_str());
return nullptr;
}
} catch (const std::exception& e) {
MMDEPLOY_ERROR("unhandled exception when creating AclNet: {}", e.what());
return nullptr;
}
}
};
REGISTER_MODULE(Net, AclNetCreator);
} // namespace mmdeploy

View File

@ -0,0 +1,70 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_NET_ACL_ACL_NET_H_
#define MMDEPLOY_SRC_NET_ACL_ACL_NET_H_
#include "acl/acl.h"
#include "mmdeploy/core/net.h"
#include "mmdeploy/core/status_code.h"
namespace mmdeploy {
class AclNet : public Net {
public:
~AclNet() override;
Result<void> Init(const Value& cfg) override;
Result<void> Deinit() override;
Result<Span<Tensor>> GetInputTensors() override;
Result<Span<Tensor>> GetOutputTensors() override;
Result<void> Reshape(Span<TensorShape> input_shapes) override;
Result<void> Forward() override;
Result<void> ForwardAsync(Event* event) override;
private:
enum InputShapeType { kStatic, kDynamicBatchSize, kDynamicImageSize, kDynamicDims };
Result<void> ReshapeStatic(Span<TensorShape> input_shapes);
Result<void> ReshapeDynamicBatchSize(Span<TensorShape> input_shapes);
Result<void> ReshapeDynamicImageSize(Span<TensorShape> input_shapes);
Result<void> ReshapeDynamicDims(Span<TensorShape> input_shapes);
struct Buffers {
aclDataBuffer* device_buffer;
Tensor host_tensor;
};
Result<Buffers> CreateBuffers(const aclmdlIODims& dims, aclDataType data_type);
Result<Buffers> CreateBuffersDynamicBatchSize(aclmdlIODims dims, aclDataType data_type);
Result<Buffers> CreateBuffersDynamicImageSize(int index, aclmdlIODims dims,
aclDataType data_type);
Result<Buffers> CreateBuffersDynamicDims(int index, int dim_count, const aclmdlIODims& dims,
aclDataType data_type);
Result<void> ConfigDynamicShapes();
Result<void> CreateInputBuffers();
Result<void> CreateOutputBuffers();
std::shared_ptr<void> acl_context_;
Stream cpu_stream_;
int32_t device_id_{0};
uint32_t model_id_{(uint32_t)-1};
aclmdlDesc* model_desc_{nullptr};
int dynamic_tensor_index_{-1};
InputShapeType input_shape_type_{kStatic};
std::vector<size_t> dynamic_batch_size_;
std::vector<aclmdlIODims> dynamic_input_dims_;
aclmdlDataset* input_dataset_{nullptr};
aclmdlDataset* output_dataset_{nullptr};
std::vector<aclmdlIODims> input_dims_;
std::vector<aclmdlIODims> output_dims_;
std::vector<aclDataType> input_data_type_;
std::vector<aclDataType> output_data_type_;
std::vector<Tensor> input_tensor_;
std::vector<Tensor> output_tensor_;
};
} // namespace mmdeploy
#endif // MMDEPLOY_SRC_NET_ACL_ACL_NET_H_

View File

@ -0,0 +1,14 @@
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_coreml_net)
if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
find_library(CORE_ML CoreML)
find_library(FOUNDATION Foundation)
mmdeploy_add_module(${PROJECT_NAME} coreml_net.mm)
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${PROJECT_NAME} PRIVATE ${CORE_ML} ${FOUNDATION})
add_library(mmdeploy::coreml_net ALIAS ${PROJECT_NAME})
else ()
message(ERROR "'coreml_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}")
endif ()

View File

@ -0,0 +1,37 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_SRC_NET_COREML_COREML_NET_H_
#define MMDEPLOY_SRC_NET_COREML_COREML_NET_H_
#include "mmdeploy/core/net.h"
namespace mmdeploy {
namespace coreml {
class Execution;
} // namespace coreml
class CoreMLNet : public Net {
public:
~CoreMLNet() override = default;
Result<void> Init(const Value& cfg) override;
Result<void> Deinit() override;
Result<Span<Tensor>> GetInputTensors() override;
Result<Span<Tensor>> GetOutputTensors() override;
Result<void> Reshape(Span<TensorShape> input_shapes) override;
Result<void> Forward() override;
Result<void> ForwardAsync(Event* event) override;
private:
std::unique_ptr<coreml::Execution> execution_;
std::vector<Tensor> input_tensors_;
std::vector<Tensor> output_tensors_;
Device device_;
Stream stream_;
friend class coreml::Execution;
};
} // namespace mmdeploy
#endif // MMDEPLOY_SRC_NET_ORT_ORT_NET_H_

View File

@ -0,0 +1,326 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "coreml_net.h"
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/status_code.h"
#include "mmdeploy/core/utils/filesystem.h"
#include <fstream>
#import <CoreML/CoreML.h>
#import <Foundation/Foundation.h>
#include <memory>
@interface MMBatchTensorFeatureProvider : NSObject <MLBatchProvider> {
const std::vector<mmdeploy::Tensor> *inputs_;
}
- (instancetype)initWithInputs:(const std::vector<mmdeploy::Tensor> &)inputs;
- (NSInteger)count;
- (id<MLFeatureProvider>)featuresAtIndex:(NSInteger)index;
@end
@implementation MMBatchTensorFeatureProvider
- (instancetype)initWithInputs:(const std::vector<mmdeploy::Tensor> &)inputs {
inputs_ = &inputs;
return self;
}
- (NSInteger)count {
return (*inputs_)[0].shape(0);
}
- (id<MLFeatureProvider>)featuresAtIndex:(NSInteger)index {
MLDictionaryFeatureProvider *feature = nil;
NSMutableDictionary<NSString *, id> *input_dict =
[[NSMutableDictionary<NSString *, id> alloc] init];
for (auto x : *inputs_) {
auto in = x.Slice(index);
NSMutableArray *shape = [[NSMutableArray alloc] init];
for (const auto dim : in.shape()) {
[shape addObject:[NSNumber numberWithLongLong:dim]];
}
NSMutableArray *strides = [[NSMutableArray alloc] init];
int64_t stride = 1;
for (int i = in.shape().size() - 1; i >= 0; i--) {
[strides insertObject:[NSNumber numberWithLongLong:stride] atIndex:0];
stride *= in.shape()[i];
}
MLMultiArrayDataType data_type = MLMultiArrayDataTypeFloat32;
NSError *error = nil;
MLMultiArray *mlArray =
[[MLMultiArray alloc] initWithDataPointer:in.data()
shape:shape
dataType:data_type
strides:strides
deallocator:(^(void *){
})error:&error];
if (error != nil) {
MMDEPLOY_ERROR("init MLMultiArray failed with key: {}, error message: {}",
in.name(), [[error localizedDescription] UTF8String]);
return nil;
}
NSString *key = [NSString stringWithUTF8String:in.name()];
input_dict[key] = mlArray;
}
NSError *error = nil;
feature = [[MLDictionaryFeatureProvider alloc] initWithDictionary:input_dict
error:&error];
if (error != nil) {
MMDEPLOY_ERROR("init MLDictionaryFeatureProvider failed with index: {}, "
"error message: {}",
index, [[error localizedDescription] UTF8String]);
return nil;
}
return feature;
}
@end
namespace mmdeploy {
namespace coreml {
static Result<void> CheckInputOutputFeatureType(MLFeatureType type) {
if (type != MLFeatureTypeMultiArray) {
MMDEPLOY_ERROR("unsupported feature type: {}", type);
return Status(eInvalidArgument);
}
return success();
}
static TensorShape to_shape(NSArray<NSNumber *> *shape) {
TensorShape _shape;
for (int i = 0; i < shape.count; i++) {
_shape.push_back(shape[i].intValue);
}
return _shape;
}
static Result<DataType> ConvertElementType(MLMultiArrayDataType type) {
switch (type) {
case MLMultiArrayDataTypeFloat32:
return DataType::kFLOAT;
case MLMultiArrayDataTypeFloat16:
return DataType::kHALF;
case MLMultiArrayDataTypeInt32:
return DataType::kINT32;
default:
MMDEPLOY_ERROR("unsupported MLMultiArrayDataType: {}",
static_cast<int>(type));
return Status(eNotSupported);
}
}
static Result<Tensor> AsTensor(MLMultiArray *mlArray, const Device &device) {
TensorDesc desc;
desc.device = device;
desc.shape = to_shape(mlArray.shape);
OUTCOME_TRY(desc.data_type, ConvertElementType(mlArray.dataType));
std::shared_ptr<void> data(const_cast<void *>(mlArray.dataPointer),
[](void *) {});
return Tensor(desc, data);
}
class Execution {
public:
Execution(const std::string &path, CoreMLNet *net) : path_(path), net_(net) {}
~Execution() { RemoveModel(); }
Result<void> Init() {
OUTCOME_TRY(LoadModel());
OUTCOME_TRY(SetInputOutputTensor());
return success();
}
Result<void> Forward() {
int batch_size = net_->input_tensors_[0].shape(0);
// prepare input
NSError *error = nil;
MMBatchTensorFeatureProvider *input_feature =
[[MMBatchTensorFeatureProvider alloc]
initWithInputs:net_->input_tensors_];
id<MLBatchProvider> output_feature =
[model_ predictionsFromBatch:input_feature error:&error];
if (error != nil) {
MMDEPLOY_ERROR("coreml forward failed, error message: {}",
[[error localizedDescription] UTF8String]);
return Status(eFail);
}
// extract output
for (size_t i = 0; i < net_->output_tensors_.size(); ++i) {
auto &out = net_->output_tensors_[i];
for (int bid = 0; bid < output_feature.count; bid++) {
NSString *name =
[NSString stringWithCString:out.name()
encoding:[NSString defaultCStringEncoding]];
if (name == nil) {
MMDEPLOY_ERROR("output name must not be nil");
return Status(eFail);
}
MLFeatureValue *output_value =
[[output_feature featuresAtIndex:bid] featureValueForName:name];
if (output_value == nil) {
MMDEPLOY_ERROR("model output doesn't have name tensort: {}",
out.name());
return Status(eFail);
}
MLMultiArray *mlArray = [output_value multiArrayValue];
OUTCOME_TRY(auto tmp, AsTensor(mlArray, out.device()));
if (bid == 0) {
TensorShape batch_shape = tmp.shape();
batch_shape[0] = batch_size;
out.Reshape(batch_shape);
}
auto slice = out.Slice(bid);
OUTCOME_TRY(tmp.CopyTo(slice, net_->stream_));
}
}
return success();
}
Result<void> SetInputOutputTensor() {
// input
auto input_desc = model_.modelDescription.inputDescriptionsByName;
for (NSString *name in input_desc) {
MLFeatureDescription *value = input_desc[name];
OUTCOME_TRY(CheckInputOutputFeatureType(value.type));
// use default shape
auto shape = to_shape(value.multiArrayConstraint.shape);
OUTCOME_TRY(auto data_type,
ConvertElementType(value.multiArrayConstraint.dataType));
net_->input_tensors_.emplace_back(
TensorDesc{net_->device_, data_type, shape, [name UTF8String]});
}
// output
auto output_desc = model_.modelDescription.outputDescriptionsByName;
for (NSString *name in output_desc) {
MLFeatureDescription *value = output_desc[name];
OUTCOME_TRY(auto data_type,
ConvertElementType(value.multiArrayConstraint.dataType));
// can't get output shape
net_->output_tensors_.emplace_back(
TensorDesc{net_->device_, data_type, {}, [name UTF8String]});
}
return success();
}
Result<void> Reshape(Span<TensorShape> input_shapes) {
for (size_t i = 0; i < input_shapes.size(); ++i) {
net_->input_tensors_[i].Reshape(input_shapes[i]);
}
return success();
}
Result<void> LoadModel() {
NSString *model_path = [NSString stringWithUTF8String:path_.c_str()];
NSError *error = nil;
NSURL *model_url = [NSURL URLWithString:model_path];
compiled_model_url_ = [MLModel compileModelAtURL:model_url error:&error];
if (error != nil) {
MMDEPLOY_ERROR("failed to compile model, error message: {}",
[[error localizedDescription] UTF8String]);
return Status(eFail);
}
MLModelConfiguration *config = [MLModelConfiguration alloc];
config.computeUnits = MLComputeUnitsAll;
model_ = [MLModel modelWithContentsOfURL:compiled_model_url_
configuration:config
error:&error];
if (error != nil) {
MMDEPLOY_ERROR("failed to construct model, error message: {}",
[[error localizedDescription] UTF8String]);
return Status(eFail);
}
return success();
}
void RemoveModel() {
NSError *error = nil;
if (compiled_model_url_ != nil) {
[[NSFileManager defaultManager] removeItemAtURL:compiled_model_url_
error:&error];
if (error != nil) {
MMDEPLOY_ERROR("failed to remove compiled model, error message: {}",
[[error localizedDescription] UTF8String]);
}
compiled_model_url_ = nil;
}
}
NSURL *compiled_model_url_{nil};
MLModel *model_{nil};
std::string path_;
CoreMLNet *net_{nullptr};
};
} // namespace coreml
Result<void> CoreMLNet::Init(const Value &cfg) {
auto &context = cfg["context"];
device_ = context["device"].get<Device>();
stream_ = context["stream"].get<Stream>();
auto name = cfg["name"].get<std::string>();
auto model = context["model"].get<Model>();
OUTCOME_TRY(auto config, model.GetModelConfig(name));
std::string coreml_tmp_path =
(fs::path(model.GetModelPath()) / config.net).string();
execution_ = std::make_unique<coreml::Execution>(coreml_tmp_path, this);
OUTCOME_TRY(execution_->Init());
return success();
}
Result<void> CoreMLNet::Deinit() { return success(); }
Result<Span<Tensor>> CoreMLNet::GetInputTensors() { return input_tensors_; }
Result<Span<Tensor>> CoreMLNet::GetOutputTensors() { return output_tensors_; }
Result<void> CoreMLNet::Reshape(Span<TensorShape> input_shapes) {
return execution_->Reshape(input_shapes);
}
Result<void> CoreMLNet::Forward() { return execution_->Forward(); }
Result<void> CoreMLNet::ForwardAsync(Event *event) {
return Status(eNotSupported);
}
class CoreMLNetCreator : public Creator<Net> {
public:
const char *GetName() const override { return "coreml"; }
int GetVersion() const override { return 0; }
std::unique_ptr<Net> Create(const Value &args) override {
auto p = std::make_unique<CoreMLNet>();
if (auto r = p->Init(args)) {
return p;
} else {
MMDEPLOY_ERROR("error creating CoreMLNet: {}",
r.error().message().c_str());
return nullptr;
}
}
};
REGISTER_MODULE(Net, CoreMLNetCreator);
} // namespace mmdeploy

View File

@ -14,7 +14,11 @@ if (PPLNN_USE_CUDA AND ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES))
target_compile_definitions(${PROJECT_NAME} PRIVATE -DPPL_NN_HAS_CUDA=1)
target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/include)
target_link_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
target_link_libraries(${PROJECT_NAME} PRIVATE nvrtc)
endif ()
target_link_libraries(${PROJECT_NAME}
PRIVATE ${PPLNN_LIBRARIES} nvrtc)
PRIVATE ${PPLNN_LIBRARIES})
if (PPLNN_USE_RISCV AND ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES))
target_compile_definitions(${PROJECT_NAME} PRIVATE -DPPL_NN_HAS_RISCV=1)
endif ()
add_library(mmdeploy::pplnn_net ALIAS ${PROJECT_NAME})

View File

@ -18,6 +18,11 @@
#include "ppl/nn/engines/cuda/ops.h"
#define PPL_CUDA_IMPORT_FROM_BUFFER 1
#endif
#if PPL_NN_HAS_RISCV
#include "ppl/nn/engines/riscv/engine_factory.h"
#include "ppl/nn/engines/riscv/engine_options.h"
#include "ppl/nn/engines/riscv/ops.h"
#endif
namespace mmdeploy {
@ -92,6 +97,18 @@ Result<void> PPLNet::Init(const Value& args) {
engines_.emplace_back(ppl::nn::x86::EngineFactory::Create({}));
}
#endif
#if PPL_NN_HAS_RISCV
if (device_.is_host()) {
ppl::nn::riscv::RegisterBuiltinOpImpls();
ppl::nn::riscv::EngineOptions options{};
// TODO:
// FP16 -> postprocess
options.forward_precision = ppl::common::DATATYPE_FLOAT32;
options.dynamic_tuning_level = 0;
options.winograd_level = 1;
engines_.emplace_back(ppl::nn::riscv::EngineFactory::Create(options));
}
#endif
std::vector<ppl::nn::Engine*> engines;
for (const auto& engine : engines_) {

View File

@ -0,0 +1,28 @@
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_torch_net)
option(MMDEPLOY_TORCHSCRIPT_SDK_BACKEND "Build TorchScript SDK backend" OFF)
if (MMDEPLOY_TORCHSCRIPT_SDK_BACKEND)
find_package(Torch REQUIRED)
find_package(TorchVision QUIET)
mmdeploy_add_module(${PROJECT_NAME} torch_net.cpp)
target_link_libraries(${PROJECT_NAME} PRIVATE
${TORCH_LIBRARIES})
target_link_directories(${PROJECT_NAME} INTERFACE
$<BUILD_INTERFACE:${Torch_DIR}/../../../lib>)
target_link_libraries(${PROJECT_NAME} PRIVATE
mmdeploy_torchscript_ops_obj)
if (TorchVision_FOUND)
target_link_libraries(${PROJECT_NAME} PRIVATE TorchVision::TorchVision)
target_compile_definitions(${PROJECT_NAME} PRIVATE -DMMDEPLOY_USE_TORCHVISION=1)
endif ()
add_library(mmdeploy::torch_net ALIAS ${PROJECT_NAME})
endif ()

View File

@ -0,0 +1,237 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/net/torchscript/torch_net.h"
#include "mmdeploy/core/model.h"
#include "mmdeploy/core/utils/formatter.h"
#include "torch/torch.h"
#if MMDEPLOY_USE_CUDA
#include "c10/cuda/CUDAGuard.h"
#include "c10/cuda/CUDAStream.h"
#endif
#if MMDEPLOY_USE_TORCHVISION
#include "torchvision/vision.h"
MMDEPLOY_API void _mmdeploy_force_link_torchvision() { vision::detail::_register_ops(); }
#endif
namespace mmdeploy {
namespace {
class InferenceMode {
#if TORCH_VERSION_MAJOR == 1 && TORCH_VERSION_MINOR >= 10
c10::InferenceMode guard_;
#else
at::AutoNonVariableTypeMode guard_;
#endif
};
class StreamGuard {
public:
StreamGuard(const torch::Device& device, Stream stream)
: device_(device), stream_(std::move(stream)), device_guard_(device) {
stream_.Wait().value();
}
~StreamGuard() {
#if MMDEPLOY_USE_CUDA
auto device = stream_.GetDevice();
if (device.is_device()) {
Stream stream(device, (cudaStream_t)c10::cuda::getCurrentCUDAStream(device_.index()));
stream.Wait().value();
}
#endif
}
private:
torch::Device device_;
Stream stream_;
c10::DeviceGuard device_guard_;
};
Result<torch::ScalarType> FromDataType(DataType data_type) {
switch (data_type) {
case DataType::kFLOAT:
return torch::ScalarType::Float;
case DataType::kHALF:
return torch::ScalarType::Half;
case DataType::kINT32:
return torch::ScalarType::Int;
case DataType::kINT64:
return torch::ScalarType::Long;
case DataType::kINT8:
return torch::ScalarType::Char;
default:
MMDEPLOY_ERROR("Unsupported mmdeploy::DataType: {}", to_string(data_type));
return Status(eNotSupported);
}
}
Result<DataType> ToDataType(torch::ScalarType scalar_type) {
switch (scalar_type) {
case torch::ScalarType::Float:
return DataType::kFLOAT;
case torch::ScalarType::Half:
return DataType::kHALF;
case torch::ScalarType::Int:
return DataType::kINT32;
case torch::ScalarType::Long:
return DataType::kINT64;
case torch::ScalarType::Char:
return DataType::kINT8;
default:
MMDEPLOY_ERROR("Unsupported torch::ScalarType: {}", toString(scalar_type));
return Status(eNotSupported);
}
}
} // namespace
TorchNet::~TorchNet() = default;
Result<void> TorchNet::Init(const Value& cfg) {
auto& context = cfg["context"];
device_ = context["device"].get<Device>();
stream_ = context["stream"].get<Stream>();
auto name = cfg["name"].get<std::string>();
auto model = context["model"].get<Model>();
OUTCOME_TRY(auto config, model.GetModelConfig(name));
OUTCOME_TRY(auto bytes, model.ReadFile(config.net));
auto platform = Platform(device_.platform_id());
auto device_name = platform.GetPlatformName();
try {
{
using namespace std::string_literals;
if (device_name == "cpu"s) {
torch_device_ = torch::Device(device_name);
} else {
torch_device_ = torch::Device(device_name + ":"s + std::to_string(device_.device_id()));
}
}
std::istringstream iss(bytes);
InferenceMode guard;
module_ = torch::jit::load(iss);
module_.eval();
module_.to(*torch_device_);
auto forward = module_.get_method("forward");
auto ToDesc = [&](torch::jit::Value* value, const char* type, int index) {
MMDEPLOY_INFO("Found {}: {}", type, value->debugNameBase());
return TensorDesc{device_, DataType::kFLOAT, {}, "#" + std::to_string(index)};
};
auto inputs = forward.graph()->inputs();
int input_count = 0;
for (int i = 1; i < inputs.size(); ++i) {
if (inputs[i]->type()->kind() == c10::TypeKind::TensorType) {
input_tensor_.emplace_back(ToDesc(inputs[i], "input", input_count++));
} else {
MMDEPLOY_ERROR("Unsupported input type: {}", typeKindToString(inputs[i]->type()->kind()));
return Status(eNotSupported);
}
}
auto outputs = forward.graph()->outputs();
int output_count = 0;
for (const auto& output : outputs) {
auto kind = output->type()->kind();
if (kind == c10::TypeKind::TensorType) {
output_tensor_.emplace_back(ToDesc(output, "output", output_count++));
} else if (output->type()->kind() == c10::TypeKind::TupleType) {
for (const auto& v : output->node()->inputs()) {
if (v->type()->kind() == c10::TypeKind::TensorType) {
output_tensor_.emplace_back(ToDesc(v, "output", output_count++));
} else {
MMDEPLOY_ERROR("Unsupported output type: {}", typeKindToString(v->type()->kind()));
return Status(eNotSupported);
}
}
} else {
MMDEPLOY_ERROR("Unsupported output type: {}", typeKindToString(kind));
}
}
return success();
} catch (const std::exception& e) {
MMDEPLOY_ERROR("unhandled exception: {}", e.what());
return Status(eFail);
}
}
Result<void> TorchNet::Deinit() { return success(); }
Result<Span<Tensor>> TorchNet::GetInputTensors() { return input_tensor_; }
Result<Span<Tensor>> TorchNet::GetOutputTensors() { return output_tensor_; }
Result<void> TorchNet::Reshape(Span<TensorShape> input_shapes) {
if (input_shapes.size() != input_tensor_.size()) {
return Status(eInvalidArgument);
}
for (size_t i = 0; i < input_shapes.size(); ++i) {
input_tensor_[i].Reshape(input_shapes[i]);
}
return success();
}
Result<void> TorchNet::Forward() {
try {
StreamGuard stream_guard(*torch_device_, stream_);
InferenceMode inference_guard;
std::vector<torch::jit::IValue> inputs;
for (auto& v : input_tensor_) {
OUTCOME_TRY(auto data_type, FromDataType(v.data_type()));
auto tensor = torch::from_blob(v.data(), v.shape(),
c10::TensorOptions(*torch_device_).dtype(data_type));
inputs.emplace_back(tensor);
}
auto outputs = module_.forward(inputs);
if (outputs.isTensor()) {
OUTCOME_TRY(output_tensor_[0], FromTorchTensor(outputs.toTensor(), output_tensor_[0].name()));
} else if (outputs.isTuple()) {
auto tuple = outputs.toTuple();
size_t index = 0;
for (const auto& x : tuple->elements()) {
OUTCOME_TRY(output_tensor_[index],
FromTorchTensor(x.toTensor(), output_tensor_[index].name()));
++index;
}
} else {
MMDEPLOY_ERROR("{}", toString(outputs.type()));
return Status(eNotSupported);
}
} catch (const std::exception& e) {
MMDEPLOY_ERROR("unhandled exception: {}", e.what());
return Status(eFail);
}
return success();
}
Result<void> TorchNet::ForwardAsync(Event* event) { return success(); }
Result<Tensor> TorchNet::FromTorchTensor(const torch::Tensor& tensor, const std::string& name) {
OUTCOME_TRY(auto data_type, ToDataType(tensor.scalar_type()));
auto shape = tensor.sizes();
TensorDesc desc{device_, data_type, {shape.begin(), shape.end()}, name};
return Tensor(desc, std::shared_ptr<void>(tensor.data_ptr(), [tensor](auto) {}));
}
class TorchNetCreator : public Creator<Net> {
public:
const char* GetName() const override { return "torchscript"; }
std::unique_ptr<Net> Create(const Value& cfg) override {
auto p = std::make_unique<TorchNet>();
if (auto status = p->Init(cfg)) {
return p;
} else {
MMDEPLOY_ERROR("Failed to created TorchNet with config: {}", cfg);
}
return nullptr;
}
};
REGISTER_MODULE(Net, TorchNetCreator);
} // namespace mmdeploy

View File

@ -0,0 +1,35 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_CSRC_MMDEPLOY_NET_TORCHSCRIPT_TORCH_NET_H_
#define MMDEPLOY_CSRC_MMDEPLOY_NET_TORCHSCRIPT_TORCH_NET_H_
#include "mmdeploy/core/net.h"
#include "torch/script.h"
namespace mmdeploy {
class TorchNet : public Net {
public:
~TorchNet() override;
Result<void> Init(const Value& cfg) override;
Result<void> Deinit() override;
Result<Span<Tensor>> GetInputTensors() override;
Result<Span<Tensor>> GetOutputTensors() override;
Result<void> Reshape(Span<TensorShape> input_shapes) override;
Result<void> Forward() override;
Result<void> ForwardAsync(Event* event) override;
private:
Result<Tensor> FromTorchTensor(const torch::Tensor& tensor, const std::string& name);
torch::jit::script::Module module_;
std::vector<Tensor> input_tensor_;
std::vector<Tensor> output_tensor_;
Device device_;
Stream stream_;
std::optional<torch::Device> torch_device_;
};
} // namespace mmdeploy
#endif // MMDEPLOY_CSRC_MMDEPLOY_NET_TORCHSCRIPT_TORCH_NET_H_

View File

@ -4,6 +4,9 @@ project(mmdeploy_transform_module)
add_subdirectory(transform)
add_subdirectory(cpu)
if (MMDEPLOY_ELENA_FUSION)
add_subdirectory(elena)
endif ()
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
add_subdirectory(cuda)
endif ()

View File

@ -5,6 +5,7 @@ project(mmdeploy_cuda_transform_impl CUDA CXX)
find_package(pplcv REQUIRED)
set(SRCS
collect_impl.cpp
crop_impl.cpp
image2tensor_impl.cpp
default_format_bundle_impl.cpp

View File

@ -0,0 +1,28 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/preprocess/transform/collect.h"
namespace mmdeploy {
namespace cuda {
class CollectImpl : public ::mmdeploy::CollectImpl {
public:
CollectImpl(const Value& args) : ::mmdeploy::CollectImpl(args) {}
~CollectImpl() = default;
};
class CollectImplCreator : public Creator<::mmdeploy::CollectImpl> {
public:
const char* GetName() const override { return "cuda"; }
int GetVersion() const override { return 1; }
std::unique_ptr<::mmdeploy::CollectImpl> Create(const Value& args) override {
return std::make_unique<CollectImpl>(args);
}
};
} // namespace cuda
} // namespace mmdeploy
using mmdeploy::CollectImpl;
using mmdeploy::cuda::CollectImplCreator;
REGISTER_MODULE(CollectImpl, CollectImplCreator);

View File

@ -0,0 +1,31 @@
# Copyright (c) OpenMMLab. All rights reserved.
project(mmdeploy_elena_transform_impl)
set(SRCS
crop_impl.cpp
collect_impl.cpp
image2tensor_impl.cpp
default_format_bundle_impl.cpp
load_impl.cpp
normalize_impl.cpp
pad_impl.cpp
resize_impl.cpp
elena_registry.cpp)
file(GLOB CPU_KERNEL_SRCS "cpu_kernel/*.cpp")
set(ALL_SRCS ${SRCS} ${CPU_KERNEL_SRCS})
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
file(GLOB CUDA_KERNEL_SRCS "cuda_kernel/*.cu")
set(ALL_SRCS ${ALL_SRCS} ${CUDA_KERNEL_SRCS})
endif ()
mmdeploy_add_module(${PROJECT_NAME} "${ALL_SRCS}")
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
target_link_libraries(${PROJECT_NAME}
PRIVATE mmdeploy::transform)
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
target_link_libraries(${PROJECT_NAME} PRIVATE cuda)
endif ()
add_library(mmdeploy::transform_impl::elena ALIAS ${PROJECT_NAME})

View File

@ -0,0 +1,145 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include <iostream>
#include <set>
#include <string>
#include "elena_registry.h"
#include "mmdeploy/archive/json_archive.h"
#include "mmdeploy/core/mat.h"
#include "mmdeploy/core/tensor.h"
#include "mmdeploy/core/utils/device_utils.h"
#include "mmdeploy/core/utils/formatter.h"
#include "mmdeploy/preprocess/transform/collect.h"
#include "mmdeploy/preprocess/transform/tracer.h"
namespace mmdeploy {
namespace elena {
using namespace trace;
struct ExtractTransParamVisitor {
bool valid{true};
std::set<std::string> st;
std::array<float, 3> mean;
std::array<float, 3> std;
std::array<int, 2> resize_hw;
std::string resize_mode;
float pad_val;
std::array<int, 4> pad_tlbr;
std::array<int, 2> pad_hw;
std::array<int, 4> crop_tlbr;
std::array<int, 2> crop_hw;
void CheckValid(const std::string& name) {
if (st.count(name)) {
valid = false;
return;
}
st.insert(name);
}
void operator()(CvtColorParam&) {}
void operator()(CastParam&) {}
void operator()(HWC2CHWParam&) {}
void operator()(ResizeParam& param) {
CheckValid("Resize");
resize_hw = {param.size[0], param.size[1]};
resize_mode = param.mode;
}
void operator()(PadParam& param) {
CheckValid("Pad");
pad_val = param.pad_val;
std::copy_n(param.tlbr.begin(), 4, pad_tlbr.begin());
std::copy_n(param.size.begin(), 2, pad_hw.begin());
}
void operator()(NormParam& param) {
CheckValid("Normalize");
std::copy(param.mean.begin(), param.mean.end(), mean.begin());
std::copy(param.std.begin(), param.std.end(), std.begin());
}
void operator()(CropParam& param) {
CheckValid("CenterCrop");
std::copy_n(param.tlbr.begin(), 4, crop_tlbr.begin());
std::copy_n(param.size.begin(), 2, crop_hw.begin());
}
};
class CollectImpl : public ::mmdeploy::CollectImpl {
public:
CollectImpl(const Value& args) : ::mmdeploy::CollectImpl(args) {
Platform platform(device_.platform_id());
device_name_ = platform.GetPlatformName();
sha256_ = args["context"].value("sha256", std::string(""));
}
~CollectImpl() = default;
Result<Value> Process(const Value& input) override {
auto tracer = input["__tracer__"].get<Tracer>();
Mat _src_mat = input["ori_img"].get<Mat>();
OUTCOME_TRY(auto src_mat, MakeAvailableOnDevice(_src_mat, device_, stream_));
OUTCOME_TRY(stream_.Wait());
ExtractTransParamVisitor visitor{};
for (auto&& trans : tracer.trans_) {
std::visit(visitor, trans);
}
std::string tag = sha256_ + "_" + device_name_;
FuseFunc func = FuseKernel::Get().GetFunc(tag);
if (!visitor.valid) {
MMDEPLOY_ERROR("unsupported fuse transform");
throw std::invalid_argument("");
}
if (src_mat.type() != DataType::kINT8) {
MMDEPLOY_ERROR("unsupported data type in fuse transform");
throw std::invalid_argument("");
}
if (!func) {
MMDEPLOY_ERROR("can't find fuse function with tag: {}", tag);
throw std::invalid_argument("");
}
Value output = input;
auto img_fields = GetImageFields(input);
for (auto& key : img_fields) {
assert(input.contains(key));
auto src_tensor = input[key].get<Tensor>();
auto desc = src_tensor.desc();
desc.device = device_;
Tensor dst_tensor{desc};
func(stream_.GetNative(), src_mat.data<uint8_t>(), src_mat.height(), src_mat.width(),
to_string(src_mat.pixel_format()).c_str(), visitor.resize_hw[0], visitor.resize_hw[1],
visitor.resize_mode.c_str(), visitor.crop_tlbr[0], visitor.crop_tlbr[1],
visitor.crop_hw[0], visitor.crop_hw[1], visitor.mean[0], visitor.mean[1],
visitor.mean[2], visitor.std[0], visitor.std[1], visitor.std[2], visitor.pad_tlbr[0],
visitor.pad_tlbr[1], visitor.pad_tlbr[2], visitor.pad_tlbr[3], visitor.pad_hw[0],
visitor.pad_hw[1], visitor.pad_val, dst_tensor.data<float>(), dst_tensor.shape(2),
dst_tensor.shape(3));
output[key] = std::move(dst_tensor);
}
return ::mmdeploy::CollectImpl::Process(output);
}
std::string sha256_;
std::string device_name_;
};
class CollectImplCreator : public Creator<::mmdeploy::CollectImpl> {
public:
const char* GetName() const override { return "elena"; }
int GetVersion() const override { return 1; }
std::unique_ptr<::mmdeploy::CollectImpl> Create(const Value& args) override {
return std::make_unique<CollectImpl>(args);
}
};
} // namespace elena
} // namespace mmdeploy
using mmdeploy::CollectImpl;
using mmdeploy::elena::CollectImplCreator;
REGISTER_MODULE(CollectImpl, CollectImplCreator);

View File

@ -0,0 +1,44 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/preprocess/transform/crop.h"
using namespace std;
namespace mmdeploy {
namespace elena {
class CenterCropImpl : public ::mmdeploy::CenterCropImpl {
public:
explicit CenterCropImpl(const Value& args) : ::mmdeploy::CenterCropImpl(args) {}
protected:
Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
int right) override {
auto& src_desc = tensor.desc();
auto data_type = src_desc.data_type;
auto shape = src_desc.shape;
shape[1] = bottom - top + 1; // h
shape[2] = right - left + 1; // w
TensorDesc dummy_desc = {Device{"cpu"}, data_type, shape};
Tensor dummy(dummy_desc, dummy_buffer_);
return dummy;
}
Buffer dummy_buffer_{Device{"cpu"}, 0, nullptr};
};
class CenterCropImplCreator : public Creator<::mmdeploy::CenterCropImpl> {
public:
const char* GetName() const override { return "elena"; }
int GetVersion() const override { return 1; }
ReturnType Create(const Value& args) override { return make_unique<CenterCropImpl>(args); }
};
} // namespace elena
} // namespace mmdeploy
using ::mmdeploy::CenterCropImpl;
using ::mmdeploy::elena::CenterCropImplCreator;
REGISTER_MODULE(CenterCropImpl, CenterCropImplCreator);

View File

@ -0,0 +1,56 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/preprocess/transform/default_format_bundle.h"
namespace mmdeploy {
namespace elena {
class DefaultFormatBundleImpl : public ::mmdeploy::DefaultFormatBundleImpl {
public:
explicit DefaultFormatBundleImpl(const Value& args) : ::mmdeploy::DefaultFormatBundleImpl(args) {}
protected:
Result<Tensor> ToFloat32(const Tensor& tensor, const bool& img_to_float) override {
auto& src_desc = tensor.desc();
auto data_type = src_desc.data_type;
auto shape = src_desc.shape;
if (img_to_float && data_type == DataType::kINT8) {
data_type = DataType::kFLOAT;
}
TensorDesc dummy_desc = {Device{"cpu"}, data_type, shape};
Tensor dummy(dummy_desc, dummy_buffer_);
return dummy;
}
Result<Tensor> HWC2CHW(const Tensor& tensor) override {
auto& src_desc = tensor.desc();
auto data_type = src_desc.data_type;
auto shape = src_desc.shape;
shape = {shape[0], shape[3], shape[1], shape[2]};
TensorDesc dummy_desc = {Device{"cpu"}, data_type, shape};
Tensor dummy(dummy_desc, dummy_buffer_);
return dummy;
}
Buffer dummy_buffer_{Device{"cpu"}, 0, nullptr};
};
class DefaultFormatBundleImplCreator : public Creator<::mmdeploy::DefaultFormatBundleImpl> {
public:
const char* GetName() const override { return "elena"; }
int GetVersion() const override { return 1; }
ReturnType Create(const Value& args) override {
return std::make_unique<DefaultFormatBundleImpl>(args);
}
};
} // namespace elena
} // namespace mmdeploy
using mmdeploy::DefaultFormatBundleImpl;
using mmdeploy::elena::DefaultFormatBundleImplCreator;
REGISTER_MODULE(DefaultFormatBundleImpl, DefaultFormatBundleImplCreator);

View File

@ -0,0 +1,32 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "elena_registry.h"
#include "mmdeploy/core/logger.h"
namespace mmdeploy {
namespace elena {
FuseKernel& FuseKernel::Get() {
static FuseKernel fuse_kernel;
return fuse_kernel;
}
FuseFunc FuseKernel::GetFunc(const std::string& name) {
if (entries_.count(name)) {
return entries_[name];
}
return nullptr;
}
int FuseKernel::Register(const std::string& name, FuseFunc func) {
if (entries_.count(name)) {
return -1;
}
MMDEPLOY_DEBUG("Register fuse kernel: '{}'", name);
entries_.emplace(name, func);
return 0;
}
} // namespace elena
} // namespace mmdeploy

View File

@ -0,0 +1,45 @@
// Copyright (c) OpenMMLab. All rights reserved.
#ifndef MMDEPLOY_ELENA_REGISTRY_H_
#define MMDEPLOY_ELENA_REGISTRY_H_
#include <map>
#include <string>
#include "mmdeploy/core/macro.h"
namespace mmdeploy {
namespace elena {
using FuseFunc = void (*)(void* stream, uint8_t* data_in, int src_h, int src_w, const char* format,
int resize_h, int resize_w, const char* interpolation, int crop_top,
int crop_left, int crop_h, int crop_w, float mean0, float mean1,
float mean2, float std0, float std1, float std2, int pad_top,
int pad_left, int pad_bottom, int pad_right, int pad_h, int pad_w,
float pad_value, float* data_out, int dst_h, int dst_w);
class MMDEPLOY_API FuseKernel {
public:
static FuseKernel& Get();
int Register(const std::string& name, FuseFunc func);
FuseFunc GetFunc(const std::string& name);
private:
FuseKernel() = default;
std::map<std::string, FuseFunc> entries_;
};
class MMDEPLOY_API FuseKernelRegister {
public:
FuseKernelRegister(const std::string& name, FuseFunc func) {
FuseKernel::Get().Register(name, func);
}
};
} // namespace elena
} // namespace mmdeploy
#define REGISTER_FUSE_KERNEL(name, module_name, func) \
static ::mmdeploy::elena::FuseKernelRegister g_register_##name##_##func(module_name, func);
#endif

View File

@ -0,0 +1,41 @@
// Copyright (c) OpenMMLab. All rights reserved.
#include "mmdeploy/preprocess/transform/image2tensor.h"
namespace mmdeploy {
namespace elena {
class ImageToTensorImpl : public ::mmdeploy::ImageToTensorImpl {
public:
explicit ImageToTensorImpl(const Value& args) : ::mmdeploy::ImageToTensorImpl(args) {}
protected:
Result<Tensor> HWC2CHW(const Tensor& tensor) override {
auto& src_desc = tensor.desc();
auto data_type = src_desc.data_type;
auto shape = src_desc.shape;
shape = {shape[0], shape[3], shape[1], shape[2]};
TensorDesc dummy_desc = {Device{"cpu"}, data_type, shape};
Tensor dummy(dummy_desc, dummy_buffer_);
return dummy;
}
Buffer dummy_buffer_{Device{"cpu"}, 0, nullptr};
};
class ImageToTensorImplCreator : public Creator<::mmdeploy::ImageToTensorImpl> {
public:
const char* GetName() const override { return "elena"; }
int GetVersion() const override { return 1; }
ReturnType Create(const Value& args) override {
return std::make_unique<ImageToTensorImpl>(args);
}
};
} // namespace elena
} // namespace mmdeploy
using mmdeploy::ImageToTensorImpl;
using mmdeploy::elena::ImageToTensorImplCreator;
REGISTER_MODULE(ImageToTensorImpl, ImageToTensorImplCreator);

Some files were not shown because too many files have changed in this diff Show More