Merge master:ea7706cb into sync_master
commit
40adb329a3
configs
mmcls
csrc/mmdeploy
backend_ops
ncnn
torchscript
ops
coreml_nms
codebase
|
@ -1,48 +0,0 @@
|
|||
---
|
||||
name: Bug report
|
||||
about: Create a report to help us improve
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
Thanks for your bug report. We appreciate it a lot.
|
||||
|
||||
**Checklist**
|
||||
|
||||
1. I have searched related issues but cannot get the expected help.
|
||||
2. I have read the [FAQ documentation](https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/faq.md) but cannot get the expected help.
|
||||
3. The bug has not been fixed in the latest version.
|
||||
|
||||
**Describe the bug**
|
||||
|
||||
A clear and concise description of what the bug is.
|
||||
|
||||
**Reproduction**
|
||||
|
||||
1. What command or script did you run?
|
||||
|
||||
```none
|
||||
A placeholder for the command.
|
||||
```
|
||||
|
||||
2. Did you make any modifications on the code or config? Did you understand what you have modified?
|
||||
|
||||
**Environment**
|
||||
|
||||
1. Please run `python tools/check_env.py` to collect necessary environment information and paste it here.
|
||||
2. You may add addition that may be helpful for locating the problem, such as
|
||||
- How you installed PyTorch \[e.g., pip, conda, source\]
|
||||
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
|
||||
**Error traceback**
|
||||
|
||||
If applicable, paste the error trackback here.
|
||||
|
||||
```none
|
||||
A placeholder for trackback.
|
||||
```
|
||||
|
||||
**Bug fix**
|
||||
|
||||
If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
|
|
@ -0,0 +1,56 @@
|
|||
name: Bug report
|
||||
description: Create a report to help us improve
|
||||
|
||||
|
||||
body:
|
||||
- type: checkboxes
|
||||
attributes:
|
||||
label: Checklist
|
||||
options:
|
||||
- label: I have searched related issues but cannot get the expected help.
|
||||
- label: 2. I have read the [FAQ documentation](https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/faq.md) but cannot get the expected help.
|
||||
- label: 3. The bug has not been fixed in the latest version.
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Describe the bug
|
||||
description: A clear and concise description of what the bug is.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Reproduction
|
||||
description: |
|
||||
1. What command or script did you run?
|
||||
|
||||
2. Did you make any modifications on the code or config? Did you understand what you have modified?
|
||||
placeholder: |
|
||||
A placeholder for the command.
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Environment
|
||||
description: |
|
||||
1. Please run `python tools/check_env.py` to collect necessary environment information and paste it here.
|
||||
2. You may add addition that may be helpful for locating the problem, such as
|
||||
- How you installed PyTorch \[e.g., pip, conda, source\]
|
||||
- Other environment variables that may be related (such as `$PATH`, `$LD_LIBRARY_PATH`, `$PYTHONPATH`, etc.)
|
||||
placeholder: Environment here.
|
||||
render: Shell
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Error traceback
|
||||
description: |
|
||||
If applicable, paste the error trackback here.
|
||||
placeholder: Logs and traceback here.
|
||||
render: Shell
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: >
|
||||
If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!
|
||||
|
||||
Thanks for your bug report. We appreciate it a lot.
|
||||
|
||||
labels: ['Bug']
|
|
@ -1,23 +0,0 @@
|
|||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for this project
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
---
|
||||
|
||||
**Describe the feature**
|
||||
|
||||
**Motivation**
|
||||
|
||||
A clear and concise description of the motivation of the feature.
|
||||
Ex1. It is inconvenient when \[....\].
|
||||
|
||||
**Related resources**
|
||||
|
||||
If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
|
||||
|
||||
**Additional context**
|
||||
|
||||
Add any other context or screenshots about the feature request here.
|
||||
If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
|
|
@ -0,0 +1,27 @@
|
|||
name: Feature request
|
||||
description: Suggest an idea for this project
|
||||
|
||||
body:
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: >
|
||||
## Describe the feature
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Motivation
|
||||
description: |
|
||||
A clear and concise description of the motivation of the feature.
|
||||
Ex1. It is inconvenient when \[....\].
|
||||
validations:
|
||||
required: true
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Related resources
|
||||
description: |
|
||||
If there is an official code release or third-party implementations, please also provide the information here, which would be very helpful.
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Additional context
|
||||
description: |
|
||||
Add any other context or screenshots about the feature request here.
|
||||
If you would like to implement the feature and create a PR, please leave a comment here and that would be much appreciated.
|
|
@ -24,21 +24,29 @@ pattern = re.compile(r'\[.*?\]\(.*?\)')
|
|||
def analyze_doc(home, path):
|
||||
print('analyze {}'.format(path))
|
||||
problem_list = []
|
||||
code_block = False
|
||||
code_block = 0
|
||||
with open(path) as f:
|
||||
lines = f.readlines()
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line.startswith('```'):
|
||||
code_block = not code_block
|
||||
continue
|
||||
code_block = 1 - code_block
|
||||
|
||||
if code_block is True:
|
||||
if code_block > 0:
|
||||
continue
|
||||
|
||||
if '[' in line and ']' in line and '(' in line and ')' in line:
|
||||
all = pattern.findall(line)
|
||||
for item in all:
|
||||
# skip ![]()
|
||||
if item.find('[') == item.find(']') - 1:
|
||||
continue
|
||||
|
||||
# process the case [text()]()
|
||||
offset = item.find('](')
|
||||
if offset == -1:
|
||||
continue
|
||||
item = item[offset:]
|
||||
start = item.find('(')
|
||||
end = item.find(')')
|
||||
ref = item[start + 1:end]
|
||||
|
@ -62,7 +70,7 @@ def analyze_doc(home, path):
|
|||
|
||||
def traverse(target):
|
||||
if os.path.isfile(target):
|
||||
analyze_doc('./', target)
|
||||
analyze_doc(os.path.dirname(target), target)
|
||||
return
|
||||
for home, dirs, files in os.walk(target):
|
||||
for filename in files:
|
||||
|
|
|
@ -0,0 +1,70 @@
|
|||
#!/bin/sh
|
||||
|
||||
set -e
|
||||
# print env
|
||||
python3 tools/check_env.py
|
||||
|
||||
deploy_cfg=configs/mmcls/classification_onnxruntime_dynamic.py
|
||||
device=cpu
|
||||
model_cfg=../mmclassification/configs/resnet/resnet18_8xb32_in1k.py
|
||||
checkpoint=https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth
|
||||
sdk_cfg=configs/mmcls/classification_sdk_dynamic.py
|
||||
input_img=../mmclassification/demo/demo.JPEG
|
||||
work_dir=work_dir
|
||||
|
||||
echo "------------------------------------------------------------------------------------------------------------"
|
||||
echo "deploy_cfg=$deploy_cfg"
|
||||
echo "model_cfg=$model_cfg"
|
||||
echo "checkpoint=$checkpoint"
|
||||
echo "device=$device"
|
||||
echo "------------------------------------------------------------------------------------------------------------"
|
||||
|
||||
mkdir -p $work_dir
|
||||
|
||||
python3 tools/deploy.py \
|
||||
$deploy_cfg \
|
||||
$model_cfg \
|
||||
$checkpoint \
|
||||
$input_img \
|
||||
--device $device \
|
||||
--work-dir $work_dir \
|
||||
--dump-info
|
||||
|
||||
# prepare dataset
|
||||
wget -P data/ https://github.com/open-mmlab/mmdeploy/files/9401216/imagenet-val100.zip
|
||||
unzip data/imagenet-val100.zip -d data/
|
||||
|
||||
echo "Running test with ort"
|
||||
|
||||
python3 tools/test.py \
|
||||
$deploy_cfg \
|
||||
$model_cfg \
|
||||
--model $work_dir/end2end.onnx \
|
||||
--device $device \
|
||||
--out $work_dir/ort_out.pkl \
|
||||
--metrics accuracy \
|
||||
--device $device \
|
||||
--log2file $work_dir/test_ort.log \
|
||||
--speed-test \
|
||||
--log-interval 50 \
|
||||
--warmup 20 \
|
||||
--batch-size 32
|
||||
|
||||
echo "Running test with sdk"
|
||||
|
||||
# change topk for test
|
||||
sed -i 's/"topk": 5/"topk": 1000/g' work_dir/pipeline.json
|
||||
|
||||
python3 tools/test.py \
|
||||
$sdk_cfg \
|
||||
$model_cfg \
|
||||
--model $work_dir \
|
||||
--device $device \
|
||||
--out $work_dir/sdk_out.pkl \
|
||||
--metrics accuracy \
|
||||
--device $device \
|
||||
--log2file $work_dir/test_sdk.log \
|
||||
--speed-test \
|
||||
--log-interval 50 \
|
||||
--warmup 20 \
|
||||
--batch-size 1
|
|
@ -0,0 +1,54 @@
|
|||
name: backend-ascend
|
||||
|
||||
on:
|
||||
push:
|
||||
paths-ignore:
|
||||
- "demo/**"
|
||||
- "tools/**"
|
||||
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- "demo/**"
|
||||
- "tools/**"
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build_sdk_demo:
|
||||
runs-on: ubuntu-18.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
steps:
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: 'recursive'
|
||||
- name: update
|
||||
run: sudo apt update
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
sudo apt update
|
||||
sudo apt install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev libc++1-9 libc++abi1-9
|
||||
sudo add-apt-repository ppa:ignaciovizzo/opencv3-nonfree
|
||||
sudo apt install libopencv-dev
|
||||
pkg-config --libs opencv
|
||||
- name: Install Ascend Toolkit
|
||||
run: |
|
||||
mkdir -p $GITHUB_WORKSPACE/Ascend
|
||||
wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%205.1.RC2/Ascend-cann-toolkit_5.1.RC2_linux-x86_64.run
|
||||
sh Ascend-cann-toolkit_5.1.RC2_linux-x86_64.run --install --install-path=$GITHUB_WORKSPACE/Ascend --quiet --chip=Ascend310 --blacklist=devtools
|
||||
- name: Build SDK Demo with Ascend backend
|
||||
run: |
|
||||
mkdir -p build && pushd build
|
||||
source $GITHUB_WORKSPACE/Ascend/ascend-toolkit/set_env.sh
|
||||
export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/Ascend/ascend-toolkit/latest/runtime/lib64/stub:$LD_LIBRARY_PATH
|
||||
cmake .. -DCMAKE_CXX_COMPILER=g++-7 -DMMDEPLOY_SHARED_LIBS=ON -DMMDEPLOY_BUILD_SDK=ON -DMMDEPLOY_BUILD_SDK_PYTHON_API=OFF -DMMDEPLOY_TARGET_DEVICES=cpu -DMMDEPLOY_BUILD_EXAMPLES=ON -DMMDEPLOY_TARGET_BACKENDS=acl -DMMDEPLOY_CODEBASES=all
|
||||
make install -j4
|
|
@ -0,0 +1,71 @@
|
|||
name: backend-coreml
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- "csrc/**"
|
||||
- "demo/csrc/**"
|
||||
- "CMakeLists.txt"
|
||||
|
||||
pull_request:
|
||||
paths:
|
||||
- "csrc/**"
|
||||
- "demo/csrc/**"
|
||||
- "CMakeLists.txt"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
env:
|
||||
DEVELOPER_DIR: /Applications/Xcode_13.4.1.app/Contents/Developer
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
jobs:
|
||||
build_macos_arm64:
|
||||
runs-on: macos-12
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: 'recursive'
|
||||
- name: install opencv
|
||||
run: |
|
||||
wget https://github.com/irexyc/mmdeploy-ci-resource/releases/download/opencv/opencv-osx-arm64-4.6.0.tar.gz
|
||||
mkdir $GITHUB_WORKSPACE/opencv-install
|
||||
tar xf opencv-osx-arm64-4.6.0.tar.gz -C $GITHUB_WORKSPACE/opencv-install
|
||||
- name: install libtorch
|
||||
run: |
|
||||
wget https://github.com/irexyc/mmdeploy-ci-resource/releases/download/libtorch/libtorch-osx-arm64-1.8.0.tar.gz
|
||||
mkdir $GITHUB_WORKSPACE/libtorch-install
|
||||
tar xf libtorch-osx-arm64-1.8.0.tar.gz -C $GITHUB_WORKSPACE/libtorch-install
|
||||
- name: build
|
||||
run: |
|
||||
mkdir build && cd build
|
||||
cmake .. -DCMAKE_OSX_ARCHITECTURES="arm64" \
|
||||
-DCMAKE_SYSTEM_PROCESSOR="arm64" \
|
||||
-DMMDEPLOY_BUILD_SDK=ON \
|
||||
-DMMDEPLOY_TARGET_DEVICES="cpu" \
|
||||
-DMMDEPLOY_CODEBASES=all \
|
||||
-DOpenCV_DIR=$GITHUB_WORKSPACE/opencv-install/lib/cmake/opencv4 \
|
||||
-DTorch_DIR=$GITHUB_WORKSPACE/libtorch-install/share/cmake/Torch \
|
||||
-DMMDEPLOY_TARGET_BACKENDS="coreml" \
|
||||
-DMMDEPLOY_BUILD_EXAMPLES=ON \
|
||||
-DMMDEPLOY_SHARED_LIBS=OFF
|
||||
cmake --build . -j 3
|
||||
cmake --build . --target install
|
||||
- name: build-shared
|
||||
run: |
|
||||
mkdir build-shared && cd build-shared
|
||||
cmake .. -DCMAKE_OSX_ARCHITECTURES="arm64" \
|
||||
-DCMAKE_SYSTEM_PROCESSOR="arm64" \
|
||||
-DMMDEPLOY_BUILD_SDK=ON \
|
||||
-DMMDEPLOY_TARGET_DEVICES="cpu" \
|
||||
-DMMDEPLOY_CODEBASES=all \
|
||||
-DOpenCV_DIR=$GITHUB_WORKSPACE/opencv-install/lib/cmake/opencv4 \
|
||||
-DTorch_DIR=$GITHUB_WORKSPACE/libtorch-install/share/cmake/Torch \
|
||||
-DMMDEPLOY_TARGET_BACKENDS="coreml" \
|
||||
-DMMDEPLOY_BUILD_EXAMPLES=ON \
|
||||
-DMMDEPLOY_SHARED_LIBS=ON
|
||||
cmake --build . -j 3
|
||||
cmake --build . --target install
|
|
@ -22,11 +22,6 @@ jobs:
|
|||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
torch: [1.9.0]
|
||||
include:
|
||||
- torch: 1.9.0
|
||||
torch_version: torch1.9
|
||||
torchvision: 0.10.0
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
|
@ -65,3 +60,23 @@ jobs:
|
|||
echo $(pwd)
|
||||
ln -s build/bin/mmdeploy_onnx2ncnn ./
|
||||
python3 .github/scripts/test_onnx2ncnn.py --run 1
|
||||
script_install:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: 'recursive'
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install mmdeploy
|
||||
run: |
|
||||
python3 tools/scripts/build_ubuntu_x64_ncnn.py
|
||||
python3 -m pip install torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cpu
|
||||
python3 -m pip install mmcv-full==1.5.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
|
||||
python3 -c 'import mmdeploy.apis.ncnn as ncnn_api; assert ncnn_api.is_available() and ncnn_api.is_custom_ops_available()'
|
||||
|
|
|
@ -0,0 +1,48 @@
|
|||
name: backend-ort
|
||||
|
||||
on:
|
||||
push:
|
||||
paths-ignore:
|
||||
- "demo/**"
|
||||
- "tools/**"
|
||||
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- "demo/**"
|
||||
- "tools/**"
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
script_install:
|
||||
runs-on: ubuntu-20.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: 'recursive'
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install mmdeploy
|
||||
run: |
|
||||
python3 tools/scripts/build_ubuntu_x64_ort.py
|
||||
python3 -m pip install torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cpu
|
||||
python3 -m pip install mmcv-full==1.5.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
|
||||
python3 -c 'import mmdeploy.apis.onnxruntime as ort_api; assert ort_api.is_available() and ort_api.is_custom_ops_available()'
|
||||
- name: test mmcls full pipeline
|
||||
run: |
|
||||
pip install openmim
|
||||
mim install mmcls
|
||||
git clone --depth 1 --single-branch --branch master https://github.com/open-mmlab/mmclassification.git ../mmclassification
|
||||
export MMDEPLOY_DIR=$(pwd)
|
||||
export ONNXRUNTIME_DIR=$MMDEPLOY_DIR/../mmdeploy-dep/onnxruntime-linux-x64-1.8.1
|
||||
export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$MMDEPLOY_DIR/build/install/lib:$LD_LIBRARY_PATH
|
||||
bash .github/scripts/test_mmcls_full_pipeline.sh
|
|
@ -0,0 +1,39 @@
|
|||
name: backend-pplnn
|
||||
|
||||
on:
|
||||
push:
|
||||
paths-ignore:
|
||||
- "demo/**"
|
||||
- "tools/**"
|
||||
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- "demo/**"
|
||||
- "tools/**"
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
script_install:
|
||||
runs-on: ubuntu-18.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: 'recursive'
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install mmdeploy
|
||||
run: |
|
||||
python3 tools/scripts/build_ubuntu_x64_pplnn.py
|
||||
python3 -m pip install torch==1.8.2 torchvision==0.9.2 --extra-index-url https://download.pytorch.org/whl/lts/1.8/cpu
|
||||
python3 -m pip install mmcv-full==1.5.1 -f https://download.openmmlab.com/mmcv/dist/cpu/torch1.8.0/index.html
|
||||
python3 -c 'import mmdeploy.apis.pplnn as pplnn_api; assert pplnn_api.is_available()'
|
|
@ -0,0 +1,36 @@
|
|||
name: backend-ort
|
||||
|
||||
on:
|
||||
push:
|
||||
paths-ignore:
|
||||
- "demo/**"
|
||||
- "tools/**"
|
||||
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- "demo/**"
|
||||
- "tools/**"
|
||||
- "docs/**"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
script_install:
|
||||
runs-on: ubuntu-18.04
|
||||
strategy:
|
||||
matrix:
|
||||
python-version: [3.7]
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: 'recursive'
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v2
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
- name: Install mmdeploy
|
||||
run: |
|
||||
python3 tools/scripts/build_ubuntu_x64_torchscript.py
|
|
@ -121,7 +121,7 @@ jobs:
|
|||
run: |
|
||||
python -V
|
||||
python -m pip install mmcv-full==${{matrix.mmcv}} -f https://download.openmmlab.com/mmcv/dist/cu102/${{matrix.torch_version}}/index.html
|
||||
python -m pip install -r requirements.txt
|
||||
CFLAGS=`python -c 'import sysconfig;print("-I"+sysconfig.get_paths()["include"])'` python -m pip install -r requirements.txt
|
||||
pip install -U pycuda
|
||||
python -m pip install -U numpy
|
||||
- name: Build and install
|
||||
|
@ -167,7 +167,7 @@ jobs:
|
|||
run: |
|
||||
python -V
|
||||
python -m pip install mmcv-full==${{matrix.mmcv}} -f https://download.openmmlab.com/mmcv/dist/cu111/${{matrix.torch_version}}/index.html
|
||||
python -m pip install -r requirements.txt
|
||||
CFLAGS=`python -c 'import sysconfig;print("-I"+sysconfig.get_paths()["include"])'` python -m pip install -r requirements.txt
|
||||
pip install -U pycuda
|
||||
python -m pip install -U numpy
|
||||
- name: Build and install
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
name: build_riscv64_gcc
|
||||
|
||||
on:
|
||||
push:
|
||||
paths:
|
||||
- "csrc/**"
|
||||
- "demo/csrc/**"
|
||||
- "CMakeLists.txt"
|
||||
|
||||
pull_request:
|
||||
paths-ignore:
|
||||
- "csrc/**"
|
||||
- "demo/csrc/**"
|
||||
- "CMakeLists.txt"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
build_riscv64_gcc:
|
||||
runs-on: ubuntu-20.04
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v3
|
||||
with:
|
||||
submodules: 'recursive'
|
||||
- name: riscv64-gnu-toolchain
|
||||
run: |
|
||||
sudo apt-get update
|
||||
sudo apt-get install g++-riscv64-linux-gnu
|
||||
- name: install opencv
|
||||
run: |
|
||||
mkdir $GITHUB_WORKSPACE/opencv-install
|
||||
wget https://github.com/irexyc/mmdeploy-ci-resource/raw/opencv/opencv_4.6.0_linux_riscv64.tar.gz
|
||||
tar xf opencv_4.6.0_linux_riscv64.tar.gz -C $GITHUB_WORKSPACE/opencv-install
|
||||
- name: install ncnn
|
||||
run: |
|
||||
mkdir $GITHUB_WORKSPACE/ncnn-install
|
||||
wget https://github.com/irexyc/mmdeploy-ci-resource/raw/ncnn/ncnn_20220729_linux_riscv64.tar.gz
|
||||
tar xf ncnn_20220729_linux_riscv64.tar.gz -C $GITHUB_WORKSPACE/ncnn-install
|
||||
- name: build
|
||||
run: |
|
||||
mkdir build && cd build
|
||||
cmake .. \
|
||||
-DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/riscv64-linux-gnu.cmake \
|
||||
-DMMDEPLOY_BUILD_SDK=ON \
|
||||
-DMMDEPLOY_SHARED_LIBS=ON \
|
||||
-DMMDEPLOY_BUILD_EXAMPLES=ON \
|
||||
-DMMDEPLOY_TARGET_DEVICES="cpu" \
|
||||
-DMMDEPLOY_TARGET_BACKENDS="ncnn" \
|
||||
-Dncnn_DIR=$GITHUB_WORKSPACE/ncnn-install/lib/cmake/ncnn/ \
|
||||
-DMMDEPLOY_CODEBASES=all \
|
||||
-DOpenCV_DIR=$GITHUB_WORKSPACE/opencv-install/lib/cmake/opencv4
|
||||
make -j$(nproc)
|
||||
make install
|
|
@ -50,7 +50,7 @@ jobs:
|
|||
run: |
|
||||
python -V
|
||||
python -m pip install mmcv-full==${{matrix.mmcv}} -f https://download.openmmlab.com/mmcv/dist/cu111/${{matrix.torch_version}}/index.html
|
||||
python -m pip install -r requirements.txt
|
||||
CFLAGS=`python -c 'import sysconfig;print("-I"+sysconfig.get_paths()["include"])'` python -m pip install -r requirements.txt
|
||||
python -m pip install -U numpy
|
||||
|
||||
- name: Install mmcls
|
||||
|
|
|
@ -155,6 +155,14 @@ mmdeploy/backend/ncnn/onnx2ncnn
|
|||
|
||||
# OCR dicts
|
||||
dicts
|
||||
# ascend
|
||||
fusion_result.json
|
||||
|
||||
# snpe
|
||||
grpc-cpp-plugin
|
||||
service/snpe/grpc_cpp_plugin
|
||||
|
||||
# elena-code
|
||||
csrc/mmdeploy/preprocess/elena/json
|
||||
csrc/mmdeploy/preprocess/elena/cpu_kernel/*
|
||||
csrc/mmdeploy/preprocess/elena/cuda_kernel/*
|
||||
|
|
|
@ -5,7 +5,7 @@ endif ()
|
|||
message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}")
|
||||
|
||||
cmake_minimum_required(VERSION 3.14)
|
||||
project(MMDeploy VERSION 0.7.0)
|
||||
project(MMDeploy VERSION 0.8.0)
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
|
@ -34,6 +34,7 @@ option(MMDEPLOY_BUILD_EXAMPLES "build examples" OFF)
|
|||
option(MMDEPLOY_SPDLOG_EXTERNAL "use external spdlog" OFF)
|
||||
option(MMDEPLOY_ZIP_MODEL "support SDK model in zip format" OFF)
|
||||
option(MMDEPLOY_COVERAGE "build SDK for coverage" OFF)
|
||||
option(MMDEPLOY_ELENA_FUSION "use elena to fuse preprocess" OFF)
|
||||
|
||||
set(MMDEPLOY_TARGET_DEVICES "cpu" CACHE STRING "target devices to support")
|
||||
set(MMDEPLOY_TARGET_BACKENDS "" CACHE STRING "target inference engines to support")
|
||||
|
@ -77,6 +78,10 @@ if (MSVC)
|
|||
add_compile_options($<$<COMPILE_LANGUAGE:CXX>:/wd4251>)
|
||||
endif ()
|
||||
|
||||
if(APPLE)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fobjc-arc")
|
||||
endif()
|
||||
|
||||
add_library(MMDeployStaticModules INTERFACE)
|
||||
add_library(MMDeployDynamicModules INTERFACE)
|
||||
add_library(MMDeployLibs INTERFACE)
|
||||
|
|
|
@ -53,9 +53,9 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为
|
|||
|
||||
### 支持多种推理后端
|
||||
|
||||
| ONNX Runtime | TensorRT | ppl.nn | ncnn | OpenVINO | LibTorch | snpe | Ascend | Core ML | RKNN | more |
|
||||
| ------------ | -------- | ------ | ---- | -------- | -------- | ---- | ------ | ------- | ---- | ------------------------------------------------- |
|
||||
| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | [benchmark](docs/zh_cn/03-benchmark/benchmark.md) |
|
||||
| ONNX Runtime | TensorRT | ppl.nn | ncnn | OpenVINO | LibTorch | snpe | Ascend | Core ML | RKNN | more |
|
||||
| ------------ | -------- | ------ | ---- | -------- | -------- | ---- | ------ | ------- | ---- | ---------------------------------------------- |
|
||||
| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | [benchmark](docs/en/03-benchmark/benchmark.md) |
|
||||
|
||||
### SDK 可高度定制化
|
||||
|
||||
|
|
|
@ -16,19 +16,19 @@ find_package(CUDA REQUIRED)
|
|||
|
||||
if (MSVC)
|
||||
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc.exe)
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=/wd4819,/wd4828")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=/wd4819,/wd4828")
|
||||
if (HAVE_CXX_FLAG_UTF_8)
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=/utf-8")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=/utf-8")
|
||||
endif ()
|
||||
else ()
|
||||
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc)
|
||||
# Explicitly set the cuda host compiler. Because the default host compiler #
|
||||
# selected by cmake maybe wrong.
|
||||
set(CMAKE_CUDA_HOST_COMPILER ${CMAKE_CXX_COMPILER})
|
||||
set(CUDA_NVCC_FLAGS
|
||||
"${CUDA_NVCC_FLAGS} -Xcompiler=-fPIC,-Wall,-fvisibility=hidden")
|
||||
set(CMAKE_CUDA_FLAGS
|
||||
"${CMAKE_CUDA_FLAGS} -Xcompiler=-fPIC,-Wall,-fvisibility=hidden")
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES "GNU")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS} -Xcompiler=-fno-gnu-unique")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=-fno-gnu-unique")
|
||||
endif ()
|
||||
endif ()
|
||||
|
||||
|
@ -62,10 +62,12 @@ if (NOT CMAKE_CUDA_ARCHITECTURES)
|
|||
endif ()
|
||||
endif ()
|
||||
|
||||
set(CUDA_NVCC_FLAGS_DEBUG "-g -O0")
|
||||
set(CUDA_NVCC_FLAGS_RELEASE "-O3")
|
||||
set(CUDA_NVCC_FLAGS "${CUDA_NVCC_FLAGS}")
|
||||
set(CMAKE_CUDA_FLAGS_DEBUG "-g -O0")
|
||||
set(CMAKE_CUDA_FLAGS_RELEASE "-O3")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DMMDEPLOY_USE_CUDA=1")
|
||||
|
||||
if (NOT MSVC)
|
||||
set(CMAKE_CUDA_STANDARD 14)
|
||||
endif ()
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${CUDA_NVCC_FLAGS} ${_NVCC_FLAGS}")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} ${_NVCC_FLAGS}")
|
||||
|
|
|
@ -0,0 +1,17 @@
|
|||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR riscv)
|
||||
|
||||
set(CMAKE_C_COMPILER "riscv64-linux-gnu-gcc")
|
||||
set(CMAKE_CXX_COMPILER "riscv64-linux-gnu-g++")
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
|
||||
set(CMAKE_C_FLAGS "-march=rv64gc")
|
||||
set(CMAKE_CXX_FLAGS "-march=rv64gc")
|
||||
|
||||
# cache flags
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")
|
|
@ -0,0 +1,26 @@
|
|||
set(CMAKE_SYSTEM_NAME Linux)
|
||||
set(CMAKE_SYSTEM_PROCESSOR riscv)
|
||||
|
||||
if(DEFINED ENV{RISCV_ROOT_PATH})
|
||||
file(TO_CMAKE_PATH $ENV{RISCV_ROOT_PATH} RISCV_ROOT_PATH)
|
||||
else()
|
||||
message(FATAL_ERROR "RISCV_ROOT_PATH env must be defined")
|
||||
endif()
|
||||
|
||||
set(CMAKE_C_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-gcc)
|
||||
set(CMAKE_CXX_COMPILER ${RISCV_ROOT_PATH}/bin/riscv64-unknown-linux-gnu-g++)
|
||||
|
||||
set(CMAKE_SYSROOT "${RISCV_ROOT_PATH}/sysroot" CACHE PATH "riscv sysroot")
|
||||
set(CMAKE_FIND_ROOT_PATH ${RISCV_ROOT_PATH}/riscv64-unknown-linux-gnu)
|
||||
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
|
||||
set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
|
||||
|
||||
set(CMAKE_C_FLAGS "-march=rv64gc")
|
||||
set(CMAKE_CXX_FLAGS "-march=rv64gc")
|
||||
|
||||
# cache flags
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags")
|
|
@ -0,0 +1 @@
|
|||
backend_config = dict(type='ascend')
|
|
@ -0,0 +1 @@
|
|||
backend_config = dict(type='coreml', convert_to='mlprogram')
|
|
@ -0,0 +1,8 @@
|
|||
backend_config = dict(
|
||||
type='rknn',
|
||||
common_config=dict(
|
||||
mean_values=None,
|
||||
std_values=None,
|
||||
target_platform='rk3588',
|
||||
optimization_level=3),
|
||||
quantization_config=dict(do_quantization=False, dataset=None))
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = ['./classification_dynamic.py', '../_base_/backends/ascend.py']
|
||||
|
||||
onnx_config = dict(input_shape=[224, 224])
|
||||
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
dynamic_batch_size=[1, 2, 4, 8],
|
||||
input_shapes=dict(input=[-1, 3, 224, 224]))
|
||||
])
|
|
@ -0,0 +1,5 @@
|
|||
_base_ = ['./classification_static.py', '../_base_/backends/ascend.py']
|
||||
|
||||
onnx_config = dict(input_shape=[224, 224])
|
||||
backend_config = dict(
|
||||
model_inputs=[dict(input_shapes=dict(input=[1, 3, 224, 224]))])
|
|
@ -0,0 +1,12 @@
|
|||
_base_ = ['../_base_/torchscript_config.py', '../_base_/backends/coreml.py']
|
||||
|
||||
codebase_config = dict(type='mmcls', task='Classification')
|
||||
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
input_shapes=dict(
|
||||
input=dict(
|
||||
min_shape=[1, 3, 224, 224],
|
||||
max_shape=[8, 3, 224, 224],
|
||||
default_shape=[1, 3, 224, 224])))
|
||||
])
|
|
@ -0,0 +1,5 @@
|
|||
_base_ = ['./classification_static.py', '../_base_/backends/rknn.py']
|
||||
|
||||
onnx_config = dict(input_shape=[224, 224])
|
||||
codebase_config = dict(model_type='rknn')
|
||||
backend_config = dict(input_size_list=[[3, 224, 224]])
|
|
@ -9,5 +9,5 @@ backend_config = dict(
|
|||
input=dict(
|
||||
min_shape=[1, 3, 224, 224],
|
||||
opt_shape=[4, 3, 224, 224],
|
||||
max_shape=[64, 3, 224, 224])))
|
||||
max_shape=[8, 3, 224, 224])))
|
||||
])
|
||||
|
|
|
@ -9,5 +9,5 @@ backend_config = dict(
|
|||
input=dict(
|
||||
min_shape=[1, 3, 224, 224],
|
||||
opt_shape=[4, 3, 224, 224],
|
||||
max_shape=[64, 3, 224, 224])))
|
||||
max_shape=[8, 3, 224, 224])))
|
||||
])
|
||||
|
|
|
@ -9,5 +9,5 @@ backend_config = dict(
|
|||
input=dict(
|
||||
min_shape=[1, 3, 224, 224],
|
||||
opt_shape=[4, 3, 224, 224],
|
||||
max_shape=[64, 3, 224, 224])))
|
||||
max_shape=[8, 3, 224, 224])))
|
||||
])
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
_base_ = ['./base_torchscript.py', '../../_base_/backends/coreml.py']
|
||||
|
||||
ir_config = dict(input_shape=(1344, 800))
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
input_shapes=dict(
|
||||
input=dict(
|
||||
min_shape=[1, 3, 800, 1344],
|
||||
max_shape=[1, 3, 800, 1344],
|
||||
default_shape=[1, 3, 800, 1344])))
|
||||
])
|
|
@ -0,0 +1,8 @@
|
|||
_base_ = ['../_base_/base_dynamic.py', '../../_base_/backends/ascend.py']
|
||||
|
||||
onnx_config = dict(input_shape=[1344, 800])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
dynamic_image_size=[(800, 1344), (1344, 800)],
|
||||
input_shapes=dict(input=[1, 3, -1, -1]))
|
||||
])
|
|
@ -0,0 +1,5 @@
|
|||
_base_ = ['../_base_/base_static.py', '../../_base_/backends/ascend.py']
|
||||
|
||||
onnx_config = dict(input_shape=[640, 640])
|
||||
backend_config = dict(
|
||||
model_inputs=[dict(input_shapes=dict(input=[1, 3, 640, 640]))])
|
|
@ -0,0 +1,5 @@
|
|||
_base_ = ['../_base_/base_static.py', '../../_base_/backends/ascend.py']
|
||||
|
||||
onnx_config = dict(input_shape=[1344, 800])
|
||||
backend_config = dict(
|
||||
model_inputs=[dict(input_shapes=dict(input=[1, 3, 800, 1344]))])
|
|
@ -0,0 +1 @@
|
|||
_base_ = ['../_base_/base_coreml_static-800x1344.py']
|
|
@ -0,0 +1,17 @@
|
|||
_base_ = ['../_base_/base_static.py', '../../_base_/backends/rknn.py']
|
||||
|
||||
onnx_config = dict(input_shape=[640, 640])
|
||||
|
||||
codebase_config = dict(model_type='rknn')
|
||||
|
||||
backend_config = dict(input_size_list=[[3, 640, 640]])
|
||||
|
||||
partition_config = dict(
|
||||
type='rknn', # the partition policy name
|
||||
apply_marks=True, # should always be set to True
|
||||
partition_cfg=[
|
||||
dict(
|
||||
save_file='model.onnx', # name to save the partitioned onnx model
|
||||
start=['detector_forward:input'], # [mark_name:input/output, ...]
|
||||
end=['yolo_head:input']) # [mark_name:input/output, ...]
|
||||
])
|
|
@ -0,0 +1,9 @@
|
|||
_base_ = ['./voxel-detection_dynamic.py', '../../_base_/backends/openvino.py']
|
||||
|
||||
onnx_config = dict(input_shape=None)
|
||||
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
opt_shapes=dict(
|
||||
voxels=[20000, 64, 4], num_points=[20000], coors=[20000, 4]))
|
||||
])
|
|
@ -1,6 +1,6 @@
|
|||
_base_ = ['./voxel-detection_dynamic.py', '../../_base_/backends/tensorrt.py']
|
||||
backend_config = dict(
|
||||
common_config=dict(max_workspace_size=1 << 30),
|
||||
common_config=dict(max_workspace_size=1 << 32),
|
||||
model_inputs=[
|
||||
dict(
|
||||
input_shapes=dict(
|
|
@ -0,0 +1,18 @@
|
|||
_base_ = ['./voxel-detection_dynamic.py', '../../_base_/backends/tensorrt.py']
|
||||
backend_config = dict(
|
||||
common_config=dict(max_workspace_size=1 << 32),
|
||||
model_inputs=[
|
||||
dict(
|
||||
input_shapes=dict(
|
||||
voxels=dict(
|
||||
min_shape=[5000, 64, 4],
|
||||
opt_shape=[20000, 64, 4],
|
||||
max_shape=[30000, 64, 4]),
|
||||
num_points=dict(
|
||||
min_shape=[5000], opt_shape=[20000], max_shape=[30000]),
|
||||
coors=dict(
|
||||
min_shape=[5000, 4],
|
||||
opt_shape=[20000, 4],
|
||||
max_shape=[30000, 4]),
|
||||
))
|
||||
])
|
|
@ -0,0 +1,8 @@
|
|||
_base_ = ['./text-detection_dynamic.py', '../../_base_/backends/ascend.py']
|
||||
|
||||
onnx_config = dict(input_shape=None)
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
input_shapes=dict(input=[-1, 3, -1, -1]),
|
||||
dynamic_dims=[(1, 640, 640), (4, 640, 640), (1, 1280, 1280)])
|
||||
])
|
|
@ -0,0 +1,5 @@
|
|||
_base_ = ['./text-detection_static.py', '../../_base_/backends/ascend.py']
|
||||
|
||||
onnx_config = dict(input_shape=[640, 640])
|
||||
backend_config = dict(
|
||||
model_inputs=[dict(input_shapes=dict(input=[1, 3, 640, 640]))])
|
|
@ -0,0 +1,3 @@
|
|||
_base_ = ['./pose-detection_static.py', '../_base_/backends/ncnn-int8.py']
|
||||
|
||||
onnx_config = dict(input_shape=[256, 256])
|
|
@ -0,0 +1,5 @@
|
|||
_base_ = ['./segmentation_static.py', '../_base_/backends/ascend.py']
|
||||
|
||||
onnx_config = dict(input_shape=[2048, 1024])
|
||||
backend_config = dict(
|
||||
model_inputs=[dict(input_shapes=dict(input=[1, 3, 1024, 2048]))])
|
|
@ -0,0 +1,5 @@
|
|||
_base_ = ['./segmentation_static.py', '../_base_/backends/ascend.py']
|
||||
|
||||
onnx_config = dict(input_shape=[1024, 512])
|
||||
backend_config = dict(
|
||||
model_inputs=[dict(input_shapes=dict(input=[1, 3, 512, 1024]))])
|
|
@ -0,0 +1,14 @@
|
|||
_base_ = [
|
||||
'../_base_/torchscript_config.py', '../_base_/backends/coreml.py',
|
||||
'./segmentation_static.py'
|
||||
]
|
||||
|
||||
ir_config = dict(input_shape=[1024, 512])
|
||||
backend_config = dict(model_inputs=[
|
||||
dict(
|
||||
input_shapes=dict(
|
||||
input=dict(
|
||||
min_shape=[1, 3, 512, 1024],
|
||||
max_shape=[1, 3, 512, 1024],
|
||||
default_shape=[1, 3, 512, 1024])))
|
||||
])
|
|
@ -0,0 +1,7 @@
|
|||
_base_ = ['./segmentation_static.py', '../_base_/backends/rknn.py']
|
||||
|
||||
onnx_config = dict(input_shape=[512, 512])
|
||||
|
||||
codebase_config = dict(model_type='rknn')
|
||||
|
||||
backend_config = dict(input_size_list=[[3, 512, 512]])
|
|
@ -0,0 +1,3 @@
|
|||
_base_ = ['./text-detection_static.py', '../../_base_/backends/ncnn-int8.py']
|
||||
|
||||
onnx_config = dict(input_shape=None)
|
|
@ -32,7 +32,8 @@ if ("ncnn" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
|||
endif ()
|
||||
|
||||
# build TorchScript ops
|
||||
if ("torchscript" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
||||
message(STATUS "Build torchsciprt custom ops")
|
||||
if ("torchscript" IN_LIST MMDEPLOY_TARGET_BACKENDS
|
||||
OR "coreml" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
||||
message(STATUS "Build torchscript custom ops")
|
||||
add_subdirectory(torchscript)
|
||||
endif ()
|
||||
|
|
|
@ -10,7 +10,7 @@ else ()
|
|||
endif ()
|
||||
|
||||
|
||||
if (NOT ANDROID AND NOT IOS)
|
||||
if (NOT ANDROID AND NOT IOS AND NOT CMAKE_CROSSCOMPILING)
|
||||
add_subdirectory(ops)
|
||||
add_subdirectory(onnx2ncnn)
|
||||
add_subdirectory(pyncnn_ext)
|
||||
|
|
|
@ -0,0 +1,150 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
#include "gather_topk.hpp"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include <chrono>
|
||||
|
||||
#include "NvInferVersion.h"
|
||||
#include "gather_topk_kernel.hpp"
|
||||
#include "trt_serialize.hpp"
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace {
|
||||
static const char *PLUGIN_VERSION{"1"};
|
||||
static const char *PLUGIN_NAME{"GatherTopk"};
|
||||
} // namespace
|
||||
|
||||
GatherTopk::GatherTopk(const std::string &name) : TRTPluginBase(name) {}
|
||||
|
||||
GatherTopk::GatherTopk(const std::string name, const void *data, size_t length)
|
||||
: TRTPluginBase(name) {}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *GatherTopk::clone() const TRT_NOEXCEPT {
|
||||
GatherTopk *plugin = new GatherTopk(mLayerName);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::DimsExprs GatherTopk::getOutputDimensions(
|
||||
int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
|
||||
nvinfer1::IExprBuilder &exprBuilder) TRT_NOEXCEPT {
|
||||
assert(inputs[0].nbDims >= inputs[1].nbDims);
|
||||
nvinfer1::DimsExprs ret;
|
||||
ret.nbDims = inputs[0].nbDims;
|
||||
for (int i = 0; i < inputs[1].nbDims; ++i) {
|
||||
ret.d[i] = inputs[1].d[i];
|
||||
}
|
||||
for (int i = inputs[1].nbDims; i < inputs[0].nbDims; ++i) {
|
||||
ret.d[i] = inputs[0].d[i];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool GatherTopk::supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *ioDesc,
|
||||
int nbInputs, int nbOutputs) TRT_NOEXCEPT {
|
||||
switch (pos) {
|
||||
case 0:
|
||||
// data
|
||||
return (ioDesc[pos].type == nvinfer1::DataType::kFLOAT &&
|
||||
ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR) ||
|
||||
(ioDesc[pos].type == nvinfer1::DataType::kINT32 &&
|
||||
ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR);
|
||||
case 1:
|
||||
// indices
|
||||
return ioDesc[pos].type == nvinfer1::DataType::kINT32 &&
|
||||
ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR;
|
||||
case 2:
|
||||
// output
|
||||
return ioDesc[pos].type == ioDesc[0].type && ioDesc[pos].format == ioDesc[0].format;
|
||||
default:
|
||||
return true;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void GatherTopk::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *inputs, int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc *outputs,
|
||||
int nbOutputs) TRT_NOEXCEPT {}
|
||||
|
||||
size_t GatherTopk::getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
|
||||
const nvinfer1::PluginTensorDesc *outputs,
|
||||
int nbOutputs) const TRT_NOEXCEPT {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int GatherTopk::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
|
||||
void *const *outputs, void *workSpace, cudaStream_t stream) TRT_NOEXCEPT {
|
||||
const int *dims = &(inputDesc[0].dims.d[0]);
|
||||
const int *indices_dims = &(inputDesc[1].dims.d[0]);
|
||||
int nbDims = inputDesc[0].dims.nbDims;
|
||||
int indice_nbDims = inputDesc[1].dims.nbDims;
|
||||
|
||||
const void *data = inputs[0];
|
||||
const void *indices = inputs[1];
|
||||
void *output = outputs[0];
|
||||
|
||||
auto data_type = inputDesc[0].type;
|
||||
|
||||
switch (data_type) {
|
||||
case nvinfer1::DataType::kFLOAT:
|
||||
gather_topk_impl<float>((float *)data, (int *)indices, dims, nbDims, indices_dims,
|
||||
indice_nbDims, (float *)output, stream);
|
||||
break;
|
||||
|
||||
case nvinfer1::DataType::kINT32:
|
||||
gather_topk_impl<int>((int *)data, (int *)indices, dims, nbDims, indices_dims, indice_nbDims,
|
||||
(int *)output, stream);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvinfer1::DataType GatherTopk::getOutputDataType(int index, const nvinfer1::DataType *inputTypes,
|
||||
int nbInputs) const TRT_NOEXCEPT {
|
||||
return inputTypes[0];
|
||||
}
|
||||
|
||||
// IPluginV2 Methods
|
||||
const char *GatherTopk::getPluginType() const TRT_NOEXCEPT { return PLUGIN_NAME; }
|
||||
|
||||
const char *GatherTopk::getPluginVersion() const TRT_NOEXCEPT { return PLUGIN_VERSION; }
|
||||
|
||||
int GatherTopk::getNbOutputs() const TRT_NOEXCEPT { return 1; }
|
||||
|
||||
size_t GatherTopk::getSerializationSize() const TRT_NOEXCEPT { return 0; }
|
||||
|
||||
void GatherTopk::serialize(void *buffer) const TRT_NOEXCEPT {}
|
||||
|
||||
GatherTopkCreator::GatherTopkCreator() {
|
||||
mPluginAttributes.clear();
|
||||
mFC.nbFields = mPluginAttributes.size();
|
||||
mFC.fields = mPluginAttributes.data();
|
||||
}
|
||||
|
||||
const char *GatherTopkCreator::getPluginName() const TRT_NOEXCEPT { return PLUGIN_NAME; }
|
||||
|
||||
const char *GatherTopkCreator::getPluginVersion() const TRT_NOEXCEPT { return PLUGIN_VERSION; }
|
||||
|
||||
nvinfer1::IPluginV2 *GatherTopkCreator::createPlugin(
|
||||
const char *name, const nvinfer1::PluginFieldCollection *fc) TRT_NOEXCEPT {
|
||||
auto *plugin = new GatherTopk(name);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2 *GatherTopkCreator::deserializePlugin(const char *name, const void *serialData,
|
||||
size_t serialLength) TRT_NOEXCEPT {
|
||||
auto plugin = new GatherTopk(name, serialData, serialLength);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(GatherTopkCreator);
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,64 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
#ifndef TRT_SCATTERND_HPP
|
||||
#define TRT_SCATTERND_HPP
|
||||
#include <cublas_v2.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "trt_plugin_base.hpp"
|
||||
|
||||
namespace mmdeploy {
|
||||
class GatherTopk : public TRTPluginBase {
|
||||
public:
|
||||
GatherTopk(const std::string &name);
|
||||
|
||||
GatherTopk(const std::string name, const void *data, size_t length);
|
||||
|
||||
GatherTopk() = delete;
|
||||
|
||||
// IPluginV2DynamicExt Methods
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const TRT_NOEXCEPT override;
|
||||
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs,
|
||||
int nbInputs, nvinfer1::IExprBuilder &exprBuilder)
|
||||
TRT_NOEXCEPT override;
|
||||
bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *ioDesc, int nbInputs,
|
||||
int nbOutputs) TRT_NOEXCEPT override;
|
||||
void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc *out,
|
||||
int nbOutputs) TRT_NOEXCEPT override;
|
||||
size_t getWorkspaceSize(const nvinfer1::PluginTensorDesc *inputs, int nbInputs,
|
||||
const nvinfer1::PluginTensorDesc *outputs,
|
||||
int nbOutputs) const TRT_NOEXCEPT override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
|
||||
void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT override;
|
||||
|
||||
// IPluginV2Ext Methods
|
||||
nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes,
|
||||
int nbInputs) const TRT_NOEXCEPT override;
|
||||
|
||||
// IPluginV2 Methods
|
||||
const char *getPluginType() const TRT_NOEXCEPT override;
|
||||
const char *getPluginVersion() const TRT_NOEXCEPT override;
|
||||
int getNbOutputs() const TRT_NOEXCEPT override;
|
||||
size_t getSerializationSize() const TRT_NOEXCEPT override;
|
||||
void serialize(void *buffer) const TRT_NOEXCEPT override;
|
||||
};
|
||||
|
||||
class GatherTopkCreator : public TRTPluginCreatorBase {
|
||||
public:
|
||||
GatherTopkCreator();
|
||||
|
||||
const char *getPluginName() const TRT_NOEXCEPT override;
|
||||
|
||||
const char *getPluginVersion() const TRT_NOEXCEPT override;
|
||||
nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
TRT_NOEXCEPT override;
|
||||
|
||||
nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData,
|
||||
size_t serialLength) TRT_NOEXCEPT override;
|
||||
};
|
||||
} // namespace mmdeploy
|
||||
#endif // TRT_SCATTERND_HPP
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include <functional>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "common_cuda_helper.hpp"
|
||||
#include "gather_topk_kernel.hpp"
|
||||
#include "trt_plugin_helper.hpp"
|
||||
|
||||
template <typename scalar_t>
|
||||
__global__ void gather_topk_kernel(const scalar_t* input, const int* indices, scalar_t* output,
|
||||
int batch, int num_input, int num_indices, int channel) {
|
||||
CUDA_1D_KERNEL_LOOP(index, batch * num_indices * channel) {
|
||||
const int b_id = index / (num_indices * channel);
|
||||
const int n_id = (index / channel) % num_indices;
|
||||
const int c_id = index % channel;
|
||||
|
||||
const int input_n_id = indices[b_id * num_indices + n_id];
|
||||
const scalar_t value = input[b_id * num_input * channel + input_n_id * channel + c_id];
|
||||
output[b_id * num_indices * channel + n_id * channel + c_id] = value;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
void gather_topk_impl(const scalar_t* input, const int* indices, const int* dims, int nbDims,
|
||||
const int* indices_dims, int indice_nbDims, scalar_t* output,
|
||||
cudaStream_t stream) {
|
||||
int batch = 1;
|
||||
for (int i = 0; i < indice_nbDims - 1; ++i) batch *= dims[i];
|
||||
int num_input = dims[indice_nbDims - 1];
|
||||
int num_indices = indices_dims[indice_nbDims - 1];
|
||||
int channel = 1;
|
||||
for (int i = indice_nbDims; i < nbDims; ++i) channel *= dims[i];
|
||||
const int col_block = DIVUP(batch * num_indices * channel, THREADS_PER_BLOCK);
|
||||
gather_topk_kernel<<<col_block, THREADS_PER_BLOCK, 0, stream>>>(input, indices, output, batch,
|
||||
num_input, num_indices, channel);
|
||||
}
|
||||
|
||||
template void gather_topk_impl<float>(const float* input, const int* indices, const int* dims,
|
||||
int nbDims, const int* indices_dims, int indice_nbDims,
|
||||
float* output, cudaStream_t stream);
|
||||
|
||||
template void gather_topk_impl<int32_t>(const int32_t* input, const int* indices, const int* dims,
|
||||
int nbDims, const int* indices_dims, int indice_nbDims,
|
||||
int32_t* output, cudaStream_t stream);
|
|
@ -0,0 +1,10 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
#ifndef TRT_GRID_SAMPLER_KERNEL_HPP
|
||||
#define TRT_GRID_SAMPLER_KERNEL_HPP
|
||||
#include <cuda_runtime.h>
|
||||
|
||||
template <typename scalar_t>
|
||||
void gather_topk_impl(const scalar_t* input, const int* indices, const int* dims, int nbDims,
|
||||
const int* indices_dims, int indice_nbDims, scalar_t* output,
|
||||
cudaStream_t stream);
|
||||
#endif // TRT_GRID_SAMPLER_KERNEL_HPP
|
|
@ -0,0 +1,183 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved
|
||||
#include "scaled_dot_product_attention.hpp"
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include <chrono>
|
||||
|
||||
#include "scaled_dot_product_attention_kernel.hpp"
|
||||
#include "trt_serialize.hpp"
|
||||
|
||||
using namespace nvinfer1;
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace {
|
||||
static const char *PLUGIN_VERSION{"1"};
|
||||
static const char *PLUGIN_NAME{"ScaledDotProductAttentionTRT"};
|
||||
} // namespace
|
||||
|
||||
ScaledDotProductAttentionTRT::ScaledDotProductAttentionTRT(const std::string &name)
|
||||
: TRTPluginBase(name), mask_dim(0) {}
|
||||
|
||||
ScaledDotProductAttentionTRT::ScaledDotProductAttentionTRT(const std::string name, const void *data,
|
||||
size_t length)
|
||||
: TRTPluginBase(name), mask_dim(0) {}
|
||||
|
||||
ScaledDotProductAttentionTRT::~ScaledDotProductAttentionTRT() {}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *ScaledDotProductAttentionTRT::clone() const TRT_NOEXCEPT {
|
||||
ScaledDotProductAttentionTRT *plugin = new ScaledDotProductAttentionTRT(mLayerName);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::DimsExprs ScaledDotProductAttentionTRT::getOutputDimensions(
|
||||
int outputIndex, const nvinfer1::DimsExprs *inputs, int nbInputs,
|
||||
nvinfer1::IExprBuilder &exprBuilder) TRT_NOEXCEPT {
|
||||
if (outputIndex == 0) return inputs[0];
|
||||
nvinfer1::DimsExprs ret;
|
||||
ret.nbDims = 3;
|
||||
ret.d[0] = inputs[0].d[0];
|
||||
ret.d[1] = inputs[0].d[1];
|
||||
ret.d[2] = inputs[1].d[1];
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool ScaledDotProductAttentionTRT::supportsFormatCombination(
|
||||
int pos, const nvinfer1::PluginTensorDesc *ioDesc, int nbInputs, int nbOutputs) TRT_NOEXCEPT {
|
||||
if (pos == 0) {
|
||||
return (ioDesc[pos].type == nvinfer1::DataType::kFLOAT &&
|
||||
ioDesc[pos].format == nvinfer1::TensorFormat::kLINEAR);
|
||||
} else {
|
||||
return ioDesc[pos].type == ioDesc[0].type && ioDesc[pos].format == ioDesc[0].format;
|
||||
}
|
||||
}
|
||||
|
||||
// Attach the plugin object to an execution context and grant the plugin the
|
||||
// access to some context resource.
|
||||
void ScaledDotProductAttentionTRT::attachToContext(cudnnContext *cudnnContext,
|
||||
cublasContext *cublasContext,
|
||||
IGpuAllocator *gpuAllocator) TRT_NOEXCEPT {
|
||||
_cublas_handle = cublasContext;
|
||||
_cudnn_handle = cudnnContext;
|
||||
cudnnCreateTensorDescriptor(&_x_desc);
|
||||
cudnnCreateTensorDescriptor(&_y_desc);
|
||||
cudnnCreateTensorDescriptor(&_mask_desc);
|
||||
}
|
||||
|
||||
// Detach the plugin object from its execution context.
|
||||
void ScaledDotProductAttentionTRT::detachFromContext() TRT_NOEXCEPT {
|
||||
cudnnDestroyTensorDescriptor(_y_desc);
|
||||
cudnnDestroyTensorDescriptor(_x_desc);
|
||||
cudnnDestroyTensorDescriptor(_mask_desc);
|
||||
}
|
||||
|
||||
void ScaledDotProductAttentionTRT::configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in,
|
||||
int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc *out,
|
||||
int nbOutputs) TRT_NOEXCEPT {
|
||||
if (nbInputs != 4) {
|
||||
mask_dim = 0;
|
||||
} else {
|
||||
mask_dim = in[3].desc.dims.nbDims;
|
||||
}
|
||||
}
|
||||
|
||||
int ScaledDotProductAttentionTRT::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs,
|
||||
void *workSpace, cudaStream_t stream) TRT_NOEXCEPT {
|
||||
if (CUDNN_STATUS_SUCCESS != cudnnSetStream(_cudnn_handle, stream)) return 1;
|
||||
if (CUBLAS_STATUS_SUCCESS != cublasSetStream(_cublas_handle, stream)) return 1;
|
||||
int B = inputDesc[0].dims.d[0]; // batch * heads
|
||||
int Nt = inputDesc[0].dims.d[1];
|
||||
int Ns = inputDesc[1].dims.d[1];
|
||||
int E = inputDesc[0].dims.d[2]; // embeding size
|
||||
|
||||
const void *query = inputs[0];
|
||||
const void *key = inputs[1];
|
||||
const void *value = inputs[2];
|
||||
const void *mask = nullptr;
|
||||
|
||||
int mask_dims[3];
|
||||
mask_dims[0] = 0;
|
||||
if (mask_dim > 0) {
|
||||
mask = inputs[3];
|
||||
// check if mask need broadcast
|
||||
if (mask_dim == 2) {
|
||||
mask_dims[0] = 1;
|
||||
mask_dims[1] = inputDesc[3].dims.d[0];
|
||||
mask_dims[2] = inputDesc[3].dims.d[1];
|
||||
} else {
|
||||
mask_dims[0] = inputDesc[3].dims.d[0];
|
||||
mask_dims[1] = inputDesc[3].dims.d[1];
|
||||
mask_dims[2] = inputDesc[3].dims.d[2];
|
||||
}
|
||||
}
|
||||
|
||||
void *output = outputs[0];
|
||||
void *attn = outputs[1];
|
||||
|
||||
auto data_type = inputDesc[0].type;
|
||||
cudnnDataType_t cudnn_dtype{};
|
||||
convert_trt2cudnn_dtype(data_type, &cudnn_dtype);
|
||||
switch (data_type) {
|
||||
case nvinfer1::DataType::kFLOAT:
|
||||
dot_product_attention_impl<float>((float *)query, (float *)key, (float *)value, (float *)mask,
|
||||
(float *)attn, (float *)output, B, Nt, Ns, E, &mask_dims[0],
|
||||
_x_desc, _y_desc, _mask_desc, cudnn_dtype, stream,
|
||||
_cublas_handle, _cudnn_handle);
|
||||
break;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
nvinfer1::DataType ScaledDotProductAttentionTRT::getOutputDataType(
|
||||
int index, const nvinfer1::DataType *inputTypes, int nbInputs) const TRT_NOEXCEPT {
|
||||
return inputTypes[0];
|
||||
}
|
||||
|
||||
// IPluginV2 Methods
|
||||
const char *ScaledDotProductAttentionTRT::getPluginType() const TRT_NOEXCEPT { return PLUGIN_NAME; }
|
||||
|
||||
const char *ScaledDotProductAttentionTRT::getPluginVersion() const TRT_NOEXCEPT {
|
||||
return PLUGIN_VERSION;
|
||||
}
|
||||
|
||||
int ScaledDotProductAttentionTRT::getNbOutputs() const TRT_NOEXCEPT { return 2; }
|
||||
|
||||
size_t ScaledDotProductAttentionTRT::getSerializationSize() const TRT_NOEXCEPT { return 0; }
|
||||
|
||||
void ScaledDotProductAttentionTRT::serialize(void *buffer) const TRT_NOEXCEPT {}
|
||||
|
||||
////////////////////// creator /////////////////////////////
|
||||
|
||||
ScaledDotProductAttentionTRTCreator::ScaledDotProductAttentionTRTCreator() {}
|
||||
|
||||
const char *ScaledDotProductAttentionTRTCreator::getPluginName() const TRT_NOEXCEPT {
|
||||
return PLUGIN_NAME;
|
||||
}
|
||||
|
||||
const char *ScaledDotProductAttentionTRTCreator::getPluginVersion() const TRT_NOEXCEPT {
|
||||
return PLUGIN_VERSION;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2 *ScaledDotProductAttentionTRTCreator::createPlugin(
|
||||
const char *name, const nvinfer1::PluginFieldCollection *fc) TRT_NOEXCEPT {
|
||||
ScaledDotProductAttentionTRT *plugin = new ScaledDotProductAttentionTRT(name);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2 *ScaledDotProductAttentionTRTCreator::deserializePlugin(
|
||||
const char *name, const void *serialData, size_t serialLength) TRT_NOEXCEPT {
|
||||
auto plugin = new ScaledDotProductAttentionTRT(name, serialData, serialLength);
|
||||
plugin->setPluginNamespace(getPluginNamespace());
|
||||
return plugin;
|
||||
}
|
||||
REGISTER_TENSORRT_PLUGIN(ScaledDotProductAttentionTRTCreator);
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
#ifndef TRT_SCALED_DOT_PRODUCT_ATTENTION_HPP
|
||||
#define TRT_SCALED_DOT_PRODUCT_ATTENTION_HPP
|
||||
#include <cublas_v2.h>
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "trt_plugin_base.hpp"
|
||||
|
||||
namespace mmdeploy {
|
||||
class ScaledDotProductAttentionTRT : public TRTPluginBase {
|
||||
public:
|
||||
ScaledDotProductAttentionTRT(const std::string &name);
|
||||
|
||||
ScaledDotProductAttentionTRT(const std::string name, const void *data, size_t length);
|
||||
|
||||
ScaledDotProductAttentionTRT() = delete;
|
||||
|
||||
~ScaledDotProductAttentionTRT() TRT_NOEXCEPT override;
|
||||
|
||||
virtual void configurePlugin(const nvinfer1::DynamicPluginTensorDesc *in, int nbInputs,
|
||||
const nvinfer1::DynamicPluginTensorDesc *out,
|
||||
int nbOutputs) TRT_NOEXCEPT override;
|
||||
// IPluginV2DynamicExt Methods
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const TRT_NOEXCEPT override;
|
||||
nvinfer1::DimsExprs getOutputDimensions(int outputIndex, const nvinfer1::DimsExprs *inputs,
|
||||
int nbInputs, nvinfer1::IExprBuilder &exprBuilder)
|
||||
TRT_NOEXCEPT override;
|
||||
bool supportsFormatCombination(int pos, const nvinfer1::PluginTensorDesc *ioDesc, int nbInputs,
|
||||
int nbOutputs) TRT_NOEXCEPT override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
|
||||
void *const *outputs, void *workspace, cudaStream_t stream) TRT_NOEXCEPT override;
|
||||
|
||||
// IPluginV2Ext Methods
|
||||
nvinfer1::DataType getOutputDataType(int index, const nvinfer1::DataType *inputTypes,
|
||||
int nbInputs) const TRT_NOEXCEPT override;
|
||||
|
||||
// IPluginV2 Methods
|
||||
const char *getPluginType() const TRT_NOEXCEPT override;
|
||||
const char *getPluginVersion() const TRT_NOEXCEPT override;
|
||||
int getNbOutputs() const TRT_NOEXCEPT override;
|
||||
size_t getSerializationSize() const TRT_NOEXCEPT override;
|
||||
void serialize(void *buffer) const TRT_NOEXCEPT override;
|
||||
void attachToContext(cudnnContext *cudnn, cublasContext *cublas,
|
||||
nvinfer1::IGpuAllocator *allocator) TRT_NOEXCEPT override;
|
||||
void detachFromContext() TRT_NOEXCEPT override;
|
||||
|
||||
private:
|
||||
int mask_dim;
|
||||
cublasHandle_t _cublas_handle{};
|
||||
cudnnHandle_t _cudnn_handle{};
|
||||
cudnnTensorDescriptor_t _x_desc{}, _y_desc{}, _mask_desc{};
|
||||
};
|
||||
|
||||
class ScaledDotProductAttentionTRTCreator : public TRTPluginCreatorBase {
|
||||
public:
|
||||
ScaledDotProductAttentionTRTCreator();
|
||||
|
||||
const char *getPluginName() const TRT_NOEXCEPT override;
|
||||
|
||||
const char *getPluginVersion() const TRT_NOEXCEPT override;
|
||||
|
||||
nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
TRT_NOEXCEPT override;
|
||||
|
||||
nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData,
|
||||
size_t serialLength) TRT_NOEXCEPT override;
|
||||
};
|
||||
} // namespace mmdeploy
|
||||
#endif // TRT_SCALED_DOT_PRODUCT_ATTENTION_HPP
|
|
@ -0,0 +1,103 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved
|
||||
#include <thrust/functional.h>
|
||||
#include <thrust/iterator/counting_iterator.h>
|
||||
#include <thrust/iterator/transform_iterator.h>
|
||||
#include <thrust/transform.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <vector>
|
||||
|
||||
#include "common_cuda_helper.hpp"
|
||||
#include "scaled_dot_product_attention_kernel.hpp"
|
||||
#include "trt_plugin_helper.hpp"
|
||||
|
||||
template <typename scalar_t>
|
||||
cublasStatus_t cublasgemmStridedBatchedWrap(cublasHandle_t handle, cublasOperation_t transa,
|
||||
cublasOperation_t transb, int m, int n, int k,
|
||||
const scalar_t* alpha, const scalar_t* A, int lda,
|
||||
long long int strideA, const scalar_t* B, int ldb,
|
||||
long long int strideB, const scalar_t* beta,
|
||||
scalar_t* C, int ldc, long long int strideC,
|
||||
int batchCount);
|
||||
|
||||
template <>
|
||||
cublasStatus_t cublasgemmStridedBatchedWrap<float>(cublasHandle_t handle, cublasOperation_t transa,
|
||||
cublasOperation_t transb, int m, int n, int k,
|
||||
const float* alpha, const float* A, int lda,
|
||||
long long int strideA, const float* B, int ldb,
|
||||
long long int strideB, const float* beta,
|
||||
float* C, int ldc, long long int strideC,
|
||||
int batchCount) {
|
||||
return cublasSgemmStridedBatched(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb,
|
||||
strideB, beta, C, ldc, strideC, batchCount);
|
||||
}
|
||||
|
||||
template <>
|
||||
cublasStatus_t cublasgemmStridedBatchedWrap<__half>(cublasHandle_t handle, cublasOperation_t transa,
|
||||
cublasOperation_t transb, int m, int n, int k,
|
||||
const __half* alpha, const __half* A, int lda,
|
||||
long long int strideA, const __half* B, int ldb,
|
||||
long long int strideB, const __half* beta,
|
||||
__half* C, int ldc, long long int strideC,
|
||||
int batchCount) {
|
||||
return cublasHgemmStridedBatched(handle, transa, transb, m, n, k, alpha, A, lda, strideA, B, ldb,
|
||||
strideB, beta, C, ldc, strideC, batchCount);
|
||||
}
|
||||
|
||||
template <typename scalar_t>
|
||||
void dot_product_attention_impl(const scalar_t* query, const scalar_t* key, const scalar_t* value,
|
||||
const scalar_t* mask, scalar_t* attn, scalar_t* output, int B,
|
||||
int Nt, int Ns, int E, const int* mask_dims,
|
||||
cudnnTensorDescriptor_t& x_desc, cudnnTensorDescriptor_t& y_desc,
|
||||
cudnnTensorDescriptor_t& mask_desc, cudnnDataType_t cudnn_dtype,
|
||||
cudaStream_t stream, cublasHandle_t cublas_handle,
|
||||
cudnnHandle_t cudnn_handle) {
|
||||
{
|
||||
// Q @ K
|
||||
const int m = Ns;
|
||||
const int n = Nt;
|
||||
const int k = E;
|
||||
const auto alpha = scalar_t(1.0f / sqrt(float(E)));
|
||||
const auto beta = scalar_t(0);
|
||||
cublasgemmStridedBatchedWrap(cublas_handle, CUBLAS_OP_T, CUBLAS_OP_N, m, n, k, &alpha, key, k,
|
||||
Ns * E, query, k, Nt * E, &beta, attn, m, Nt * Ns, B);
|
||||
}
|
||||
|
||||
if (mask_dims != nullptr && mask_dims[0] != 0) {
|
||||
const auto alpha = scalar_t(1);
|
||||
const auto beta = scalar_t(1);
|
||||
cudnnSetTensor4dDescriptor(mask_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, mask_dims[0],
|
||||
mask_dims[1], mask_dims[2]);
|
||||
cudnnSetTensor4dDescriptor(x_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, 1, B, Nt, Ns);
|
||||
cudnnAddTensor(cudnn_handle, &alpha, mask_desc, mask, &beta, x_desc, attn);
|
||||
}
|
||||
|
||||
{
|
||||
// softmax attention
|
||||
const auto alpha = scalar_t(1);
|
||||
const auto beta = scalar_t(0);
|
||||
cudnnSetTensor4dDescriptor(x_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, B * Nt, Ns, 1, 1);
|
||||
cudnnSetTensor4dDescriptor(y_desc, CUDNN_TENSOR_NCHW, cudnn_dtype, B * Nt, Ns, 1, 1);
|
||||
cudnnSoftmaxForward(cudnn_handle, CUDNN_SOFTMAX_ACCURATE, CUDNN_SOFTMAX_MODE_INSTANCE, &alpha,
|
||||
x_desc, attn, &beta, y_desc, attn);
|
||||
}
|
||||
|
||||
{
|
||||
// attn @ v
|
||||
const int m = E;
|
||||
const int n = Nt;
|
||||
const int k = Ns;
|
||||
const auto alpha = scalar_t(1);
|
||||
const auto beta = scalar_t(0);
|
||||
cublasgemmStridedBatchedWrap(cublas_handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, &alpha, value, m,
|
||||
Ns * E, (const scalar_t*)(attn), k, Ns * Nt, &beta, output, m,
|
||||
Nt * E, B);
|
||||
}
|
||||
}
|
||||
|
||||
template void dot_product_attention_impl<float>(
|
||||
const float* query, const float* key, const float* value, const float* mask, float* attn,
|
||||
float* output, int B, int Nt, int Ns, int E, const int* mask_dims,
|
||||
cudnnTensorDescriptor_t& x_desc, cudnnTensorDescriptor_t& y_desc,
|
||||
cudnnTensorDescriptor_t& mask_desc, cudnnDataType_t cudnn_dtype, cudaStream_t stream,
|
||||
cublasHandle_t cublas_handle, cudnnHandle_t cudnn_handle);
|
|
@ -0,0 +1,17 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved
|
||||
#ifndef TRT_SCALED_DOT_PRODUCT_ATTENTION_KERNEL_HPP
|
||||
#define TRT_SCALED_DOT_PRODUCT_ATTENTION_KERNEL_HPP
|
||||
#include <cublas_v2.h>
|
||||
#include <cuda_runtime.h>
|
||||
#include <cudnn.h>
|
||||
|
||||
template <typename scalar_t>
|
||||
void dot_product_attention_impl(const scalar_t* query, const scalar_t* key, const scalar_t* value,
|
||||
const scalar_t* mask, scalar_t* attn, scalar_t* output, int B,
|
||||
int Nt, int Ns, int E, const int* mask_dims,
|
||||
cudnnTensorDescriptor_t& x_desc, cudnnTensorDescriptor_t& y_desc,
|
||||
cudnnTensorDescriptor_t& mask_desc, cudnnDataType_t cudnn_dtype,
|
||||
cudaStream_t stream, cublasHandle_t cublas_handle,
|
||||
cudnnHandle_t cudnn_handle);
|
||||
|
||||
#endif
|
|
@ -1,10 +0,0 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
#include "torch/script.h"
|
||||
|
||||
TORCH_LIBRARY(mmdeploy, m) {
|
||||
m.def(
|
||||
"modulated_deform_conv(Tensor input, Tensor weight, Tensor bias, Tensor offset, Tensor "
|
||||
"mask, "
|
||||
"int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int "
|
||||
"dilation_h,int dilation_w, int groups, int deform_groups, bool with_bias) -> Tensor");
|
||||
}
|
|
@ -0,0 +1,13 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
#include "torch/script.h"
|
||||
|
||||
TORCH_LIBRARY(mmdeploy, m) {
|
||||
m.def(
|
||||
"modulated_deform_conv(Tensor input, Tensor weight, Tensor bias, Tensor offset, Tensor "
|
||||
"mask, "
|
||||
"int kernel_h, int kernel_w, int stride_h, int stride_w, int pad_h, int pad_w, int "
|
||||
"dilation_h,int dilation_w, int groups, int deform_groups, bool with_bias) -> Tensor")
|
||||
.def(
|
||||
"coreml_nms(Tensor boxes, Tensor scores, float iou_threshold, "
|
||||
"float score_threshold, int max_boxes) -> Tensor[]");
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
|
||||
#include <assert.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "torch/script.h"
|
||||
namespace mmdeploy {
|
||||
|
||||
using at::Tensor;
|
||||
|
||||
std::vector<Tensor> coreml_nms_cpu(Tensor boxes, Tensor scores, double iou_threshold,
|
||||
double score_threshold, int64_t max_boxes) {
|
||||
assert(boxes.dim() == 3); // bboxes with shape (batch_size, num_bboxes, 4)
|
||||
assert(boxes.size(2) == 4);
|
||||
assert(boxes.size(0) == scores.size(0)); // check batch size
|
||||
assert(boxes.size(1) == scores.size(1)); // check num boxes
|
||||
|
||||
auto batch_size = boxes.size(0);
|
||||
auto num_boxes = boxes.size(1);
|
||||
auto num_classes = scores.size(2);
|
||||
|
||||
Tensor ret_boxes = at::zeros({batch_size, max_boxes, 4});
|
||||
Tensor ret_scores = at::zeros({batch_size, max_boxes, num_classes});
|
||||
Tensor indices = at::zeros({batch_size, max_boxes}, at::kInt);
|
||||
Tensor num_outputs = at::zeros({batch_size}, at::kInt);
|
||||
|
||||
return std::vector<Tensor>({ret_boxes, ret_scores, indices, num_outputs});
|
||||
}
|
||||
|
||||
TORCH_LIBRARY_IMPL(mmdeploy, CPU, m) { m.impl("coreml_nms", coreml_nms_cpu); }
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,55 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
#include "mmdeploy/codebase/mmcls/mmcls.h"
|
||||
#include "mmdeploy/core/tensor.h"
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
#include "mmdeploy/core/utils/formatter.h"
|
||||
#include "mmdeploy/experimental/module_adapter.h"
|
||||
|
||||
using std::vector;
|
||||
|
||||
namespace mmdeploy::mmcls {
|
||||
|
||||
class MultiLabelLinearClsHead : public MMClassification {
|
||||
public:
|
||||
explicit MultiLabelLinearClsHead(const Value& cfg) : MMClassification(cfg) {}
|
||||
Result<Value> operator()(const Value& infer_res) {
|
||||
MMDEPLOY_DEBUG("infer_res: {}", infer_res);
|
||||
auto output = infer_res["output"].get<Tensor>();
|
||||
|
||||
if (!(output.shape().size() >= 2 && output.data_type() == DataType::kFLOAT)) {
|
||||
MMDEPLOY_ERROR("unsupported `output` tensor, shape: {}, dtype: {}", output.shape(),
|
||||
(int)output.data_type());
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
|
||||
auto class_num = (int)output.shape(1);
|
||||
|
||||
OUTCOME_TRY(auto _scores, MakeAvailableOnDevice(output, kHost, stream()));
|
||||
OUTCOME_TRY(stream().Wait());
|
||||
|
||||
return GetLabels(_scores, class_num);
|
||||
}
|
||||
|
||||
private:
|
||||
Value GetLabels(const Tensor& scores, int class_num) const {
|
||||
auto scores_data = scores.data<float>();
|
||||
ClassifyOutput output;
|
||||
for (int i = 0; i < class_num; ++i) {
|
||||
auto label = ClassifyOutput::Label{i, scores_data[i]};
|
||||
MMDEPLOY_DEBUG("label_id: {}, score: {}", label.label_id, label.score);
|
||||
output.labels.push_back(label);
|
||||
}
|
||||
return to_value(std::move(output));
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr const auto kHost = Device{0};
|
||||
};
|
||||
|
||||
REGISTER_CODEBASE_COMPONENT(MMClassification, MultiLabelLinearClsHead);
|
||||
|
||||
} // namespace mmdeploy::mmcls
|
|
@ -4,6 +4,7 @@
|
|||
|
||||
#include "mmdeploy/core/registry.h"
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
#include "mmdeploy/core/utils/formatter.h"
|
||||
#include "mmdeploy/experimental/module_adapter.h"
|
||||
|
||||
using namespace std;
|
||||
|
|
|
@ -68,7 +68,7 @@ class Device {
|
|||
constexpr explicit Device(int platform_id, int device_id = 0)
|
||||
: platform_id_(platform_id), device_id_(device_id) {}
|
||||
|
||||
MMDEPLOY_API explicit Device(const char *platform_name, int device_id = 0);
|
||||
MMDEPLOY_API explicit Device(const char* platform_name, int device_id = 0);
|
||||
|
||||
constexpr int device_id() const noexcept { return device_id_; }
|
||||
|
||||
|
@ -78,11 +78,11 @@ class Device {
|
|||
|
||||
constexpr bool is_device() const noexcept { return platform_id() > 0; }
|
||||
|
||||
constexpr bool operator==(const Device &other) const noexcept {
|
||||
constexpr bool operator==(const Device& other) const noexcept {
|
||||
return platform_id_ == other.platform_id_ && device_id_ == other.device_id_;
|
||||
}
|
||||
|
||||
constexpr bool operator!=(const Device &other) const noexcept { return !(*this == other); }
|
||||
constexpr bool operator!=(const Device& other) const noexcept { return !(*this == other); }
|
||||
|
||||
constexpr explicit operator bool() const noexcept { return platform_id_ >= 0 && device_id_ >= 0; }
|
||||
|
||||
|
@ -104,7 +104,7 @@ enum class MemcpyKind : int { HtoD, DtoH, DtoD };
|
|||
class MMDEPLOY_API Platform {
|
||||
public:
|
||||
// throws if not found
|
||||
explicit Platform(const char *platform_name);
|
||||
explicit Platform(const char* platform_name);
|
||||
|
||||
// throws if not found
|
||||
explicit Platform(int platform_id);
|
||||
|
@ -113,11 +113,11 @@ class MMDEPLOY_API Platform {
|
|||
int GetPlatformId() const;
|
||||
|
||||
// "" if invalid
|
||||
const char *GetPlatformName() const;
|
||||
const char* GetPlatformName() const;
|
||||
|
||||
bool operator==(const Platform &other) { return impl_ == other.impl_; }
|
||||
bool operator==(const Platform& other) { return impl_ == other.impl_; }
|
||||
|
||||
bool operator!=(const Platform &other) { return !(*this == other); }
|
||||
bool operator!=(const Platform& other) { return !(*this == other); }
|
||||
|
||||
explicit operator bool() const noexcept { return static_cast<bool>(impl_); }
|
||||
|
||||
|
@ -132,7 +132,7 @@ class MMDEPLOY_API Platform {
|
|||
|
||||
Platform GetPlatform(int platform_id);
|
||||
|
||||
Platform GetPlatform(const char *platform_name);
|
||||
Platform GetPlatform(const char* platform_name);
|
||||
|
||||
class MMDEPLOY_API Stream {
|
||||
public:
|
||||
|
@ -140,7 +140,7 @@ class MMDEPLOY_API Stream {
|
|||
|
||||
explicit Stream(Device device, uint64_t flags = 0);
|
||||
|
||||
explicit Stream(Device device, void *native, uint64_t flags = 0);
|
||||
explicit Stream(Device device, void* native, uint64_t flags = 0);
|
||||
|
||||
explicit Stream(Device device, std::shared_ptr<void> native, uint64_t flags = 0);
|
||||
|
||||
|
@ -150,25 +150,25 @@ class MMDEPLOY_API Stream {
|
|||
|
||||
Result<void> Wait();
|
||||
|
||||
Result<void> DependsOn(Event &event);
|
||||
Result<void> DependsOn(Event& event);
|
||||
|
||||
Result<void> Submit(Kernel &kernel);
|
||||
Result<void> Submit(Kernel& kernel);
|
||||
|
||||
void *GetNative(ErrorCode *ec = nullptr);
|
||||
void* GetNative(ErrorCode* ec = nullptr);
|
||||
|
||||
Result<void> Copy(const Buffer &src, Buffer &dst, size_t size = -1, size_t src_offset = 0,
|
||||
Result<void> Copy(const Buffer& src, Buffer& dst, size_t size = -1, size_t src_offset = 0,
|
||||
size_t dst_offset = 0);
|
||||
|
||||
Result<void> Copy(const void *host_ptr, Buffer &dst, size_t size = -1, size_t dst_offset = 0);
|
||||
Result<void> Copy(const void* host_ptr, Buffer& dst, size_t size = -1, size_t dst_offset = 0);
|
||||
|
||||
Result<void> Copy(const Buffer &src, void *host_ptr, size_t size = -1, size_t src_offset = 0);
|
||||
Result<void> Copy(const Buffer& src, void* host_ptr, size_t size = -1, size_t src_offset = 0);
|
||||
|
||||
Result<void> Fill(const Buffer &dst, void *pattern, size_t pattern_size, size_t size = -1,
|
||||
Result<void> Fill(const Buffer& dst, void* pattern, size_t pattern_size, size_t size = -1,
|
||||
size_t offset = 0);
|
||||
|
||||
bool operator==(const Stream &other) const { return impl_ == other.impl_; }
|
||||
bool operator==(const Stream& other) const { return impl_ == other.impl_; }
|
||||
|
||||
bool operator!=(const Stream &other) const { return !(*this == other); }
|
||||
bool operator!=(const Stream& other) const { return !(*this == other); }
|
||||
|
||||
explicit operator bool() const noexcept { return static_cast<bool>(impl_); }
|
||||
|
||||
|
@ -184,7 +184,7 @@ class MMDEPLOY_API Stream {
|
|||
};
|
||||
|
||||
template <typename T>
|
||||
T GetNative(Stream &stream, ErrorCode *ec = nullptr) {
|
||||
T GetNative(Stream& stream, ErrorCode* ec = nullptr) {
|
||||
return reinterpret_cast<T>(stream.GetNative(ec));
|
||||
}
|
||||
|
||||
|
@ -194,7 +194,7 @@ class MMDEPLOY_API Event {
|
|||
|
||||
explicit Event(Device device, uint64_t flags = 0);
|
||||
|
||||
explicit Event(Device device, void *native, uint64_t flags = 0);
|
||||
explicit Event(Device device, void* native, uint64_t flags = 0);
|
||||
|
||||
explicit Event(Device device, std::shared_ptr<void> native, uint64_t flags = 0);
|
||||
|
||||
|
@ -204,13 +204,13 @@ class MMDEPLOY_API Event {
|
|||
|
||||
Result<void> Wait();
|
||||
|
||||
Result<void> Record(Stream &stream);
|
||||
Result<void> Record(Stream& stream);
|
||||
|
||||
void *GetNative(ErrorCode *ec = nullptr);
|
||||
void* GetNative(ErrorCode* ec = nullptr);
|
||||
|
||||
bool operator==(const Event &other) const { return impl_ == other.impl_; }
|
||||
bool operator==(const Event& other) const { return impl_ == other.impl_; }
|
||||
|
||||
bool operator!=(const Event &other) const { return !(*this == other); }
|
||||
bool operator!=(const Event& other) const { return !(*this == other); }
|
||||
|
||||
explicit operator bool() const noexcept { return static_cast<bool>(impl_); }
|
||||
|
||||
|
@ -223,7 +223,7 @@ class MMDEPLOY_API Event {
|
|||
};
|
||||
|
||||
template <typename T>
|
||||
T GetNative(Event &event, ErrorCode *ec = nullptr) {
|
||||
T GetNative(Event& event, ErrorCode* ec = nullptr) {
|
||||
return reinterpret_cast<T>(event.GetNative(ec));
|
||||
}
|
||||
|
||||
|
@ -234,7 +234,7 @@ class MMDEPLOY_API Kernel {
|
|||
|
||||
Device GetDevice() const;
|
||||
|
||||
void *GetNative(ErrorCode *ec = nullptr);
|
||||
void* GetNative(ErrorCode* ec = nullptr);
|
||||
|
||||
explicit operator bool() const noexcept { return static_cast<bool>(impl_); }
|
||||
|
||||
|
@ -243,7 +243,7 @@ class MMDEPLOY_API Kernel {
|
|||
};
|
||||
|
||||
template <typename T>
|
||||
T GetNative(Kernel &kernel, ErrorCode *ec = nullptr) {
|
||||
T GetNative(Kernel& kernel, ErrorCode* ec = nullptr) {
|
||||
return reinterpret_cast<T>(kernel.GetNative(ec));
|
||||
}
|
||||
|
||||
|
@ -269,25 +269,25 @@ class MMDEPLOY_API Buffer {
|
|||
|
||||
Buffer(Device device, size_t size, Allocator allocator, size_t alignment = 1, uint64_t flags = 0);
|
||||
|
||||
Buffer(Device device, size_t size, void *native, uint64_t flags = 0);
|
||||
Buffer(Device device, size_t size, void* native, uint64_t flags = 0);
|
||||
|
||||
Buffer(Device device, size_t size, std::shared_ptr<void> native, uint64_t flags = 0);
|
||||
// create sub-buffer
|
||||
Buffer(Buffer &buffer, size_t offset, size_t size, uint64_t flags = 0);
|
||||
Buffer(Buffer& buffer, size_t offset, size_t size, uint64_t flags = 0);
|
||||
|
||||
size_t GetSize(ErrorCode *ec = nullptr) const;
|
||||
size_t GetSize(ErrorCode* ec = nullptr) const;
|
||||
|
||||
// bool IsSubBuffer(ErrorCode *ec = nullptr);
|
||||
// bool IsSubBuffer(ErrorCode* ec = nullptr);
|
||||
|
||||
void *GetNative(ErrorCode *ec = nullptr) const;
|
||||
void* GetNative(ErrorCode* ec = nullptr) const;
|
||||
|
||||
Device GetDevice() const;
|
||||
|
||||
Allocator GetAllocator() const;
|
||||
|
||||
bool operator==(const Buffer &other) const { return impl_ == other.impl_; }
|
||||
bool operator==(const Buffer& other) const { return impl_ == other.impl_; }
|
||||
|
||||
bool operator!=(const Buffer &other) const { return !(*this == other); }
|
||||
bool operator!=(const Buffer& other) const { return !(*this == other); }
|
||||
|
||||
explicit operator bool() const noexcept { return static_cast<bool>(impl_); }
|
||||
|
||||
|
@ -300,12 +300,12 @@ class MMDEPLOY_API Buffer {
|
|||
};
|
||||
|
||||
template <typename T>
|
||||
T GetNative(Buffer &buffer, ErrorCode *ec = nullptr) {
|
||||
T GetNative(Buffer& buffer, ErrorCode* ec = nullptr) {
|
||||
return reinterpret_cast<T>(buffer.GetNative(ec));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T GetNative(const Buffer &buffer, ErrorCode *ec = nullptr) {
|
||||
T GetNative(const Buffer& buffer, ErrorCode* ec = nullptr) {
|
||||
return reinterpret_cast<T>(buffer.GetNative(ec));
|
||||
}
|
||||
|
||||
|
@ -315,13 +315,15 @@ class MMDEPLOY_API PlatformRegistry {
|
|||
|
||||
int Register(Creator creator);
|
||||
|
||||
int GetPlatform(const char *name, Platform *platform);
|
||||
int AddAlias(const char* name, const char* target);
|
||||
|
||||
int GetPlatform(int id, Platform *platform);
|
||||
int GetPlatform(const char* name, Platform* platform);
|
||||
|
||||
int GetPlatformId(const char *name);
|
||||
int GetPlatform(int id, Platform* platform);
|
||||
|
||||
PlatformImpl *GetPlatformImpl(PlatformId id);
|
||||
int GetPlatformId(const char* name);
|
||||
|
||||
PlatformImpl* GetPlatformImpl(PlatformId id);
|
||||
|
||||
private:
|
||||
int GetNextId();
|
||||
|
@ -335,8 +337,9 @@ class MMDEPLOY_API PlatformRegistry {
|
|||
Platform platform;
|
||||
};
|
||||
std::vector<Entry> entries_;
|
||||
std::vector<std::pair<std::string, std::string>> aliases_;
|
||||
};
|
||||
|
||||
MMDEPLOY_API PlatformRegistry &gPlatformRegistry();
|
||||
MMDEPLOY_API PlatformRegistry& gPlatformRegistry();
|
||||
|
||||
} // namespace mmdeploy
|
||||
|
|
|
@ -321,6 +321,11 @@ int PlatformRegistry::Register(Creator creator) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
int PlatformRegistry::AddAlias(const char* name, const char* target) {
|
||||
aliases_.emplace_back(name, target);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int PlatformRegistry::GetNextId() {
|
||||
for (int i = 1;; ++i) {
|
||||
if (IsAvailable(i)) {
|
||||
|
@ -339,6 +344,12 @@ bool PlatformRegistry::IsAvailable(int id) {
|
|||
}
|
||||
|
||||
int PlatformRegistry::GetPlatform(const char* name, Platform* platform) {
|
||||
for (const auto& alias : aliases_) {
|
||||
if (name == alias.first) {
|
||||
name = alias.second.c_str();
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (const auto& entry : entries_) {
|
||||
if (entry.name == name) {
|
||||
*platform = entry.platform;
|
||||
|
@ -357,7 +368,14 @@ int PlatformRegistry::GetPlatform(int id, Platform* platform) {
|
|||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int PlatformRegistry::GetPlatformId(const char* name) {
|
||||
for (const auto& alias : aliases_) {
|
||||
if (name == alias.first) {
|
||||
name = alias.second.c_str();
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (const auto& entry : entries_) {
|
||||
if (entry.name == name) {
|
||||
return entry.id;
|
||||
|
|
|
@ -20,6 +20,7 @@ Model::Model(const std::string& model_path) {
|
|||
Model::Model(const void* buffer, size_t size) { Init(buffer, size).value(); }
|
||||
|
||||
Result<void> Model::Init(const std::string& model_path) {
|
||||
model_path_ = model_path;
|
||||
if (!fs::exists(model_path)) {
|
||||
MMDEPLOY_ERROR("'{}' doesn't exist", model_path);
|
||||
return Status(eFileNotExist);
|
||||
|
@ -45,6 +46,8 @@ Result<void> Model::Init(const std::string& model_path) {
|
|||
return Status(eNotSupported);
|
||||
}
|
||||
|
||||
const std::string& Model::GetModelPath() const { return model_path_; }
|
||||
|
||||
Result<void> Model::Init(const void* buffer, size_t size) {
|
||||
auto registry = ModelRegistry::Get();
|
||||
auto entries = registry.ListEntries();
|
||||
|
|
|
@ -94,7 +94,14 @@ class MMDEPLOY_API Model {
|
|||
*/
|
||||
explicit operator bool() const { return impl_ != nullptr; }
|
||||
|
||||
/**
|
||||
* @brief get model_path that init with DirectoryModel
|
||||
* @return file path of an sdk model
|
||||
*/
|
||||
const std::string& GetModelPath() const;
|
||||
|
||||
private:
|
||||
std::string model_path_;
|
||||
std::shared_ptr<ModelImpl> impl_;
|
||||
deploy_meta_info_t meta_;
|
||||
};
|
||||
|
|
|
@ -94,17 +94,23 @@ class Span {
|
|||
|
||||
constexpr Span& operator=(const Span& other) noexcept = default;
|
||||
|
||||
friend bool operator==(const Span& a, const Span& b) {
|
||||
if (a.size() != b.size()) return false;
|
||||
template <typename U>
|
||||
friend bool operator!=(const Span& a, const Span<U>& b) {
|
||||
if (a.size() != b.size()) {
|
||||
return true;
|
||||
}
|
||||
for (size_type i = 0; i < a.size(); ++i) {
|
||||
if (a[i] != b[i]) {
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
friend bool operator!=(const Span& a, const Span& b) { return !(a == b); }
|
||||
template <typename U>
|
||||
friend bool operator==(const Span& a, const Span<U>& b) {
|
||||
return !(a != b);
|
||||
}
|
||||
|
||||
private:
|
||||
T* data_;
|
||||
|
|
|
@ -115,9 +115,9 @@ Result<void> Tensor::CopyFrom(const Tensor& tensor, Stream stream) {
|
|||
if (!stream) {
|
||||
auto device = desc_.device.is_device() ? desc_.device : tensor.desc().device;
|
||||
auto default_stream = Stream::GetDefault(device);
|
||||
OUTCOME_TRY(default_stream.Copy(tensor.buffer(), buffer_));
|
||||
OUTCOME_TRY(default_stream.Copy(tensor.buffer(), buffer_, tensor.byte_size()));
|
||||
} else {
|
||||
OUTCOME_TRY(stream.Copy(tensor.buffer(), buffer_));
|
||||
OUTCOME_TRY(stream.Copy(tensor.buffer(), buffer_, tensor.byte_size()));
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
@ -141,9 +141,9 @@ Result<void> Tensor::CopyTo(Tensor& tensor, Stream stream) const {
|
|||
if (!stream) {
|
||||
Device device = desc_.device.is_device() ? desc_.device : tensor.desc().device;
|
||||
Stream default_stream = Stream::GetDefault(device);
|
||||
return default_stream.Copy(buffer_, tensor.buffer());
|
||||
return default_stream.Copy(buffer_, tensor.buffer(), byte_size());
|
||||
} else {
|
||||
return stream.Copy(buffer_, tensor.buffer());
|
||||
return stream.Copy(buffer_, tensor.buffer(), byte_size());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -158,9 +158,9 @@ Result<void> Tensor::CopyFrom(void* host_ptr, Stream stream) {
|
|||
Allocate();
|
||||
if (!stream) {
|
||||
auto default_stream = Stream::GetDefault(desc_.device);
|
||||
return default_stream.Copy(host_ptr, buffer_, buffer_.GetSize());
|
||||
return default_stream.Copy(host_ptr, buffer_, byte_size());
|
||||
} else {
|
||||
return stream.Copy(host_ptr, buffer_, buffer_.GetSize());
|
||||
return stream.Copy(host_ptr, buffer_, byte_size());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -174,9 +174,9 @@ Result<void> Tensor::CopyTo(void* host_ptr, Stream stream) const {
|
|||
}
|
||||
if (!stream) {
|
||||
auto default_stream = Stream::GetDefault(desc_.device);
|
||||
return default_stream.Copy(buffer_, host_ptr, buffer_.GetSize());
|
||||
return default_stream.Copy(buffer_, host_ptr, byte_size());
|
||||
} else {
|
||||
return stream.Copy(buffer_, host_ptr, buffer_.GetSize());
|
||||
return stream.Copy(buffer_, host_ptr, byte_size());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -5,3 +5,7 @@ add_subdirectory(cpu)
|
|||
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
|
||||
add_subdirectory(cuda)
|
||||
endif ()
|
||||
|
||||
if ("acl" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
||||
add_subdirectory(acl)
|
||||
endif ()
|
||||
|
|
|
@ -0,0 +1,7 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
project(mmdeploy_acl_device)
|
||||
|
||||
file(GLOB_RECURSE SRCS "*.cpp")
|
||||
|
||||
mmdeploy_add_module(${PROJECT_NAME} "${SRCS}")
|
|
@ -0,0 +1,14 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/core/device_impl.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
class AclPlatformRegisterer {
|
||||
public:
|
||||
AclPlatformRegisterer() { gPlatformRegistry().AddAlias("npu", "cpu"); }
|
||||
};
|
||||
|
||||
AclPlatformRegisterer g_acl_platform_registerer;
|
||||
|
||||
} // namespace mmdeploy
|
|
@ -105,7 +105,7 @@ Result<void> CpuPlatformImpl::CopyImpl(const void* src, void* dst, size_t src_si
|
|||
task();
|
||||
return success();
|
||||
}
|
||||
if (st.GetDevice() != Device(0, 0)) {
|
||||
if (st.GetDevice().platform_id() != 0) {
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
auto cpu_stream = static_cast<CpuStreamImpl*>(st.GetNative());
|
||||
|
@ -126,6 +126,7 @@ Result<void> CpuPlatformImpl::Copy(const void* host_ptr, Buffer dst, size_t size
|
|||
}
|
||||
return CopyImpl(host_ptr, dst_ptr, size, dst.GetSize(), 0, dst_offset, size, stream);
|
||||
}
|
||||
|
||||
Result<void> CpuPlatformImpl::Copy(Buffer src, void* host_ptr, size_t size, size_t src_offset,
|
||||
Stream stream) {
|
||||
auto src_ptr = src.GetNative();
|
||||
|
@ -145,7 +146,7 @@ Result<void> CpuPlatformImpl::Copy(Buffer src, Buffer dst, size_t size, size_t s
|
|||
return Status(eInvalidArgument);
|
||||
}
|
||||
auto device = src.GetDevice();
|
||||
if (device.platform_id() != 0 || device != dst.GetDevice()) {
|
||||
if (device.platform_id() != 0 || device.platform_id() != dst.GetDevice().platform_id()) {
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
return CopyImpl(src_ptr, dst_ptr, src.GetSize(), dst.GetSize(), src_offset, dst_offset, size,
|
||||
|
|
|
@ -26,5 +26,17 @@ if ("snpe" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
|||
add_subdirectory(snpe)
|
||||
endif ()
|
||||
|
||||
if ("acl" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
||||
add_subdirectory(acl)
|
||||
endif ()
|
||||
|
||||
if ("torchscript" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
||||
add_subdirectory(torchscript)
|
||||
endif ()
|
||||
|
||||
if ("coreml" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
||||
add_subdirectory(coreml)
|
||||
endif ()
|
||||
|
||||
mmdeploy_add_module(${PROJECT_NAME} net_module.cpp)
|
||||
add_library(mmdeploy::net_module ALIAS ${PROJECT_NAME})
|
||||
|
|
|
@ -0,0 +1,14 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
project(mmdeploy_acl_net)
|
||||
|
||||
if ("acl" IN_LIST MMDEPLOY_TARGET_BACKENDS)
|
||||
if (NOT DEFINED ASCEND_TOOLKIT_HOME)
|
||||
set(ASCEND_TOOLKIT_HOME $ENV{ASCEND_TOOLKIT_HOME})
|
||||
endif ()
|
||||
mmdeploy_add_module(${PROJECT_NAME} acl_net.cpp)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE
|
||||
$<BUILD_INTERFACE:${ASCEND_TOOLKIT_HOME}/runtime/include>)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
$<BUILD_INTERFACE:${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub/libascendcl.so>)
|
||||
endif ()
|
|
@ -0,0 +1,659 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/net/acl/acl_net.h"
|
||||
|
||||
#include "mmdeploy/core/logger.h"
|
||||
#include "mmdeploy/core/model.h"
|
||||
#include "mmdeploy/core/utils/formatter.h"
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const aclmdlIODims& dims) {
|
||||
os << dims.name << " [";
|
||||
for (int i = 0; i < dims.dimCount; ++i) {
|
||||
os << (i ? ", " : "") << dims.dims[i];
|
||||
}
|
||||
os << "]";
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const aclmdlBatch& batch) {
|
||||
os << "batch [";
|
||||
for (int i = 0; i < batch.batchCount; ++i) {
|
||||
os << (i ? ", " : "") << batch.batch[i];
|
||||
}
|
||||
os << "]";
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const aclmdlHW& hw) {
|
||||
os << "HW [";
|
||||
for (int i = 0; i < hw.hwCount; ++i) {
|
||||
os << (i ? ", " : "") << "(" << hw.hw[i][0] << ", " << hw.hw[i][1] << ")";
|
||||
}
|
||||
os << "]";
|
||||
return os;
|
||||
}
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
namespace {
|
||||
|
||||
inline Result<void> _m(aclError ec, SourceLocation loc = SourceLocation::current()) {
|
||||
if (ec == ACL_SUCCESS) {
|
||||
return success();
|
||||
} else {
|
||||
return Status(eFail, loc);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline Result<T*> _p(T* ptr, SourceLocation loc = SourceLocation::current()) {
|
||||
if (ptr) {
|
||||
return ptr;
|
||||
} else {
|
||||
return Status(eFail, loc);
|
||||
}
|
||||
}
|
||||
|
||||
struct Context {
|
||||
Context() {
|
||||
std::lock_guard lock{mutex_};
|
||||
if (ref_count_++ != 0) {
|
||||
return;
|
||||
}
|
||||
auto ret = aclInit(nullptr);
|
||||
if (ret == ACL_SUCCESS) {
|
||||
MMDEPLOY_INFO("ACL initialized.");
|
||||
owned_acl_ = true;
|
||||
} else if (ret == ACL_ERROR_REPEAT_INITIALIZE) {
|
||||
MMDEPLOY_INFO("ACL has already been initialized.");
|
||||
} else {
|
||||
MMDEPLOY_ERROR("aclInit() failed: {}", ret);
|
||||
assert(ret == 0);
|
||||
}
|
||||
}
|
||||
~Context() {
|
||||
std::lock_guard lock{mutex_};
|
||||
if (--ref_count_ != 0) {
|
||||
return;
|
||||
}
|
||||
// skip aclFinalize if aclInit is not successfully called by us.
|
||||
if (owned_acl_) {
|
||||
auto ret = aclFinalize();
|
||||
if (ret == ACL_SUCCESS) {
|
||||
MMDEPLOY_INFO("ACL finalized.");
|
||||
owned_acl_ = false;
|
||||
} else if (ret == ACL_ERROR_REPEAT_FINALIZE) {
|
||||
MMDEPLOY_INFO("ACL has already been finalized.");
|
||||
} else {
|
||||
MMDEPLOY_ERROR("aclFinalize() failed: {}", ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
static bool owned_acl_;
|
||||
static int ref_count_;
|
||||
static std::mutex mutex_;
|
||||
};
|
||||
|
||||
bool Context::owned_acl_ = false;
|
||||
int Context::ref_count_ = 0;
|
||||
std::mutex Context::mutex_{};
|
||||
|
||||
} // namespace
|
||||
|
||||
AclNet::~AclNet() {
|
||||
auto dtor = [&]() -> Result<void> {
|
||||
auto n_inputs = aclmdlGetDatasetNumBuffers(input_dataset_);
|
||||
for (int i = 0; i < n_inputs; ++i) {
|
||||
auto buffer = aclmdlGetDatasetBuffer(input_dataset_, i);
|
||||
auto data = aclGetDataBufferAddr(buffer);
|
||||
OUTCOME_TRY(_m(aclrtFree(data)));
|
||||
}
|
||||
input_tensor_.clear();
|
||||
OUTCOME_TRY(_m(aclmdlDestroyDataset(input_dataset_)));
|
||||
|
||||
auto n_outputs = aclmdlGetDatasetNumBuffers(output_dataset_);
|
||||
for (int i = 0; i < n_outputs; ++i) {
|
||||
auto buffer = aclmdlGetDatasetBuffer(output_dataset_, i);
|
||||
auto data = aclGetDataBufferAddr(buffer);
|
||||
OUTCOME_TRY(_m(aclrtFree(data)));
|
||||
}
|
||||
output_tensor_.clear();
|
||||
OUTCOME_TRY(_m(aclmdlDestroyDataset(output_dataset_)));
|
||||
|
||||
OUTCOME_TRY(_m(aclmdlDestroyDesc(model_desc_)));
|
||||
OUTCOME_TRY(_m(aclmdlUnload(model_id_)));
|
||||
return success();
|
||||
};
|
||||
if (auto r = dtor(); !r) {
|
||||
MMDEPLOY_ERROR("uninit failed: {}", r.error().message().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
Result<DataType> FromAclDataType(aclDataType data_type) {
|
||||
switch (data_type) {
|
||||
case ACL_FLOAT:
|
||||
return DataType::kFLOAT;
|
||||
case ACL_FLOAT16:
|
||||
return DataType::kHALF;
|
||||
case ACL_INT8:
|
||||
return DataType::kINT8;
|
||||
case ACL_INT32:
|
||||
return DataType::kINT32;
|
||||
case ACL_INT64:
|
||||
return DataType::kINT64;
|
||||
default:
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
|
||||
Result<aclDataType> ToAclDataType(DataType data_type) {
|
||||
switch (data_type) {
|
||||
case DataType::kFLOAT:
|
||||
return ACL_FLOAT;
|
||||
case DataType::kHALF:
|
||||
return ACL_FLOAT16;
|
||||
case DataType::kINT8:
|
||||
return ACL_INT8;
|
||||
case DataType::kINT32:
|
||||
return ACL_INT32;
|
||||
case DataType::kINT64:
|
||||
return ACL_INT64;
|
||||
default:
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
|
||||
Result<TensorDesc> ToTensorDesc(const aclmdlIODims& dims, aclDataType data_type) {
|
||||
auto extract_name = [](const std::string& name) {
|
||||
if (auto pos = name.find_last_of(':'); pos != std::string::npos) {
|
||||
return name.substr(pos + 1);
|
||||
} else {
|
||||
return name;
|
||||
}
|
||||
};
|
||||
OUTCOME_TRY(auto _data_type, FromAclDataType(data_type));
|
||||
return TensorDesc{Device(0), _data_type,
|
||||
TensorShape(&dims.dims[0], &dims.dims[0] + dims.dimCount),
|
||||
extract_name(dims.name)};
|
||||
}
|
||||
|
||||
Result<size_t> GetByteSize(const aclmdlIODims& dims, aclDataType data_type) {
|
||||
size_t byte_size = aclDataTypeSize(data_type);
|
||||
for (int i = 0; i < dims.dimCount; ++i) {
|
||||
if (dims.dims[i] < 0) {
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
byte_size *= dims.dims[i];
|
||||
}
|
||||
return byte_size;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// all dims must be fixed
|
||||
auto AclNet::CreateBuffers(const aclmdlIODims& dims, aclDataType data_type) -> Result<Buffers> {
|
||||
OUTCOME_TRY(auto byte_size, GetByteSize(dims, data_type));
|
||||
Buffers pair{};
|
||||
void* dev_ptr{};
|
||||
OUTCOME_TRY(_m(aclrtMalloc(&dev_ptr, byte_size, ACL_MEM_MALLOC_HUGE_FIRST)));
|
||||
OUTCOME_TRY(_m(aclrtMemset(dev_ptr, byte_size, 0, byte_size)));
|
||||
OUTCOME_TRY(pair.device_buffer, _p(aclCreateDataBuffer(dev_ptr, byte_size)));
|
||||
OUTCOME_TRY(auto desc, ToTensorDesc(dims, data_type));
|
||||
void* host_ptr{};
|
||||
OUTCOME_TRY(_m(aclrtMallocHost(&host_ptr, byte_size)));
|
||||
memset(host_ptr, 0, byte_size);
|
||||
pair.host_tensor =
|
||||
Tensor(desc, std::shared_ptr<void>(host_ptr, [](void* p) { aclrtFreeHost(p); }));
|
||||
return pair;
|
||||
}
|
||||
|
||||
auto AclNet::CreateBuffersDynamicBatchSize(aclmdlIODims dims, aclDataType data_type)
|
||||
-> Result<Buffers> {
|
||||
for (int i = 0; i < dims.dimCount; ++i) {
|
||||
if (dims.dims[i] == -1) {
|
||||
dims.dims[i] = dynamic_batch_size_.back();
|
||||
}
|
||||
}
|
||||
return CreateBuffers(dims, data_type);
|
||||
}
|
||||
|
||||
auto AclNet::CreateBuffersDynamicImageSize(int index, aclmdlIODims dims, aclDataType data_type)
|
||||
-> Result<Buffers> {
|
||||
aclmdlHW hw_desc{};
|
||||
OUTCOME_TRY(_m(aclmdlGetDynamicHW(model_desc_, index, &hw_desc)));
|
||||
if (hw_desc.hwCount > 0) {
|
||||
auto& val = *std::max_element(hw_desc.hw, hw_desc.hw + hw_desc.hwCount,
|
||||
[](auto u, auto v) { return u[0] * u[1] < v[0] * v[1]; });
|
||||
int ptr = 0;
|
||||
for (int i = 0; i < dims.dimCount; ++i) {
|
||||
if (dims.dims[i] == -1) {
|
||||
if (ptr == 2) {
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
dims.dims[i] = val[ptr++];
|
||||
}
|
||||
}
|
||||
if (ptr != 2) {
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
}
|
||||
return CreateBuffers(dims, data_type);
|
||||
}
|
||||
|
||||
auto AclNet::CreateBuffersDynamicDims(int index, int dim_count, const aclmdlIODims& dims,
|
||||
aclDataType data_type) -> Result<Buffers> {
|
||||
int max_index = -1;
|
||||
size_t max_value = 0;
|
||||
aclmdlIODims max_shape{};
|
||||
for (int j = 0; j < dynamic_input_dims_.size(); ++j) {
|
||||
aclmdlIODims shape{};
|
||||
strncpy(shape.name, dims.name, sizeof(shape.name));
|
||||
shape.dimCount = dims.dimCount;
|
||||
std::copy(dynamic_input_dims_[j].dims + dim_count,
|
||||
dynamic_input_dims_[j].dims + dim_count + dims.dimCount, shape.dims);
|
||||
OUTCOME_TRY(auto byte_size, GetByteSize(shape, data_type));
|
||||
if (byte_size > max_value) {
|
||||
max_index = j;
|
||||
max_value = byte_size;
|
||||
max_shape = shape;
|
||||
}
|
||||
}
|
||||
if (max_index < 0) {
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
MMDEPLOY_INFO("max shape for input {}: {}", index, max_shape);
|
||||
return CreateBuffers(max_shape, data_type);
|
||||
}
|
||||
|
||||
Result<void> AclNet::ConfigDynamicShapes() {
|
||||
aclError status = ACL_SUCCESS;
|
||||
{
|
||||
size_t dynamic_tensor_index{};
|
||||
status = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &dynamic_tensor_index);
|
||||
if (status == ACL_SUCCESS) {
|
||||
dynamic_tensor_index_ = static_cast<int>(dynamic_tensor_index);
|
||||
MMDEPLOY_INFO("dynamic tensor index: {}", dynamic_tensor_index);
|
||||
}
|
||||
}
|
||||
|
||||
if (dynamic_tensor_index_ >= 0) {
|
||||
aclmdlBatch batch_desc{};
|
||||
status = aclmdlGetDynamicBatch(model_desc_, &batch_desc);
|
||||
if (status == ACL_SUCCESS && batch_desc.batchCount > 0) {
|
||||
MMDEPLOY_INFO("{}, status = {}", batch_desc, status);
|
||||
input_shape_type_ = kDynamicBatchSize;
|
||||
dynamic_batch_size_.insert(dynamic_batch_size_.end(), batch_desc.batch,
|
||||
batch_desc.batch + batch_desc.batchCount);
|
||||
std::sort(dynamic_batch_size_.begin(), dynamic_batch_size_.end());
|
||||
}
|
||||
|
||||
size_t dynamic_gear_count{0};
|
||||
if (input_shape_type_ == kStatic) {
|
||||
status = aclmdlGetInputDynamicGearCount(model_desc_, -1, &dynamic_gear_count);
|
||||
dynamic_input_dims_.resize(dynamic_gear_count);
|
||||
if (status == ACL_SUCCESS && dynamic_gear_count > 0) {
|
||||
status = aclmdlGetInputDynamicDims(model_desc_, -1, dynamic_input_dims_.data(),
|
||||
dynamic_gear_count);
|
||||
for (const auto& dims : dynamic_input_dims_) {
|
||||
MMDEPLOY_INFO("dynamic input dims: {}", dims);
|
||||
}
|
||||
input_shape_type_ = kDynamicDims;
|
||||
} else {
|
||||
input_shape_type_ = kDynamicImageSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> AclNet::CreateInputBuffers() {
|
||||
input_dataset_ = aclmdlCreateDataset();
|
||||
auto n_inputs = aclmdlGetNumInputs(model_desc_);
|
||||
MMDEPLOY_INFO("n_inputs = {}, dynamic_tensor_index_ = {}", n_inputs, dynamic_tensor_index_);
|
||||
int dim_count = 0;
|
||||
for (int i = 0; i < n_inputs; ++i) {
|
||||
if (i == dynamic_tensor_index_) {
|
||||
void* data{};
|
||||
auto input_len = aclmdlGetInputSizeByIndex(model_desc_, i);
|
||||
OUTCOME_TRY(_m(aclrtMalloc(&data, input_len, ACL_MEM_MALLOC_HUGE_FIRST)));
|
||||
OUTCOME_TRY(auto buffer, _p(aclCreateDataBuffer(data, input_len)));
|
||||
OUTCOME_TRY(_m(aclmdlAddDatasetBuffer(input_dataset_, buffer)));
|
||||
} else {
|
||||
Buffers buffers{};
|
||||
aclmdlIODims dims{};
|
||||
OUTCOME_TRY(_m(aclmdlGetInputDims(model_desc_, i, &dims)));
|
||||
input_dims_.push_back(dims);
|
||||
auto data_type = aclmdlGetInputDataType(model_desc_, i);
|
||||
input_data_type_.push_back(data_type);
|
||||
MMDEPLOY_INFO("{}", dims);
|
||||
|
||||
switch (input_shape_type_) {
|
||||
case kStatic: {
|
||||
OUTCOME_TRY(buffers, CreateBuffers(dims, data_type));
|
||||
break;
|
||||
}
|
||||
case kDynamicBatchSize: {
|
||||
OUTCOME_TRY(buffers, CreateBuffersDynamicBatchSize(dims, data_type));
|
||||
break;
|
||||
}
|
||||
case kDynamicImageSize: {
|
||||
OUTCOME_TRY(buffers, CreateBuffersDynamicImageSize(i, dims, data_type));
|
||||
break;
|
||||
}
|
||||
case kDynamicDims: {
|
||||
OUTCOME_TRY(buffers, CreateBuffersDynamicDims(i, dim_count, dims, data_type));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
|
||||
OUTCOME_TRY(_m(aclmdlAddDatasetBuffer(input_dataset_, buffers.device_buffer)));
|
||||
input_tensor_.push_back(std::move(buffers.host_tensor));
|
||||
dim_count += dims.dimCount;
|
||||
}
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> AclNet::CreateOutputBuffers() {
|
||||
output_dataset_ = aclmdlCreateDataset();
|
||||
auto n_outputs = aclmdlGetNumOutputs(model_desc_);
|
||||
std::vector<aclmdlIODims> output_dims;
|
||||
for (int i = 0; i < n_outputs; ++i) {
|
||||
aclmdlIODims dims{};
|
||||
OUTCOME_TRY(_m(aclmdlGetOutputDims(model_desc_, i, &dims))); // return max dims
|
||||
output_dims_.push_back(dims);
|
||||
MMDEPLOY_INFO("{}", dims);
|
||||
auto data_type = aclmdlGetOutputDataType(model_desc_, i);
|
||||
output_data_type_.push_back(data_type);
|
||||
OUTCOME_TRY(auto buffers, CreateBuffers(dims, data_type));
|
||||
OUTCOME_TRY(_m(aclmdlAddDatasetBuffer(output_dataset_, buffers.device_buffer)));
|
||||
output_tensor_.push_back(std::move(buffers.host_tensor));
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> AclNet::Init(const Value& args) {
|
||||
auto& context = args["context"];
|
||||
cpu_stream_ = context["stream"].get<Stream>();
|
||||
|
||||
auto name = args["name"].get<std::string>();
|
||||
auto model = context["model"].get<Model>();
|
||||
|
||||
device_id_ = context["device"].get<Device>().device_id();
|
||||
acl_context_ = std::make_shared<Context>();
|
||||
|
||||
OUTCOME_TRY(auto config, model.GetModelConfig(name));
|
||||
OUTCOME_TRY(auto binary, model.ReadFile(config.net));
|
||||
|
||||
OUTCOME_TRY(_m(aclrtSetDevice(device_id_)));
|
||||
|
||||
OUTCOME_TRY(_m(aclmdlLoadFromMem(binary.data(), binary.size(), &model_id_)));
|
||||
|
||||
model_desc_ = aclmdlCreateDesc();
|
||||
OUTCOME_TRY(_m(aclmdlGetDesc(model_desc_, model_id_)));
|
||||
|
||||
// dynamic_tensor_index_
|
||||
// input_shape_type_
|
||||
// dynamic_batch_size_
|
||||
// dynamic_input_dims_
|
||||
if (auto r = ConfigDynamicShapes(); !r) {
|
||||
MMDEPLOY_ERROR("Failed to config dynamic shapes");
|
||||
return r.as_failure();
|
||||
}
|
||||
|
||||
// input_dataset_
|
||||
// input_data_type_
|
||||
// input_dims_
|
||||
// input_tensor_
|
||||
if (auto r = CreateInputBuffers(); !r) {
|
||||
MMDEPLOY_ERROR("Failed to create input buffers");
|
||||
return r.as_failure();
|
||||
}
|
||||
|
||||
// output_dataset_
|
||||
// output_data_type_
|
||||
// output_dims_
|
||||
// output_tensor_
|
||||
if (auto r = CreateOutputBuffers(); !r) {
|
||||
MMDEPLOY_ERROR("Failed to create output buffers");
|
||||
return r.as_failure();
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> AclNet::Deinit() { return success(); }
|
||||
|
||||
Result<Span<Tensor>> AclNet::GetInputTensors() { return input_tensor_; }
|
||||
|
||||
Result<Span<Tensor>> AclNet::GetOutputTensors() { return output_tensor_; }
|
||||
|
||||
Result<void> AclNet::Reshape(Span<TensorShape> input_shapes) {
|
||||
OUTCOME_TRY(_m(aclrtSetDevice(device_id_)));
|
||||
// Sanity checks
|
||||
if (input_shapes.size() != input_dims_.size()) {
|
||||
MMDEPLOY_ERROR("inconsistent num inputs");
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
for (int i = 0; i < input_dims_.size(); ++i) {
|
||||
if (input_shapes[i].size() != input_dims_[i].dimCount) {
|
||||
MMDEPLOY_ERROR("inconsistent num of dims");
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
}
|
||||
|
||||
switch (input_shape_type_) {
|
||||
case kStatic: {
|
||||
OUTCOME_TRY(ReshapeStatic(input_shapes));
|
||||
break;
|
||||
}
|
||||
case kDynamicBatchSize: {
|
||||
OUTCOME_TRY(ReshapeDynamicBatchSize(input_shapes));
|
||||
break;
|
||||
}
|
||||
case kDynamicImageSize: {
|
||||
OUTCOME_TRY(ReshapeDynamicImageSize(input_shapes));
|
||||
break;
|
||||
}
|
||||
case kDynamicDims: {
|
||||
OUTCOME_TRY(ReshapeDynamicDims(input_shapes));
|
||||
break;
|
||||
}
|
||||
default:
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
|
||||
for (int i = 0; i < input_shapes.size(); ++i) {
|
||||
auto buffer = input_tensor_[i].buffer();
|
||||
auto desc = input_tensor_[i].desc();
|
||||
desc.shape = input_shapes[i];
|
||||
input_tensor_[i] = Tensor(std::move(desc), std::move(buffer));
|
||||
}
|
||||
|
||||
for (int i = 0; i < output_dims_.size(); ++i) {
|
||||
aclmdlIODims dims{};
|
||||
OUTCOME_TRY(_m(aclmdlGetCurOutputDims(model_desc_, i, &dims)));
|
||||
auto buffer = output_tensor_[i].buffer();
|
||||
auto desc = output_tensor_[i].desc();
|
||||
desc.shape = TensorShape(&dims.dims[0], &dims.dims[0] + dims.dimCount);
|
||||
output_tensor_[i] = Tensor(std::move(desc), std::move(buffer));
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> AclNet::ReshapeStatic(Span<TensorShape> input_shapes) {
|
||||
for (int i = 0; i < input_dims_.size(); ++i) {
|
||||
Span src(input_shapes[i]);
|
||||
Span ref(input_dims_[i].dims, input_dims_[i].dimCount);
|
||||
if (src != ref) {
|
||||
MMDEPLOY_ERROR("Shape mismatch {} vs {}", src, ref);
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> AclNet::ReshapeDynamicBatchSize(Span<TensorShape> input_shapes) {
|
||||
int batch_size = -1;
|
||||
for (int i = 0; i < input_dims_.size(); ++i) {
|
||||
for (int j = 0; j < input_dims_[i].dimCount; ++j) {
|
||||
if (input_dims_[i].dims[j] == -1) {
|
||||
if (batch_size != -1 && batch_size != input_shapes[i][j]) {
|
||||
// inconsistent batch size
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
batch_size = input_shapes[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (batch_size < 0) {
|
||||
MMDEPLOY_ERROR("unable to determine batch size");
|
||||
return Status(eFail);
|
||||
}
|
||||
MMDEPLOY_INFO("batch size {} {}", batch_size, dynamic_tensor_index_);
|
||||
auto index =
|
||||
std::lower_bound(dynamic_batch_size_.begin(), dynamic_batch_size_.end(), batch_size) -
|
||||
dynamic_batch_size_.begin();
|
||||
if (index == dynamic_batch_size_.size()) {
|
||||
MMDEPLOY_ERROR("Unsupported batch size: {}", batch_size);
|
||||
}
|
||||
// TODO: memset padding memory to avoid potential extra computation
|
||||
OUTCOME_TRY(_m(aclmdlSetDynamicBatchSize(model_id_, input_dataset_, dynamic_tensor_index_,
|
||||
dynamic_batch_size_[index])));
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> AclNet::ReshapeDynamicImageSize(Span<TensorShape> input_shapes) {
|
||||
uint64_t hw[2];
|
||||
bool found = false;
|
||||
for (int i = 0; i < input_dims_.size(); ++i) {
|
||||
uint64_t tmp[2];
|
||||
int ptr = 0;
|
||||
for (int j = 0; j < input_dims_[i].dimCount; ++j) {
|
||||
if (input_dims_[i].dims[j] == -1) {
|
||||
if (ptr == 2) {
|
||||
MMDEPLOY_ERROR("dynamic HW size out of bounds: {}", input_dims_[i]);
|
||||
return Status(eInvalidArgument);
|
||||
} else {
|
||||
tmp[ptr++] = input_shapes[i][j];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (ptr && ptr != 2) {
|
||||
MMDEPLOY_ERROR("Partially determined dynamic HW size: {}", input_dims_[i]);
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
if (ptr == 2) {
|
||||
if (found) {
|
||||
if (hw[0] != tmp[0] || hw[1] != tmp[1]) {
|
||||
MMDEPLOY_ERROR("Inconsistent dynamic HW size: ({}, {}) vs ({}, {})", hw[0], hw[1], tmp[0],
|
||||
tmp[1]);
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
} else {
|
||||
found = true;
|
||||
hw[0] = tmp[0];
|
||||
hw[1] = tmp[1];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
MMDEPLOY_ERROR("Unable to determine image size");
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
MMDEPLOY_INFO("dynamic HW size ({}, {})", hw[0], hw[1]);
|
||||
OUTCOME_TRY(
|
||||
_m(aclmdlSetDynamicHWSize(model_id_, input_dataset_, dynamic_tensor_index_, hw[0], hw[1])));
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> AclNet::ReshapeDynamicDims(Span<TensorShape> input_shapes) {
|
||||
std::vector<int> match(dynamic_input_dims_.size(), 1);
|
||||
aclmdlIODims dims{};
|
||||
for (int i = 0; i < input_shapes.size(); ++i) {
|
||||
const auto& shape = input_shapes[i];
|
||||
for (int j = 0; j < shape.size(); ++j) {
|
||||
if (input_dims_[i].dims[j] == -1) {
|
||||
for (int k = 0; k < dynamic_input_dims_.size(); ++k) {
|
||||
// disable profile when dims mismatch, except for the first dim (batch size)
|
||||
if (j == 0 && shape[j] < dynamic_input_dims_[k].dims[dims.dimCount]) {
|
||||
// pass
|
||||
} else if (shape[j] != dynamic_input_dims_[k].dims[dims.dimCount]) {
|
||||
match[k] = 0;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (input_dims_[i].dims[j] != shape[j]) {
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
dims.dims[dims.dimCount++] = shape[j];
|
||||
}
|
||||
}
|
||||
int dims_index = std::find(match.begin(), match.end(), 1) - match.begin();
|
||||
if (dims_index == match.size()) {
|
||||
MMDEPLOY_ERROR("Shape not supported: {}", dims);
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
// TODO: memset padding memory to avoid potential extra computation
|
||||
OUTCOME_TRY(_m(aclmdlSetInputDynamicDims(model_id_, input_dataset_, dynamic_tensor_index_,
|
||||
&dynamic_input_dims_[dims_index])));
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> AclNet::Forward() {
|
||||
OUTCOME_TRY(cpu_stream_.Wait());
|
||||
|
||||
OUTCOME_TRY(_m(aclrtSetDevice(device_id_)));
|
||||
|
||||
for (int i = 0; i < input_tensor_.size(); ++i) {
|
||||
auto buffer = aclmdlGetDatasetBuffer(input_dataset_, i);
|
||||
auto buffer_size = aclGetDataBufferSizeV2(buffer);
|
||||
auto buffer_data = aclGetDataBufferAddr(buffer);
|
||||
auto host_ptr = input_tensor_[i].data();
|
||||
OUTCOME_TRY(_m(aclrtMemcpy(buffer_data, buffer_size, host_ptr, input_tensor_[i].byte_size(),
|
||||
ACL_MEMCPY_HOST_TO_DEVICE)));
|
||||
}
|
||||
|
||||
OUTCOME_TRY(_m(aclmdlExecute(model_id_, input_dataset_, output_dataset_)));
|
||||
|
||||
for (int i = 0; i < output_tensor_.size(); ++i) {
|
||||
auto buffer = aclmdlGetDatasetBuffer(output_dataset_, i);
|
||||
auto buffer_data = aclGetDataBufferAddr(buffer);
|
||||
auto host_ptr = output_tensor_[i].data();
|
||||
OUTCOME_TRY(_m(aclrtMemcpy(host_ptr, output_tensor_[i].byte_size(), buffer_data,
|
||||
output_tensor_[i].byte_size(), ACL_MEMCPY_DEVICE_TO_HOST)));
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> AclNet::ForwardAsync(Event* event) { return Status(eNotSupported); }
|
||||
|
||||
class AclNetCreator : public Creator<Net> {
|
||||
public:
|
||||
const char* GetName() const override { return "ascend"; }
|
||||
int GetVersion() const override { return 0; }
|
||||
std::unique_ptr<Net> Create(const Value& args) override {
|
||||
try {
|
||||
auto p = std::make_unique<AclNet>();
|
||||
if (auto r = p->Init(args)) {
|
||||
return p;
|
||||
} else {
|
||||
MMDEPLOY_ERROR("error creating AclNet: {}", r.error().message().c_str());
|
||||
return nullptr;
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
MMDEPLOY_ERROR("unhandled exception when creating AclNet: {}", e.what());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_MODULE(Net, AclNetCreator);
|
||||
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,70 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_SRC_NET_ACL_ACL_NET_H_
|
||||
#define MMDEPLOY_SRC_NET_ACL_ACL_NET_H_
|
||||
|
||||
#include "acl/acl.h"
|
||||
#include "mmdeploy/core/net.h"
|
||||
#include "mmdeploy/core/status_code.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
class AclNet : public Net {
|
||||
public:
|
||||
~AclNet() override;
|
||||
Result<void> Init(const Value& cfg) override;
|
||||
Result<void> Deinit() override;
|
||||
Result<Span<Tensor>> GetInputTensors() override;
|
||||
Result<Span<Tensor>> GetOutputTensors() override;
|
||||
Result<void> Reshape(Span<TensorShape> input_shapes) override;
|
||||
Result<void> Forward() override;
|
||||
Result<void> ForwardAsync(Event* event) override;
|
||||
|
||||
private:
|
||||
enum InputShapeType { kStatic, kDynamicBatchSize, kDynamicImageSize, kDynamicDims };
|
||||
|
||||
Result<void> ReshapeStatic(Span<TensorShape> input_shapes);
|
||||
Result<void> ReshapeDynamicBatchSize(Span<TensorShape> input_shapes);
|
||||
Result<void> ReshapeDynamicImageSize(Span<TensorShape> input_shapes);
|
||||
Result<void> ReshapeDynamicDims(Span<TensorShape> input_shapes);
|
||||
|
||||
struct Buffers {
|
||||
aclDataBuffer* device_buffer;
|
||||
Tensor host_tensor;
|
||||
};
|
||||
|
||||
Result<Buffers> CreateBuffers(const aclmdlIODims& dims, aclDataType data_type);
|
||||
|
||||
Result<Buffers> CreateBuffersDynamicBatchSize(aclmdlIODims dims, aclDataType data_type);
|
||||
Result<Buffers> CreateBuffersDynamicImageSize(int index, aclmdlIODims dims,
|
||||
aclDataType data_type);
|
||||
Result<Buffers> CreateBuffersDynamicDims(int index, int dim_count, const aclmdlIODims& dims,
|
||||
aclDataType data_type);
|
||||
|
||||
Result<void> ConfigDynamicShapes();
|
||||
|
||||
Result<void> CreateInputBuffers();
|
||||
Result<void> CreateOutputBuffers();
|
||||
|
||||
std::shared_ptr<void> acl_context_;
|
||||
Stream cpu_stream_;
|
||||
int32_t device_id_{0};
|
||||
uint32_t model_id_{(uint32_t)-1};
|
||||
aclmdlDesc* model_desc_{nullptr};
|
||||
int dynamic_tensor_index_{-1};
|
||||
InputShapeType input_shape_type_{kStatic};
|
||||
std::vector<size_t> dynamic_batch_size_;
|
||||
std::vector<aclmdlIODims> dynamic_input_dims_;
|
||||
aclmdlDataset* input_dataset_{nullptr};
|
||||
aclmdlDataset* output_dataset_{nullptr};
|
||||
std::vector<aclmdlIODims> input_dims_;
|
||||
std::vector<aclmdlIODims> output_dims_;
|
||||
std::vector<aclDataType> input_data_type_;
|
||||
std::vector<aclDataType> output_data_type_;
|
||||
std::vector<Tensor> input_tensor_;
|
||||
std::vector<Tensor> output_tensor_;
|
||||
};
|
||||
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif // MMDEPLOY_SRC_NET_ACL_ACL_NET_H_
|
|
@ -0,0 +1,14 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
project(mmdeploy_coreml_net)
|
||||
|
||||
if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES)
|
||||
find_library(CORE_ML CoreML)
|
||||
find_library(FOUNDATION Foundation)
|
||||
mmdeploy_add_module(${PROJECT_NAME} coreml_net.mm)
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE ${CORE_ML} ${FOUNDATION})
|
||||
add_library(mmdeploy::coreml_net ALIAS ${PROJECT_NAME})
|
||||
else ()
|
||||
message(ERROR "'coreml_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}")
|
||||
endif ()
|
|
@ -0,0 +1,37 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_SRC_NET_COREML_COREML_NET_H_
|
||||
#define MMDEPLOY_SRC_NET_COREML_COREML_NET_H_
|
||||
|
||||
#include "mmdeploy/core/net.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
namespace coreml {
|
||||
class Execution;
|
||||
} // namespace coreml
|
||||
|
||||
class CoreMLNet : public Net {
|
||||
public:
|
||||
~CoreMLNet() override = default;
|
||||
Result<void> Init(const Value& cfg) override;
|
||||
Result<void> Deinit() override;
|
||||
Result<Span<Tensor>> GetInputTensors() override;
|
||||
Result<Span<Tensor>> GetOutputTensors() override;
|
||||
Result<void> Reshape(Span<TensorShape> input_shapes) override;
|
||||
Result<void> Forward() override;
|
||||
Result<void> ForwardAsync(Event* event) override;
|
||||
|
||||
private:
|
||||
std::unique_ptr<coreml::Execution> execution_;
|
||||
std::vector<Tensor> input_tensors_;
|
||||
std::vector<Tensor> output_tensors_;
|
||||
Device device_;
|
||||
Stream stream_;
|
||||
|
||||
friend class coreml::Execution;
|
||||
};
|
||||
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif // MMDEPLOY_SRC_NET_ORT_ORT_NET_H_
|
|
@ -0,0 +1,326 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "coreml_net.h"
|
||||
#include "mmdeploy/core/model.h"
|
||||
#include "mmdeploy/core/status_code.h"
|
||||
#include "mmdeploy/core/utils/filesystem.h"
|
||||
#include <fstream>
|
||||
|
||||
#import <CoreML/CoreML.h>
|
||||
#import <Foundation/Foundation.h>
|
||||
#include <memory>
|
||||
|
||||
@interface MMBatchTensorFeatureProvider : NSObject <MLBatchProvider> {
|
||||
const std::vector<mmdeploy::Tensor> *inputs_;
|
||||
}
|
||||
|
||||
- (instancetype)initWithInputs:(const std::vector<mmdeploy::Tensor> &)inputs;
|
||||
- (NSInteger)count;
|
||||
- (id<MLFeatureProvider>)featuresAtIndex:(NSInteger)index;
|
||||
@end
|
||||
|
||||
@implementation MMBatchTensorFeatureProvider
|
||||
|
||||
- (instancetype)initWithInputs:(const std::vector<mmdeploy::Tensor> &)inputs {
|
||||
inputs_ = &inputs;
|
||||
return self;
|
||||
}
|
||||
|
||||
- (NSInteger)count {
|
||||
return (*inputs_)[0].shape(0);
|
||||
}
|
||||
|
||||
- (id<MLFeatureProvider>)featuresAtIndex:(NSInteger)index {
|
||||
MLDictionaryFeatureProvider *feature = nil;
|
||||
NSMutableDictionary<NSString *, id> *input_dict =
|
||||
[[NSMutableDictionary<NSString *, id> alloc] init];
|
||||
|
||||
for (auto x : *inputs_) {
|
||||
auto in = x.Slice(index);
|
||||
NSMutableArray *shape = [[NSMutableArray alloc] init];
|
||||
for (const auto dim : in.shape()) {
|
||||
[shape addObject:[NSNumber numberWithLongLong:dim]];
|
||||
}
|
||||
|
||||
NSMutableArray *strides = [[NSMutableArray alloc] init];
|
||||
int64_t stride = 1;
|
||||
for (int i = in.shape().size() - 1; i >= 0; i--) {
|
||||
[strides insertObject:[NSNumber numberWithLongLong:stride] atIndex:0];
|
||||
stride *= in.shape()[i];
|
||||
}
|
||||
|
||||
MLMultiArrayDataType data_type = MLMultiArrayDataTypeFloat32;
|
||||
NSError *error = nil;
|
||||
MLMultiArray *mlArray =
|
||||
[[MLMultiArray alloc] initWithDataPointer:in.data()
|
||||
shape:shape
|
||||
dataType:data_type
|
||||
strides:strides
|
||||
deallocator:(^(void *){
|
||||
})error:&error];
|
||||
if (error != nil) {
|
||||
MMDEPLOY_ERROR("init MLMultiArray failed with key: {}, error message: {}",
|
||||
in.name(), [[error localizedDescription] UTF8String]);
|
||||
return nil;
|
||||
}
|
||||
|
||||
NSString *key = [NSString stringWithUTF8String:in.name()];
|
||||
input_dict[key] = mlArray;
|
||||
}
|
||||
|
||||
NSError *error = nil;
|
||||
feature = [[MLDictionaryFeatureProvider alloc] initWithDictionary:input_dict
|
||||
error:&error];
|
||||
if (error != nil) {
|
||||
MMDEPLOY_ERROR("init MLDictionaryFeatureProvider failed with index: {}, "
|
||||
"error message: {}",
|
||||
index, [[error localizedDescription] UTF8String]);
|
||||
return nil;
|
||||
}
|
||||
|
||||
return feature;
|
||||
}
|
||||
|
||||
@end
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace coreml {
|
||||
|
||||
static Result<void> CheckInputOutputFeatureType(MLFeatureType type) {
|
||||
if (type != MLFeatureTypeMultiArray) {
|
||||
MMDEPLOY_ERROR("unsupported feature type: {}", type);
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
static TensorShape to_shape(NSArray<NSNumber *> *shape) {
|
||||
TensorShape _shape;
|
||||
for (int i = 0; i < shape.count; i++) {
|
||||
_shape.push_back(shape[i].intValue);
|
||||
}
|
||||
return _shape;
|
||||
}
|
||||
|
||||
static Result<DataType> ConvertElementType(MLMultiArrayDataType type) {
|
||||
switch (type) {
|
||||
case MLMultiArrayDataTypeFloat32:
|
||||
return DataType::kFLOAT;
|
||||
case MLMultiArrayDataTypeFloat16:
|
||||
return DataType::kHALF;
|
||||
case MLMultiArrayDataTypeInt32:
|
||||
return DataType::kINT32;
|
||||
default:
|
||||
MMDEPLOY_ERROR("unsupported MLMultiArrayDataType: {}",
|
||||
static_cast<int>(type));
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
|
||||
static Result<Tensor> AsTensor(MLMultiArray *mlArray, const Device &device) {
|
||||
TensorDesc desc;
|
||||
desc.device = device;
|
||||
desc.shape = to_shape(mlArray.shape);
|
||||
OUTCOME_TRY(desc.data_type, ConvertElementType(mlArray.dataType));
|
||||
std::shared_ptr<void> data(const_cast<void *>(mlArray.dataPointer),
|
||||
[](void *) {});
|
||||
return Tensor(desc, data);
|
||||
}
|
||||
|
||||
class Execution {
|
||||
public:
|
||||
Execution(const std::string &path, CoreMLNet *net) : path_(path), net_(net) {}
|
||||
~Execution() { RemoveModel(); }
|
||||
|
||||
Result<void> Init() {
|
||||
OUTCOME_TRY(LoadModel());
|
||||
OUTCOME_TRY(SetInputOutputTensor());
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> Forward() {
|
||||
int batch_size = net_->input_tensors_[0].shape(0);
|
||||
|
||||
// prepare input
|
||||
NSError *error = nil;
|
||||
MMBatchTensorFeatureProvider *input_feature =
|
||||
[[MMBatchTensorFeatureProvider alloc]
|
||||
initWithInputs:net_->input_tensors_];
|
||||
|
||||
id<MLBatchProvider> output_feature =
|
||||
[model_ predictionsFromBatch:input_feature error:&error];
|
||||
if (error != nil) {
|
||||
MMDEPLOY_ERROR("coreml forward failed, error message: {}",
|
||||
[[error localizedDescription] UTF8String]);
|
||||
return Status(eFail);
|
||||
}
|
||||
|
||||
// extract output
|
||||
for (size_t i = 0; i < net_->output_tensors_.size(); ++i) {
|
||||
auto &out = net_->output_tensors_[i];
|
||||
|
||||
for (int bid = 0; bid < output_feature.count; bid++) {
|
||||
NSString *name =
|
||||
[NSString stringWithCString:out.name()
|
||||
encoding:[NSString defaultCStringEncoding]];
|
||||
if (name == nil) {
|
||||
MMDEPLOY_ERROR("output name must not be nil");
|
||||
return Status(eFail);
|
||||
}
|
||||
MLFeatureValue *output_value =
|
||||
[[output_feature featuresAtIndex:bid] featureValueForName:name];
|
||||
if (output_value == nil) {
|
||||
MMDEPLOY_ERROR("model output doesn't have name tensort: {}",
|
||||
out.name());
|
||||
return Status(eFail);
|
||||
}
|
||||
|
||||
MLMultiArray *mlArray = [output_value multiArrayValue];
|
||||
OUTCOME_TRY(auto tmp, AsTensor(mlArray, out.device()));
|
||||
if (bid == 0) {
|
||||
TensorShape batch_shape = tmp.shape();
|
||||
batch_shape[0] = batch_size;
|
||||
out.Reshape(batch_shape);
|
||||
}
|
||||
|
||||
auto slice = out.Slice(bid);
|
||||
OUTCOME_TRY(tmp.CopyTo(slice, net_->stream_));
|
||||
}
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> SetInputOutputTensor() {
|
||||
// input
|
||||
auto input_desc = model_.modelDescription.inputDescriptionsByName;
|
||||
for (NSString *name in input_desc) {
|
||||
MLFeatureDescription *value = input_desc[name];
|
||||
OUTCOME_TRY(CheckInputOutputFeatureType(value.type));
|
||||
// use default shape
|
||||
auto shape = to_shape(value.multiArrayConstraint.shape);
|
||||
OUTCOME_TRY(auto data_type,
|
||||
ConvertElementType(value.multiArrayConstraint.dataType));
|
||||
net_->input_tensors_.emplace_back(
|
||||
TensorDesc{net_->device_, data_type, shape, [name UTF8String]});
|
||||
}
|
||||
|
||||
// output
|
||||
auto output_desc = model_.modelDescription.outputDescriptionsByName;
|
||||
for (NSString *name in output_desc) {
|
||||
MLFeatureDescription *value = output_desc[name];
|
||||
OUTCOME_TRY(auto data_type,
|
||||
ConvertElementType(value.multiArrayConstraint.dataType));
|
||||
// can't get output shape
|
||||
net_->output_tensors_.emplace_back(
|
||||
TensorDesc{net_->device_, data_type, {}, [name UTF8String]});
|
||||
}
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> Reshape(Span<TensorShape> input_shapes) {
|
||||
for (size_t i = 0; i < input_shapes.size(); ++i) {
|
||||
net_->input_tensors_[i].Reshape(input_shapes[i]);
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> LoadModel() {
|
||||
NSString *model_path = [NSString stringWithUTF8String:path_.c_str()];
|
||||
NSError *error = nil;
|
||||
NSURL *model_url = [NSURL URLWithString:model_path];
|
||||
compiled_model_url_ = [MLModel compileModelAtURL:model_url error:&error];
|
||||
if (error != nil) {
|
||||
MMDEPLOY_ERROR("failed to compile model, error message: {}",
|
||||
[[error localizedDescription] UTF8String]);
|
||||
return Status(eFail);
|
||||
}
|
||||
|
||||
MLModelConfiguration *config = [MLModelConfiguration alloc];
|
||||
config.computeUnits = MLComputeUnitsAll;
|
||||
model_ = [MLModel modelWithContentsOfURL:compiled_model_url_
|
||||
configuration:config
|
||||
error:&error];
|
||||
if (error != nil) {
|
||||
MMDEPLOY_ERROR("failed to construct model, error message: {}",
|
||||
[[error localizedDescription] UTF8String]);
|
||||
return Status(eFail);
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
void RemoveModel() {
|
||||
NSError *error = nil;
|
||||
if (compiled_model_url_ != nil) {
|
||||
[[NSFileManager defaultManager] removeItemAtURL:compiled_model_url_
|
||||
error:&error];
|
||||
if (error != nil) {
|
||||
MMDEPLOY_ERROR("failed to remove compiled model, error message: {}",
|
||||
[[error localizedDescription] UTF8String]);
|
||||
}
|
||||
compiled_model_url_ = nil;
|
||||
}
|
||||
}
|
||||
|
||||
NSURL *compiled_model_url_{nil};
|
||||
MLModel *model_{nil};
|
||||
|
||||
std::string path_;
|
||||
CoreMLNet *net_{nullptr};
|
||||
};
|
||||
|
||||
} // namespace coreml
|
||||
|
||||
Result<void> CoreMLNet::Init(const Value &cfg) {
|
||||
auto &context = cfg["context"];
|
||||
device_ = context["device"].get<Device>();
|
||||
stream_ = context["stream"].get<Stream>();
|
||||
|
||||
auto name = cfg["name"].get<std::string>();
|
||||
auto model = context["model"].get<Model>();
|
||||
OUTCOME_TRY(auto config, model.GetModelConfig(name));
|
||||
|
||||
std::string coreml_tmp_path =
|
||||
(fs::path(model.GetModelPath()) / config.net).string();
|
||||
execution_ = std::make_unique<coreml::Execution>(coreml_tmp_path, this);
|
||||
OUTCOME_TRY(execution_->Init());
|
||||
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> CoreMLNet::Deinit() { return success(); }
|
||||
|
||||
Result<Span<Tensor>> CoreMLNet::GetInputTensors() { return input_tensors_; }
|
||||
|
||||
Result<Span<Tensor>> CoreMLNet::GetOutputTensors() { return output_tensors_; }
|
||||
|
||||
Result<void> CoreMLNet::Reshape(Span<TensorShape> input_shapes) {
|
||||
return execution_->Reshape(input_shapes);
|
||||
}
|
||||
|
||||
Result<void> CoreMLNet::Forward() { return execution_->Forward(); }
|
||||
|
||||
Result<void> CoreMLNet::ForwardAsync(Event *event) {
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
|
||||
class CoreMLNetCreator : public Creator<Net> {
|
||||
public:
|
||||
const char *GetName() const override { return "coreml"; }
|
||||
int GetVersion() const override { return 0; }
|
||||
std::unique_ptr<Net> Create(const Value &args) override {
|
||||
auto p = std::make_unique<CoreMLNet>();
|
||||
if (auto r = p->Init(args)) {
|
||||
return p;
|
||||
} else {
|
||||
MMDEPLOY_ERROR("error creating CoreMLNet: {}",
|
||||
r.error().message().c_str());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_MODULE(Net, CoreMLNetCreator);
|
||||
|
||||
} // namespace mmdeploy
|
|
@ -14,7 +14,11 @@ if (PPLNN_USE_CUDA AND ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES))
|
|||
target_compile_definitions(${PROJECT_NAME} PRIVATE -DPPL_NN_HAS_CUDA=1)
|
||||
target_include_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/include)
|
||||
target_link_directories(${PROJECT_NAME} PUBLIC ${CUDA_TOOLKIT_ROOT_DIR}/lib64)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE nvrtc)
|
||||
endif ()
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
PRIVATE ${PPLNN_LIBRARIES} nvrtc)
|
||||
PRIVATE ${PPLNN_LIBRARIES})
|
||||
if (PPLNN_USE_RISCV AND ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES))
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE -DPPL_NN_HAS_RISCV=1)
|
||||
endif ()
|
||||
add_library(mmdeploy::pplnn_net ALIAS ${PROJECT_NAME})
|
||||
|
|
|
@ -18,6 +18,11 @@
|
|||
#include "ppl/nn/engines/cuda/ops.h"
|
||||
#define PPL_CUDA_IMPORT_FROM_BUFFER 1
|
||||
#endif
|
||||
#if PPL_NN_HAS_RISCV
|
||||
#include "ppl/nn/engines/riscv/engine_factory.h"
|
||||
#include "ppl/nn/engines/riscv/engine_options.h"
|
||||
#include "ppl/nn/engines/riscv/ops.h"
|
||||
#endif
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
|
@ -92,6 +97,18 @@ Result<void> PPLNet::Init(const Value& args) {
|
|||
engines_.emplace_back(ppl::nn::x86::EngineFactory::Create({}));
|
||||
}
|
||||
#endif
|
||||
#if PPL_NN_HAS_RISCV
|
||||
if (device_.is_host()) {
|
||||
ppl::nn::riscv::RegisterBuiltinOpImpls();
|
||||
ppl::nn::riscv::EngineOptions options{};
|
||||
// TODO:
|
||||
// FP16 -> postprocess
|
||||
options.forward_precision = ppl::common::DATATYPE_FLOAT32;
|
||||
options.dynamic_tuning_level = 0;
|
||||
options.winograd_level = 1;
|
||||
engines_.emplace_back(ppl::nn::riscv::EngineFactory::Create(options));
|
||||
}
|
||||
#endif
|
||||
|
||||
std::vector<ppl::nn::Engine*> engines;
|
||||
for (const auto& engine : engines_) {
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
project(mmdeploy_torch_net)
|
||||
|
||||
option(MMDEPLOY_TORCHSCRIPT_SDK_BACKEND "Build TorchScript SDK backend" OFF)
|
||||
|
||||
if (MMDEPLOY_TORCHSCRIPT_SDK_BACKEND)
|
||||
find_package(Torch REQUIRED)
|
||||
find_package(TorchVision QUIET)
|
||||
|
||||
mmdeploy_add_module(${PROJECT_NAME} torch_net.cpp)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
${TORCH_LIBRARIES})
|
||||
|
||||
target_link_directories(${PROJECT_NAME} INTERFACE
|
||||
$<BUILD_INTERFACE:${Torch_DIR}/../../../lib>)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE
|
||||
mmdeploy_torchscript_ops_obj)
|
||||
|
||||
if (TorchVision_FOUND)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE TorchVision::TorchVision)
|
||||
target_compile_definitions(${PROJECT_NAME} PRIVATE -DMMDEPLOY_USE_TORCHVISION=1)
|
||||
endif ()
|
||||
|
||||
add_library(mmdeploy::torch_net ALIAS ${PROJECT_NAME})
|
||||
endif ()
|
|
@ -0,0 +1,237 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/net/torchscript/torch_net.h"
|
||||
|
||||
#include "mmdeploy/core/model.h"
|
||||
#include "mmdeploy/core/utils/formatter.h"
|
||||
#include "torch/torch.h"
|
||||
|
||||
#if MMDEPLOY_USE_CUDA
|
||||
#include "c10/cuda/CUDAGuard.h"
|
||||
#include "c10/cuda/CUDAStream.h"
|
||||
#endif
|
||||
|
||||
#if MMDEPLOY_USE_TORCHVISION
|
||||
#include "torchvision/vision.h"
|
||||
MMDEPLOY_API void _mmdeploy_force_link_torchvision() { vision::detail::_register_ops(); }
|
||||
#endif
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
namespace {
|
||||
|
||||
class InferenceMode {
|
||||
#if TORCH_VERSION_MAJOR == 1 && TORCH_VERSION_MINOR >= 10
|
||||
c10::InferenceMode guard_;
|
||||
#else
|
||||
at::AutoNonVariableTypeMode guard_;
|
||||
#endif
|
||||
};
|
||||
|
||||
class StreamGuard {
|
||||
public:
|
||||
StreamGuard(const torch::Device& device, Stream stream)
|
||||
: device_(device), stream_(std::move(stream)), device_guard_(device) {
|
||||
stream_.Wait().value();
|
||||
}
|
||||
|
||||
~StreamGuard() {
|
||||
#if MMDEPLOY_USE_CUDA
|
||||
auto device = stream_.GetDevice();
|
||||
if (device.is_device()) {
|
||||
Stream stream(device, (cudaStream_t)c10::cuda::getCurrentCUDAStream(device_.index()));
|
||||
stream.Wait().value();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
private:
|
||||
torch::Device device_;
|
||||
Stream stream_;
|
||||
c10::DeviceGuard device_guard_;
|
||||
};
|
||||
|
||||
Result<torch::ScalarType> FromDataType(DataType data_type) {
|
||||
switch (data_type) {
|
||||
case DataType::kFLOAT:
|
||||
return torch::ScalarType::Float;
|
||||
case DataType::kHALF:
|
||||
return torch::ScalarType::Half;
|
||||
case DataType::kINT32:
|
||||
return torch::ScalarType::Int;
|
||||
case DataType::kINT64:
|
||||
return torch::ScalarType::Long;
|
||||
case DataType::kINT8:
|
||||
return torch::ScalarType::Char;
|
||||
default:
|
||||
MMDEPLOY_ERROR("Unsupported mmdeploy::DataType: {}", to_string(data_type));
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
|
||||
Result<DataType> ToDataType(torch::ScalarType scalar_type) {
|
||||
switch (scalar_type) {
|
||||
case torch::ScalarType::Float:
|
||||
return DataType::kFLOAT;
|
||||
case torch::ScalarType::Half:
|
||||
return DataType::kHALF;
|
||||
case torch::ScalarType::Int:
|
||||
return DataType::kINT32;
|
||||
case torch::ScalarType::Long:
|
||||
return DataType::kINT64;
|
||||
case torch::ScalarType::Char:
|
||||
return DataType::kINT8;
|
||||
default:
|
||||
MMDEPLOY_ERROR("Unsupported torch::ScalarType: {}", toString(scalar_type));
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TorchNet::~TorchNet() = default;
|
||||
|
||||
Result<void> TorchNet::Init(const Value& cfg) {
|
||||
auto& context = cfg["context"];
|
||||
device_ = context["device"].get<Device>();
|
||||
stream_ = context["stream"].get<Stream>();
|
||||
|
||||
auto name = cfg["name"].get<std::string>();
|
||||
auto model = context["model"].get<Model>();
|
||||
|
||||
OUTCOME_TRY(auto config, model.GetModelConfig(name));
|
||||
OUTCOME_TRY(auto bytes, model.ReadFile(config.net));
|
||||
|
||||
auto platform = Platform(device_.platform_id());
|
||||
auto device_name = platform.GetPlatformName();
|
||||
|
||||
try {
|
||||
{
|
||||
using namespace std::string_literals;
|
||||
if (device_name == "cpu"s) {
|
||||
torch_device_ = torch::Device(device_name);
|
||||
} else {
|
||||
torch_device_ = torch::Device(device_name + ":"s + std::to_string(device_.device_id()));
|
||||
}
|
||||
}
|
||||
std::istringstream iss(bytes);
|
||||
InferenceMode guard;
|
||||
module_ = torch::jit::load(iss);
|
||||
module_.eval();
|
||||
module_.to(*torch_device_);
|
||||
auto forward = module_.get_method("forward");
|
||||
|
||||
auto ToDesc = [&](torch::jit::Value* value, const char* type, int index) {
|
||||
MMDEPLOY_INFO("Found {}: {}", type, value->debugNameBase());
|
||||
return TensorDesc{device_, DataType::kFLOAT, {}, "#" + std::to_string(index)};
|
||||
};
|
||||
|
||||
auto inputs = forward.graph()->inputs();
|
||||
int input_count = 0;
|
||||
for (int i = 1; i < inputs.size(); ++i) {
|
||||
if (inputs[i]->type()->kind() == c10::TypeKind::TensorType) {
|
||||
input_tensor_.emplace_back(ToDesc(inputs[i], "input", input_count++));
|
||||
} else {
|
||||
MMDEPLOY_ERROR("Unsupported input type: {}", typeKindToString(inputs[i]->type()->kind()));
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
|
||||
auto outputs = forward.graph()->outputs();
|
||||
int output_count = 0;
|
||||
for (const auto& output : outputs) {
|
||||
auto kind = output->type()->kind();
|
||||
if (kind == c10::TypeKind::TensorType) {
|
||||
output_tensor_.emplace_back(ToDesc(output, "output", output_count++));
|
||||
} else if (output->type()->kind() == c10::TypeKind::TupleType) {
|
||||
for (const auto& v : output->node()->inputs()) {
|
||||
if (v->type()->kind() == c10::TypeKind::TensorType) {
|
||||
output_tensor_.emplace_back(ToDesc(v, "output", output_count++));
|
||||
} else {
|
||||
MMDEPLOY_ERROR("Unsupported output type: {}", typeKindToString(v->type()->kind()));
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
MMDEPLOY_ERROR("Unsupported output type: {}", typeKindToString(kind));
|
||||
}
|
||||
}
|
||||
return success();
|
||||
} catch (const std::exception& e) {
|
||||
MMDEPLOY_ERROR("unhandled exception: {}", e.what());
|
||||
return Status(eFail);
|
||||
}
|
||||
}
|
||||
|
||||
Result<void> TorchNet::Deinit() { return success(); }
|
||||
Result<Span<Tensor>> TorchNet::GetInputTensors() { return input_tensor_; }
|
||||
Result<Span<Tensor>> TorchNet::GetOutputTensors() { return output_tensor_; }
|
||||
|
||||
Result<void> TorchNet::Reshape(Span<TensorShape> input_shapes) {
|
||||
if (input_shapes.size() != input_tensor_.size()) {
|
||||
return Status(eInvalidArgument);
|
||||
}
|
||||
for (size_t i = 0; i < input_shapes.size(); ++i) {
|
||||
input_tensor_[i].Reshape(input_shapes[i]);
|
||||
}
|
||||
return success();
|
||||
}
|
||||
|
||||
Result<void> TorchNet::Forward() {
|
||||
try {
|
||||
StreamGuard stream_guard(*torch_device_, stream_);
|
||||
InferenceMode inference_guard;
|
||||
std::vector<torch::jit::IValue> inputs;
|
||||
for (auto& v : input_tensor_) {
|
||||
OUTCOME_TRY(auto data_type, FromDataType(v.data_type()));
|
||||
auto tensor = torch::from_blob(v.data(), v.shape(),
|
||||
c10::TensorOptions(*torch_device_).dtype(data_type));
|
||||
inputs.emplace_back(tensor);
|
||||
}
|
||||
auto outputs = module_.forward(inputs);
|
||||
if (outputs.isTensor()) {
|
||||
OUTCOME_TRY(output_tensor_[0], FromTorchTensor(outputs.toTensor(), output_tensor_[0].name()));
|
||||
} else if (outputs.isTuple()) {
|
||||
auto tuple = outputs.toTuple();
|
||||
size_t index = 0;
|
||||
for (const auto& x : tuple->elements()) {
|
||||
OUTCOME_TRY(output_tensor_[index],
|
||||
FromTorchTensor(x.toTensor(), output_tensor_[index].name()));
|
||||
++index;
|
||||
}
|
||||
} else {
|
||||
MMDEPLOY_ERROR("{}", toString(outputs.type()));
|
||||
return Status(eNotSupported);
|
||||
}
|
||||
} catch (const std::exception& e) {
|
||||
MMDEPLOY_ERROR("unhandled exception: {}", e.what());
|
||||
return Status(eFail);
|
||||
}
|
||||
return success();
|
||||
}
|
||||
Result<void> TorchNet::ForwardAsync(Event* event) { return success(); }
|
||||
|
||||
Result<Tensor> TorchNet::FromTorchTensor(const torch::Tensor& tensor, const std::string& name) {
|
||||
OUTCOME_TRY(auto data_type, ToDataType(tensor.scalar_type()));
|
||||
auto shape = tensor.sizes();
|
||||
TensorDesc desc{device_, data_type, {shape.begin(), shape.end()}, name};
|
||||
return Tensor(desc, std::shared_ptr<void>(tensor.data_ptr(), [tensor](auto) {}));
|
||||
}
|
||||
|
||||
class TorchNetCreator : public Creator<Net> {
|
||||
public:
|
||||
const char* GetName() const override { return "torchscript"; }
|
||||
std::unique_ptr<Net> Create(const Value& cfg) override {
|
||||
auto p = std::make_unique<TorchNet>();
|
||||
if (auto status = p->Init(cfg)) {
|
||||
return p;
|
||||
} else {
|
||||
MMDEPLOY_ERROR("Failed to created TorchNet with config: {}", cfg);
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
};
|
||||
|
||||
REGISTER_MODULE(Net, TorchNetCreator);
|
||||
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,35 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_CSRC_MMDEPLOY_NET_TORCHSCRIPT_TORCH_NET_H_
|
||||
#define MMDEPLOY_CSRC_MMDEPLOY_NET_TORCHSCRIPT_TORCH_NET_H_
|
||||
|
||||
#include "mmdeploy/core/net.h"
|
||||
#include "torch/script.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
|
||||
class TorchNet : public Net {
|
||||
public:
|
||||
~TorchNet() override;
|
||||
Result<void> Init(const Value& cfg) override;
|
||||
Result<void> Deinit() override;
|
||||
Result<Span<Tensor>> GetInputTensors() override;
|
||||
Result<Span<Tensor>> GetOutputTensors() override;
|
||||
Result<void> Reshape(Span<TensorShape> input_shapes) override;
|
||||
Result<void> Forward() override;
|
||||
Result<void> ForwardAsync(Event* event) override;
|
||||
|
||||
private:
|
||||
Result<Tensor> FromTorchTensor(const torch::Tensor& tensor, const std::string& name);
|
||||
|
||||
torch::jit::script::Module module_;
|
||||
std::vector<Tensor> input_tensor_;
|
||||
std::vector<Tensor> output_tensor_;
|
||||
Device device_;
|
||||
Stream stream_;
|
||||
std::optional<torch::Device> torch_device_;
|
||||
};
|
||||
|
||||
} // namespace mmdeploy
|
||||
|
||||
#endif // MMDEPLOY_CSRC_MMDEPLOY_NET_TORCHSCRIPT_TORCH_NET_H_
|
|
@ -4,6 +4,9 @@ project(mmdeploy_transform_module)
|
|||
|
||||
add_subdirectory(transform)
|
||||
add_subdirectory(cpu)
|
||||
if (MMDEPLOY_ELENA_FUSION)
|
||||
add_subdirectory(elena)
|
||||
endif ()
|
||||
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
|
||||
add_subdirectory(cuda)
|
||||
endif ()
|
||||
|
|
|
@ -5,6 +5,7 @@ project(mmdeploy_cuda_transform_impl CUDA CXX)
|
|||
find_package(pplcv REQUIRED)
|
||||
|
||||
set(SRCS
|
||||
collect_impl.cpp
|
||||
crop_impl.cpp
|
||||
image2tensor_impl.cpp
|
||||
default_format_bundle_impl.cpp
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/preprocess/transform/collect.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace cuda {
|
||||
|
||||
class CollectImpl : public ::mmdeploy::CollectImpl {
|
||||
public:
|
||||
CollectImpl(const Value& args) : ::mmdeploy::CollectImpl(args) {}
|
||||
~CollectImpl() = default;
|
||||
};
|
||||
|
||||
class CollectImplCreator : public Creator<::mmdeploy::CollectImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "cuda"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
std::unique_ptr<::mmdeploy::CollectImpl> Create(const Value& args) override {
|
||||
return std::make_unique<CollectImpl>(args);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cuda
|
||||
} // namespace mmdeploy
|
||||
|
||||
using mmdeploy::CollectImpl;
|
||||
using mmdeploy::cuda::CollectImplCreator;
|
||||
REGISTER_MODULE(CollectImpl, CollectImplCreator);
|
|
@ -0,0 +1,31 @@
|
|||
# Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
project(mmdeploy_elena_transform_impl)
|
||||
|
||||
set(SRCS
|
||||
crop_impl.cpp
|
||||
collect_impl.cpp
|
||||
image2tensor_impl.cpp
|
||||
default_format_bundle_impl.cpp
|
||||
load_impl.cpp
|
||||
normalize_impl.cpp
|
||||
pad_impl.cpp
|
||||
resize_impl.cpp
|
||||
elena_registry.cpp)
|
||||
|
||||
file(GLOB CPU_KERNEL_SRCS "cpu_kernel/*.cpp")
|
||||
|
||||
set(ALL_SRCS ${SRCS} ${CPU_KERNEL_SRCS})
|
||||
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
|
||||
file(GLOB CUDA_KERNEL_SRCS "cuda_kernel/*.cu")
|
||||
set(ALL_SRCS ${ALL_SRCS} ${CUDA_KERNEL_SRCS})
|
||||
endif ()
|
||||
|
||||
mmdeploy_add_module(${PROJECT_NAME} "${ALL_SRCS}")
|
||||
target_include_directories(${PROJECT_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
PRIVATE mmdeploy::transform)
|
||||
if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES)
|
||||
target_link_libraries(${PROJECT_NAME} PRIVATE cuda)
|
||||
endif ()
|
||||
add_library(mmdeploy::transform_impl::elena ALIAS ${PROJECT_NAME})
|
|
@ -0,0 +1,145 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
#include <iostream>
|
||||
#include <set>
|
||||
#include <string>
|
||||
|
||||
#include "elena_registry.h"
|
||||
#include "mmdeploy/archive/json_archive.h"
|
||||
#include "mmdeploy/core/mat.h"
|
||||
#include "mmdeploy/core/tensor.h"
|
||||
#include "mmdeploy/core/utils/device_utils.h"
|
||||
#include "mmdeploy/core/utils/formatter.h"
|
||||
#include "mmdeploy/preprocess/transform/collect.h"
|
||||
#include "mmdeploy/preprocess/transform/tracer.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace elena {
|
||||
|
||||
using namespace trace;
|
||||
|
||||
struct ExtractTransParamVisitor {
|
||||
bool valid{true};
|
||||
std::set<std::string> st;
|
||||
|
||||
std::array<float, 3> mean;
|
||||
std::array<float, 3> std;
|
||||
std::array<int, 2> resize_hw;
|
||||
std::string resize_mode;
|
||||
float pad_val;
|
||||
std::array<int, 4> pad_tlbr;
|
||||
std::array<int, 2> pad_hw;
|
||||
std::array<int, 4> crop_tlbr;
|
||||
std::array<int, 2> crop_hw;
|
||||
|
||||
void CheckValid(const std::string& name) {
|
||||
if (st.count(name)) {
|
||||
valid = false;
|
||||
return;
|
||||
}
|
||||
st.insert(name);
|
||||
}
|
||||
|
||||
void operator()(CvtColorParam&) {}
|
||||
void operator()(CastParam&) {}
|
||||
void operator()(HWC2CHWParam&) {}
|
||||
|
||||
void operator()(ResizeParam& param) {
|
||||
CheckValid("Resize");
|
||||
resize_hw = {param.size[0], param.size[1]};
|
||||
resize_mode = param.mode;
|
||||
}
|
||||
void operator()(PadParam& param) {
|
||||
CheckValid("Pad");
|
||||
pad_val = param.pad_val;
|
||||
std::copy_n(param.tlbr.begin(), 4, pad_tlbr.begin());
|
||||
std::copy_n(param.size.begin(), 2, pad_hw.begin());
|
||||
}
|
||||
void operator()(NormParam& param) {
|
||||
CheckValid("Normalize");
|
||||
std::copy(param.mean.begin(), param.mean.end(), mean.begin());
|
||||
std::copy(param.std.begin(), param.std.end(), std.begin());
|
||||
}
|
||||
void operator()(CropParam& param) {
|
||||
CheckValid("CenterCrop");
|
||||
std::copy_n(param.tlbr.begin(), 4, crop_tlbr.begin());
|
||||
std::copy_n(param.size.begin(), 2, crop_hw.begin());
|
||||
}
|
||||
};
|
||||
|
||||
class CollectImpl : public ::mmdeploy::CollectImpl {
|
||||
public:
|
||||
CollectImpl(const Value& args) : ::mmdeploy::CollectImpl(args) {
|
||||
Platform platform(device_.platform_id());
|
||||
device_name_ = platform.GetPlatformName();
|
||||
sha256_ = args["context"].value("sha256", std::string(""));
|
||||
}
|
||||
|
||||
~CollectImpl() = default;
|
||||
|
||||
Result<Value> Process(const Value& input) override {
|
||||
auto tracer = input["__tracer__"].get<Tracer>();
|
||||
Mat _src_mat = input["ori_img"].get<Mat>();
|
||||
OUTCOME_TRY(auto src_mat, MakeAvailableOnDevice(_src_mat, device_, stream_));
|
||||
OUTCOME_TRY(stream_.Wait());
|
||||
|
||||
ExtractTransParamVisitor visitor{};
|
||||
for (auto&& trans : tracer.trans_) {
|
||||
std::visit(visitor, trans);
|
||||
}
|
||||
std::string tag = sha256_ + "_" + device_name_;
|
||||
FuseFunc func = FuseKernel::Get().GetFunc(tag);
|
||||
|
||||
if (!visitor.valid) {
|
||||
MMDEPLOY_ERROR("unsupported fuse transform");
|
||||
throw std::invalid_argument("");
|
||||
}
|
||||
if (src_mat.type() != DataType::kINT8) {
|
||||
MMDEPLOY_ERROR("unsupported data type in fuse transform");
|
||||
throw std::invalid_argument("");
|
||||
}
|
||||
if (!func) {
|
||||
MMDEPLOY_ERROR("can't find fuse function with tag: {}", tag);
|
||||
throw std::invalid_argument("");
|
||||
}
|
||||
|
||||
Value output = input;
|
||||
auto img_fields = GetImageFields(input);
|
||||
for (auto& key : img_fields) {
|
||||
assert(input.contains(key));
|
||||
auto src_tensor = input[key].get<Tensor>();
|
||||
auto desc = src_tensor.desc();
|
||||
desc.device = device_;
|
||||
Tensor dst_tensor{desc};
|
||||
|
||||
func(stream_.GetNative(), src_mat.data<uint8_t>(), src_mat.height(), src_mat.width(),
|
||||
to_string(src_mat.pixel_format()).c_str(), visitor.resize_hw[0], visitor.resize_hw[1],
|
||||
visitor.resize_mode.c_str(), visitor.crop_tlbr[0], visitor.crop_tlbr[1],
|
||||
visitor.crop_hw[0], visitor.crop_hw[1], visitor.mean[0], visitor.mean[1],
|
||||
visitor.mean[2], visitor.std[0], visitor.std[1], visitor.std[2], visitor.pad_tlbr[0],
|
||||
visitor.pad_tlbr[1], visitor.pad_tlbr[2], visitor.pad_tlbr[3], visitor.pad_hw[0],
|
||||
visitor.pad_hw[1], visitor.pad_val, dst_tensor.data<float>(), dst_tensor.shape(2),
|
||||
dst_tensor.shape(3));
|
||||
output[key] = std::move(dst_tensor);
|
||||
}
|
||||
return ::mmdeploy::CollectImpl::Process(output);
|
||||
}
|
||||
|
||||
std::string sha256_;
|
||||
std::string device_name_;
|
||||
};
|
||||
|
||||
class CollectImplCreator : public Creator<::mmdeploy::CollectImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "elena"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
std::unique_ptr<::mmdeploy::CollectImpl> Create(const Value& args) override {
|
||||
return std::make_unique<CollectImpl>(args);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace elena
|
||||
} // namespace mmdeploy
|
||||
|
||||
using mmdeploy::CollectImpl;
|
||||
using mmdeploy::elena::CollectImplCreator;
|
||||
REGISTER_MODULE(CollectImpl, CollectImplCreator);
|
|
@ -0,0 +1,44 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/preprocess/transform/crop.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace elena {
|
||||
|
||||
class CenterCropImpl : public ::mmdeploy::CenterCropImpl {
|
||||
public:
|
||||
explicit CenterCropImpl(const Value& args) : ::mmdeploy::CenterCropImpl(args) {}
|
||||
|
||||
protected:
|
||||
Result<Tensor> CropImage(const Tensor& tensor, int top, int left, int bottom,
|
||||
int right) override {
|
||||
auto& src_desc = tensor.desc();
|
||||
auto data_type = src_desc.data_type;
|
||||
auto shape = src_desc.shape;
|
||||
shape[1] = bottom - top + 1; // h
|
||||
shape[2] = right - left + 1; // w
|
||||
|
||||
TensorDesc dummy_desc = {Device{"cpu"}, data_type, shape};
|
||||
Tensor dummy(dummy_desc, dummy_buffer_);
|
||||
|
||||
return dummy;
|
||||
}
|
||||
Buffer dummy_buffer_{Device{"cpu"}, 0, nullptr};
|
||||
};
|
||||
|
||||
class CenterCropImplCreator : public Creator<::mmdeploy::CenterCropImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "elena"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
ReturnType Create(const Value& args) override { return make_unique<CenterCropImpl>(args); }
|
||||
};
|
||||
|
||||
} // namespace elena
|
||||
} // namespace mmdeploy
|
||||
|
||||
using ::mmdeploy::CenterCropImpl;
|
||||
using ::mmdeploy::elena::CenterCropImplCreator;
|
||||
|
||||
REGISTER_MODULE(CenterCropImpl, CenterCropImplCreator);
|
|
@ -0,0 +1,56 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/preprocess/transform/default_format_bundle.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace elena {
|
||||
|
||||
class DefaultFormatBundleImpl : public ::mmdeploy::DefaultFormatBundleImpl {
|
||||
public:
|
||||
explicit DefaultFormatBundleImpl(const Value& args) : ::mmdeploy::DefaultFormatBundleImpl(args) {}
|
||||
|
||||
protected:
|
||||
Result<Tensor> ToFloat32(const Tensor& tensor, const bool& img_to_float) override {
|
||||
auto& src_desc = tensor.desc();
|
||||
auto data_type = src_desc.data_type;
|
||||
auto shape = src_desc.shape;
|
||||
|
||||
if (img_to_float && data_type == DataType::kINT8) {
|
||||
data_type = DataType::kFLOAT;
|
||||
}
|
||||
|
||||
TensorDesc dummy_desc = {Device{"cpu"}, data_type, shape};
|
||||
Tensor dummy(dummy_desc, dummy_buffer_);
|
||||
|
||||
return dummy;
|
||||
}
|
||||
|
||||
Result<Tensor> HWC2CHW(const Tensor& tensor) override {
|
||||
auto& src_desc = tensor.desc();
|
||||
auto data_type = src_desc.data_type;
|
||||
auto shape = src_desc.shape;
|
||||
shape = {shape[0], shape[3], shape[1], shape[2]};
|
||||
|
||||
TensorDesc dummy_desc = {Device{"cpu"}, data_type, shape};
|
||||
Tensor dummy(dummy_desc, dummy_buffer_);
|
||||
|
||||
return dummy;
|
||||
}
|
||||
Buffer dummy_buffer_{Device{"cpu"}, 0, nullptr};
|
||||
};
|
||||
|
||||
class DefaultFormatBundleImplCreator : public Creator<::mmdeploy::DefaultFormatBundleImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "elena"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
ReturnType Create(const Value& args) override {
|
||||
return std::make_unique<DefaultFormatBundleImpl>(args);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace elena
|
||||
} // namespace mmdeploy
|
||||
|
||||
using mmdeploy::DefaultFormatBundleImpl;
|
||||
using mmdeploy::elena::DefaultFormatBundleImplCreator;
|
||||
REGISTER_MODULE(DefaultFormatBundleImpl, DefaultFormatBundleImplCreator);
|
|
@ -0,0 +1,32 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "elena_registry.h"
|
||||
|
||||
#include "mmdeploy/core/logger.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace elena {
|
||||
|
||||
FuseKernel& FuseKernel::Get() {
|
||||
static FuseKernel fuse_kernel;
|
||||
return fuse_kernel;
|
||||
}
|
||||
|
||||
FuseFunc FuseKernel::GetFunc(const std::string& name) {
|
||||
if (entries_.count(name)) {
|
||||
return entries_[name];
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
int FuseKernel::Register(const std::string& name, FuseFunc func) {
|
||||
if (entries_.count(name)) {
|
||||
return -1;
|
||||
}
|
||||
MMDEPLOY_DEBUG("Register fuse kernel: '{}'", name);
|
||||
entries_.emplace(name, func);
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace elena
|
||||
} // namespace mmdeploy
|
|
@ -0,0 +1,45 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#ifndef MMDEPLOY_ELENA_REGISTRY_H_
|
||||
#define MMDEPLOY_ELENA_REGISTRY_H_
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
|
||||
#include "mmdeploy/core/macro.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace elena {
|
||||
|
||||
using FuseFunc = void (*)(void* stream, uint8_t* data_in, int src_h, int src_w, const char* format,
|
||||
int resize_h, int resize_w, const char* interpolation, int crop_top,
|
||||
int crop_left, int crop_h, int crop_w, float mean0, float mean1,
|
||||
float mean2, float std0, float std1, float std2, int pad_top,
|
||||
int pad_left, int pad_bottom, int pad_right, int pad_h, int pad_w,
|
||||
float pad_value, float* data_out, int dst_h, int dst_w);
|
||||
|
||||
class MMDEPLOY_API FuseKernel {
|
||||
public:
|
||||
static FuseKernel& Get();
|
||||
int Register(const std::string& name, FuseFunc func);
|
||||
FuseFunc GetFunc(const std::string& name);
|
||||
|
||||
private:
|
||||
FuseKernel() = default;
|
||||
std::map<std::string, FuseFunc> entries_;
|
||||
};
|
||||
|
||||
class MMDEPLOY_API FuseKernelRegister {
|
||||
public:
|
||||
FuseKernelRegister(const std::string& name, FuseFunc func) {
|
||||
FuseKernel::Get().Register(name, func);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace elena
|
||||
} // namespace mmdeploy
|
||||
|
||||
#define REGISTER_FUSE_KERNEL(name, module_name, func) \
|
||||
static ::mmdeploy::elena::FuseKernelRegister g_register_##name##_##func(module_name, func);
|
||||
|
||||
#endif
|
|
@ -0,0 +1,41 @@
|
|||
// Copyright (c) OpenMMLab. All rights reserved.
|
||||
|
||||
#include "mmdeploy/preprocess/transform/image2tensor.h"
|
||||
|
||||
namespace mmdeploy {
|
||||
namespace elena {
|
||||
|
||||
class ImageToTensorImpl : public ::mmdeploy::ImageToTensorImpl {
|
||||
public:
|
||||
explicit ImageToTensorImpl(const Value& args) : ::mmdeploy::ImageToTensorImpl(args) {}
|
||||
|
||||
protected:
|
||||
Result<Tensor> HWC2CHW(const Tensor& tensor) override {
|
||||
auto& src_desc = tensor.desc();
|
||||
auto data_type = src_desc.data_type;
|
||||
auto shape = src_desc.shape;
|
||||
shape = {shape[0], shape[3], shape[1], shape[2]};
|
||||
|
||||
TensorDesc dummy_desc = {Device{"cpu"}, data_type, shape};
|
||||
Tensor dummy(dummy_desc, dummy_buffer_);
|
||||
|
||||
return dummy;
|
||||
}
|
||||
Buffer dummy_buffer_{Device{"cpu"}, 0, nullptr};
|
||||
};
|
||||
|
||||
class ImageToTensorImplCreator : public Creator<::mmdeploy::ImageToTensorImpl> {
|
||||
public:
|
||||
const char* GetName() const override { return "elena"; }
|
||||
int GetVersion() const override { return 1; }
|
||||
ReturnType Create(const Value& args) override {
|
||||
return std::make_unique<ImageToTensorImpl>(args);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace elena
|
||||
} // namespace mmdeploy
|
||||
|
||||
using mmdeploy::ImageToTensorImpl;
|
||||
using mmdeploy::elena::ImageToTensorImplCreator;
|
||||
REGISTER_MODULE(ImageToTensorImpl, ImageToTensorImplCreator);
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue