diff --git a/.circleci/scripts/linux/build.sh b/.circleci/scripts/linux/build.sh index fdd55ca60..77342f9dd 100644 --- a/.circleci/scripts/linux/build.sh +++ b/.circleci/scripts/linux/build.sh @@ -6,11 +6,7 @@ cd mmdeploy MMDEPLOY_DIR=$(pwd) mkdir -p build && cd build cmake .. -DMMDEPLOY_BUILD_SDK=ON -DMMDEPLOY_BUILD_TEST=ON -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON \ - -DMMDEPLOY_BUILD_SDK_CXX_API=ON -DMMDEPLOY_BUILD_SDK_CSHARP_API=ON \ + -DMMDEPLOY_BUILD_EXAMPLES=ON -DMMDEPLOY_BUILD_SDK_CXX_API=ON -DMMDEPLOY_BUILD_SDK_CSHARP_API=ON \ -DMMDEPLOY_TARGET_DEVICES="$1" -DMMDEPLOY_TARGET_BACKENDS="$2" "${ARGS[@]:2}" make -j$(nproc) && make install -cd install/example -mkdir -p build -cd build -cmake ../cpp -DMMDeploy_DIR="$MMDEPLOY_DIR"/build/install/lib/cmake/MMDeploy "${ARGS[@]:2}" && make -j$(nproc) diff --git a/.circleci/scripts/windows/install_opencv.ps1 b/.circleci/scripts/windows/install_opencv.ps1 index 2e5cae5f7..4aaabd6e5 100644 --- a/.circleci/scripts/windows/install_opencv.ps1 +++ b/.circleci/scripts/windows/install_opencv.ps1 @@ -1,3 +1,3 @@ -Invoke-WebRequest -Uri https://download.openmmlab.com/mmdeploy/library/opencv-4.5.5.zip -OutFile opencv.zip +Invoke-WebRequest -Uri https://github.com/irexyc/mmdeploy-ci-resource/releases/download/opencv/opencv-win-amd64-4.5.5-vc16.zip -OutFile opencv.zip Expand-Archive opencv.zip . Move-Item opencv-4.5.5 opencv diff --git a/.circleci/test.yml b/.circleci/test.yml index cca08a194..8725ed94b 100644 --- a/.circleci/test.yml +++ b/.circleci/test.yml @@ -192,19 +192,13 @@ jobs: -DMMDEPLOY_BUILD_SDK=ON ` -DMMDEPLOY_BUILD_TEST=ON ` -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON ` + -DMMDEPLOY_BUILD_EXAMPLES=ON ` -DMMDEPLOY_BUILD_SDK_CXX_API=ON ` -DMMDEPLOY_BUILD_SDK_CSHARP_API=ON ` -DMMDEPLOY_TARGET_BACKENDS="ort" ` -DOpenCV_DIR="$env:OPENCV_PACKAGE_DIR" cmake --build . --config Release -- /m cmake --install . --config Release - cd install/example - mkdir build -ErrorAction SilentlyContinue - cd build - cmake ../cpp -G "Visual Studio 16 2019" -A x64 -T v142 ` - -DMMDeploy_DIR="$env:MMDEPLOY_DIR/build/install/lib/cmake/MMDeploy" ` - -DOpenCV_DIR="$env:OPENCV_PACKAGE_DIR" - cmake --build . --config Release -- /m - install_mmdeploy - install_model_converter_req - perform_model_converter_ut @@ -256,7 +250,7 @@ jobs: - run: name: Inference model by SDK command: | - mmdeploy/build/install/example/build/image_classification cpu mmdeploy-models/mmcls/onnxruntime mmdeploy/tests/data/tiger.jpeg + ./mmdeploy/build/bin/image_classification cpu mmdeploy-models/mmcls/onnxruntime mmdeploy/demo/resources/cityscapes.png # See: https://circleci.com/docs/2.0/configuration-reference/#workflows diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/1-bug-report.yml similarity index 94% rename from .github/ISSUE_TEMPLATE/bug-report.yml rename to .github/ISSUE_TEMPLATE/1-bug-report.yml index 79bc39f42..bf6924789 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/1-bug-report.yml @@ -1,6 +1,7 @@ -name: Bug report -description: Create a report to help us improve - +name: 🐞 Bug report +description: Create a report to help us reproduce and fix the bug +title: "[Bug] " +labels: ['Bug'] body: - type: checkboxes @@ -52,5 +53,3 @@ body: If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated! Thanks for your bug report. We appreciate it a lot. - -labels: ['Bug'] diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/2-feature_request.yaml similarity index 69% rename from .github/ISSUE_TEMPLATE/feature_request.yaml rename to .github/ISSUE_TEMPLATE/2-feature_request.yaml index 92e9a914d..df6345c02 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.yaml +++ b/.github/ISSUE_TEMPLATE/2-feature_request.yaml @@ -1,11 +1,15 @@ -name: Feature request +name: 🚀 Feature request description: Suggest an idea for this project +title: "[Feature] " body: - type: markdown attributes: - value: > - ## Describe the feature + value: | + We strongly appreciate you creating a PR to implement this feature [here](https://github.com/open-mmlab/mmdeploy/pulls)! + If you need our help, please fill in as much of the following form as you're able to. + + **The less clear the description, the longer it will take to solve it.** - type: textarea attributes: label: Motivation diff --git a/.github/ISSUE_TEMPLATE/3-documentation.yml b/.github/ISSUE_TEMPLATE/3-documentation.yml new file mode 100644 index 000000000..b112c2aea --- /dev/null +++ b/.github/ISSUE_TEMPLATE/3-documentation.yml @@ -0,0 +1,23 @@ +name: 📚 Documentation +description: Report an issue related to the documentation. +labels: "kind/doc,status/unconfirmed" +title: "[Docs] " + +body: +- type: textarea + attributes: + label: 📚 The doc issue + description: > + A clear and concise description the issue. + validations: + required: true + +- type: textarea + attributes: + label: Suggest a potential alternative/fix + description: > + Tell us how we could improve the documentation in this regard. +- type: markdown + attributes: + value: > + Thanks for contributing 🎉! diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml index 1f68f5166..bcf07914a 100644 --- a/.github/ISSUE_TEMPLATE/config.yml +++ b/.github/ISSUE_TEMPLATE/config.yml @@ -1,6 +1,12 @@ blank_issues_enabled: false contact_links: - - name: Common Issues + - name: 💥 FAQ url: https://github.com/open-mmlab/mmdeploy/blob/master/docs/en/faq.md about: Check if your issue already has solutions + - name: 💬 Forum + url: https://github.com/open-mmlab/mmdeploy/discussions + about: Ask general usage questions and discuss with other MMDeploy community members + - name: 🌐 Explore OpenMMLab + url: https://openmmlab.com/ + about: Get know more about OpenMMLab diff --git a/.github/ISSUE_TEMPLATE/general_questions.md b/.github/ISSUE_TEMPLATE/general_questions.md deleted file mode 100644 index f02dd63a8..000000000 --- a/.github/ISSUE_TEMPLATE/general_questions.md +++ /dev/null @@ -1,7 +0,0 @@ ---- -name: General questions -about: Ask general questions to get help -title: '' -labels: '' -assignees: '' ---- diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index dfdf7e59c..f70301e1c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -89,6 +89,23 @@ jobs: ls -lah coverage.info cp coverage.info ../ + cross_build_aarch64: + runs-on: ubuntu-20.04 + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + submodules: 'recursive' + - name: update + run: sudo apt update + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.8 + - name: gcc-multilib + run: | + sh -x tools/scripts/ubuntu_cross_build_aarch64.sh + build_cuda102: runs-on: ubuntu-18.04 container: diff --git a/.github/workflows/linux-rknpu2-gcc.yml b/.github/workflows/linux-rknpu2-gcc.yml new file mode 100644 index 000000000..470944e7b --- /dev/null +++ b/.github/workflows/linux-rknpu2-gcc.yml @@ -0,0 +1,54 @@ +name: build_rknpu2_gcc + +on: + push: + paths: + - "csrc/**" + - "demo/csrc/**" + - "CMakeLists.txt" + + pull_request: + paths-ignore: + - "csrc/**" + - "demo/csrc/**" + - "CMakeLists.txt" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build_rknpu2_gcc: + runs-on: ubuntu-18.04 + steps: + - name: Checkout repository + uses: actions/checkout@v3 + with: + submodules: 'recursive' + - name: rknpu2-gnu-toolchain + run: | + mkdir $GITHUB_WORKSPACE/rknpu2-gnu-toolchain + cd $GITHUB_WORKSPACE/rknpu2-gnu-toolchain + git clone https://github.com/Caesar-github/gcc-buildroot-9.3.0-2020.03-x86_64_aarch64-rockchip-linux-gnu.git + - name: rknpu2 + run: | + mkdir $GITHUB_WORKSPACE/rknpu2 + cd $GITHUB_WORKSPACE/rknpu2 + git clone https://github.com/rockchip-linux/rknpu2.git + - name: build + run: | + export RKNN_TOOL_CHAIN=$GITHUB_WORKSPACE/rknpu2-gnu-toolchain/gcc-buildroot-9.3.0-2020.03-x86_64_aarch64-rockchip-linux-gnu/usr + export LD_LIBRARY_PATH=$RKNN_TOOL_CHAIN/lib64:$LD_LIBRARY_PATH + export RKNPU2_DEVICE_DIR=$GITHUB_WORKSPACE/rknpu2/rknpu2/runtime/RK3588 + mkdir build && cd build + cmake .. \ + -DCMAKE_TOOLCHAIN_FILE=$(pwd)/../cmake/toolchains/rknpu2-linux-gnu.cmake \ + -DMMDEPLOY_BUILD_SDK=ON \ + -DMMDEPLOY_SHARED_LIBS=ON \ + -DMMDEPLOY_BUILD_EXAMPLES=ON \ + -DMMDEPLOY_TARGET_DEVICES="cpu" \ + -DMMDEPLOY_TARGET_BACKENDS="rknn" \ + -DMMDEPLOY_CODEBASES=all \ + -DOpenCV_DIR=$RKNPU2_DEVICE_DIR/../../examples/3rdparty/opencv/opencv-linux-aarch64/share/OpenCV + make -j$(nproc) + make install diff --git a/CMakeLists.txt b/CMakeLists.txt index 81a3d7527..8de3f84f6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,7 +5,7 @@ endif () message(STATUS "CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") cmake_minimum_required(VERSION 3.14) -project(MMDeploy VERSION 0.9.0) +project(MMDeploy VERSION 0.10.0) set(CMAKE_CXX_STANDARD 17) @@ -128,6 +128,7 @@ if (MMDEPLOY_BUILD_SDK) mmdeploy_add_deps(pplnn BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS pplnn) endif () mmdeploy_add_deps(snpe BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS snpe) + mmdeploy_add_deps(rknn BACKENDS ${MMDEPLOY_TARGET_BACKENDS} DEPS rknn) include(CMakePackageConfigHelpers) # generate the config file that is includes the exports diff --git a/README.md b/README.md index a5d177f1f..f74baf6d5 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,7 @@ Please read [getting_started](docs/en/get_started.md) for the basic usage of MMD - [Build for Android](docs/en/01-how-to-build/android.md) - [Build for Jetson](docs/en/01-how-to-build/jetsons.md) - [Build for SNPE](docs/en/01-how-to-build/snpe.md) + - [Cross Build for aarch64](docs/en/01-how-to-build/cross_build_ncnn_aarch64.md) - User Guide - [How to convert model](docs/en/02-how-to-run/convert_model.md) - [How to write config](docs/en/02-how-to-run/write_config.md) @@ -148,6 +149,7 @@ This project is released under the [Apache 2.0 license](LICENSE). - [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab image classification toolbox and benchmark. - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab detection toolbox and benchmark. - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab's next-generation platform for general 3D object detection. +- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO series toolbox and benchmark - [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab rotated object detection toolbox and benchmark. - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab semantic segmentation toolbox and benchmark. - [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab text detection, recognition, and understanding toolbox. diff --git a/README_zh-CN.md b/README_zh-CN.md index d530eef33..d44ab802f 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -86,6 +86,7 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为 - [Build for Android](docs/zh_cn/01-how-to-build/android.md) - [Build for Jetson](docs/zh_cn/01-how-to-build/jetsons.md) - [Build for SNPE](docs/zh_cn/01-how-to-build/snpe.md) + - [Cross Build for aarch64](docs/zh_cn/01-how-to-build/cross_build_ncnn_aarch64.md) - 使用 - [把模型转换到推理 Backend](docs/zh_cn/02-how-to-run/convert_model.md) - [配置转换参数](docs/zh_cn/02-how-to-run/write_config.md) @@ -153,6 +154,7 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为 - [MMClassification](https://github.com/open-mmlab/mmclassification): OpenMMLab 图像分类工具箱 - [MMDetection](https://github.com/open-mmlab/mmdetection): OpenMMLab 目标检测工具箱 - [MMDetection3D](https://github.com/open-mmlab/mmdetection3d): OpenMMLab 新一代通用 3D 目标检测平台 +- [MMYOLO](https://github.com/open-mmlab/mmyolo): OpenMMLab YOLO 系列工具箱和基准测试 - [MMRotate](https://github.com/open-mmlab/mmrotate): OpenMMLab 旋转框检测工具箱与测试基准 - [MMSegmentation](https://github.com/open-mmlab/mmsegmentation): OpenMMLab 语义分割工具箱 - [MMOCR](https://github.com/open-mmlab/mmocr): OpenMMLab 全流程文字检测识别理解工具包 diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 5d105de9f..c2f2bdb78 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -42,6 +42,7 @@ if (NOT CMAKE_CUDA_ARCHITECTURES) if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "8") set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_60,code=sm_60") set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_61,code=sm_61") + set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_62,code=sm_62") endif () if (CUDA_VERSION_MAJOR VERSION_GREATER_EQUAL "9") set(_NVCC_FLAGS "${_NVCC_FLAGS} -gencode arch=compute_70,code=sm_70") diff --git a/cmake/toolchains/aarch64-linux-gnu.cmake b/cmake/toolchains/aarch64-linux-gnu.cmake new file mode 100644 index 000000000..f95911efd --- /dev/null +++ b/cmake/toolchains/aarch64-linux-gnu.cmake @@ -0,0 +1,17 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR aarch64) + +set(CMAKE_C_COMPILER "aarch64-linux-gnu-gcc") +set(CMAKE_CXX_COMPILER "aarch64-linux-gnu-g++") +set(CMAKE_LINKER "aarch64-linux-gnu-ld") + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) + +set(CMAKE_C_FLAGS "-march=armv8-a") +set(CMAKE_CXX_FLAGS "-march=armv8-a") + +# cache flags +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags") diff --git a/cmake/toolchains/rknpu2-linux-gnu.cmake b/cmake/toolchains/rknpu2-linux-gnu.cmake new file mode 100644 index 000000000..2bb683543 --- /dev/null +++ b/cmake/toolchains/rknpu2-linux-gnu.cmake @@ -0,0 +1,23 @@ +set(CMAKE_SYSTEM_NAME Linux) +set(CMAKE_SYSTEM_PROCESSOR rockchip) + +if(DEFINED ENV{RKNN_TOOL_CHAIN}) + file(TO_CMAKE_PATH $ENV{RKNN_TOOL_CHAIN} RKNN_TOOL_CHAIN) +else() + message(FATAL_ERROR "RKNN_TOOL_CHAIN env must be defined") +endif() + +set(CMAKE_C_COMPILER ${RKNN_TOOL_CHAIN}/bin/aarch64-rockchip-linux-gnu-gcc) +set(CMAKE_CXX_COMPILER ${RKNN_TOOL_CHAIN}/bin/aarch64-rockchip-linux-gnu-g++) + +set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) +set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) +set(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY) + +set(CMAKE_C_FLAGS "-Wl,--allow-shlib-undefined") +set(CMAKE_CXX_FLAGS "-Wl,--allow-shlib-undefined") + +# cache flags +set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS}" CACHE STRING "c flags") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}" CACHE STRING "c++ flags") diff --git a/csrc/mmdeploy/archive/value_archive.h b/csrc/mmdeploy/archive/value_archive.h index 13cc95cca..2f559c1a1 100644 --- a/csrc/mmdeploy/archive/value_archive.h +++ b/csrc/mmdeploy/archive/value_archive.h @@ -53,6 +53,10 @@ inline Value to_value(T&& val) { return value; } +// fast path +inline Value to_value(const Value& v) { return v; } +inline Value to_value(Value&& v) { return std::move(v); } + template void from_value(const Value& value, T&& x); @@ -107,6 +111,9 @@ void from_value(const Value& value, T&& x) { archive(std::forward(x)); } +// Required to avoid Value::Pointer being unwrapped by Value::get_to() +inline void from_value(const Value& value, Value& x) { x = value; } + template inline T from_value(const Value& value) { T x{}; diff --git a/csrc/mmdeploy/backend_ops/torchscript/optimizer/ir/subgraph_matcher.cpp b/csrc/mmdeploy/backend_ops/torchscript/optimizer/ir/subgraph_matcher.cpp index 6f188c568..10ce9829d 100644 --- a/csrc/mmdeploy/backend_ops/torchscript/optimizer/ir/subgraph_matcher.cpp +++ b/csrc/mmdeploy/backend_ops/torchscript/optimizer/ir/subgraph_matcher.cpp @@ -156,7 +156,7 @@ bool SubgraphMatcher::SubgraphMatcherImpl::matchAttributes(const Node* n1, Node* n1is = n1->is(attr_name); n2is = n2->is(attr_name); if (n1is.size() != n2is.size()) return false; - for (int i = 0; i < n1is.size(); ++i) { + for (size_t i = 0; i < n1is.size(); ++i) { if (n1is[i] != n2is[i]) return false; } break; @@ -164,7 +164,7 @@ bool SubgraphMatcher::SubgraphMatcherImpl::matchAttributes(const Node* n1, Node* n1fs = n1->fs(attr_name); n2fs = n2->fs(attr_name); if (n1fs.size() != n2fs.size()) return false; - for (int i = 0; i < n1fs.size(); ++i) { + for (size_t i = 0; i < n1fs.size(); ++i) { if (n1fs[i] != n2fs[i]) return false; } break; diff --git a/csrc/mmdeploy/graph/CMakeLists.txt b/csrc/mmdeploy/graph/CMakeLists.txt index a8f10a168..b5b6d6422 100644 --- a/csrc/mmdeploy/graph/CMakeLists.txt +++ b/csrc/mmdeploy/graph/CMakeLists.txt @@ -6,6 +6,7 @@ set(SRCS task.cpp static_router.cpp inference.cpp - pipeline.cpp) + pipeline.cpp + cond.cpp) mmdeploy_add_module(${PROJECT_NAME} LIBRARY "${SRCS}") add_library(mmdeploy::graph ALIAS ${PROJECT_NAME}) diff --git a/csrc/mmdeploy/graph/cond.cpp b/csrc/mmdeploy/graph/cond.cpp new file mode 100644 index 000000000..3ef574888 --- /dev/null +++ b/csrc/mmdeploy/graph/cond.cpp @@ -0,0 +1,124 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "mmdeploy/graph/cond.h" + +#include + +namespace mmdeploy::graph { + +namespace { + +std::vector get_predicates(const Value::Array& xs) { + std::vector ps; + ps.reserve(xs.size()); + std::transform(std::begin(xs), std::end(xs), std::back_inserter(ps), + [](const Value& x) { return static_cast(x.get()); }); + return ps; +} + +std::pair choice(const std::vector& xs) { + auto count = std::count(std::begin(xs), std::end(xs), 1); + if (count == 0 || count == xs.size()) { + return std::make_pair(true, count == xs.size()); + } + return std::make_pair(false, false); +} + +Value get_divergent_input(Value::Array& as, const std::vector& ps) { + Value::Array ts(as.size(), Value::kArray); + for (size_t i = 0; i < ts.size(); ++i) { + auto& t = ts[i].array(); + auto& a = as[i].array(); + for (size_t j = 0; j < ps.size(); ++j) { + if (ps[j]) { + t.push_back(std::move(a[j])); + } + } + } + return ts; +} + +Value get_divergent_output(Value::Array& rs, const vector& ps) { + Value::Array ys(rs.size(), Value::kArray); + for (size_t i = 0; i < ys.size(); ++i) { + auto& y = ys[i].array(); + auto& r = rs[i].array(); + size_t k = 0; + for (const auto& p : ps) { + y.push_back(p ? std::move(r[k++]) : nullptr); + } + } + return ys; +} + +} // namespace + +Sender Cond::Process(Sender input) { + return LetValue(std::move(input), [this](Value& _input) -> Sender { + assert(_input.is_array()); + auto& as = _input.array(); + auto ps = get_predicates(as.front().array()); + as.erase(as.begin()); + auto [coherent, branch] = choice(ps); + if (coherent) { + if (branch) { + return node_->Process(Just(std::move(_input))); + } else { + Value::Array output(n_output_, Value::Array(ps.size(), nullptr)); + return Just(Value(std::move(output))); + } + } else { + auto ts = get_divergent_input(as, ps); + return node_->Process(Just(Value(std::move(ts)))) | + Then([ps = std::move(ps)](Value rs) -> Value { + return get_divergent_output(rs.array(), ps); + }); + } + }); +} + +CondBuilder::CondBuilder(Value config) : Builder(std::move(config)) {} + +Result> CondBuilder::BuildImpl() { + try { + auto cond = std::make_unique(); + cond->n_output_ = static_cast(config_["output"].size()); + + auto& body_config = config_["body"]; + auto inputs = config_["input"].array(); + inputs.erase(inputs.begin()); + + body_config["input"] = std::move(inputs); + body_config["output"] = config_["output"]; + + // propagate context + if (!body_config.contains("context")) { + body_config["context"] = Value::Object(); + } + if (config_.contains("context")) { + update(body_config["context"].object(), config_["context"].object(), 2); + } + + if (auto builder = Builder::CreateFromConfig(body_config).value()) { + if (auto node = builder->Build().value()) { + cond->node_ = std::move(node); + return std::move(cond); + } + } + } catch (const std::exception& e) { + MMDEPLOY_ERROR("error parsing config: {}", config_); + } + return Status(eFail); +} + +class CondCreator : public Creator { + public: + const char* GetName() const override { return "Cond"; } + unique_ptr Create(const Value& config) override { + return std::make_unique(config); + } +}; + +REGISTER_MODULE(Builder, CondCreator); + +} // namespace mmdeploy::graph diff --git a/csrc/mmdeploy/graph/cond.h b/csrc/mmdeploy/graph/cond.h new file mode 100644 index 000000000..645358608 --- /dev/null +++ b/csrc/mmdeploy/graph/cond.h @@ -0,0 +1,31 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_CSRC_MMDEPLOY_GRAPH_COND_H_ +#define MMDEPLOY_CSRC_MMDEPLOY_GRAPH_COND_H_ + +#include "mmdeploy/core/graph.h" + +namespace mmdeploy::graph { + +class Cond : public Node { + friend class CondBuilder; + + public: + Sender Process(Sender input) override; + + private: + std::unique_ptr node_; + int n_output_{0}; +}; + +class CondBuilder : public Builder { + public: + explicit CondBuilder(Value config); + + protected: + Result> BuildImpl() override; +}; + +} // namespace mmdeploy::graph + +#endif // MMDEPLOY_CSRC_MMDEPLOY_GRAPH_COND_H_ diff --git a/csrc/mmdeploy/net/CMakeLists.txt b/csrc/mmdeploy/net/CMakeLists.txt index fe4e9e292..33a582a20 100644 --- a/csrc/mmdeploy/net/CMakeLists.txt +++ b/csrc/mmdeploy/net/CMakeLists.txt @@ -38,5 +38,9 @@ if ("coreml" IN_LIST MMDEPLOY_TARGET_BACKENDS) add_subdirectory(coreml) endif () +if ("rknn" IN_LIST MMDEPLOY_TARGET_BACKENDS) + add_subdirectory(rknn) +endif () + mmdeploy_add_module(${PROJECT_NAME} net_module.cpp) add_library(mmdeploy::net_module ALIAS ${PROJECT_NAME}) diff --git a/csrc/mmdeploy/net/ort/CMakeLists.txt b/csrc/mmdeploy/net/ort/CMakeLists.txt index 598e40477..00a941911 100644 --- a/csrc/mmdeploy/net/ort/CMakeLists.txt +++ b/csrc/mmdeploy/net/ort/CMakeLists.txt @@ -4,12 +4,8 @@ project(mmdeploy_ort_net) include(${CMAKE_SOURCE_DIR}/cmake/modules/FindONNXRUNTIME.cmake) -if ("cpu" IN_LIST MMDEPLOY_TARGET_DEVICES) - mmdeploy_add_module(${PROJECT_NAME} ort_net.cpp) - target_include_directories(${PROJECT_NAME} PRIVATE ${ONNXRUNTIME_DIR}/include) - target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_onnxruntime_ops_obj) - target_link_libraries(${PROJECT_NAME} PUBLIC onnxruntime) - add_library(mmdeploy::ort_net ALIAS ${PROJECT_NAME}) -else () - message(ERROR "'ort_net' is NOT supported in target devices: ${MMDEPLOY_TARGET_DEVICES}") -endif () +mmdeploy_add_module(${PROJECT_NAME} ort_net.cpp) +target_include_directories(${PROJECT_NAME} PRIVATE ${ONNXRUNTIME_DIR}/include) +target_link_libraries(${PROJECT_NAME} PRIVATE mmdeploy_onnxruntime_ops_obj) +target_link_libraries(${PROJECT_NAME} PUBLIC onnxruntime) +add_library(mmdeploy::ort_net ALIAS ${PROJECT_NAME}) diff --git a/csrc/mmdeploy/net/rknn/CMakeLists.txt b/csrc/mmdeploy/net/rknn/CMakeLists.txt new file mode 100644 index 000000000..269df4dc9 --- /dev/null +++ b/csrc/mmdeploy/net/rknn/CMakeLists.txt @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +project(mmdeploy_rknn_net) + +add_library(rknn SHARED IMPORTED) + +if(DEFINED ENV{RKNPU2_DEVICE_DIR}) + file(TO_CMAKE_PATH $ENV{RKNPU2_DEVICE_DIR} RKNPU2_DEVICE_DIR) +else() + message(FATAL_ERROR "RKNPU2_DEVICE_DIR env must be defined") +endif() + +set_target_properties(rknn PROPERTIES + IMPORTED_LOCATION "${RKNPU2_DEVICE_DIR}/Linux/librknn_api/aarch64/librknn_api.so" + INTERFACE_INCLUDE_DIRECTORIES "${RKNPU2_DEVICE_DIR}/Linux/librknn_api/include" +) + +mmdeploy_add_module(${PROJECT_NAME} rknn_net.cpp) +target_link_libraries(${PROJECT_NAME} PRIVATE rknn) +add_library(mmdeploy::rknn_net ALIAS ${PROJECT_NAME}) diff --git a/csrc/mmdeploy/net/rknn/rknn_net.cpp b/csrc/mmdeploy/net/rknn/rknn_net.cpp new file mode 100644 index 000000000..00ebcf675 --- /dev/null +++ b/csrc/mmdeploy/net/rknn/rknn_net.cpp @@ -0,0 +1,216 @@ +// Copyright (c) OpenMMLab. All rights reserved. +#include "rknn_net.h" + +#include + +#include + +#include "mmdeploy/core/logger.h" +#include "mmdeploy/core/model.h" +#include "mmdeploy/core/utils/filesystem.h" +#include "mmdeploy/core/utils/formatter.h" + +namespace mmdeploy::framework { + +Result GetRKNNDataType(DataType data_type) { + switch (data_type) { + case DataType::kFLOAT: + return RKNN_TENSOR_FLOAT32; + case DataType::kHALF: + return RKNN_TENSOR_FLOAT16; + case DataType::kINT8: + return RKNN_TENSOR_INT8; + case DataType::kINT32: + return RKNN_TENSOR_INT32; + case DataType::kINT64: + return RKNN_TENSOR_INT64; + default: + return Status(eNotSupported); + } +} + +Result GetMMDeployDataType(rknn_tensor_type data_type) { + switch (data_type) { + case RKNN_TENSOR_FLOAT32: + return DataType::kFLOAT; + case RKNN_TENSOR_FLOAT16: + return DataType::kHALF; + case RKNN_TENSOR_INT8: + return DataType::kINT8; + case RKNN_TENSOR_INT32: + return DataType::kINT32; + case RKNN_TENSOR_INT64: + return DataType::kINT64; + default: + return Status(eNotSupported); + } +} + +RKNNNet::~RKNNNet() { rknn_destroy(ctx_); } + +void RKNNNet::dump_tensor_attr(rknn_tensor_attr* attr) { + MMDEPLOY_INFO( + " index={}, name={}, n_dims={}, dims=[{}, {}, {}, {}], n_elems={}, size={}, fmt={}, " + "type={}, qnt_type={}, " + "zp={}, scale=%f\n", + attr->index, attr->name, attr->n_dims, attr->dims[0], attr->dims[1], attr->dims[2], + attr->dims[3], attr->n_elems, attr->size, get_format_string(attr->fmt), + get_type_string(attr->type), get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale); +} + +Result RKNNNet::Init(const Value& args) { + auto& context = args["context"]; + device_ = context["device"].get(); + stream_ = context["stream"].get(); + if (!device_.is_host()) { + return Status(eNotSupported); + } + + auto name = args["name"].get(); + auto model = context["model"].get(); + OUTCOME_TRY(auto config, model.GetModelConfig(name)); + + std::string content; + OUTCOME_TRY(content, model.ReadFile(config.net)); + char* model_ptr = const_cast(content.data()); + int ret = rknn_init(&ctx_, model_ptr, content.size(), 0, NULL); + if (ret != RKNN_SUCC) { + MMDEPLOY_ERROR("Load .rknn failed! ret= {}", ret); + return Status(eInvalidArgument); + } + + // Get Model Input Output Info + rknn_input_output_num io_num; + ret = rknn_query(ctx_, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num)); + if (ret != RKNN_SUCC) { + MMDEPLOY_INFO("model input num: {}, output num: {}\n", io_num.n_input, io_num.n_output); + MMDEPLOY_ERROR("rknn_query fail! ret= {}", ret); + return Status(eFail); + } + + for (int i = 0; i < io_num.n_input; i++) { + rknn_tensor_attr input_attr; + input_attr.index = i; + ret = rknn_query(ctx_, RKNN_QUERY_INPUT_ATTR, &(input_attr), sizeof(rknn_tensor_attr)); + if (ret != RKNN_SUCC) { + MMDEPLOY_INFO("input tensors:\n"); + dump_tensor_attr(&(input_attr)); + MMDEPLOY_ERROR("rknn_query fail! ret= {}", ret); + return Status(eFail); + } + input_attrs_.push_back(input_attr); + OUTCOME_TRY(auto data_type, GetMMDeployDataType(input_attr.type)); + input_tensors_.emplace_back(TensorDesc{device_, data_type, {}, input_attr.name}); + } + + for (int i = 0; i < io_num.n_output; i++) { + rknn_tensor_attr output_attr; + output_attr.index = i; + ret = rknn_query(ctx_, RKNN_QUERY_OUTPUT_ATTR, &(output_attr), sizeof(rknn_tensor_attr)); + if (ret != RKNN_SUCC) { + MMDEPLOY_INFO("output tensors:\n"); + dump_tensor_attr(&(output_attr)); + MMDEPLOY_ERROR("rknn_query fail! ret= {}", ret); + return Status(eFail); + } + output_attrs_.push_back(output_attr); + OUTCOME_TRY(auto data_type, GetMMDeployDataType(output_attr.type)); + output_tensors_.emplace_back(TensorDesc{device_, data_type, {}, output_attr.name}); + } + + return success(); +} + +Result RKNNNet::ForwardAsync(Event* event) { return Status(eNotSupported); } + +Result RKNNNet::Deinit() { return success(); } + +Result> RKNNNet::GetInputTensors() { return input_tensors_; } + +Result> RKNNNet::GetOutputTensors() { return output_tensors_; } + +Result RKNNNet::Reshape(Span input_shapes) { + for (size_t i = 0; i < input_shapes.size(); ++i) { + input_tensors_[i].Reshape(input_shapes[i]); + } + return success(); +} + +Result RKNNNet::Forward() { + OUTCOME_TRY(stream_.Wait()); + + std::vector inputs; + for (int i = 0; i < input_tensors_.size(); i++) { + rknn_input input; + input.index = i; + input.pass_through = 0; + input.type = input_attrs_[i].type; + input.fmt = input_attrs_[i].fmt; + input.buf = input_tensors_[i].data(); + input.size = input_attrs_[i].size; + inputs.push_back(input); + } + + // Set input + int ret = rknn_inputs_set(ctx_, input_tensors_.size(), inputs.data()); + if (ret < 0) { + MMDEPLOY_ERROR("rknn_input_set fail! ret= {}", ret); + return Status(eFail); + } + + // Get output + std::vector outputs; + for (uint32_t i = 0; i < output_tensors_.size(); ++i) { + rknn_output output; + output.want_float = 1; + output.index = i; + output.is_prealloc = 0; + outputs.push_back(output); + } + + ret = rknn_run(ctx_, NULL); + if (ret < 0) { + MMDEPLOY_ERROR("rknn_run fail! ret={}", ret); + return Status(eFail); + } + + ret = rknn_outputs_get(ctx_, output_tensors_.size(), outputs.data(), NULL); + if (ret < 0) { + MMDEPLOY_ERROR("rknn_outputs_get fail! ret= {}", ret); + return Status(eFail); + } + for (int i = 0; i < output_tensors_.size(); i++) { + TensorShape tensor_shape; + for (int j = 0; j < output_attrs_[i].n_dims; ++j) { + tensor_shape.push_back(output_attrs_[i].dims[j]); + } + output_tensors_[i].Reshape(tensor_shape); + memcpy(output_tensors_[i].data(), (float*)outputs[i].buf, output_attrs_[i].size); + } + OUTCOME_TRY(stream_.Wait()); + return success(); +} + +class RKNNNetCreator : public Creator { + public: + const char* GetName() const override { return "rknn"; } + int GetVersion() const override { return 0; } + std::unique_ptr Create(const Value& args) override { + try { + auto p = std::make_unique(); + if (auto r = p->Init(args)) { + return p; + } else { + MMDEPLOY_ERROR("error creating RKNNNet: {}", r.error().message().c_str()); + return nullptr; + } + } catch (const std::exception& e) { + MMDEPLOY_ERROR("unhandled exception when creating RKNNNet: {}", e.what()); + return nullptr; + } + } +}; + +REGISTER_MODULE(Net, RKNNNetCreator); + +} // namespace mmdeploy::framework diff --git a/csrc/mmdeploy/net/rknn/rknn_net.h b/csrc/mmdeploy/net/rknn/rknn_net.h new file mode 100644 index 000000000..6afce5b0b --- /dev/null +++ b/csrc/mmdeploy/net/rknn/rknn_net.h @@ -0,0 +1,45 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_SRC_NET_RKNN_RKNN_NET_H_ +#define MMDEPLOY_SRC_NET_RKNN_RKNN_NET_H_ + +#include "mmdeploy/core/mpl/span.h" +#include "mmdeploy/core/net.h" +#include "rknn_api.h" + +namespace mmdeploy::framework { + +class RKNNNet : public Net { + public: + ~RKNNNet() override; + + Result Init(const Value& args) override; + + Result Deinit() override; + + Result Reshape(Span input_shapes) override; + + Result > GetInputTensors() override; + + Result > GetOutputTensors() override; + + Result Forward() override; + + Result ForwardAsync(Event* event) override; + + private: + void dump_tensor_attr(rknn_tensor_attr* attr); + + Device device_; + Stream stream_; + rknn_context ctx_; + std::vector input_tensors_; + std::vector output_tensors_; + std::vector input_attrs_; + std::vector output_attrs_; + static constexpr const auto kHost = Device(0); +}; + +} // namespace mmdeploy::framework + +#endif // MMDEPLOY_SRC_NET_RKNN_RKNN_NET_H_ diff --git a/demo/csrc/CMakeLists.txt b/demo/csrc/CMakeLists.txt index 7daed5f89..d2bcd1a0b 100644 --- a/demo/csrc/CMakeLists.txt +++ b/demo/csrc/CMakeLists.txt @@ -30,7 +30,9 @@ function(add_example dep folder name) endfunction() add_example(classifier c image_classification) +add_example(classifier c batch_image_classification) add_example(detector c object_detection) +add_example(detector c batch_object_detection) add_example(segmentor c image_segmentation) add_example(restorer c image_restorer) add_example(text_detector c ocr) @@ -46,7 +48,8 @@ if (MMDEPLOY_BUILD_SDK_CXX_API) add_example(segmentor cpp segmentor) add_example(restorer cpp restorer) add_example(text_detector cpp text_ocr) - add_example("" cpp text_det_recog) + add_example(text_detector cpp text_det_recog) add_example(pose_detector cpp pose_detector) add_example(rotated_detector cpp rotated_detector) + add_example(pose_detector cpp pose_tracker) endif () diff --git a/demo/csrc/c/batch_image_classification.cpp b/demo/csrc/c/batch_image_classification.cpp new file mode 100644 index 000000000..a9529f9ba --- /dev/null +++ b/demo/csrc/c/batch_image_classification.cpp @@ -0,0 +1,100 @@ +#include +#include +#include + +#include "mmdeploy/classifier.h" + +static int batch_inference(mmdeploy_classifier_t classifier, + const std::vector& image_ids, + const std::vector& mats); + +int main(int argc, char* argv[]) { + if (argc < 5) { + fprintf(stderr, "usage:\n image_classification device_name dump_model_directory " + "imagelist.txt batch_size\n"); + return 1; + } + auto device_name = argv[1]; + auto model_path = argv[2]; + + mmdeploy_classifier_t classifier{}; + int status{}; + status = mmdeploy_classifier_create_by_path(model_path, device_name, 0, &classifier); + if (status != MMDEPLOY_SUCCESS) { + fprintf(stderr, "failed to create classifier, code: %d\n", (int)status); + return 1; + } + + // `file_path` is the path of an image list file + std::string file_path = argv[3]; + const int batch = std::stoi(argv[argc-1]); + + // read image paths from the file + std::ifstream ifs(file_path); + std::string img_path; + std::vector img_paths; + while (ifs >> img_path) { + img_paths.emplace_back(std::move(img_path)); + } + + // read images and process batch inference + std::vector images; + std::vector image_ids; + std::vector mats; + for (int i = 0; i < (int)img_paths.size(); ++i) { + auto img = cv::imread(img_paths[i]); + if (!img.data) { + fprintf(stderr, "failed to load image: %s\n", img_paths[i].c_str()); + continue; + } + images.push_back(img); + image_ids.push_back(i); + mmdeploy_mat_t mat{ + img.data, img.rows, img.cols, 3, MMDEPLOY_PIXEL_FORMAT_BGR, MMDEPLOY_DATA_TYPE_UINT8}; + mats.push_back(mat); + + // process batch inference + if ((int)mats.size() == batch) { + if (batch_inference(classifier, image_ids, mats) != 0) { + continue; + } + // clear buffer for next batch + mats.clear(); + image_ids.clear(); + images.clear(); + } + } + // process batch inference if there are still unhandled images + if (!mats.empty()) { + (void)batch_inference(classifier, image_ids, mats); + } + + mmdeploy_classifier_destroy(classifier); + + return 0; +} + + +int batch_inference(mmdeploy_classifier_t classifier, const std::vector& image_ids, + const std::vector& mats) { + mmdeploy_classification_t* res{}; + int* res_count{}; + auto status = mmdeploy_classifier_apply(classifier, mats.data(), (int)mats.size(), + &res, &res_count); + if (status != MMDEPLOY_SUCCESS) { + fprintf(stderr, "failed to apply classifier to batch images %d, code: %d\n", + (int)mats.size(), (int)status); + return 1; + } + // print the inference results + auto res_ptr = res; + for (int j = 0; j < (int)mats.size(); ++j) { + fprintf(stderr, "results in the %d-th image:\n", image_ids[j]); + for (int k = 0; k < res_count[j]; ++k, ++res_ptr) { + fprintf(stderr, " label: %d, score: %.4f\n", res_ptr->label_id, res_ptr->score); + } + } + // release results buffer + mmdeploy_classifier_release_result(res, res_count, (int)mats.size()); + return 0; +} diff --git a/demo/csrc/c/batch_object_detection.cpp b/demo/csrc/c/batch_object_detection.cpp new file mode 100644 index 000000000..04cfe90f9 --- /dev/null +++ b/demo/csrc/c/batch_object_detection.cpp @@ -0,0 +1,147 @@ +#include +#include +#include +#include + +#include "mmdeploy/detector.h" + +static int batch_inference(mmdeploy_detector_t detector, std::vector& images, + const std::vector& image_ids, + const std::vector& mats); + +static void visualize_detection(const std::string& output_name, cv::Mat& image, + const mmdeploy_detection_t* bboxes_ptr, int bboxes_num); + +int main(int argc, char* argv[]) { + if (argc < 5) { + fprintf(stderr, "usage:\n object_detection device_name sdk_model_path " + "file_path batch_size\n"); + return 1; + } + auto device_name = argv[1]; + auto model_path = argv[2]; + + mmdeploy_detector_t detector{}; + int status{}; + status = mmdeploy_detector_create_by_path(model_path, device_name, 0, &detector); + if (status != MMDEPLOY_SUCCESS) { + fprintf(stderr, "failed to create detector, code: %d\n", (int)status); + return 1; + } + + // file_path is the path of an image list file + std::string file_path = argv[3]; + const int batch = std::stoi(argv[argc-1]); + + // read image paths from the file + std::ifstream ifs(file_path); + std::string img_path; + std::vector img_paths; + while (ifs >> img_path) { + img_paths.emplace_back(std::move(img_path)); + } + + + // read images and process batch inference + std::vector images; + std::vector image_ids; + std::vector mats; + for (int i = 0; i < (int)img_paths.size(); ++i) { + auto img = cv::imread(img_paths[i]); + if (!img.data) { + fprintf(stderr, "failed to load image: %s\n", img_paths[i].c_str()); + continue; + } + images.push_back(img); + image_ids.push_back(i); + mmdeploy_mat_t mat{ + img.data, img.rows, img.cols, 3, MMDEPLOY_PIXEL_FORMAT_BGR, MMDEPLOY_DATA_TYPE_UINT8}; + mats.push_back(mat); + + // process batch inference + if ((int)mats.size() == batch) { + if (batch_inference(detector, images, image_ids, mats) != 0) { + continue; + } + // clear buffer for next batch + mats.clear(); + image_ids.clear(); + images.clear(); + } + } + // process batch inference if there are still unhandled images + if (!mats.empty()) { + (void)batch_inference(detector, images, image_ids, mats); + } + + mmdeploy_detector_destroy(detector); + return 0; +} + +int batch_inference(mmdeploy_detector_t detector, std::vector& images, + const std::vector& image_ids, + const std::vector& mats) { + mmdeploy_detection_t* bboxes{}; + int* res_count{}; + auto status = mmdeploy_detector_apply(detector, mats.data(), mats.size(), &bboxes, &res_count); + if (status != MMDEPLOY_SUCCESS) { + fprintf(stderr, "failed to apply detector, code: %d\n", (int)status); + return 1; + } + + mmdeploy_detection_t* bboxes_ptr = bboxes; + for (int i = 0; i < (int)mats.size(); ++i) { + fprintf(stdout, "results in the %d-th image:\n bbox_count=%d\n", image_ids[i], res_count[i]); + const std::string output_name = "output_detection_" + std::to_string(image_ids[i]) + ".png"; + visualize_detection(output_name, images[i], bboxes_ptr, res_count[i]); + bboxes_ptr = bboxes_ptr + res_count[i]; + } + + mmdeploy_detector_release_result(bboxes, res_count, mats.size()); + return 0; +} + + +void visualize_detection(const std::string& output_name, cv::Mat& image, + const mmdeploy_detection_t* bboxes_ptr, int bbox_num) { + for (int i = 0; i < bbox_num; ++i, ++bboxes_ptr) { + const auto& box = bboxes_ptr->bbox; + const auto& mask = bboxes_ptr->mask; + + fprintf(stdout, + " box %d, left=%.2f, top=%.2f, right=%.2f, bottom=%.2f, " + "label=%d, score=%.4f\n", + i, box.left, box.top, box.right, box.bottom, bboxes_ptr->label_id, bboxes_ptr->score); + + // skip detections with invalid bbox size (bbox height or width < 1) + if ((box.right - box.left) < 1 || (box.bottom - box.top) < 1) { + continue; + } + + // skip detections less than specified score threshold + if (bboxes_ptr->score < 0.3) { + continue; + } + + // generate mask overlay if model exports masks + if (mask != nullptr) { + fprintf(stdout, "mask %d, height=%d, width=%d\n", i, mask->height, mask->width); + + cv::Mat imgMask(mask->height, mask->width, CV_8UC1, &mask->data[0]); + auto x0 = std::max(std::floor(box.left) - 1, 0.f); + auto y0 = std::max(std::floor(box.top) - 1, 0.f); + cv::Rect roi((int)x0, (int)y0, mask->width, mask->height); + + // split the RGB channels, overlay mask to a specific color channel + cv::Mat ch[3]; + split(image, ch); + int col = 0; + cv::bitwise_or(imgMask, ch[col](roi), ch[col](roi)); + merge(ch, 3, image); + } + + cv::rectangle(image, cv::Point{(int)box.left, (int)box.top}, + cv::Point{(int)box.right, (int)box.bottom}, cv::Scalar{0, 255, 0}); + } + cv::imwrite(output_name, image); +} diff --git a/demo/csrc/cpp/pose_tracker.cpp b/demo/csrc/cpp/pose_tracker.cpp new file mode 100644 index 000000000..c3193351e --- /dev/null +++ b/demo/csrc/cpp/pose_tracker.cpp @@ -0,0 +1,427 @@ + + +#include "mmdeploy/archive/json_archive.h" +#include "mmdeploy/archive/value_archive.h" +#include "mmdeploy/common.hpp" +#include "mmdeploy/core/mat.h" +#include "mmdeploy/core/module.h" +#include "mmdeploy/core/utils/formatter.h" +#include "mmdeploy/experimental/module_adapter.h" +#include "mmdeploy/pipeline.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/videoio.hpp" + +const auto config_json = R"( +{ + "type": "Pipeline", + "input": ["data", "use_det", "state"], + "output": "targets", + "tasks": [ + { + "type": "Cond", + "input": ["use_det", "data"], + "output": "dets", + "body": { + "name": "detection", + "type": "Inference", + "params": { "model": "detection" } + } + }, + { + "type": "Task", + "module": "ProcessBboxes", + "input": ["dets", "data", "state"], + "output": "rois" + }, + { + "input": "*rois", + "output": "*keypoints", + "name": "pose", + "type": "Inference", + "params": { "model": "pose" } + }, + { + "type": "Task", + "module": "TrackPose", + "scheduler": "pool", + "input": ["keypoints", "state"], + "output": "targets" + } + ] +} +)"_json; + +namespace mmdeploy { + +#define REGISTER_SIMPLE_MODULE(name, fn) \ + class name##_Creator : public ::mmdeploy::Creator { \ + const char* GetName() const override { return #name; } \ + std::unique_ptr Create(const Value&) override { return CreateTask(fn); } \ + }; \ + REGISTER_MODULE(Module, name##_Creator) + +std::optional> keypoints_to_bbox(const std::vector& keypoints, + const std::vector& scores, float img_h, + float img_w, float scale = 1.5, + float kpt_thr = 0.3) { + auto valid = false; + auto x1 = static_cast(img_w); + auto y1 = static_cast(img_h); + auto x2 = 0.f; + auto y2 = 0.f; + for (size_t i = 0; i < keypoints.size(); ++i) { + auto& kpt = keypoints[i]; + if (scores[i] > kpt_thr) { + x1 = std::min(x1, kpt.x); + y1 = std::min(y1, kpt.y); + x2 = std::max(x2, kpt.x); + y2 = std::max(y2, kpt.y); + valid = true; + } + } + if (!valid) { + return std::nullopt; + } + auto xc = .5f * (x1 + x2); + auto yc = .5f * (y1 + y2); + auto w = (x2 - x1) * scale; + auto h = (y2 - y1) * scale; + + return std::array{ + std::max(0.f, std::min(img_w, xc - .5f * w)), + std::max(0.f, std::min(img_h, yc - .5f * h)), + std::max(0.f, std::min(img_w, xc + .5f * w)), + std::max(0.f, std::min(img_h, yc + .5f * h)), + }; +} + +struct Track { + std::vector> keypoints; + std::vector> scores; + std::vector> bboxes; + int64_t track_id{-1}; +}; + +struct TrackInfo { + std::vector tracks; + int64_t next_id{0}; +}; + +MMDEPLOY_REGISTER_TYPE_ID(TrackInfo, 0xcfe87980aa895d3a); // randomly generated type id + +Value::Array GetObjectsByTracking(Value& state, int img_h, int img_w) { + Value::Array objs; + auto& track_info = state["track_info"].get_ref(); + for (auto& track : track_info.tracks) { + auto bbox = keypoints_to_bbox(track.keypoints.back(), track.scores.back(), + static_cast(img_h), static_cast(img_w)); + if (bbox) { + objs.push_back({{"bbox", to_value(*bbox)}}); + } + } + return objs; +} + +Value ProcessBboxes(const Value& detections, const Value& data, Value state) { + assert(state.is_pointer()); + Value::Array bboxes; + if (detections.is_array()) { // has detections + auto& dets = detections.array(); + for (const auto& det : dets) { + if (det["label_id"].get() == 0 && det["score"].get() >= .3f) { + bboxes.push_back(det); + } + } + MMDEPLOY_INFO("bboxes by detection: {}", bboxes.size()); + state["bboxes"] = bboxes; + } else { // no detections, use tracked results + auto img_h = state["img_shape"][0].get(); + auto img_w = state["img_shape"][1].get(); + bboxes = GetObjectsByTracking(state, img_h, img_w); + MMDEPLOY_INFO("GetObjectsByTracking: {}", bboxes.size()); + } + // attach bboxes to image data + for (auto& bbox : bboxes) { + auto img = data["ori_img"].get(); + auto box = from_value>(bbox["bbox"]); + cv::Rect rect(cv::Rect2f(cv::Point2f(box[0], box[1]), cv::Point2f(box[2], box[3]))); + bbox = Value::Object{ + {"ori_img", img}, {"bbox", {rect.x, rect.y, rect.width, rect.height}}, {"rotation", 0.f}}; + }; + return bboxes; +} +REGISTER_SIMPLE_MODULE(ProcessBboxes, ProcessBboxes); + +// xyxy format +float ComputeIoU(const std::array& a, const std::array& b) { + auto x1 = std::max(a[0], b[0]); + auto y1 = std::max(a[1], b[1]); + auto x2 = std::min(a[2], b[2]); + auto y2 = std::min(a[3], b[3]); + + auto inter_area = std::max(0.f, x2 - x1) * std::max(0.f, y2 - y1); + + auto a_area = (a[2] - a[0]) * (a[3] - a[1]); + auto b_area = (b[2] - b[0]) * (b[3] - b[1]); + auto union_area = a_area + b_area - inter_area; + + if (union_area == 0.f) { + return 0; + } + + return inter_area / union_area; +} + +void UpdateTrack(Track& track, std::vector& keypoints, std::vector& score, + const std::array& bbox, int n_history) { + if (track.scores.size() == n_history) { + std::rotate(track.keypoints.begin(), track.keypoints.begin() + 1, track.keypoints.end()); + std::rotate(track.scores.begin(), track.scores.begin() + 1, track.scores.end()); + std::rotate(track.bboxes.begin(), track.bboxes.begin() + 1, track.bboxes.end()); + track.keypoints.back() = std::move(keypoints); + track.scores.back() = std::move(score); + track.bboxes.back() = bbox; + } else { + track.keypoints.push_back(std::move(keypoints)); + track.scores.push_back(std::move(score)); + track.bboxes.push_back(bbox); + } +} + +std::vector> GreedyAssignment(const std::vector& scores, + int n_rows, int n_cols, float thr) { + std::vector used_rows(n_rows); + std::vector used_cols(n_cols); + std::vector> assignment; + assignment.reserve(std::max(n_rows, n_cols)); + while (true) { + auto max_score = 0.f; + int max_row = -1; + int max_col = -1; + for (int i = 0; i < n_rows; ++i) { + if (!used_rows[i]) { + for (int j = 0; j < n_cols; ++j) { + if (!used_cols[j]) { + if (scores[i * n_cols + j] > max_score) { + max_score = scores[i * n_cols + j]; + max_row = i; + max_col = j; + } + } + } + } + } + if (max_score < thr) { + break; + } + used_rows[max_row] = 1; + used_cols[max_col] = 1; + assignment.emplace_back(max_row, max_col, max_score); + } + return assignment; +} + +void TrackStep(std::vector>& keypoints, + std::vector>& scores, TrackInfo& track_info, int img_h, int img_w, + float iou_thr, int min_keypoints, int n_history) { + auto& tracks = track_info.tracks; + + std::vector new_tracks; + new_tracks.reserve(tracks.size()); + + std::vector> bboxes; + bboxes.reserve(keypoints.size()); + + std::vector indices; + indices.reserve(keypoints.size()); + + for (size_t i = 0; i < keypoints.size(); ++i) { + if (auto bbox = keypoints_to_bbox(keypoints[i], scores[i], img_h, img_w, 1.f, 0.f)) { + bboxes.push_back(*bbox); + indices.push_back(i); + } + } + + const auto n_rows = static_cast(bboxes.size()); + const auto n_cols = static_cast(tracks.size()); + + std::vector similarities(n_rows * n_cols); + for (size_t i = 0; i < n_rows; ++i) { + for (size_t j = 0; j < n_cols; ++j) { + similarities[i * n_cols + j] = ComputeIoU(bboxes[i], tracks[j].bboxes.back()); + } + } + + const auto assignment = GreedyAssignment(similarities, n_rows, n_cols, iou_thr); + + std::vector used(n_rows); + for (auto [i, j, _] : assignment) { + auto k = indices[i]; + UpdateTrack(tracks[j], keypoints[k], scores[k], bboxes[i], n_history); + new_tracks.push_back(std::move(tracks[j])); + used[i] = true; + } + + for (size_t i = 0; i < used.size(); ++i) { + if (used[i] == 0) { + auto k = indices[i]; + auto count = std::count_if(scores[k].begin(), scores[k].end(), [](auto x) { return x > 0; }); + if (count >= min_keypoints) { + auto& track = new_tracks.emplace_back(); + track.track_id = track_info.next_id++; + UpdateTrack(track, keypoints[k], scores[k], bboxes[i], n_history); + } + } + } + + tracks = std::move(new_tracks); +} + +Value TrackPose(const Value& result, Value state) { + assert(state.is_pointer()); + assert(result.is_array()); + std::vector> keypoints; + std::vector> scores; + for (auto& output : result.array()) { + auto& k = keypoints.emplace_back(); + auto& s = scores.emplace_back(); + for (auto& kpt : output["key_points"].array()) { + k.push_back(cv::Point2f{kpt["bbox"][0].get(), kpt["bbox"][1].get()}); + s.push_back(kpt["score"].get()); + } + } + auto& track_info = state["track_info"].get_ref(); + auto img_h = state["img_shape"][0].get(); + auto img_w = state["img_shape"][1].get(); + auto iou_thr = state["iou_thr"].get(); + auto min_keypoints = state["min_keypoints"].get(); + auto n_history = state["n_history"].get(); + TrackStep(keypoints, scores, track_info, img_h, img_w, iou_thr, min_keypoints, n_history); + + Value::Array targets; + for (const auto& track : track_info.tracks) { + if (auto bbox = keypoints_to_bbox(track.keypoints.back(), track.scores.back(), img_h, img_w)) { + Value::Array kpts; + kpts.reserve(track.keypoints.back().size()); + for (const auto& kpt : track.keypoints.back()) { + kpts.push_back(kpt.x); + kpts.push_back(kpt.y); + } + targets.push_back({{"bbox", to_value(*bbox)}, {"keypoints", std::move(kpts)}}); + } + } + return targets; +} +REGISTER_SIMPLE_MODULE(TrackPose, TrackPose); + +class PoseTracker { + public: + using State = Value; + + public: + PoseTracker(const Model& det_model, const Model& pose_model, Context context) + : pipeline_([&] { + context.Add("detection", det_model); + context.Add("pose", pose_model); + auto config = from_json(config_json); + return Pipeline{config, context}; + }()) {} + + State CreateState() { // NOLINT + return make_pointer({{"frame_id", 0}, + {"n_history", 10}, + {"iou_thr", .3f}, + {"min_keypoints", 3}, + {"track_info", TrackInfo{}}}); + } + + Value Track(const Mat& img, State& state, int use_detector = -1) { + assert(state.is_pointer()); + framework::Mat mat(img.desc().height, img.desc().width, + static_cast(img.desc().format), + static_cast(img.desc().type), {img.desc().data, [](void*) {}}); + // TODO: get_ref is not working + auto frame_id = state["frame_id"].get(); + if (use_detector < 0) { + use_detector = frame_id % 10 == 0; + if (use_detector) { + MMDEPLOY_WARN("use detector"); + } + } + state["frame_id"] = frame_id + 1; + state["img_shape"] = {mat.height(), mat.width()}; + Value::Object data{{"ori_img", mat}}; + Value input{{data}, {use_detector}, {state}}; + return pipeline_.Apply(input)[0][0]; + } + + private: + Pipeline pipeline_; +}; + +} // namespace mmdeploy + +using namespace mmdeploy; + +void Visualize(cv::Mat& frame, const Value& result) { + static std::vector> skeleton{ + {15, 13}, {13, 11}, {16, 14}, {14, 12}, {11, 12}, {5, 11}, {6, 12}, {5, 6}, {5, 7}, {6, 8}, + {7, 9}, {8, 10}, {1, 2}, {0, 1}, {0, 2}, {1, 3}, {2, 4}, {3, 5}, {4, 6}}; + const auto& targets = result.array(); + for (const auto& target : targets) { + auto bbox = from_value>(target["bbox"]); + auto kpts = from_value>(target["keypoints"]); + cv::Point p1(bbox[0], bbox[1]); + cv::Point p2(bbox[2], bbox[3]); + cv::rectangle(frame, p1, p2, cv::Scalar(0, 255, 0)); + for (int i = 0; i < kpts.size(); i += 2) { + cv::Point p(kpts[i], kpts[i + 1]); + cv::circle(frame, p, 1, cv::Scalar(0, 255, 255), 2, cv::LINE_AA); + } + for (int i = 0; i < skeleton.size(); ++i) { + auto [u, v] = skeleton[i]; + cv::Point p_u(kpts[u * 2], kpts[u * 2 + 1]); + cv::Point p_v(kpts[v * 2], kpts[v * 2 + 1]); + cv::line(frame, p_u, p_v, cv::Scalar(0, 255, 255), 1, cv::LINE_AA); + } + } + cv::imshow("", frame); + cv::waitKey(10); +} + +int main(int argc, char* argv[]) { + const auto device_name = argv[1]; + const auto det_model_path = argv[2]; + const auto pose_model_path = argv[3]; + const auto video_path = argv[4]; + Device device(device_name); + Context context(device); + auto pool = Scheduler::ThreadPool(4); + auto infer = Scheduler::Thread(); + context.Add("pool", pool); + context.Add("infer", infer); + PoseTracker tracker(Model(det_model_path), Model(pose_model_path), context); + auto state = tracker.CreateState(); + + cv::Mat frame; + std::chrono::duration dt{}; + + int frame_id{}; + + cv::VideoCapture video(video_path); + while (true) { + video >> frame; + if (!frame.data) { + break; + } + auto t0 = std::chrono::high_resolution_clock::now(); + auto result = tracker.Track(frame, state); + auto t1 = std::chrono::high_resolution_clock::now(); + dt += t1 - t0; + ++frame_id; + Visualize(frame, result); + } + + MMDEPLOY_INFO("frames: {}, time {} ms", frame_id, dt.count()); +} diff --git a/docs/en/01-how-to-build/build_from_script.md b/docs/en/01-how-to-build/build_from_script.md index 6c13b6969..33144e705 100644 --- a/docs/en/01-how-to-build/build_from_script.md +++ b/docs/en/01-how-to-build/build_from_script.md @@ -2,7 +2,13 @@ Through user investigation, we know that most users are already familiar with python and torch before using mmdeploy. Therefore we provide scripts to simplify mmdeploy installation. -Assuming you have a python ready (whether `conda` or `pyenv`), run this script to install mmdeploy + ncnn backend, `nproc` is not compulsory. +Assuming you already have + +- python3 -m pip (`conda` or `pyenv`) +- nvcc (depends on inference backend) +- torch (not compulsory) + +run this script to install mmdeploy + ncnn backend, `nproc` is not compulsory. ```bash $ cd /path/to/mmdeploy diff --git a/docs/en/01-how-to-build/build_from_source.md b/docs/en/01-how-to-build/build_from_source.md index 3a8b14174..d9b456ed3 100644 --- a/docs/en/01-how-to-build/build_from_source.md +++ b/docs/en/01-how-to-build/build_from_source.md @@ -39,3 +39,4 @@ Please visit the following links to find out how to build MMDeploy according to - [NVIDIA Jetson](jetsons.md) - [SNPE](snpe.md) - [RISC-V](riscv.md) +- [Rockchip](rockchip.md) diff --git a/docs/en/01-how-to-build/cross_build_ncnn_aarch64.md b/docs/en/01-how-to-build/cross_build_ncnn_aarch64.md new file mode 100644 index 000000000..c63b898ec --- /dev/null +++ b/docs/en/01-how-to-build/cross_build_ncnn_aarch64.md @@ -0,0 +1,108 @@ +# Ubuntu Cross Build aarch64 + +mmdeploy chose ncnn as the inference backend for aarch64 embedded linux devices. There are two parts: + +Host + +- model conversion +- cross build SDK and demo for embedded devices + +Device + +- Run converted model + +## 1. Model Convert on Host + +Refer to the doc to install [mmdeploy](../01-how-to-build/) and [mmcls](https://github.com/open-mmlab/mmclassification), and convert resnet18 for model package + +```bash +export MODEL_CONFIG=/path/to/mmclassification/configs/resnet/resnet18_8xb32_in1k.py +export MODEL_PATH=https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth + +# Convert resnet18 +cd /path/to/mmdeploy +python tools/deploy.py \ + configs/mmcls/classification_ncnn_static.py \ + $MODEL_CONFIG \ + $MODEL_PATH \ + tests/data/tiger.jpeg \ + --work-dir resnet18 \ + --device cpu \ + --dump-info +``` + +## 2. Cross Build on Host + +It is recommended to compile directly with the script + +```bash +sh -x tools/scripts/ubuntu_cross_build_aarch64.sh +``` + +The following is the manual process corresponding to the script: + +a) Install aarch64 build tools + +```bash +sudo apt install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu +``` + +b) Cross build opencv and install to /tmp/ocv-aarch64 + +```bash +git clone https://github.com/opencv/opencv --depth=1 --branch=4.x --recursive +cd opencv/platforms/linux/ +mkdir build && cd build +cmake ../../.. \ + -DCMAKE_INSTALL_PREFIX=/tmp/ocv-aarch64 \ + -DCMAKE_TOOLCHAIN_FILE=../aarch64-gnu.toolchain.cmake +make -j && make install +ls -alh /tmp/ocv-aarch64 +.. +``` + +c) Cross build ncnn and install to /tmp/ncnn-aarch64 + +```bash +git clone https://github.com/tencent/ncnn --branch 20220729 --depth=1 +mkdir build && cd build +cmake .. \ + -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake \ + -DCMAKE_INSTALL_PREFIX=/tmp/ncnn-aarch64 +make -j && make install +ls -alh /tmp/ncnn-aarch64 +.. +``` + +d) Cross build mmdeploy + +```bash +git submodule init +git submodule update +mkdir build && cd build +cmake .. \ + -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch64-linux-gnu.cmake \ + -DMMDEPLOY_TARGET_DEVICES="cpu" \ + -DMMDEPLOY_TARGET_BACKENDS="ncnn" \ + -Dncnn_DIR=/tmp/ncnn-aarch64/lib/cmake/ncnn \ + -DOpenCV_DIR=/tmp/ocv-aarch64/lib/cmake/opencv4 +make install +ls -lah install/bin/* +.. +``` + +## 3. Execute on Device + +Make sure that `--dump-info` is used during model conversion, so that the `resnet18` directory contains the files required by the SDK such as `pipeline.json`. + +Copy the model folder(resnet18), executable(image_classification) file, test image(tests/data/tiger.jpeg) and prebuilt OpenCV(/tmp/ocv-aarch64) to the device. + +```bash +./image_classification cpu ./resnet18 tiger.jpeg +.. +label: 292, score: 0.9261 +label: 282, score: 0.0726 +label: 290, score: 0.0008 +label: 281, score: 0.0002 +label: 340, score: 0.0001 +``` diff --git a/docs/en/01-how-to-build/rockchip.md b/docs/en/01-how-to-build/rockchip.md new file mode 100644 index 000000000..d099914ba --- /dev/null +++ b/docs/en/01-how-to-build/rockchip.md @@ -0,0 +1,147 @@ +# Build for RKNN + +This tutorial is based on Linux systems like Ubuntu-18.04 and Rockchip NPU like `rk3588`. + +## Installation + +It is recommended to create a virtual environment for the project. + +1. get RKNN-Toolkit2 through: + + ``` + git clone git@github.com:rockchip-linux/rknn-toolkit2.git + ``` + +2. install RKNN python package following [official doc](https://github.com/rockchip-linux/rknn-toolkit2/tree/master/doc). In our testing, we used the rknn-toolkit2 1.2.0 with commit id `834ba0b0a1ab8ee27024443d77b02b5ba48b67fc`. When installing rknn-toolkit2, it is better to append `--no-deps` after the commands to avoid dependency conflicts. For example: + + ``` + pip install packages/rknn_toolkit2-1.2.0_f7bb160f-cp36-cp36m-linux_x86_64.whl --no-deps + ``` + +3. Install ONNX==1.8.0 before reinstall MMDeploy from source following the [instructions](../01-how-to-build/build_from_source.md). Note that there are conflicts between the pip dependencies of MMDeploy and RKNN. Here is the suggested packages versions for python 3.6: + + ``` + protobuf==3.19.4 + onnx==1.8.0 + onnxruntime==1.8.0 + torch==1.8.0 + torchvision==0.9.0 + ``` + +4. Install torch and torchvision using conda. For example: + +``` +conda install pytorch==1.8.0 torchvision==0.9.0 cudatoolkit=11.1 -c pytorch -c conda-forge +``` + +To work with models from [MMClassification](https://mmclassification.readthedocs.io/en/latest/getting_started.html), you may need to install it additionally. + +## Usage + +Example: + +```bash +python tools/deploy.py \ + configs/mmcls/classification_rknn_static.py \ + /mmclassification_dir/configs/resnet/resnet50_8xb32_in1k.py \ + https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth \ + /mmclassification_dir/demo/demo.JPEG \ + --work-dir ../resnet50 \ + --device cpu +``` + +## Deployment config + +With the deployment config, you can modify the `backend_config` for your preference. An example `backend_config` of mmclassification is shown as below: + +```python +backend_config = dict( + type='rknn', + common_config=dict( + mean_values=None, + std_values=None, + target_platform='rk3588', + optimization_level=3), + quantization_config=dict(do_quantization=False, dataset=None), + input_size_list=[[3, 224, 224]]) + +``` + +The contents of `common_config` are for `rknn.config()`. The contents of `quantization_config` are used to control `rknn.build()`. + +## Build SDK with Rockchip NPU + +1. get rknpu2 through: + + ``` + git clone git@github.com:rockchip-linux/rknpu2.git + ``` + +2. for linux, download gcc cross compiler. The download link of the compiler from the official user guide of `rknpu2` was deprecated. You may use another verified [link](https://github.com/Caesar-github/gcc-buildroot-9.3.0-2020.03-x86_64_aarch64-rockchip-linux-gnu). After download and unzip the compiler, you may open the terminal, set `RKNN_TOOL_CHAIN` and `RKNPU2_DEVICE_DIR` by `export RKNN_TOOL_CHAIN=/path/to/gcc/usr;export RKNPU2_DEVICE_DIR=/path/to/rknpu2/runtime/RK3588`. + +3. after the above preparition, run the following commands: + +```shell +cd /path/to/mmdeploy +mkdir -p build && rm -rf build/CM* && cd build +export LD_LIBRARY_PATH=$RKNN_TOOL_CHAIN/lib64:$LD_LIBRARY_PATH +cmake \ + -DCMAKE_TOOLCHAIN_FILE=/path/to/mmdeploy/cmake/toolchains/rknpu2-linux-gnu.cmake \ + -DMMDEPLOY_BUILD_SDK=ON \ + -DCMAKE_BUILD_TYPE=Debug \ + -DOpenCV_DIR=${RKNPU2_DEVICE_DIR}/../../examples/3rdparty/opencv/opencv-linux-aarch64/share/OpenCV \ + -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON \ + -DMMDEPLOY_TARGET_DEVICES="cpu" \ + -DMMDEPLOY_TARGET_BACKENDS="rknn" \ + -DMMDEPLOY_CODEBASES=all \ + -DMMDEPLOY_BUILD_TEST=ON \ + -DMMDEPLOY_BUILD_EXAMPLES=ON \ + .. +make && make install +``` + +## Run the demo with SDK + +First make sure that`--dump-info`is used during convert model, so that the working directory has the files required by the SDK such as `pipeline.json`. + +`adb push` the model directory, executable file and .so to the device. + +```bash +cd /path/to/mmdeploy +adb push resnet50 /data/local/tmp/resnet50 +adb push /mmclassification_dir/demo/demo.JPEG /data/local/tmp/resnet50/demo.JPEG +cd build +adb push lib /data/local/tmp/lib +adb push bin/image_classification /data/local/tmp/image_classification +``` + +Set up environment variable and execute the sample. + +```bash +adb shell +cd /data/local/tmp +export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/data/local/tmp/lib +./image_classification cpu ./resnet50 ./resnet50/demo.JPEG +.. +label: 65, score: 0.95 +``` + +## Troubleshooting + +- Quantization fails. + + Empirically, RKNN require the inputs not normalized if `do_quantization` is set to `True`. Please modify the settings of `Normalize` in the `model_cfg` from + + ```python + img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + ``` + + to + + ```python + img_norm_cfg = dict( + mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True) + ``` + + Besides, the `mean_values` and `std_values` of deploy_cfg should be replaced with original normalization settings of `model_cfg`. Let `mean_values=[123.675, 116.28, 103.53]` and `std_values=[58.395, 57.12, 57.375]`. diff --git a/docs/en/02-how-to-run/prebuilt_package_windows.md b/docs/en/02-how-to-run/prebuilt_package_windows.md index b1ae037e9..9fa456476 100644 --- a/docs/en/02-how-to-run/prebuilt_package_windows.md +++ b/docs/en/02-how-to-run/prebuilt_package_windows.md @@ -21,7 +21,7 @@ ______________________________________________________________________ -This tutorial takes `mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1.zip` and `mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0.zip` as examples to show how to use the prebuilt packages. +This tutorial takes `mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1.zip` and `mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0.zip` as examples to show how to use the prebuilt packages. The directory structure of the prebuilt package is as follows, where the `dist` folder is about model converter, and the `sdk` folder is related to model inference. @@ -80,9 +80,9 @@ In order to use `ONNX Runtime` backend, you should also do the following steps. 5. Install `mmdeploy` (Model Converter) and `mmdeploy_python` (SDK Python API). ```bash - # download mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1.zip - pip install .\mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\dist\mmdeploy-0.9.0-py38-none-win_amd64.whl - pip install .\mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\sdk\python\mmdeploy_python-0.9.0-cp38-none-win_amd64.whl + # download mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1.zip + pip install .\mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\dist\mmdeploy-0.10.0-py38-none-win_amd64.whl + pip install .\mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\sdk\python\mmdeploy_python-0.10.0-cp38-none-win_amd64.whl ``` :point_right: If you have installed it before, please uninstall it first. @@ -107,9 +107,9 @@ In order to use `TensorRT` backend, you should also do the following steps. 5. Install `mmdeploy` (Model Converter) and `mmdeploy_python` (SDK Python API). ```bash - # download mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0.zip - pip install .\mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\dist\mmdeploy-0.9.0-py38-none-win_amd64.whl - pip install .\mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\sdk\python\mmdeploy_python-0.9.0-cp38-none-win_amd64.whl + # download mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0.zip + pip install .\mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\dist\mmdeploy-0.10.0-py38-none-win_amd64.whl + pip install .\mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\sdk\python\mmdeploy_python-0.10.0-cp38-none-win_amd64.whl ``` :point_right: If you have installed it before, please uninstall it first. @@ -138,7 +138,7 @@ After preparation work, the structure of the current working directory should be ``` .. -|-- mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1 +|-- mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1 |-- mmclassification |-- mmdeploy `-- resnet18_8xb32_in1k_20210831-fbbb1da6.pth @@ -186,7 +186,7 @@ After installation of mmdeploy-tensorrt prebuilt package, the structure of the c ``` .. -|-- mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0 +|-- mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0 |-- mmclassification |-- mmdeploy `-- resnet18_8xb32_in1k_20210831-fbbb1da6.pth @@ -249,8 +249,8 @@ The structure of current working directory: ``` . -|-- mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0 -|-- mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1 +|-- mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0 +|-- mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1 |-- mmclassification |-- mmdeploy |-- resnet18_8xb32_in1k_20210831-fbbb1da6.pth @@ -311,7 +311,7 @@ The following describes how to use the SDK's C API for inference 1. Build examples - Under `mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\sdk\example` directory + Under `mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\sdk\example` directory ``` // Path should be modified according to the actual location @@ -319,7 +319,7 @@ The following describes how to use the SDK's C API for inference cd build cmake ..\cpp -A x64 -T v142 ` -DOpenCV_DIR=C:\Deps\opencv\build\x64\vc15\lib ` - -DMMDeploy_DIR=C:\workspace\mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\sdk\lib\cmake\MMDeploy ` + -DMMDeploy_DIR=C:\workspace\mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\sdk\lib\cmake\MMDeploy ` -DONNXRUNTIME_DIR=C:\Deps\onnxruntime\onnxruntime-win-gpu-x64-1.8.1 cmake --build . --config Release @@ -329,7 +329,7 @@ The following describes how to use the SDK's C API for inference :point_right: The purpose is to make the exe find the relevant dll - If choose to add environment variables, add the runtime libraries path of `mmdeploy` (`mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\sdk\bin`) to the `PATH`. + If choose to add environment variables, add the runtime libraries path of `mmdeploy` (`mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\sdk\bin`) to the `PATH`. If choose to copy the dynamic libraries, copy the dll in the bin directory to the same level directory of the just compiled exe (build/Release). @@ -337,7 +337,7 @@ The following describes how to use the SDK's C API for inference It is recommended to use `CMD` here. - Under `mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\\sdk\\example\\build\\Release` directory: + Under `mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\\sdk\\example\\build\\Release` directory: ``` .\image_classification.exe cpu C:\workspace\work_dir\onnx\resnet\ C:\workspace\mmclassification\demo\demo.JPEG @@ -347,7 +347,7 @@ The following describes how to use the SDK's C API for inference 1. Build examples - Under `mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\\sdk\\example` directory + Under `mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\\sdk\\example` directory ``` // Path should be modified according to the actual location @@ -355,7 +355,7 @@ The following describes how to use the SDK's C API for inference cd build cmake ..\cpp -A x64 -T v142 ` -DOpenCV_DIR=C:\Deps\opencv\build\x64\vc15\lib ` - -DMMDeploy_DIR=C:\workspace\mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8 2.3.0\sdk\lib\cmake\MMDeploy ` + -DMMDeploy_DIR=C:\workspace\mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8 2.3.0\sdk\lib\cmake\MMDeploy ` -DTENSORRT_DIR=C:\Deps\tensorrt\TensorRT-8.2.3.0 ` -DCUDNN_DIR=C:\Deps\cudnn\8.2.1 cmake --build . --config Release @@ -365,7 +365,7 @@ The following describes how to use the SDK's C API for inference :point_right: The purpose is to make the exe find the relevant dll - If choose to add environment variables, add the runtime libraries path of `mmdeploy` (`mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\sdk\bin`) to the `PATH`. + If choose to add environment variables, add the runtime libraries path of `mmdeploy` (`mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\sdk\bin`) to the `PATH`. If choose to copy the dynamic libraries, copy the dll in the bin directory to the same level directory of the just compiled exe (build/Release). @@ -373,7 +373,7 @@ The following describes how to use the SDK's C API for inference It is recommended to use `CMD` here. - Under `mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\\sdk\\example\\build\\Release` directory + Under `mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\\sdk\\example\\build\\Release` directory ``` .\image_classification.exe cuda C:\workspace\work_dir\trt\resnet C:\workspace\mmclassification\demo\demo.JPEG diff --git a/docs/en/05-supported-backends/rknn.md b/docs/en/05-supported-backends/rknn.md index 450b7e0d3..4c6c50f9e 100644 --- a/docs/en/05-supported-backends/rknn.md +++ b/docs/en/05-supported-backends/rknn.md @@ -1,80 +1,9 @@ -# RKNN support +# Supported RKNN feature -This tutorial is based on Linux systems like Ubuntu-18.04 and Rockchip NPU like `rk3588`. +Currently, MMDeploy only tests rk3588 with linux platform. -## Installation +The following features cannot be automatically enabled by mmdeploy and you need to manually modify the configuration in MMDeploy like [here](https://github.com/open-mmlab/mmdeploy/blob/master/configs/_base_/backends/rknn.py). -It is recommended to create a virtual environment for the project. - -1. get RKNN-Toolkit2 through: - - ``` - git clone https://github.com/rockchip-linux/rknn-toolkit2 - ``` - -2. install RKNN python package following [official doc](https://github.com/rockchip-linux/rknn-toolkit2/tree/master/doc). In our testing, we used the rknn-toolkit 1.2.0 with commit id `834ba0b0a1ab8ee27024443d77b02b5ba48b67fc`. - -3. reinstall MMDeploy from source following the [instructions](../01-how-to-build/build_from_source.md). Note that there are conflicts between the pip dependencies of MMDeploy and RKNN. Here is the suggested packages versions for python 3.6: - - ``` - protobuf==3.19.4 - onnx==1.8.0 - onnxruntime==1.8.0 - torch==1.8.0 - torchvision==0.9.0 - ``` - -To work with models from [MMDetection](https://github.com/open-mmlab/mmdetection/blob/master/docs/get_started.md), you may need to install it additionally. - -## Usage - -Example: - -```bash -python tools/deploy.py \ - configs/mmdet/detection/detection_rknn_static.py \ - /mmdetection_dir/mmdetection/configs/yolo/yolov3_d53_mstrain-608_273e_coco.py \ - /tmp/snapshots/yolov3_d53_mstrain-608_273e_coco_20210518_115020-a2c3acb8.pth \ - tests/data/tiger.jpeg \ - --work-dir ../deploy_result \ - --device cpu -``` - -## Deployment config - -With the deployment config, you can modify the `backend_config` for your preference. An example `backend_config` of mmclassification is shown as below: - -```python -backend_config = dict( - type='rknn', - common_config=dict( - mean_values=None, - std_values=None, - target_platform='rk3588', - optimization_level=3), - quantization_config=dict(do_quantization=False, dataset=None), - input_size_list=[[3, 224, 224]]) - -``` - -The contents of `common_config` are for `rknn.config()`. The contents of `quantization_config` are used to control `rknn.build()`. - -## Troubleshooting - -- Quantization fails. - - Empirically, RKNN require the inputs not normalized if `do_quantization` is set to `False`. Please modify the settings of `Normalize` in the `model_cfg` from - - ```python - img_norm_cfg = dict( - mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) - ``` - - to - - ```python - img_norm_cfg = dict( - mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True) - ``` - - Besides, the `mean_values` and `std_values` of deploy_cfg should be replaced with original normalization settings of `model_cfg`. Let `mean_values=[123.675, 116.28, 103.53]` and `std_values=[58.395, 57.12, 57.375]`. +- target_platform other than `3588` +- quantization settings +- optimization level other than 3 diff --git a/docs/en/get_started.md b/docs/en/get_started.md index 12a9dfac0..5190b45fa 100644 --- a/docs/en/get_started.md +++ b/docs/en/get_started.md @@ -118,11 +118,11 @@ Take the latest precompiled package as example, you can install it as follows: ```shell # install MMDeploy -wget https://github.com/open-mmlab/mmdeploy/releases/download/v0.9.0/mmdeploy-0.9.0-linux-x86_64-onnxruntime1.8.1.tar.gz -tar -zxvf mmdeploy-0.9.0-linux-x86_64-onnxruntime1.8.1.tar.gz -cd mmdeploy-0.9.0-linux-x86_64-onnxruntime1.8.1 -pip install dist/mmdeploy-0.9.0-py3-none-linux_x86_64.whl -pip install sdk/python/mmdeploy_python-0.9.0-cp38-none-linux_x86_64.whl +wget https://github.com/open-mmlab/mmdeploy/releases/download/v0.10.0/mmdeploy-0.10.0-linux-x86_64-onnxruntime1.8.1.tar.gz +tar -zxvf mmdeploy-0.10.0-linux-x86_64-onnxruntime1.8.1.tar.gz +cd mmdeploy-0.10.0-linux-x86_64-onnxruntime1.8.1 +pip install dist/mmdeploy-0.10.0-py3-none-linux_x86_64.whl +pip install sdk/python/mmdeploy_python-0.10.0-cp38-none-linux_x86_64.whl cd .. # install inference engine: ONNX Runtime pip install onnxruntime==1.8.1 @@ -139,11 +139,11 @@ export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH ```shell # install MMDeploy -wget https://github.com/open-mmlab/mmdeploy/releases/download/v0.9.0/mmdeploy-0.9.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0.tar.gz -tar -zxvf mmdeploy-0.9.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0.tar.gz -cd mmdeploy-0.9.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0 -pip install dist/mmdeploy-0.9.0-py3-none-linux_x86_64.whl -pip install sdk/python/mmdeploy_python-0.9.0-cp38-none-linux_x86_64.whl +wget https://github.com/open-mmlab/mmdeploy/releases/download/v0.10.0/mmdeploy-0.10.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0.tar.gz +tar -zxvf mmdeploy-0.10.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0.tar.gz +cd mmdeploy-0.10.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0 +pip install dist/mmdeploy-0.10.0-py3-none-linux_x86_64.whl +pip install sdk/python/mmdeploy_python-0.10.0-cp38-none-linux_x86_64.whl cd .. # install inference engine: TensorRT # !!! Download TensorRT-8.2.3.0 CUDA 11.x tar package from NVIDIA, and extract it to the current directory @@ -232,7 +232,7 @@ result = inference_model( You can directly run MMDeploy demo programs in the precompiled package to get inference results. ```shell -cd mmdeploy-0.9.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0 +cd mmdeploy-0.10.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0 # run python demo python sdk/example/python/object_detection.py cuda ../mmdeploy_model/faster-rcnn ../mmdetection/demo/demo.jpg # run C/C++ demo diff --git a/docs/en/index.rst b/docs/en/index.rst index 3fd890351..2f6a91b40 100644 --- a/docs/en/index.rst +++ b/docs/en/index.rst @@ -61,6 +61,7 @@ You can switch between Chinese and English documents in the lower-left corner of 05-supported-backends/snpe.md 05-supported-backends/tensorrt.md 05-supported-backends/torchscript.md + 05-supported-backends/rknn.md .. toctree:: :maxdepth: 1 diff --git a/docs/zh_cn/01-how-to-build/build_from_script.md b/docs/zh_cn/01-how-to-build/build_from_script.md index 2c73d1f8a..848828917 100644 --- a/docs/zh_cn/01-how-to-build/build_from_script.md +++ b/docs/zh_cn/01-how-to-build/build_from_script.md @@ -2,7 +2,13 @@ 通过用户调研,我们得知多数使用者在了解 mmdeploy 前,已经熟知 python 和 torch 用法。因此我们提供脚本简化 mmdeploy 安装。 -假设您已经准备好 Python3.6 pip 以上环境(无论 conda 或 pyenv),运行这个脚本来安装 mmdeploy + ncnn backend,`nproc` 可以不指定。 +假设您已经准备好 + +- python3 -m pip(必须,conda 或 pyenv 皆可) +- nvcc(取决于推理后端) +- torch(非必须,可延后安装) + +运行这个脚本来安装 mmdeploy + ncnn backend,`nproc` 可以不指定。 ```bash $ cd /path/to/mmdeploy diff --git a/docs/zh_cn/01-how-to-build/build_from_source.md b/docs/zh_cn/01-how-to-build/build_from_source.md index 0f8045a66..eb2f14a17 100644 --- a/docs/zh_cn/01-how-to-build/build_from_source.md +++ b/docs/zh_cn/01-how-to-build/build_from_source.md @@ -42,3 +42,4 @@ git clone -b master git@github.com:open-mmlab/mmdeploy.git --recursive - [NVIDIA Jetson](jetsons.md) - [Qcom SNPE](snpe.md) - [RISC-V](riscv.md) +- [Rockchip](rockchip.md) diff --git a/docs/zh_cn/01-how-to-build/cross_build_ncnn_aarch64.md b/docs/zh_cn/01-how-to-build/cross_build_ncnn_aarch64.md new file mode 100644 index 000000000..a84ef54b6 --- /dev/null +++ b/docs/zh_cn/01-how-to-build/cross_build_ncnn_aarch64.md @@ -0,0 +1,108 @@ +# ubuntu 交叉编译 aarch64 + +mmdeploy 选 ncnn 作为 aarch64 嵌入式 linux 设备的推理后端。 完整的部署分为两部分: + +Host + +- 模型转换 +- 交叉编译嵌入式设备所需 SDK 和 bin + +Device + +- 运行编译结果 + +## 1. Host 模型转换 + +参照文档安装 [mmdeploy](../01-how-to-build/) 和 [mmcls](https://github.com/open-mmlab/mmclassification),转换 resnet18 对应模型包 + +```bash +export MODEL_CONFIG=/path/to/mmclassification/configs/resnet/resnet18_8xb32_in1k.py +export MODEL_PATH=https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_8xb32_in1k_20210831-fbbb1da6.pth + +# 模型转换 +cd /path/to/mmdeploy +python tools/deploy.py \ + configs/mmcls/classification_ncnn_static.py \ + $MODEL_CONFIG \ + $MODEL_PATH \ + tests/data/tiger.jpeg \ + --work-dir resnet18 \ + --device cpu \ + --dump-info +``` + +## 2. Host 交叉编译 + +建议直接用脚本编译 + +```bash +sh -x tools/scripts/ubuntu_cross_build_aarch64.sh +``` + +以下是脚本对应的手动过程 + +a) 安装 aarch64 交叉编译工具 + +```bash +sudo apt install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu +``` + +b) 交叉编译 opencv 安装到 tmp 目录 + +```bash +git clone https://github.com/opencv/opencv --depth=1 --branch=4.x --recursive +cd opencv/platforms/linux/ +mkdir build && cd build +cmake ../../.. \ + -DCMAKE_INSTALL_PREFIX=/tmp/ocv-aarch64 \ + -DCMAKE_TOOLCHAIN_FILE=../aarch64-gnu.toolchain.cmake +make -j && make install +ls -alh /tmp/ocv-aarch64 +.. +``` + +c) 交叉编译 ncnn 安装到 tmp 目录 + +```bash +git clone https://github.com/tencent/ncnn --branch 20220729 --depth=1 +mkdir build && cd build +cmake .. \ + -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake \ + -DCMAKE_INSTALL_PREFIX=/tmp/ncnn-aarch64 +make -j && make install +ls -alh /tmp/ncnn-aarch64 +.. +``` + +d) 交叉编译 mmdeploy,install/bin 目录是可执行文件 + +```bash +git submodule init +git submodule update +mkdir build && cd build +cmake .. \ + -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch64-linux-gnu.cmake \ + -DMMDEPLOY_TARGET_DEVICES="cpu" \ + -DMMDEPLOY_TARGET_BACKENDS="ncnn" \ + -Dncnn_DIR=/tmp/ncnn-aarch64/lib/cmake/ncnn \ + -DOpenCV_DIR=/tmp/ocv-aarch64/lib/cmake/opencv4 +make install +ls -lah install/bin/* +.. +``` + +## 3. Device 执行 + +确认转换模型用了 `--dump-info`,这样 `resnet18` 目录才有 `pipeline.json` 等 SDK 所需文件。 + +把 dump 好的模型目录(resnet18)、可执行文件(image_classification)、测试图片(tests/data/tiger.jpeg)、交叉编译的 OpenCV(/tmp/ocv-aarch64) 拷贝到设备中 + +```bash +./image_classification cpu ./resnet18 tiger.jpeg +.. +label: 292, score: 0.9261 +label: 282, score: 0.0726 +label: 290, score: 0.0008 +label: 281, score: 0.0002 +label: 340, score: 0.0001 +``` diff --git a/docs/zh_cn/01-how-to-build/rockchip.md b/docs/zh_cn/01-how-to-build/rockchip.md new file mode 100644 index 000000000..0161a972a --- /dev/null +++ b/docs/zh_cn/01-how-to-build/rockchip.md @@ -0,0 +1,147 @@ +# 支持 RKNN + +本教程基于 Ubuntu-18.04 和 Rockchip `rk3588` NPU。 + +## 安装 + +建议为项目创建一个虚拟环境。 + +1. 获取 RKNN-Toolkit2: + + ``` + git clone git@github.com:rockchip-linux/rknn-toolkit2.git + ``` + +2. 通过 [官方文档](https://github.com/rockchip-linux/rknn-toolkit2/tree/master/doc),安装 RKNN python 安装包. 在我们的测试中, 使用的 rknn-toolkit 版本是 1.2.0,commit id `834ba0b0a1ab8ee27024443d77b02b5ba48b67fc`。安装 rknn-toolkit2 时,最好在安装命令后添加`--no-deps`,以避免依赖包的冲突。比如: + + ``` + pip install packages/rknn_toolkit2-1.2.0_f7bb160f-cp36-cp36m-linux_x86_64.whl --no-deps + ``` + +3. 先安装onnx==1.8.0,跟着 [instructions](../01-how-to-build/build_from_source.md),源码安装 MMDeploy。 需要注意的是, MMDeploy 和 RKNN 依赖的安装包间有冲突的内容. 这里提供建议在 python 3.6 环境中使用的安装包版本: + + ``` + protobuf==3.19.4 + onnx==1.8.0 + onnxruntime==1.8.0 + torch==1.8.0 + torchvision==0.9.0 + ``` + +4. 使用 conda 安装 torch and torchvision,比如: + +``` +conda install pytorch==1.8.0 torchvision==0.9.0 cudatoolkit=11.1 -c pytorch -c conda-forge +``` + +如要使用 [MMClassification](https://mmclassification.readthedocs.io/en/latest/getting_started.html), 需要用户自己安装使用。 + +## 使用 + +例子: + +```bash +python tools/deploy.py \ + configs/mmcls/classification_rknn_static.py \ + /mmclassification_dir/configs/resnet/resnet50_8xb32_in1k.py \ + https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth \ + /mmclassification_dir/demo/demo.JPEG \ + --work-dir ../resnet50 \ + --device cpu +``` + +## 部署 config + +部署 config,你可以根据需要修改 `backend_config` 字段. 一个 mmclassification 的 `backend_config`例子如下: + +```python +backend_config = dict( + type='rknn', + common_config=dict( + mean_values=None, + std_values=None, + target_platform='rk3588', + optimization_level=3), + quantization_config=dict(do_quantization=False, dataset=None), + input_size_list=[[3, 224, 224]]) + +``` + +`common_config` 的内容服务于 `rknn.config()`. `quantization_config` 的内容服务于 `rknn.build()`。 + +## 安装 SDK + +1. 获取 rknpu2: + + ``` + git clone git@github.com:rockchip-linux/rknpu2.git + ``` + +2. 在 linux 系统, 下载 gcc 交叉编译器. `rknpu2` 的官方提供的下载链接无法使用了. 用户可以使用另一个 [链接](https://github.com/Caesar-github/gcc-buildroot-9.3.0-2020.03-x86_64_aarch64-rockchip-linux-gnu). 下载并解压完编译器, 打开终端, 设置 `RKNN_TOOL_CHAIN` 和 `RKNPU2_DEVICE_DIR` 为 `export RKNN_TOOL_CHAIN=/path/to/gcc/usr;export RKNPU2_DEVICE_DIR=/path/to/rknpu2/runtime/RK3588`。 + +3. 上述准备工作完成后, 运行如下指令安装: + +```shell +cd /path/to/mmdeploy +mkdir -p build && rm -rf build/CM* && cd build +export LD_LIBRARY_PATH=$RKNN_TOOL_CHAIN/lib64:$LD_LIBRARY_PATH +cmake \ + -DCMAKE_TOOLCHAIN_FILE=/path/to/mmdeploy/cmake/toolchains/rknpu2-linux-gnu.cmake \ + -DMMDEPLOY_BUILD_SDK=ON \ + -DCMAKE_BUILD_TYPE=Debug \ + -DOpenCV_DIR=${RKNPU2_DEVICE_DIR}/../../examples/3rdparty/opencv/opencv-linux-aarch64/share/OpenCV \ + -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON \ + -DMMDEPLOY_TARGET_DEVICES="cpu" \ + -DMMDEPLOY_TARGET_BACKENDS="rknn" \ + -DMMDEPLOY_CODEBASES=all \ + -DMMDEPLOY_BUILD_TEST=ON \ + -DMMDEPLOY_BUILD_EXAMPLES=ON \ + .. +make && make install +``` + +## 运行 SDK 的 demo + +首先,确保`--dump-info`在转模型的时候调用了, 这样工作目录下包含 SDK 需要的配置文件 `pipeline.json`。 + +使用 `adb push` 将模型路径,执行文件和.so 文件传到板子上。 + +```bash +cd /path/to/mmdeploy +adb push resnet50 /data/local/tmp/resnet50 +adb push /mmclassification_dir/demo/demo.JPEG /data/local/tmp/resnet50/demo.JPEG +cd build +adb push lib /data/local/tmp/lib +adb push bin/image_classification /data/local/tmp/image_classification +``` + +设置环境变量,运行例子。 + +```bash +adb shell +cd /data/local/tmp +export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/data/local/tmp/lib +./image_classification cpu ./resnet50 ./resnet50/demo.JPEG +.. +label: 65, score: 0.95 +``` + +## 问题点 + +- 量化失败. + + 经验来说, 如果 `do_quantization` 被设置为 `True`,RKNN 需要的输入没有被归一化过。请修改 `Normalize` 在 `model_cfg` 的设置,如将 + + ```python + img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) + ``` + + 改为 + + ```python + img_norm_cfg = dict( + mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True) + ``` + + 此外, deploy_cfg 的 `mean_values` 和 `std_values` 应该被设置为 `model_cfg` 中归一化的设置. 使 `mean_values=[123.675, 116.28, 103.53]`, `std_values=[58.395, 57.12, 57.375]`。 diff --git a/docs/zh_cn/02-how-to-run/prebuilt_package_windows.md b/docs/zh_cn/02-how-to-run/prebuilt_package_windows.md index 36cd4f0cd..c27f928e7 100644 --- a/docs/zh_cn/02-how-to-run/prebuilt_package_windows.md +++ b/docs/zh_cn/02-how-to-run/prebuilt_package_windows.md @@ -23,7 +23,7 @@ ______________________________________________________________________ 目前,`MMDeploy`在`Windows`平台下提供`TensorRT`以及`ONNX Runtime`两种预编译包,可以从[Releases](https://github.com/open-mmlab/mmdeploy/releases)获取。 -本篇教程以`mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1.zip`和`mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0.zip`为例,展示预编译包的使用方法。 +本篇教程以`mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1.zip`和`mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0.zip`为例,展示预编译包的使用方法。 为了方便使用者快速上手,本教程以分类模型(mmclassification)为例,展示两种预编译包的使用方法。 @@ -88,9 +88,9 @@ ______________________________________________________________________ 5. 安装`mmdeploy`(模型转换)以及`mmdeploy_python`(模型推理Python API)的预编译包 ```bash - # 先下载 mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1.zip - pip install .\mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\dist\mmdeploy-0.9.0-py38-none-win_amd64.whl - pip install .\mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\sdk\python\mmdeploy_python-0.9.0-cp38-none-win_amd64.whl + # 先下载 mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1.zip + pip install .\mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\dist\mmdeploy-0.10.0-py38-none-win_amd64.whl + pip install .\mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\sdk\python\mmdeploy_python-0.10.0-cp38-none-win_amd64.whl ``` :point_right: 如果之前安装过,需要先卸载后再安装。 @@ -115,9 +115,9 @@ ______________________________________________________________________ 5. 安装`mmdeploy`(模型转换)以及`mmdeploy_python`(模型推理Python API)的预编译包 ```bash - # 先下载 mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0.zip - pip install .\mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\dist\mmdeploy-0.9.0-py38-none-win_amd64.whl - pip install .\mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\sdk\python\mmdeploy_python-0.9.0-cp38-none-win_amd64.whl + # 先下载 mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0.zip + pip install .\mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\dist\mmdeploy-0.10.0-py38-none-win_amd64.whl + pip install .\mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\sdk\python\mmdeploy_python-0.10.0-cp38-none-win_amd64.whl ``` :point_right: 如果之前安装过,需要先卸载后再安装 @@ -146,7 +146,7 @@ ______________________________________________________________________ ``` .. -|-- mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1 +|-- mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1 |-- mmclassification |-- mmdeploy `-- resnet18_8xb32_in1k_20210831-fbbb1da6.pth @@ -194,7 +194,7 @@ export2SDK(deploy_cfg, model_cfg, work_dir, pth=model_checkpoint, device=device) ``` .. -|-- mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0 +|-- mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0 |-- mmclassification |-- mmdeploy `-- resnet18_8xb32_in1k_20210831-fbbb1da6.pth @@ -257,8 +257,8 @@ export2SDK(deploy_cfg, model_cfg, work_dir, pth=model_checkpoint, device=device) ``` . -|-- mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0 -|-- mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1 +|-- mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0 +|-- mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1 |-- mmclassification |-- mmdeploy |-- resnet18_8xb32_in1k_20210831-fbbb1da6.pth @@ -327,7 +327,7 @@ python .\mmdeploy\demo\python\image_classification.py cpu .\work_dir\onnx\resnet 1. 编译 examples - 在`mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\sdk\example`目录下 + 在`mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\sdk\example`目录下 ``` // 部分路径根据实际位置进行修改 @@ -335,7 +335,7 @@ python .\mmdeploy\demo\python\image_classification.py cpu .\work_dir\onnx\resnet cd build cmake ..\cpp -A x64 -T v142 ` -DOpenCV_DIR=C:\Deps\opencv\build\x64\vc15\lib ` - -DMMDeploy_DIR=C:\workspace\mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\sdk\lib\cmake\MMDeploy ` + -DMMDeploy_DIR=C:\workspace\mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\sdk\lib\cmake\MMDeploy ` -DONNXRUNTIME_DIR=C:\Deps\onnxruntime\onnxruntime-win-gpu-x64-1.8.1 cmake --build . --config Release @@ -345,7 +345,7 @@ python .\mmdeploy\demo\python\image_classification.py cpu .\work_dir\onnx\resnet :point_right: 目的是使exe运行时可以正确找到相关dll - 若选择添加环境变量,则将`mmdeploy`的运行时库路径(`mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\sdk\bin`)添加到PATH,可参考onnxruntime的添加过程。 + 若选择添加环境变量,则将`mmdeploy`的运行时库路径(`mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\sdk\bin`)添加到PATH,可参考onnxruntime的添加过程。 若选择拷贝动态库,而将bin目录中的dll拷贝到刚才编译出的exe(build/Release)的同级目录下。 @@ -353,7 +353,7 @@ python .\mmdeploy\demo\python\image_classification.py cpu .\work_dir\onnx\resnet 这里建议使用cmd,这样如果exe运行时如果找不到相关的dll的话会有弹窗 - 在mmdeploy-0.9.0-windows-amd64-onnxruntime1.8.1\\sdk\\example\\build\\Release目录下: + 在mmdeploy-0.10.0-windows-amd64-onnxruntime1.8.1\\sdk\\example\\build\\Release目录下: ``` .\image_classification.exe cpu C:\workspace\work_dir\onnx\resnet\ C:\workspace\mmclassification\demo\demo.JPEG @@ -363,7 +363,7 @@ python .\mmdeploy\demo\python\image_classification.py cpu .\work_dir\onnx\resnet 1. 编译 examples - 在mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\\sdk\\example目录下 + 在mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\\sdk\\example目录下 ``` // 部分路径根据所在硬盘的位置进行修改 @@ -371,7 +371,7 @@ python .\mmdeploy\demo\python\image_classification.py cpu .\work_dir\onnx\resnet cd build cmake ..\cpp -A x64 -T v142 ` -DOpenCV_DIR=C:\Deps\opencv\build\x64\vc15\lib ` - -DMMDeploy_DIR=C:\workspace\mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8 2.3.0\sdk\lib\cmake\MMDeploy ` + -DMMDeploy_DIR=C:\workspace\mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8 2.3.0\sdk\lib\cmake\MMDeploy ` -DTENSORRT_DIR=C:\Deps\tensorrt\TensorRT-8.2.3.0 ` -DCUDNN_DIR=C:\Deps\cudnn\8.2.1 cmake --build . --config Release @@ -381,7 +381,7 @@ python .\mmdeploy\demo\python\image_classification.py cpu .\work_dir\onnx\resnet :point_right: 目的是使exe运行时可以正确找到相关dll - 若选择添加环境变量,则将`mmdeploy`的运行时库路径(`mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\sdk\bin`)添加到PATH,可参考onnxruntime的添加过程。 + 若选择添加环境变量,则将`mmdeploy`的运行时库路径(`mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\sdk\bin`)添加到PATH,可参考onnxruntime的添加过程。 若选择拷贝动态库,而将bin目录中的dll拷贝到刚才编译出的exe(build/Release)的同级目录下。 @@ -389,7 +389,7 @@ python .\mmdeploy\demo\python\image_classification.py cpu .\work_dir\onnx\resnet 这里建议使用cmd,这样如果exe运行时如果找不到相关的dll的话会有弹窗 - 在mmdeploy-0.9.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\\sdk\\example\\build\\Release目录下: + 在mmdeploy-0.10.0-windows-amd64-cuda11.1-tensorrt8.2.3.0\\sdk\\example\\build\\Release目录下: ``` .\image_classification.exe cuda C:\workspace\work_dir\trt\resnet C:\workspace\mmclassification\demo\demo.JPEG diff --git a/docs/zh_cn/05-supported-backends/rknn.md b/docs/zh_cn/05-supported-backends/rknn.md new file mode 100644 index 000000000..baf41e540 --- /dev/null +++ b/docs/zh_cn/05-supported-backends/rknn.md @@ -0,0 +1,9 @@ +# 支持的 RKNN 特征 + +目前, MMDeploy 只在 rk3588 的 linux 平台上测试过. + +以下特性需要手动在 MMDeploy 自行配置,如[这里](https://github.com/open-mmlab/mmdeploy/blob/master/configs/_base_/backends/rknn.py). + +- target_platform != `3588` +- quantization settings +- optimization level != 3 diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md index cb0e8e7cb..e190c0b9f 100644 --- a/docs/zh_cn/get_started.md +++ b/docs/zh_cn/get_started.md @@ -113,11 +113,11 @@ mim install mmcv-full ```shell # 安装 MMDeploy ONNX Runtime 自定义算子库和推理 SDK -wget https://github.com/open-mmlab/mmdeploy/releases/download/v0.9.0/mmdeploy-0.9.0-linux-x86_64-onnxruntime1.8.1.tar.gz -tar -zxvf mmdeploy-0.9.0-linux-x86_64-onnxruntime1.8.1.tar.gz -cd mmdeploy-0.9.0-linux-x86_64-onnxruntime1.8.1 -pip install dist/mmdeploy-0.9.0-py3-none-linux_x86_64.whl -pip install sdk/python/mmdeploy_python-0.9.0-cp38-none-linux_x86_64.whl +wget https://github.com/open-mmlab/mmdeploy/releases/download/v0.10.0/mmdeploy-0.10.0-linux-x86_64-onnxruntime1.8.1.tar.gz +tar -zxvf mmdeploy-0.10.0-linux-x86_64-onnxruntime1.8.1.tar.gz +cd mmdeploy-0.10.0-linux-x86_64-onnxruntime1.8.1 +pip install dist/mmdeploy-0.10.0-py3-none-linux_x86_64.whl +pip install sdk/python/mmdeploy_python-0.10.0-cp38-none-linux_x86_64.whl cd .. # 安装推理引擎 ONNX Runtime pip install onnxruntime==1.8.1 @@ -134,11 +134,11 @@ export LD_LIBRARY_PATH=$ONNXRUNTIME_DIR/lib:$LD_LIBRARY_PATH ```shell # 安装 MMDeploy TensorRT 自定义算子库和推理 SDK -wget https://github.com/open-mmlab/mmdeploy/releases/download/v0.9.0/mmdeploy-0.9.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0.tar.gz -tar -zxvf mmdeploy-0.9.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0.tar.gz -cd mmdeploy-0.9.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0 -pip install dist/mmdeploy-0.9.0-py3-none-linux_x86_64.whl -pip install sdk/python/mmdeploy_python-0.9.0-cp38-none-linux_x86_64.whl +wget https://github.com/open-mmlab/mmdeploy/releases/download/v0.10.0/mmdeploy-0.10.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0.tar.gz +tar -zxvf mmdeploy-0.10.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0.tar.gz +cd mmdeploy-0.10.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0 +pip install dist/mmdeploy-0.10.0-py3-none-linux_x86_64.whl +pip install sdk/python/mmdeploy_python-0.10.0-cp38-none-linux_x86_64.whl cd .. # 安装推理引擎 TensorRT # !!! 从 NVIDIA 官网下载 TensorRT-8.2.3.0 CUDA 11.x 安装包并解压到当前目录 @@ -226,7 +226,7 @@ result = inference_model( 你可以直接运行预编译包中的 demo 程序,输入 SDK Model 和图像,进行推理,并查看推理结果。 ```shell -cd mmdeploy-0.9.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0 +cd mmdeploy-0.10.0-linux-x86_64-cuda11.1-tensorrt8.2.3.0 # 运行 python demo python sdk/example/python/object_detection.py cuda ../mmdeploy_model/faster-rcnn ../mmdetection/demo/demo.jpg # 运行 C/C++ demo diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst index 2b8ba20d5..1d08a9fa1 100644 --- a/docs/zh_cn/index.rst +++ b/docs/zh_cn/index.rst @@ -58,6 +58,7 @@ 05-supported-backends/onnxruntime.md 05-supported-backends/openvino.md 05-supported-backends/pplnn.md + 05-supported-backends/rknn.md 05-supported-backends/snpe.md 05-supported-backends/tensorrt.md 05-supported-backends/torchscript.md diff --git a/mmdeploy/backend/tensorrt/utils.py b/mmdeploy/backend/tensorrt/utils.py index 9fe4e54aa..fcd307a4d 100644 --- a/mmdeploy/backend/tensorrt/utils.py +++ b/mmdeploy/backend/tensorrt/utils.py @@ -169,7 +169,12 @@ def from_onnx(onnx_model: Union[str, onnx.ModelProto], builder.max_workspace_size = max_workspace_size config = builder.create_builder_config() - config.max_workspace_size = max_workspace_size + + if hasattr(config, 'set_memory_pool_limit'): + config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, + max_workspace_size) + else: + config.max_workspace_size = max_workspace_size cuda_version = search_cuda_version() if cuda_version is not None: @@ -187,14 +192,19 @@ def from_onnx(onnx_model: Union[str, onnx.ModelProto], opt_shape = param['opt_shape'] max_shape = param['max_shape'] profile.set_shape(input_name, min_shape, opt_shape, max_shape) - config.add_optimization_profile(profile) + if config.add_optimization_profile(profile) < 0: + logger.warning(f'Invalid optimization profile {profile}.') if fp16_mode: + if not getattr(builder, 'platform_has_fast_fp16', True): + logger.warning('Platform does not has fast native fp16.') if version.parse(trt.__version__) < version.parse('8'): builder.fp16_mode = fp16_mode config.set_flag(trt.BuilderFlag.FP16) if int8_mode: + if not getattr(builder, 'platform_has_fast_int8', True): + logger.warning('Platform does not has fast native int8.') from .calib_utils import HDF5Calibrator config.set_flag(trt.BuilderFlag.INT8) assert int8_param is not None diff --git a/mmdeploy/mmcv/cnn/__init__.py b/mmdeploy/mmcv/cnn/__init__.py index f9dafc2d9..987f190a6 100644 --- a/mmdeploy/mmcv/cnn/__init__.py +++ b/mmdeploy/mmcv/cnn/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) OpenMMLab. All rights reserved. -from .transformer import (MultiHeadAttentionop, - multiheadattention__forward__ncnn) +from . import conv2d_adaptive_padding # noqa: F401,F403 +from .transformer import MultiHeadAttentionop -__all__ = ['multiheadattention__forward__ncnn', 'MultiHeadAttentionop'] +__all__ = ['conv2d_adaptive_padding', 'MultiHeadAttentionop'] diff --git a/mmdeploy/mmcv/cnn/conv2d_adaptive_padding.py b/mmdeploy/mmcv/cnn/conv2d_adaptive_padding.py new file mode 100644 index 000000000..d00184c8e --- /dev/null +++ b/mmdeploy/mmcv/cnn/conv2d_adaptive_padding.py @@ -0,0 +1,86 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math + +import torch +import torch.nn.functional as F + +from mmdeploy.core import FUNCTION_REWRITER +from mmdeploy.utils import Backend, is_dynamic_batch, is_dynamic_shape + + +def compute_padding(input_size, kernel_size, stride, dilation): + """Compute padding.""" + + input_h, input_w = input_size + kernel_h, kernel_w = kernel_size + stride_h, stride_w = stride + dilation_h, dilation_w = dilation + output_h = math.ceil(input_h / stride_h) + output_w = math.ceil(input_w / stride_w) + pad_h = max( + (output_h - 1) * stride_h + (kernel_h - 1) * dilation_h + 1 - input_h, + 0) + pad_w = max( + (output_w - 1) * stride_w + (kernel_w - 1) * dilation_w + 1 - input_w, + 0) + if pad_w > 0 or pad_h > 0: + padded = [ + pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2 + ] + else: + padded = None + return padded + + +class AdaptivePadOp(torch.autograd.Function): + """Dummy adaptive pad op.""" + + @staticmethod + def forward(ctx, x, padded): + if padded is not None: + x = F.pad(x, padded) + return x + + @staticmethod + def symbolic(g, x, padded): + if padded is None: + return g.op('Identity', x) + padded = g.op( + 'Constant', value_t=torch.tensor(padded, dtype=torch.int64)) + constant_value = g.op( + 'Constant', value_t=torch.tensor(0, dtype=torch.int64)) + return g.op( + 'Pad', x, padded, constant_value, mode_s='constant', outputs=1) + + +@FUNCTION_REWRITER.register_rewriter( + func_name='mmcv.cnn.bricks.conv2d_adaptive_padding. \ + Conv2dAdaptivePadding.forward', + backend=Backend.TENSORRT.value) +def conv2d_adaptive_padding__forward__tensorrt(ctx, self, x): + """Rewrite `forward` of Conv2dAdaptivePadding used in EfficientNet for + TensorRT backend. Main changes of this rewritten function is to separate + the computation of padding and encapsulate it into another + `torch.autograd.Function` so that the adaptive padding could be parsed as + `Pad` ops in ONNX with the padding information computed in advance (Only + for static shape configuration). + + Args: + x (Tensor): Input tensor of Conv2dAdaptivePadding ops + Returns: + Tensor: forward result of 2D convolution after padding + """ + + deploy_cfg = ctx.cfg + is_dynamic_flag = is_dynamic_shape(deploy_cfg) + if (not is_dynamic_flag) or is_dynamic_batch(deploy_cfg): + padded = compute_padding(x.shape[2:], self.weight.shape[2:], + self.stride, self.dilation) + if padded is not None: + padded = [int(_) for _ in padded] + x = AdaptivePadOp.apply(x, padded) + return F.conv2d(x, self.weight, self.bias, self.stride, self.padding, + self.dilation, self.groups) + else: + x = ctx.origin_func(x) + return x diff --git a/mmdeploy/version.py b/mmdeploy/version.py index 220b9c9f1..3fb10ac9e 100644 --- a/mmdeploy/version.py +++ b/mmdeploy/version.py @@ -1,7 +1,7 @@ # Copyright (c) OpenMMLab. All rights reserved. from typing import Tuple -__version__ = '0.9.0' +__version__ = '0.10.0' short_version = __version__ diff --git a/resources/introduction.png b/resources/introduction.png index 7690dd8fd..be9f72e38 100644 Binary files a/resources/introduction.png and b/resources/introduction.png differ diff --git a/tests/regression/mmcls.yml b/tests/regression/mmcls.yml index 7bcce6b70..edca9f156 100644 --- a/tests/regression/mmcls.yml +++ b/tests/regression/mmcls.yml @@ -210,4 +210,13 @@ models: pipelines: - *pipeline_ts_fp32 - *pipeline_ort_dynamic_fp32 - - *pipeline_trt_dynamic_fp32 + - *pipeline_trt_static_fp16 + + - name: EfficientNet + metafile: configs/efficientnet/metafile.yml + model_configs: + - configs/efficientnet/efficientnet-b0_8xb32_in1k.py + pipelines: + - *pipeline_ort_static_fp32 + - convert_image: *convert_image + deploy_config: configs/mmcls/classification_tensorrt_dynamic-224x224-224x224.py diff --git a/tests/test_csrc/CMakeLists.txt b/tests/test_csrc/CMakeLists.txt index ad7934e0d..fa0d06a81 100644 --- a/tests/test_csrc/CMakeLists.txt +++ b/tests/test_csrc/CMakeLists.txt @@ -9,6 +9,7 @@ aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/core CORE_TC) aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/preprocess TRANSFORM_TC) aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/net NET_TC) aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/model MODEL_TC) +aux_source_directory(${CMAKE_CURRENT_SOURCE_DIR}/graph GRAPH_TC) set(DEVICE_TC) foreach (DEVICE IN LISTS MMDEPLOY_TARGET_DEVICES) @@ -58,7 +59,8 @@ set(TC_SRCS ${MODEL_TC} ${NET_TC} ${DEVICE_TC} - ${CAPI_TC}) + ${CAPI_TC} + ${GRAPH_TC}) add_executable(mmdeploy_tests ${TC_SRCS}) target_include_directories(mmdeploy_tests diff --git a/tests/test_csrc/graph/test_cond.cpp b/tests/test_csrc/graph/test_cond.cpp new file mode 100644 index 000000000..f542bc204 --- /dev/null +++ b/tests/test_csrc/graph/test_cond.cpp @@ -0,0 +1,65 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "catch.hpp" +#include "mmdeploy/archive/json_archive.h" +#include "mmdeploy/core/graph.h" +#include "mmdeploy/core/registry.h" +#include "mmdeploy/experimental/module_adapter.h" + +using namespace mmdeploy; + +namespace { + +class PlusCreator : public Creator { + public: + const char* GetName() const override { return "Plus"; } + std::unique_ptr Create(const Value&) override { + return CreateTask([](int a, int b) { return a + b; }); + } +}; +REGISTER_MODULE(Module, PlusCreator); + +const auto json_config1 = R"( +{ + "type": "Cond", + "input": ["pred", "a", "b"], + "output": "c", + "body": { + "type": "Task", + "module": "Plus" + } +} +)"_json; + +} // namespace + +TEST_CASE("test Cond node", "[graph]") { + auto config = from_json(json_config1); + auto builder = graph::Builder::CreateFromConfig(config).value(); + REQUIRE(builder); + auto node = builder->Build().value(); + REQUIRE(node); + { + auto result = SyncWait(node->Process(Just(Value({{false}, {1}, {1}})))); + MMDEPLOY_INFO("{}", result); + } + { + auto result = SyncWait(node->Process(Just(Value({{true}, {1}, {1}})))); + MMDEPLOY_INFO("{}", result); + } + { + auto result = SyncWait( + node->Process(Just(Value({{false, false, false, false}, {1, 2, 3, 4}, {1, 3, 5, 7}})))); + MMDEPLOY_INFO("{}", result); + } + { + auto result = SyncWait( + node->Process(Just(Value({{true, true, true, true}, {1, 2, 3, 4}, {1, 3, 5, 7}})))); + MMDEPLOY_INFO("{}", result); + } + { + auto result = SyncWait( + node->Process(Just(Value({{true, false, false, true}, {1, 2, 3, 4}, {1, 3, 5, 7}})))); + MMDEPLOY_INFO("{}", result); + } +} diff --git a/tests/test_csrc/graph/test_crnn.cpp b/tests/test_csrc/graph/test_crnn.cpp deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/test_mmcv/test_mmcv_cnn.py b/tests/test_mmcv/test_mmcv_cnn.py index 496197952..4ff02438b 100644 --- a/tests/test_mmcv/test_mmcv_cnn.py +++ b/tests/test_mmcv/test_mmcv_cnn.py @@ -30,3 +30,30 @@ def test_multiheadattention_ncnn(): else: assert torch.allclose( model_outputs, rewrite_outputs[0], rtol=1e-03, atol=1e-05) + + +def test_conv2d_adaptive_padding_tensorrt(): + check_backend(Backend.TENSORRT) + from mmcv.cnn.bricks.conv2d_adaptive_padding import Conv2dAdaptivePadding + in_channels, out_channels = 3, 64 + kernel_sz = 3 + model = Conv2dAdaptivePadding(in_channels, out_channels, kernel_sz) + dummy_input = torch.rand(1, 3, 256, 256) + + deploy_cfg = Config( + dict( + onnx_config=dict(input_shape=None), + backend_config=dict(type=Backend.TENSORRT.value), + )) + model_outputs = model(dummy_input) + rewrite_inputs = dict(x=dummy_input) + rewrite_outputs, is_backend_output = get_rewrite_outputs( + wrapped_model=model, + model_inputs=rewrite_inputs, + deploy_cfg=deploy_cfg, + run_with_backend=True) + if is_backend_output is None: + assert rewrite_outputs is not None + else: + assert torch.allclose( + model_outputs, rewrite_outputs[0], rtol=1e-03, atol=1e-05) diff --git a/tools/package_tools/packaging/mmdeploy_python/version.py b/tools/package_tools/packaging/mmdeploy_python/version.py index 6816ee1a7..44c0eb148 100644 --- a/tools/package_tools/packaging/mmdeploy_python/version.py +++ b/tools/package_tools/packaging/mmdeploy_python/version.py @@ -1,2 +1,2 @@ # Copyright (c) OpenMMLab. All rights reserved. -__version__ = '0.9.0' +__version__ = '0.10.0' diff --git a/tools/scripts/build_ubuntu_x64_ncnn.py b/tools/scripts/build_ubuntu_x64_ncnn.py index feaec8557..8fff3dea2 100644 --- a/tools/scripts/build_ubuntu_x64_ncnn.py +++ b/tools/scripts/build_ubuntu_x64_ncnn.py @@ -9,7 +9,8 @@ g_jobs = 2 def install_protobuf(dep_dir) -> int: - """build and install protobuf. + """build and install protobuf. protobuf seems not support repeated install, + so clean build first. Args: wor_dir (_type_): _description_ @@ -29,11 +30,22 @@ def install_protobuf(dep_dir) -> int: os.chdir(os.path.join(dep_dir, 'protobuf-3.20.0')) install_dir = os.path.join(dep_dir, 'pbinstall') + if os.path.exists(install_dir): + os.system('rm -rf {}'.format(install_dir)) + + os.system('make clean') os.system('./configure --prefix={}'.format(install_dir)) os.system('make -j {} && make install'.format(g_jobs)) - protoc = os.path.join(dep_dir, 'pbinstall', 'bin', 'protoc') + protoc = os.path.join(install_dir, 'bin', 'protoc') print('protoc \t:{}'.format(cmd_result('{} --version'.format(protoc)))) + + os.system(""" echo 'export PATH={}:$PATH' >> ~/mmdeploy.env """.format( + os.path.join(install_dir, 'bin'))) + os.system( + """ echo 'export LD_LIBRARY_PATH={}:$LD_LIBRARY_PATH' >> ~/mmdeploy.env """ # noqa: E501 + .format(os.path.join(install_dir, 'lib'))) + return 0 @@ -60,6 +72,7 @@ def install_pyncnn(dep_dir): os.system('mkdir build') os.chdir(os.path.join(ncnn_dir, 'build')) + os.system('rm -rf CMakeCache.txt') pb_install = os.path.join(dep_dir, 'pbinstall') pb_bin = os.path.join(pb_install, 'bin', 'protoc') pb_lib = os.path.join(pb_install, 'lib', 'libprotobuf.so') @@ -101,9 +114,9 @@ def install_mmdeploy(work_dir, dep_dir, ncnn_cmake_dir): pb_lib = os.path.join(pb_install, 'lib', 'libprotobuf.so') pb_include = os.path.join(pb_install, 'include') + os.system('rm -rf build/CMakeCache.txt') + cmd = 'cd build && cmake ..' - cmd += ' -DCMAKE_C_COMPILER=gcc-7 ' - cmd += ' -DCMAKE_CXX_COMPILER=g++-7 ' cmd += ' -DMMDEPLOY_BUILD_SDK=ON ' cmd += ' -DMMDEPLOY_BUILD_EXAMPLES=ON ' cmd += ' -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON ' @@ -117,7 +130,14 @@ def install_mmdeploy(work_dir, dep_dir, ncnn_cmake_dir): os.system('cd build && make -j {} && make install'.format(g_jobs)) os.system('python3 -m pip install -v -e .') - os.system('python3 tools/check_env.py') + os.system(""" echo 'export PATH={}:$PATH' >> ~/mmdeploy.env """.format( + os.path.join(work_dir, 'mmdeploy', 'backend', 'ncnn'))) + try: + import mmcv + print(mmcv.__version__) + os.system('python3 tools/check_env.py') + except Exception: + print('Please install torch & mmcv later.. ╮(╯▽╰)╭') return 0 @@ -143,7 +163,7 @@ def main(): return -1 os.mkdir(dep_dir) - success, envs = ensure_base_env(work_dir, dep_dir) + success = ensure_base_env(work_dir, dep_dir) if success != 0: return -1 @@ -155,12 +175,9 @@ def main(): if install_mmdeploy(work_dir, dep_dir, ncnn_cmake_dir) != 0: return -1 - if len(envs) > 0: - print( - 'We recommend that you set the following environment variables:\n') - for env in envs: - print(env) - print('\n') + if os.path.exists('~/mmdeploy.env'): + print('Please source ~/mmdeploy.env to setup your env !') + os.system('cat ~/mmdeploy.env') if __name__ == '__main__': diff --git a/tools/scripts/build_ubuntu_x64_ort.py b/tools/scripts/build_ubuntu_x64_ort.py index e32e393ef..f3a9c3b99 100644 --- a/tools/scripts/build_ubuntu_x64_ort.py +++ b/tools/scripts/build_ubuntu_x64_ort.py @@ -20,7 +20,7 @@ def install_ort(dep_dir): # git clone if not os.path.exists('onnxruntime-linux-x64-1.8.1'): os.system( - 'wget https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz' # noqa: E501 + 'wget -q --show-progress https://github.com/microsoft/onnxruntime/releases/download/v1.8.1/onnxruntime-linux-x64-1.8.1.tgz' # noqa: E501 ) os.system('tar xvf onnxruntime-linux-x64-1.8.1.tgz') @@ -41,9 +41,9 @@ def install_mmdeploy(work_dir, ort_dir): if not os.path.exists('build'): os.system('mkdir build') + os.system('rm -rf build/CMakeCache.txt') + cmd = 'cd build && cmake ..' - cmd += ' -DCMAKE_C_COMPILER=gcc-7 ' - cmd += ' -DCMAKE_CXX_COMPILER=g++-7 ' cmd += ' -DMMDEPLOY_BUILD_SDK=ON ' cmd += ' -DMMDEPLOY_BUILD_EXAMPLES=ON ' cmd += ' -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON ' @@ -54,7 +54,12 @@ def install_mmdeploy(work_dir, ort_dir): os.system('cd build && make -j {} && make install'.format(g_jobs)) os.system('python3 -m pip install -e .') - os.system('python3 tools/check_env.py') + try: + import mmcv + print(mmcv.__version__) + os.system('python3 tools/check_env.py') + except Exception: + print('Please install torch & mmcv later.. ⊙▽⊙') return 0 @@ -80,7 +85,7 @@ def main(): return -1 os.mkdir(dep_dir) - success, envs = ensure_base_env(work_dir, dep_dir) + success = ensure_base_env(work_dir, dep_dir) if success != 0: return -1 @@ -89,12 +94,9 @@ def main(): if install_mmdeploy(work_dir, ort_dir) != 0: return -1 - if len(envs) > 0: - print( - 'We recommend that you set the following environment variables:\n') - for env in envs: - print(env) - print('\n') + if os.path.exists('~/mmdeploy.env'): + print('Please source ~/mmdeploy.env to setup your env !') + os.system('cat ~/mmdeploy.env') if __name__ == '__main__': diff --git a/tools/scripts/build_ubuntu_x64_pplnn.py b/tools/scripts/build_ubuntu_x64_pplnn.py index 45eae19b9..327b96187 100644 --- a/tools/scripts/build_ubuntu_x64_pplnn.py +++ b/tools/scripts/build_ubuntu_x64_pplnn.py @@ -85,9 +85,9 @@ def install_mmdeploy(work_dir, pplnn_cmake_dir, pplcv_cmake_dir, build_cuda): if not os.path.exists('build'): os.system('mkdir build') + os.system('rm -rf build/CMakeCache.txt') + cmd = 'cd build && cmake ..' - cmd += ' -DCMAKE_C_COMPILER=gcc-7 ' - cmd += ' -DCMAKE_CXX_COMPILER=g++-7 ' cmd += ' -DMMDEPLOY_BUILD_SDK=ON ' cmd += ' -DMMDEPLOY_BUILD_EXAMPLES=ON ' cmd += ' -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON ' @@ -104,7 +104,12 @@ def install_mmdeploy(work_dir, pplnn_cmake_dir, pplcv_cmake_dir, build_cuda): os.system('cd build && make -j {} && make install'.format(g_jobs)) os.system('python3 -m pip install -e .') - os.system('python3 tools/check_env.py') + try: + import mmcv + print(mmcv.__version__) + os.system('python3 tools/check_env.py') + except Exception: + print('Please install torch & mmcv later.. ∩▽∩') return 0 @@ -130,23 +135,10 @@ def main(): return -1 os.mkdir(dep_dir) - success, envs = ensure_base_env(work_dir, dep_dir) + success = ensure_base_env(work_dir, dep_dir) if success != 0: return -1 - # enable g++ and gcc - gplus = cmd_result('which g++') - if gplus is None or len(gplus) < 1: - sudo = 'sudo' - if 'root' in cmd_result('whoami'): - sudo = '' - os.system( - '{} update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 200' # noqa: E501 - .format(sudo)) - os.system( - '{} update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-7 200' # noqa: E501 - .format(sudo)) - # install pplcv and pplnn nvcc = cmd_result('which nvcc') build_cuda = False @@ -158,12 +150,9 @@ def main(): build_cuda) != 0: return -1 - if len(envs) > 0: - print( - 'We recommend that you set the following environment variables:\n') - for env in envs: - print(env) - print('\n') + if os.path.exists('~/mmdeploy.env'): + print('Please source ~/mmdeploy.env to setup your env !') + os.system('cat ~/mmdeploy.env') if __name__ == '__main__': diff --git a/tools/scripts/build_ubuntu_x64_torchscript.py b/tools/scripts/build_ubuntu_x64_torchscript.py index c8c58f04e..5575eec71 100644 --- a/tools/scripts/build_ubuntu_x64_torchscript.py +++ b/tools/scripts/build_ubuntu_x64_torchscript.py @@ -3,7 +3,8 @@ import os import sys import time -from ubuntu_utils import cmd_result, cu_version_name, ensure_base_env, get_job +from ubuntu_utils import (cmd_result, cu_version_name, ensure_base_env, + get_job, pytorch_version) g_jobs = 2 @@ -17,15 +18,9 @@ def install_libtorch(dep_dir): if os.path.exists(unzipped_name): return os.path.join(dep_dir, unzipped_name) - torch_version = None - try: - import torch - torch_version = torch.__version__ - except Exception: - pass - + torch_version = pytorch_version() if torch_version is None: - print('torch version is None, use 1.11.0') + print('torch version is None, try 1.11.0') torch_version = '1.11.0' version_name = None @@ -46,7 +41,7 @@ def install_libtorch(dep_dir): torch_version, version_name) url = 'https://download.pytorch.org/libtorch/{}/{}'.format( version_name, filename) - os.system('wget {} -O libtorch.zip'.format(url)) + os.system('wget -q --show-progress {} -O libtorch.zip'.format(url)) os.system('unzip libtorch.zip') if not os.path.exists(unzipped_name): print( @@ -67,9 +62,9 @@ def install_mmdeploy(work_dir, libtorch_dir): if not os.path.exists('build'): os.system('mkdir build') + os.system('rm -rf build/CMakeCache.txt') + cmd = 'cd build && Torch_DIR={} cmake ..'.format(libtorch_dir) - cmd += ' -DCMAKE_C_COMPILER=gcc-7 ' - cmd += ' -DCMAKE_CXX_COMPILER=g++-7 ' cmd += ' -DMMDEPLOY_BUILD_SDK=ON ' cmd += ' -DMMDEPLOY_BUILD_EXAMPLES=ON ' cmd += ' -DMMDEPLOY_BUILD_SDK_PYTHON_API=ON ' @@ -80,7 +75,12 @@ def install_mmdeploy(work_dir, libtorch_dir): os.system('cd build && make -j {} && make install'.format(g_jobs)) os.system('python3 -m pip install -e .') - os.system('python3 tools/check_env.py') + try: + import mmcv + print(mmcv.__version__) + os.system('python3 tools/check_env.py') + except Exception: + print('Please install torch & mmcv later.. ≥▽≤') return 0 @@ -106,7 +106,7 @@ def main(): return -1 os.mkdir(dep_dir) - success, envs = ensure_base_env(work_dir, dep_dir) + success = ensure_base_env(work_dir, dep_dir) if success != 0: return -1 @@ -118,12 +118,9 @@ def main(): if install_mmdeploy(work_dir, libtorch_dir) != 0: return -1 - if len(envs) > 0: - print( - 'We recommend that you set the following environment variables:\n') - for env in envs: - print(env) - print('\n') + if os.path.exists('~/mmdeploy.env'): + print('Please source ~/mmdeploy.env to setup your env !') + os.system('cat ~/mmdeploy.env') if __name__ == '__main__': diff --git a/tools/scripts/ubuntu_cross_build_aarch64.sh b/tools/scripts/ubuntu_cross_build_aarch64.sh new file mode 100755 index 000000000..faedb794f --- /dev/null +++ b/tools/scripts/ubuntu_cross_build_aarch64.sh @@ -0,0 +1,105 @@ +#!/bin/bash +# set -ex +# get appropriate proc number: max(1, nproc-3) +good_nproc() { + num=`nproc` + num=`expr $num - 3` + if [ $num -lt 1 ];then + return 1 + fi + return ${num} +} + +install_tools() { + sudo apt install -y gcc-aarch64-linux-gnu g++-aarch64-linux-gnu + aarch64-linux-gnu-g++ --version + aarch64-linux-gnu-gcc --version + aarch64-linux-gnu-ld --version + + sudo apt install wget git git-lfs + + python3 -m pip install cmake==3.22.0 + + echo 'export PATH=~/.local/bin:${PATH}' >> ~/mmdeploy.env + export PATH=~/.local/bin:${PATH} +} + +build_ocv() { + if [ ! -e "opencv" ];then + git clone https://github.com/opencv/opencv --depth=1 --branch=4.6.0 --recursive + fi + if [ ! -e "opencv/platforms/linux/cross_build_aarch64" ];then + mkdir opencv/platforms/linux/cross_build_aarch64 + fi + cd opencv/platforms/linux/cross_build_aarch64 + rm -rf CMakeCache.txt + cmake ../../.. -DCMAKE_INSTALL_PREFIX=/tmp/ocv-aarch64 -DCMAKE_TOOLCHAIN_FILE=../aarch64-gnu.toolchain.cmake + good_nproc + jobs=$? + make -j${jobs} + make install + cd - +} + +build_ncnn() { + if [ ! -e "ncnn" ];then + git clone https://github.com/tencent/ncnn --branch 20220729 --depth=1 + fi + if [ ! -e "ncnn/build_aarch64" ];then + mkdir -p ncnn/build_aarch64 + fi + cd ncnn/build_aarch64 + rm -rf CMakeCache.txt + cmake .. \ + -DCMAKE_TOOLCHAIN_FILE=../toolchains/aarch64-linux-gnu.toolchain.cmake \ + -DCMAKE_INSTALL_PREFIX=/tmp/ncnn-aarch64 + good_nproc + jobs=$? + make -j${jobs} + make install + cd - +} + +build_mmdeploy() { + git submodule init + git submodule update + + if [ ! -e "build_aarch64" ];then + mkdir build_aarch64 + fi + cd build_aarch64 + + rm -rf CMakeCache.txt + cmake .. \ + -DCMAKE_TOOLCHAIN_FILE=../cmake/toolchains/aarch64-linux-gnu.cmake \ + -DMMDEPLOY_TARGET_DEVICES="cpu" \ + -DMMDEPLOY_TARGET_BACKENDS="ncnn" \ + -Dncnn_DIR=/tmp/ncnn-aarch64/lib/cmake/ncnn \ + -DOpenCV_DIR=/tmp/ocv-aarch64/lib/cmake/opencv4 + + good_nproc + jobs=$? + make -j${jobs} + make install + + ls -lah install/bin/* +} + +print_success() { + echo "----------------------------------------------------------------------" + echo "Cross build finished, PLS copy bin/model/test_data to the device.. QVQ" + echo "----------------------------------------------------------------------" +} + +if [ ! -e "../mmdeploy-dep" ];then + mkdir ../mmdeploy-dep +fi +cd ../mmdeploy-dep + +install_tools +build_ocv +build_ncnn + +cd ../mmdeploy +build_mmdeploy +print_success diff --git a/tools/scripts/ubuntu_utils.py b/tools/scripts/ubuntu_utils.py index 9b82869dd..742e06962 100644 --- a/tools/scripts/ubuntu_utils.py +++ b/tools/scripts/ubuntu_utils.py @@ -4,21 +4,33 @@ import re import time +def pytorch_version(): + version = None + try: + import torch + raw = torch.__version__ + pattern = re.compile(r'[0-9]+\.[0-9]+\.[0-9]+') + version = pattern.findall(raw)[0] + except Exception: + pass + return version + + def cmd_result(txt: str): cmd = os.popen(txt) return cmd.read().rstrip().lstrip() def get_job(argv) -> int: - # get nprocs, if user not specified, use max(2, nproc-1) + # get nprocs, if user not specified, use max(1, nproc-2) job = 2 if len(argv) <= 1: print('your can use `python3 {} N` to set make -j [N]'.format(argv[0])) nproc = cmd_result('nproc') if nproc is not None and len(nproc) > 0: - job = max(int(nproc) - 1, 2) + job = max(int(nproc) - 2, 1) else: - job = 2 + job = 1 else: job = int(argv[1]) return job @@ -55,7 +67,7 @@ def ensure_base_env(work_dir, dep_dir): check python, root, pytorch version, auto install these binary: * make - * g++-7 + * g++ * git * wget * unzip @@ -63,7 +75,6 @@ def ensure_base_env(work_dir, dep_dir): * mmcv (not compulsory) """ - envs = [] print('-' * 10 + 'ensure base env' + '-' * 10) print(description) @@ -83,18 +94,18 @@ def ensure_base_env(work_dir, dep_dir): cmake = cmd_result('which cmake') if cmake is None or len(cmake) < 1: print('cmake not found, try install cmake ..', end='') - os.system('python3 -m pip install cmake>=3.14.0') + os.system('python3 -m pip install cmake') cmake = cmd_result('which cmake') if cmake is None or len(cmake) < 1: env = 'export PATH=${PATH}:~/.local/bin' os.system(env) - envs.append(env) + os.system(""" echo '{}' >> ~/mmdeploy.env """.format(env)) cmake = cmd_result('which cmake') if cmake is None or len(cmake) < 1: print('Check cmake failed.') - return -1, envs + return -1 print('success') # check make @@ -109,14 +120,14 @@ def ensure_base_env(work_dir, dep_dir): make = cmd_result('which make') if make is None or len(make) < 1: print('Check make failed.') - return -1, envs + return -1 print('success') # check g++ version - gplus = cmd_result('which g++-7') + gplus = cmd_result('which g++') if gplus is None or len(gplus) < 1: # install g++ - print('g++-7 not found, try install g++-7 ..', end='') + print('g++ not found, try install g++ ..', end='') os.system( '{} DEBIAN_FRONTEND="noninteractive" apt install software-properties-common -y' # noqa: E501 .format(sudo)) # noqa: E501 @@ -125,18 +136,12 @@ def ensure_base_env(work_dir, dep_dir): os.system( '{} add-apt-repository ppa:ubuntu-toolchain-r/test -y'.format( sudo)) - os.system('{} apt install gcc-7 g++-7 -y'.format(sudo)) + os.system('{} apt install gcc g++ -y'.format(sudo)) - gplus = cmd_result('which g++-7') + gplus = cmd_result('which g++') if gplus is None or len(gplus) < 1: - print('Check g++-7 failed.') - return -1, envs - os.system( - '{} update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 200' # noqa: E501 - .format(sudo)) - os.system( - '{} update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-7 200' # noqa: E501 - .format(sudo)) + print('Check g++ failed.') + return -1 print('success') # wget @@ -197,7 +202,7 @@ def ensure_base_env(work_dir, dep_dir): ocv = cmd_result('which opencv_version') if ocv is None or len(ocv) < 1: print('Check ocv failed.') - return -1, envs + return -1 print('success') # print all @@ -217,11 +222,11 @@ def ensure_base_env(work_dir, dep_dir): cmd_result(" make --version | head -n 1 | awk '{print $3}' "))) print('wget bin\t:{}'.format(wget)) - print('g++-7 bin\t:{}'.format(gplus)) + print('g++ bin\t:{}'.format(gplus)) print('mmcv version\t:{}'.format(mmcv_version)) if mmcv_version is None: - print('\t please install an mm serials algorithm later.') + print('\t please install mmcv later.') time.sleep(2) print('torch version\t:{}'.format(torch_version)) @@ -241,4 +246,4 @@ def ensure_base_env(work_dir, dep_dir): print('dep dir \t:{}'.format(dep_dir)) print('\n') - return 0, envs + return 0