diff --git a/.codespell_ignore.txt b/.codespell_ignore.txt new file mode 100644 index 000000000..f8287736f --- /dev/null +++ b/.codespell_ignore.txt @@ -0,0 +1,2 @@ +cann +CANN diff --git a/.github/workflows/backend-ascend.yml b/.github/workflows/backend-ascend.yml new file mode 100644 index 000000000..d83ec7a4d --- /dev/null +++ b/.github/workflows/backend-ascend.yml @@ -0,0 +1,54 @@ +name: backend-ascend + +on: + push: + paths-ignore: + - "demo/**" + - "tools/**" + + pull_request: + paths-ignore: + - "demo/**" + - "tools/**" + - "docs/**" + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build_sdk_demo: + runs-on: ubuntu-18.04 + strategy: + matrix: + python-version: [3.7] + steps: + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v2 + with: + python-version: ${{ matrix.python-version }} + - name: Checkout repository + uses: actions/checkout@v3 + with: + submodules: 'recursive' + - name: update + run: sudo apt update + - name: Install dependencies + run: | + sudo apt update + sudo apt install -y ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libxrender-dev libc++1-9 libc++abi1-9 + sudo add-apt-repository ppa:ignaciovizzo/opencv3-nonfree + sudo apt install libopencv-dev + pkg-config --libs opencv + - name: Install Ascend Toolkit + run: | + mkdir -p $GITHUB_WORKSPACE/Ascend + wget https://ascend-repo.obs.cn-east-2.myhuaweicloud.com/CANN/CANN%205.1.RC2/Ascend-cann-toolkit_5.1.RC2_linux-x86_64.run + sh Ascend-cann-toolkit_5.1.RC2_linux-x86_64.run --install --install-path=$GITHUB_WORKSPACE/Ascend --quiet --chip=Ascend310 --blacklist=devtools + - name: Build SDK Demo with Ascend backend + run: | + mkdir -p build && pushd build + source $GITHUB_WORKSPACE/Ascend/ascend-toolkit/set_env.sh + export LD_LIBRARY_PATH=$GITHUB_WORKSPACE/Ascend/ascend-toolkit/latest/runtime/lib64/stub:$LD_LIBRARY_PATH + cmake .. -DCMAKE_CXX_COMPILER=g++-7 -DMMDEPLOY_SHARED_LIBS=ON -DMMDEPLOY_BUILD_SDK=ON -DMMDEPLOY_BUILD_SDK_PYTHON_API=OFF -DMMDEPLOY_TARGET_DEVICES=cpu -DMMDEPLOY_BUILD_EXAMPLES=ON -DMMDEPLOY_TARGET_BACKENDS=acl -DMMDEPLOY_CODEBASES=all + make install -j4 diff --git a/.gitignore b/.gitignore index 7dea9d9ea..25dedac46 100644 --- a/.gitignore +++ b/.gitignore @@ -153,6 +153,9 @@ mmdeploy/backend/ncnn/onnx2ncnn /mmdeploy-* +# ascend +fusion_result.json + # snpe grpc-cpp-plugin service/snpe/grpc_cpp_plugin diff --git a/README.md b/README.md index e4eac34af..312bb05b0 100644 --- a/README.md +++ b/README.md @@ -55,9 +55,9 @@ The currently supported codebases and models are as follows, and more will be in Models can be exported and run in the following backends, and more will be compatible -| ONNX Runtime | TensorRT | ppl.nn | ncnn | OpenVINO | LibTorch | snpe | more | -| ------------ | -------- | ------ | ---- | -------- | -------- | ---- | ---------------------------------------------- | -| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | [benchmark](docs/en/03-benchmark/benchmark.md) | +| ONNX Runtime | TensorRT | ppl.nn | ncnn | OpenVINO | LibTorch | snpe | Ascend | more | +| ------------ | -------- | ------ | ---- | -------- | -------- | ---- | ------ | ---------------------------------------------- | +| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | [benchmark](docs/en/03-benchmark/benchmark.md) | ### Efficient and scalable C/C++ SDK Framework diff --git a/README_zh-CN.md b/README_zh-CN.md index 7d8ac08dd..eaee395fd 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -53,9 +53,9 @@ MMDeploy 是 [OpenMMLab](https://openmmlab.com/) 模型部署工具箱,**为 ### 支持多种推理后端 -| ONNX Runtime | TensorRT | ppl.nn | ncnn | OpenVINO | LibTorch | snpe | more | -| ------------ | -------- | ------ | ---- | -------- | -------- | ---- | ------------------------------------------------- | -| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | [benchmark](docs/zh_cn/03-benchmark/benchmark.md) | +| ONNX Runtime | TensorRT | ppl.nn | ncnn | OpenVINO | LibTorch | snpe | Ascend | more | +| ------------ | -------- | ------ | ---- | -------- | -------- | ---- | ------ | ------------------------------------------------- | +| ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | ✔️ | [benchmark](docs/zh_cn/03-benchmark/benchmark.md) | ### SDK 可高度定制化 diff --git a/configs/_base_/backends/ascend.py b/configs/_base_/backends/ascend.py new file mode 100644 index 000000000..dd78aa14e --- /dev/null +++ b/configs/_base_/backends/ascend.py @@ -0,0 +1 @@ +backend_config = dict(type='ascend') diff --git a/configs/mmcls/classification_ascend_dynamic-224x224-224x224.py b/configs/mmcls/classification_ascend_dynamic-224x224-224x224.py new file mode 100644 index 000000000..786365d05 --- /dev/null +++ b/configs/mmcls/classification_ascend_dynamic-224x224-224x224.py @@ -0,0 +1,9 @@ +_base_ = ['./classification_dynamic.py', '../_base_/backends/ascend.py'] + +onnx_config = dict(input_shape=[224, 224]) + +backend_config = dict(model_inputs=[ + dict( + dynamic_batch_size=[1, 2, 4, 8], + input_shapes=dict(input=[-1, 3, 224, 224])) +]) diff --git a/configs/mmcls/classification_ascend_static-224x224.py b/configs/mmcls/classification_ascend_static-224x224.py new file mode 100644 index 000000000..245883e81 --- /dev/null +++ b/configs/mmcls/classification_ascend_static-224x224.py @@ -0,0 +1,5 @@ +_base_ = ['./classification_static.py', '../_base_/backends/ascend.py'] + +onnx_config = dict(input_shape=[224, 224]) +backend_config = dict( + model_inputs=[dict(input_shapes=dict(input=[1, 3, 224, 224]))]) diff --git a/configs/mmdet/detection/detection_ascend_dynamic-800x1344.py b/configs/mmdet/detection/detection_ascend_dynamic-800x1344.py new file mode 100644 index 000000000..0331a4ea6 --- /dev/null +++ b/configs/mmdet/detection/detection_ascend_dynamic-800x1344.py @@ -0,0 +1,8 @@ +_base_ = ['../_base_/base_dynamic.py', '../../_base_/backends/ascend.py'] + +onnx_config = dict(input_shape=[1344, 800]) +backend_config = dict(model_inputs=[ + dict( + dynamic_image_size=[(800, 1344), (1344, 800)], + input_shapes=dict(input=[1, 3, -1, -1])) +]) diff --git a/configs/mmdet/detection/detection_ascend_static-640x640.py b/configs/mmdet/detection/detection_ascend_static-640x640.py new file mode 100644 index 000000000..a71cf3038 --- /dev/null +++ b/configs/mmdet/detection/detection_ascend_static-640x640.py @@ -0,0 +1,5 @@ +_base_ = ['../_base_/base_static.py', '../../_base_/backends/ascend.py'] + +onnx_config = dict(input_shape=[640, 640]) +backend_config = dict( + model_inputs=[dict(input_shapes=dict(input=[1, 3, 640, 640]))]) diff --git a/configs/mmdet/detection/detection_ascend_static-800x1344.py b/configs/mmdet/detection/detection_ascend_static-800x1344.py new file mode 100644 index 000000000..81495e8d4 --- /dev/null +++ b/configs/mmdet/detection/detection_ascend_static-800x1344.py @@ -0,0 +1,5 @@ +_base_ = ['../_base_/base_static.py', '../../_base_/backends/ascend.py'] + +onnx_config = dict(input_shape=[1344, 800]) +backend_config = dict( + model_inputs=[dict(input_shapes=dict(input=[1, 3, 800, 1344]))]) diff --git a/configs/mmocr/text-detection/text-detection_ascend_dynamic-640x640-1280x1280.py b/configs/mmocr/text-detection/text-detection_ascend_dynamic-640x640-1280x1280.py new file mode 100644 index 000000000..b1da50783 --- /dev/null +++ b/configs/mmocr/text-detection/text-detection_ascend_dynamic-640x640-1280x1280.py @@ -0,0 +1,8 @@ +_base_ = ['./text-detection_dynamic.py', '../../_base_/backends/ascend.py'] + +onnx_config = dict(input_shape=None) +backend_config = dict(model_inputs=[ + dict( + input_shapes=dict(input=[-1, 3, -1, -1]), + dynamic_dims=[(1, 640, 640), (4, 640, 640), (1, 1280, 1280)]) +]) diff --git a/configs/mmocr/text-detection/text-detection_ascend_static-640x640.py b/configs/mmocr/text-detection/text-detection_ascend_static-640x640.py new file mode 100644 index 000000000..7f7ff9f02 --- /dev/null +++ b/configs/mmocr/text-detection/text-detection_ascend_static-640x640.py @@ -0,0 +1,5 @@ +_base_ = ['./text-detection_static.py', '../../_base_/backends/ascend.py'] + +onnx_config = dict(input_shape=[640, 640]) +backend_config = dict( + model_inputs=[dict(input_shapes=dict(input=[1, 3, 640, 640]))]) diff --git a/configs/mmseg/segmentation_ascend_static-1024x2048.py b/configs/mmseg/segmentation_ascend_static-1024x2048.py new file mode 100644 index 000000000..eef4cbd9d --- /dev/null +++ b/configs/mmseg/segmentation_ascend_static-1024x2048.py @@ -0,0 +1,5 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/ascend.py'] + +onnx_config = dict(input_shape=[2048, 1024]) +backend_config = dict( + model_inputs=[dict(input_shapes=dict(input=[1, 3, 1024, 2048]))]) diff --git a/configs/mmseg/segmentation_ascend_static-512x1024.py b/configs/mmseg/segmentation_ascend_static-512x1024.py new file mode 100644 index 000000000..678d154ff --- /dev/null +++ b/configs/mmseg/segmentation_ascend_static-512x1024.py @@ -0,0 +1,5 @@ +_base_ = ['./segmentation_static.py', '../_base_/backends/ascend.py'] + +onnx_config = dict(input_shape=[1024, 512]) +backend_config = dict( + model_inputs=[dict(input_shapes=dict(input=[1, 3, 512, 1024]))]) diff --git a/csrc/mmdeploy/codebase/mmdet/object_detection.cpp b/csrc/mmdeploy/codebase/mmdet/object_detection.cpp index 486faff43..ade17bd3c 100644 --- a/csrc/mmdeploy/codebase/mmdet/object_detection.cpp +++ b/csrc/mmdeploy/codebase/mmdet/object_detection.cpp @@ -4,6 +4,7 @@ #include "mmdeploy/core/registry.h" #include "mmdeploy/core/utils/device_utils.h" +#include "mmdeploy/core/utils/formatter.h" #include "mmdeploy/experimental/module_adapter.h" using namespace std; diff --git a/csrc/mmdeploy/core/device.h b/csrc/mmdeploy/core/device.h index f301b14ba..f0e8adeab 100644 --- a/csrc/mmdeploy/core/device.h +++ b/csrc/mmdeploy/core/device.h @@ -68,7 +68,7 @@ class Device { constexpr explicit Device(int platform_id, int device_id = 0) : platform_id_(platform_id), device_id_(device_id) {} - MMDEPLOY_API explicit Device(const char *platform_name, int device_id = 0); + MMDEPLOY_API explicit Device(const char* platform_name, int device_id = 0); constexpr int device_id() const noexcept { return device_id_; } @@ -78,11 +78,11 @@ class Device { constexpr bool is_device() const noexcept { return platform_id() > 0; } - constexpr bool operator==(const Device &other) const noexcept { + constexpr bool operator==(const Device& other) const noexcept { return platform_id_ == other.platform_id_ && device_id_ == other.device_id_; } - constexpr bool operator!=(const Device &other) const noexcept { return !(*this == other); } + constexpr bool operator!=(const Device& other) const noexcept { return !(*this == other); } constexpr explicit operator bool() const noexcept { return platform_id_ >= 0 && device_id_ >= 0; } @@ -104,7 +104,7 @@ enum class MemcpyKind : int { HtoD, DtoH, DtoD }; class MMDEPLOY_API Platform { public: // throws if not found - explicit Platform(const char *platform_name); + explicit Platform(const char* platform_name); // throws if not found explicit Platform(int platform_id); @@ -113,11 +113,11 @@ class MMDEPLOY_API Platform { int GetPlatformId() const; // "" if invalid - const char *GetPlatformName() const; + const char* GetPlatformName() const; - bool operator==(const Platform &other) { return impl_ == other.impl_; } + bool operator==(const Platform& other) { return impl_ == other.impl_; } - bool operator!=(const Platform &other) { return !(*this == other); } + bool operator!=(const Platform& other) { return !(*this == other); } explicit operator bool() const noexcept { return static_cast(impl_); } @@ -132,7 +132,7 @@ class MMDEPLOY_API Platform { Platform GetPlatform(int platform_id); -Platform GetPlatform(const char *platform_name); +Platform GetPlatform(const char* platform_name); class MMDEPLOY_API Stream { public: @@ -140,7 +140,7 @@ class MMDEPLOY_API Stream { explicit Stream(Device device, uint64_t flags = 0); - explicit Stream(Device device, void *native, uint64_t flags = 0); + explicit Stream(Device device, void* native, uint64_t flags = 0); explicit Stream(Device device, std::shared_ptr native, uint64_t flags = 0); @@ -150,25 +150,25 @@ class MMDEPLOY_API Stream { Result Wait(); - Result DependsOn(Event &event); + Result DependsOn(Event& event); - Result Submit(Kernel &kernel); + Result Submit(Kernel& kernel); - void *GetNative(ErrorCode *ec = nullptr); + void* GetNative(ErrorCode* ec = nullptr); - Result Copy(const Buffer &src, Buffer &dst, size_t size = -1, size_t src_offset = 0, + Result Copy(const Buffer& src, Buffer& dst, size_t size = -1, size_t src_offset = 0, size_t dst_offset = 0); - Result Copy(const void *host_ptr, Buffer &dst, size_t size = -1, size_t dst_offset = 0); + Result Copy(const void* host_ptr, Buffer& dst, size_t size = -1, size_t dst_offset = 0); - Result Copy(const Buffer &src, void *host_ptr, size_t size = -1, size_t src_offset = 0); + Result Copy(const Buffer& src, void* host_ptr, size_t size = -1, size_t src_offset = 0); - Result Fill(const Buffer &dst, void *pattern, size_t pattern_size, size_t size = -1, + Result Fill(const Buffer& dst, void* pattern, size_t pattern_size, size_t size = -1, size_t offset = 0); - bool operator==(const Stream &other) const { return impl_ == other.impl_; } + bool operator==(const Stream& other) const { return impl_ == other.impl_; } - bool operator!=(const Stream &other) const { return !(*this == other); } + bool operator!=(const Stream& other) const { return !(*this == other); } explicit operator bool() const noexcept { return static_cast(impl_); } @@ -184,7 +184,7 @@ class MMDEPLOY_API Stream { }; template -T GetNative(Stream &stream, ErrorCode *ec = nullptr) { +T GetNative(Stream& stream, ErrorCode* ec = nullptr) { return reinterpret_cast(stream.GetNative(ec)); } @@ -194,7 +194,7 @@ class MMDEPLOY_API Event { explicit Event(Device device, uint64_t flags = 0); - explicit Event(Device device, void *native, uint64_t flags = 0); + explicit Event(Device device, void* native, uint64_t flags = 0); explicit Event(Device device, std::shared_ptr native, uint64_t flags = 0); @@ -204,13 +204,13 @@ class MMDEPLOY_API Event { Result Wait(); - Result Record(Stream &stream); + Result Record(Stream& stream); - void *GetNative(ErrorCode *ec = nullptr); + void* GetNative(ErrorCode* ec = nullptr); - bool operator==(const Event &other) const { return impl_ == other.impl_; } + bool operator==(const Event& other) const { return impl_ == other.impl_; } - bool operator!=(const Event &other) const { return !(*this == other); } + bool operator!=(const Event& other) const { return !(*this == other); } explicit operator bool() const noexcept { return static_cast(impl_); } @@ -223,7 +223,7 @@ class MMDEPLOY_API Event { }; template -T GetNative(Event &event, ErrorCode *ec = nullptr) { +T GetNative(Event& event, ErrorCode* ec = nullptr) { return reinterpret_cast(event.GetNative(ec)); } @@ -234,7 +234,7 @@ class MMDEPLOY_API Kernel { Device GetDevice() const; - void *GetNative(ErrorCode *ec = nullptr); + void* GetNative(ErrorCode* ec = nullptr); explicit operator bool() const noexcept { return static_cast(impl_); } @@ -243,7 +243,7 @@ class MMDEPLOY_API Kernel { }; template -T GetNative(Kernel &kernel, ErrorCode *ec = nullptr) { +T GetNative(Kernel& kernel, ErrorCode* ec = nullptr) { return reinterpret_cast(kernel.GetNative(ec)); } @@ -269,25 +269,25 @@ class MMDEPLOY_API Buffer { Buffer(Device device, size_t size, Allocator allocator, size_t alignment = 1, uint64_t flags = 0); - Buffer(Device device, size_t size, void *native, uint64_t flags = 0); + Buffer(Device device, size_t size, void* native, uint64_t flags = 0); Buffer(Device device, size_t size, std::shared_ptr native, uint64_t flags = 0); // create sub-buffer - Buffer(Buffer &buffer, size_t offset, size_t size, uint64_t flags = 0); + Buffer(Buffer& buffer, size_t offset, size_t size, uint64_t flags = 0); - size_t GetSize(ErrorCode *ec = nullptr) const; + size_t GetSize(ErrorCode* ec = nullptr) const; - // bool IsSubBuffer(ErrorCode *ec = nullptr); + // bool IsSubBuffer(ErrorCode* ec = nullptr); - void *GetNative(ErrorCode *ec = nullptr) const; + void* GetNative(ErrorCode* ec = nullptr) const; Device GetDevice() const; Allocator GetAllocator() const; - bool operator==(const Buffer &other) const { return impl_ == other.impl_; } + bool operator==(const Buffer& other) const { return impl_ == other.impl_; } - bool operator!=(const Buffer &other) const { return !(*this == other); } + bool operator!=(const Buffer& other) const { return !(*this == other); } explicit operator bool() const noexcept { return static_cast(impl_); } @@ -300,12 +300,12 @@ class MMDEPLOY_API Buffer { }; template -T GetNative(Buffer &buffer, ErrorCode *ec = nullptr) { +T GetNative(Buffer& buffer, ErrorCode* ec = nullptr) { return reinterpret_cast(buffer.GetNative(ec)); } template -T GetNative(const Buffer &buffer, ErrorCode *ec = nullptr) { +T GetNative(const Buffer& buffer, ErrorCode* ec = nullptr) { return reinterpret_cast(buffer.GetNative(ec)); } @@ -315,13 +315,15 @@ class MMDEPLOY_API PlatformRegistry { int Register(Creator creator); - int GetPlatform(const char *name, Platform *platform); + int AddAlias(const char* name, const char* target); - int GetPlatform(int id, Platform *platform); + int GetPlatform(const char* name, Platform* platform); - int GetPlatformId(const char *name); + int GetPlatform(int id, Platform* platform); - PlatformImpl *GetPlatformImpl(PlatformId id); + int GetPlatformId(const char* name); + + PlatformImpl* GetPlatformImpl(PlatformId id); private: int GetNextId(); @@ -335,8 +337,9 @@ class MMDEPLOY_API PlatformRegistry { Platform platform; }; std::vector entries_; + std::vector> aliases_; }; -MMDEPLOY_API PlatformRegistry &gPlatformRegistry(); +MMDEPLOY_API PlatformRegistry& gPlatformRegistry(); } // namespace mmdeploy diff --git a/csrc/mmdeploy/core/device_impl.cpp b/csrc/mmdeploy/core/device_impl.cpp index ae708d933..ef72f1e33 100644 --- a/csrc/mmdeploy/core/device_impl.cpp +++ b/csrc/mmdeploy/core/device_impl.cpp @@ -321,6 +321,11 @@ int PlatformRegistry::Register(Creator creator) { return 0; } +int PlatformRegistry::AddAlias(const char* name, const char* target) { + aliases_.emplace_back(name, target); + return 0; +} + int PlatformRegistry::GetNextId() { for (int i = 1;; ++i) { if (IsAvailable(i)) { @@ -339,6 +344,12 @@ bool PlatformRegistry::IsAvailable(int id) { } int PlatformRegistry::GetPlatform(const char* name, Platform* platform) { + for (const auto& alias : aliases_) { + if (name == alias.first) { + name = alias.second.c_str(); + break; + } + } for (const auto& entry : entries_) { if (entry.name == name) { *platform = entry.platform; @@ -357,7 +368,14 @@ int PlatformRegistry::GetPlatform(int id, Platform* platform) { } return -1; } + int PlatformRegistry::GetPlatformId(const char* name) { + for (const auto& alias : aliases_) { + if (name == alias.first) { + name = alias.second.c_str(); + break; + } + } for (const auto& entry : entries_) { if (entry.name == name) { return entry.id; diff --git a/csrc/mmdeploy/core/mpl/span.h b/csrc/mmdeploy/core/mpl/span.h index 7aa630bf0..29d49f6a7 100644 --- a/csrc/mmdeploy/core/mpl/span.h +++ b/csrc/mmdeploy/core/mpl/span.h @@ -94,17 +94,23 @@ class Span { constexpr Span& operator=(const Span& other) noexcept = default; - friend bool operator==(const Span& a, const Span& b) { - if (a.size() != b.size()) return false; + template + friend bool operator!=(const Span& a, const Span& b) { + if (a.size() != b.size()) { + return true; + } for (size_type i = 0; i < a.size(); ++i) { if (a[i] != b[i]) { - return false; + return true; } } - return true; + return false; } - friend bool operator!=(const Span& a, const Span& b) { return !(a == b); } + template + friend bool operator==(const Span& a, const Span& b) { + return !(a != b); + } private: T* data_; diff --git a/csrc/mmdeploy/core/tensor.cpp b/csrc/mmdeploy/core/tensor.cpp index 07fac1ae7..a715da7c4 100644 --- a/csrc/mmdeploy/core/tensor.cpp +++ b/csrc/mmdeploy/core/tensor.cpp @@ -115,9 +115,9 @@ Result Tensor::CopyFrom(const Tensor& tensor, Stream stream) { if (!stream) { auto device = desc_.device.is_device() ? desc_.device : tensor.desc().device; auto default_stream = Stream::GetDefault(device); - OUTCOME_TRY(default_stream.Copy(tensor.buffer(), buffer_)); + OUTCOME_TRY(default_stream.Copy(tensor.buffer(), buffer_, tensor.byte_size())); } else { - OUTCOME_TRY(stream.Copy(tensor.buffer(), buffer_)); + OUTCOME_TRY(stream.Copy(tensor.buffer(), buffer_, tensor.byte_size())); } return success(); } @@ -141,9 +141,9 @@ Result Tensor::CopyTo(Tensor& tensor, Stream stream) const { if (!stream) { Device device = desc_.device.is_device() ? desc_.device : tensor.desc().device; Stream default_stream = Stream::GetDefault(device); - return default_stream.Copy(buffer_, tensor.buffer()); + return default_stream.Copy(buffer_, tensor.buffer(), byte_size()); } else { - return stream.Copy(buffer_, tensor.buffer()); + return stream.Copy(buffer_, tensor.buffer(), byte_size()); } } @@ -158,9 +158,9 @@ Result Tensor::CopyFrom(void* host_ptr, Stream stream) { Allocate(); if (!stream) { auto default_stream = Stream::GetDefault(desc_.device); - return default_stream.Copy(host_ptr, buffer_, buffer_.GetSize()); + return default_stream.Copy(host_ptr, buffer_, byte_size()); } else { - return stream.Copy(host_ptr, buffer_, buffer_.GetSize()); + return stream.Copy(host_ptr, buffer_, byte_size()); } } @@ -174,9 +174,9 @@ Result Tensor::CopyTo(void* host_ptr, Stream stream) const { } if (!stream) { auto default_stream = Stream::GetDefault(desc_.device); - return default_stream.Copy(buffer_, host_ptr, buffer_.GetSize()); + return default_stream.Copy(buffer_, host_ptr, byte_size()); } else { - return stream.Copy(buffer_, host_ptr, buffer_.GetSize()); + return stream.Copy(buffer_, host_ptr, byte_size()); } } diff --git a/csrc/mmdeploy/device/CMakeLists.txt b/csrc/mmdeploy/device/CMakeLists.txt index 358460609..6243e8e6a 100644 --- a/csrc/mmdeploy/device/CMakeLists.txt +++ b/csrc/mmdeploy/device/CMakeLists.txt @@ -5,3 +5,7 @@ add_subdirectory(cpu) if ("cuda" IN_LIST MMDEPLOY_TARGET_DEVICES) add_subdirectory(cuda) endif () + +if ("acl" IN_LIST MMDEPLOY_TARGET_BACKENDS) + add_subdirectory(acl) +endif () diff --git a/csrc/mmdeploy/device/acl/CMakeLists.txt b/csrc/mmdeploy/device/acl/CMakeLists.txt new file mode 100644 index 000000000..be7e504c3 --- /dev/null +++ b/csrc/mmdeploy/device/acl/CMakeLists.txt @@ -0,0 +1,7 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +project(mmdeploy_acl_device) + +file(GLOB_RECURSE SRCS "*.cpp") + +mmdeploy_add_module(${PROJECT_NAME} "${SRCS}") diff --git a/csrc/mmdeploy/device/acl/acl_device.cpp b/csrc/mmdeploy/device/acl/acl_device.cpp new file mode 100644 index 000000000..fffc9f7ba --- /dev/null +++ b/csrc/mmdeploy/device/acl/acl_device.cpp @@ -0,0 +1,14 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "mmdeploy/core/device_impl.h" + +namespace mmdeploy { + +class AclPlatformRegisterer { + public: + AclPlatformRegisterer() { gPlatformRegistry().AddAlias("npu", "cpu"); } +}; + +AclPlatformRegisterer g_acl_platform_registerer; + +} // namespace mmdeploy diff --git a/csrc/mmdeploy/device/cpu/cpu_device.cpp b/csrc/mmdeploy/device/cpu/cpu_device.cpp index 1fd21ccd8..8c9d319ec 100644 --- a/csrc/mmdeploy/device/cpu/cpu_device.cpp +++ b/csrc/mmdeploy/device/cpu/cpu_device.cpp @@ -105,7 +105,7 @@ Result CpuPlatformImpl::CopyImpl(const void* src, void* dst, size_t src_si task(); return success(); } - if (st.GetDevice() != Device(0, 0)) { + if (st.GetDevice().platform_id() != 0) { return Status(eInvalidArgument); } auto cpu_stream = static_cast(st.GetNative()); @@ -126,6 +126,7 @@ Result CpuPlatformImpl::Copy(const void* host_ptr, Buffer dst, size_t size } return CopyImpl(host_ptr, dst_ptr, size, dst.GetSize(), 0, dst_offset, size, stream); } + Result CpuPlatformImpl::Copy(Buffer src, void* host_ptr, size_t size, size_t src_offset, Stream stream) { auto src_ptr = src.GetNative(); @@ -145,7 +146,7 @@ Result CpuPlatformImpl::Copy(Buffer src, Buffer dst, size_t size, size_t s return Status(eInvalidArgument); } auto device = src.GetDevice(); - if (device.platform_id() != 0 || device != dst.GetDevice()) { + if (device.platform_id() != 0 || device.platform_id() != dst.GetDevice().platform_id()) { return Status(eInvalidArgument); } return CopyImpl(src_ptr, dst_ptr, src.GetSize(), dst.GetSize(), src_offset, dst_offset, size, diff --git a/csrc/mmdeploy/net/CMakeLists.txt b/csrc/mmdeploy/net/CMakeLists.txt index 09dca3cd7..cd3d2711c 100644 --- a/csrc/mmdeploy/net/CMakeLists.txt +++ b/csrc/mmdeploy/net/CMakeLists.txt @@ -26,6 +26,10 @@ if ("snpe" IN_LIST MMDEPLOY_TARGET_BACKENDS) add_subdirectory(snpe) endif () +if ("acl" IN_LIST MMDEPLOY_TARGET_BACKENDS) + add_subdirectory(acl) +endif () + if ("torchscript" IN_LIST MMDEPLOY_TARGET_BACKENDS) add_subdirectory(torchscript) endif () diff --git a/csrc/mmdeploy/net/acl/CMakeLists.txt b/csrc/mmdeploy/net/acl/CMakeLists.txt new file mode 100644 index 000000000..2056b7350 --- /dev/null +++ b/csrc/mmdeploy/net/acl/CMakeLists.txt @@ -0,0 +1,14 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +project(mmdeploy_acl_net) + +if ("acl" IN_LIST MMDEPLOY_TARGET_BACKENDS) + if (NOT DEFINED ASCEND_TOOLKIT_HOME) + set(ASCEND_TOOLKIT_HOME $ENV{ASCEND_TOOLKIT_HOME}) + endif () + mmdeploy_add_module(${PROJECT_NAME} acl_net.cpp) + target_include_directories(${PROJECT_NAME} PRIVATE + $) + target_link_libraries(${PROJECT_NAME} PRIVATE + $) +endif () diff --git a/csrc/mmdeploy/net/acl/acl_net.cpp b/csrc/mmdeploy/net/acl/acl_net.cpp new file mode 100644 index 000000000..f1b37a1e8 --- /dev/null +++ b/csrc/mmdeploy/net/acl/acl_net.cpp @@ -0,0 +1,659 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#include "mmdeploy/net/acl/acl_net.h" + +#include "mmdeploy/core/logger.h" +#include "mmdeploy/core/model.h" +#include "mmdeploy/core/utils/formatter.h" + +std::ostream& operator<<(std::ostream& os, const aclmdlIODims& dims) { + os << dims.name << " ["; + for (int i = 0; i < dims.dimCount; ++i) { + os << (i ? ", " : "") << dims.dims[i]; + } + os << "]"; + return os; +} + +std::ostream& operator<<(std::ostream& os, const aclmdlBatch& batch) { + os << "batch ["; + for (int i = 0; i < batch.batchCount; ++i) { + os << (i ? ", " : "") << batch.batch[i]; + } + os << "]"; + return os; +} + +std::ostream& operator<<(std::ostream& os, const aclmdlHW& hw) { + os << "HW ["; + for (int i = 0; i < hw.hwCount; ++i) { + os << (i ? ", " : "") << "(" << hw.hw[i][0] << ", " << hw.hw[i][1] << ")"; + } + os << "]"; + return os; +} + +namespace mmdeploy { + +namespace { + +inline Result _m(aclError ec, SourceLocation loc = SourceLocation::current()) { + if (ec == ACL_SUCCESS) { + return success(); + } else { + return Status(eFail, loc); + } +} + +template +inline Result _p(T* ptr, SourceLocation loc = SourceLocation::current()) { + if (ptr) { + return ptr; + } else { + return Status(eFail, loc); + } +} + +struct Context { + Context() { + std::lock_guard lock{mutex_}; + if (ref_count_++ != 0) { + return; + } + auto ret = aclInit(nullptr); + if (ret == ACL_SUCCESS) { + MMDEPLOY_INFO("ACL initialized."); + owned_acl_ = true; + } else if (ret == ACL_ERROR_REPEAT_INITIALIZE) { + MMDEPLOY_INFO("ACL has already been initialized."); + } else { + MMDEPLOY_ERROR("aclInit() failed: {}", ret); + assert(ret == 0); + } + } + ~Context() { + std::lock_guard lock{mutex_}; + if (--ref_count_ != 0) { + return; + } + // skip aclFinalize if aclInit is not successfully called by us. + if (owned_acl_) { + auto ret = aclFinalize(); + if (ret == ACL_SUCCESS) { + MMDEPLOY_INFO("ACL finalized."); + owned_acl_ = false; + } else if (ret == ACL_ERROR_REPEAT_FINALIZE) { + MMDEPLOY_INFO("ACL has already been finalized."); + } else { + MMDEPLOY_ERROR("aclFinalize() failed: {}", ret); + } + } + } + static bool owned_acl_; + static int ref_count_; + static std::mutex mutex_; +}; + +bool Context::owned_acl_ = false; +int Context::ref_count_ = 0; +std::mutex Context::mutex_{}; + +} // namespace + +AclNet::~AclNet() { + auto dtor = [&]() -> Result { + auto n_inputs = aclmdlGetDatasetNumBuffers(input_dataset_); + for (int i = 0; i < n_inputs; ++i) { + auto buffer = aclmdlGetDatasetBuffer(input_dataset_, i); + auto data = aclGetDataBufferAddr(buffer); + OUTCOME_TRY(_m(aclrtFree(data))); + } + input_tensor_.clear(); + OUTCOME_TRY(_m(aclmdlDestroyDataset(input_dataset_))); + + auto n_outputs = aclmdlGetDatasetNumBuffers(output_dataset_); + for (int i = 0; i < n_outputs; ++i) { + auto buffer = aclmdlGetDatasetBuffer(output_dataset_, i); + auto data = aclGetDataBufferAddr(buffer); + OUTCOME_TRY(_m(aclrtFree(data))); + } + output_tensor_.clear(); + OUTCOME_TRY(_m(aclmdlDestroyDataset(output_dataset_))); + + OUTCOME_TRY(_m(aclmdlDestroyDesc(model_desc_))); + OUTCOME_TRY(_m(aclmdlUnload(model_id_))); + return success(); + }; + if (auto r = dtor(); !r) { + MMDEPLOY_ERROR("uninit failed: {}", r.error().message().c_str()); + } +} + +namespace { + +Result FromAclDataType(aclDataType data_type) { + switch (data_type) { + case ACL_FLOAT: + return DataType::kFLOAT; + case ACL_FLOAT16: + return DataType::kHALF; + case ACL_INT8: + return DataType::kINT8; + case ACL_INT32: + return DataType::kINT32; + case ACL_INT64: + return DataType::kINT64; + default: + return Status(eNotSupported); + } +} + +Result ToAclDataType(DataType data_type) { + switch (data_type) { + case DataType::kFLOAT: + return ACL_FLOAT; + case DataType::kHALF: + return ACL_FLOAT16; + case DataType::kINT8: + return ACL_INT8; + case DataType::kINT32: + return ACL_INT32; + case DataType::kINT64: + return ACL_INT64; + default: + return Status(eNotSupported); + } +} + +Result ToTensorDesc(const aclmdlIODims& dims, aclDataType data_type) { + auto extract_name = [](const std::string& name) { + if (auto pos = name.find_last_of(':'); pos != std::string::npos) { + return name.substr(pos + 1); + } else { + return name; + } + }; + OUTCOME_TRY(auto _data_type, FromAclDataType(data_type)); + return TensorDesc{Device(0), _data_type, + TensorShape(&dims.dims[0], &dims.dims[0] + dims.dimCount), + extract_name(dims.name)}; +} + +Result GetByteSize(const aclmdlIODims& dims, aclDataType data_type) { + size_t byte_size = aclDataTypeSize(data_type); + for (int i = 0; i < dims.dimCount; ++i) { + if (dims.dims[i] < 0) { + return Status(eInvalidArgument); + } + byte_size *= dims.dims[i]; + } + return byte_size; +} + +} // namespace + +// all dims must be fixed +auto AclNet::CreateBuffers(const aclmdlIODims& dims, aclDataType data_type) -> Result { + OUTCOME_TRY(auto byte_size, GetByteSize(dims, data_type)); + Buffers pair{}; + void* dev_ptr{}; + OUTCOME_TRY(_m(aclrtMalloc(&dev_ptr, byte_size, ACL_MEM_MALLOC_HUGE_FIRST))); + OUTCOME_TRY(_m(aclrtMemset(dev_ptr, byte_size, 0, byte_size))); + OUTCOME_TRY(pair.device_buffer, _p(aclCreateDataBuffer(dev_ptr, byte_size))); + OUTCOME_TRY(auto desc, ToTensorDesc(dims, data_type)); + void* host_ptr{}; + OUTCOME_TRY(_m(aclrtMallocHost(&host_ptr, byte_size))); + memset(host_ptr, 0, byte_size); + pair.host_tensor = + Tensor(desc, std::shared_ptr(host_ptr, [](void* p) { aclrtFreeHost(p); })); + return pair; +} + +auto AclNet::CreateBuffersDynamicBatchSize(aclmdlIODims dims, aclDataType data_type) + -> Result { + for (int i = 0; i < dims.dimCount; ++i) { + if (dims.dims[i] == -1) { + dims.dims[i] = dynamic_batch_size_.back(); + } + } + return CreateBuffers(dims, data_type); +} + +auto AclNet::CreateBuffersDynamicImageSize(int index, aclmdlIODims dims, aclDataType data_type) + -> Result { + aclmdlHW hw_desc{}; + OUTCOME_TRY(_m(aclmdlGetDynamicHW(model_desc_, index, &hw_desc))); + if (hw_desc.hwCount > 0) { + auto& val = *std::max_element(hw_desc.hw, hw_desc.hw + hw_desc.hwCount, + [](auto u, auto v) { return u[0] * u[1] < v[0] * v[1]; }); + int ptr = 0; + for (int i = 0; i < dims.dimCount; ++i) { + if (dims.dims[i] == -1) { + if (ptr == 2) { + return Status(eInvalidArgument); + } + dims.dims[i] = val[ptr++]; + } + } + if (ptr != 2) { + return Status(eInvalidArgument); + } + } + return CreateBuffers(dims, data_type); +} + +auto AclNet::CreateBuffersDynamicDims(int index, int dim_count, const aclmdlIODims& dims, + aclDataType data_type) -> Result { + int max_index = -1; + size_t max_value = 0; + aclmdlIODims max_shape{}; + for (int j = 0; j < dynamic_input_dims_.size(); ++j) { + aclmdlIODims shape{}; + strncpy(shape.name, dims.name, sizeof(shape.name)); + shape.dimCount = dims.dimCount; + std::copy(dynamic_input_dims_[j].dims + dim_count, + dynamic_input_dims_[j].dims + dim_count + dims.dimCount, shape.dims); + OUTCOME_TRY(auto byte_size, GetByteSize(shape, data_type)); + if (byte_size > max_value) { + max_index = j; + max_value = byte_size; + max_shape = shape; + } + } + if (max_index < 0) { + return Status(eInvalidArgument); + } + MMDEPLOY_INFO("max shape for input {}: {}", index, max_shape); + return CreateBuffers(max_shape, data_type); +} + +Result AclNet::ConfigDynamicShapes() { + aclError status = ACL_SUCCESS; + { + size_t dynamic_tensor_index{}; + status = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &dynamic_tensor_index); + if (status == ACL_SUCCESS) { + dynamic_tensor_index_ = static_cast(dynamic_tensor_index); + MMDEPLOY_INFO("dynamic tensor index: {}", dynamic_tensor_index); + } + } + + if (dynamic_tensor_index_ >= 0) { + aclmdlBatch batch_desc{}; + status = aclmdlGetDynamicBatch(model_desc_, &batch_desc); + if (status == ACL_SUCCESS && batch_desc.batchCount > 0) { + MMDEPLOY_INFO("{}, status = {}", batch_desc, status); + input_shape_type_ = kDynamicBatchSize; + dynamic_batch_size_.insert(dynamic_batch_size_.end(), batch_desc.batch, + batch_desc.batch + batch_desc.batchCount); + std::sort(dynamic_batch_size_.begin(), dynamic_batch_size_.end()); + } + + size_t dynamic_gear_count{0}; + if (input_shape_type_ == kStatic) { + status = aclmdlGetInputDynamicGearCount(model_desc_, -1, &dynamic_gear_count); + dynamic_input_dims_.resize(dynamic_gear_count); + if (status == ACL_SUCCESS && dynamic_gear_count > 0) { + status = aclmdlGetInputDynamicDims(model_desc_, -1, dynamic_input_dims_.data(), + dynamic_gear_count); + for (const auto& dims : dynamic_input_dims_) { + MMDEPLOY_INFO("dynamic input dims: {}", dims); + } + input_shape_type_ = kDynamicDims; + } else { + input_shape_type_ = kDynamicImageSize; + } + } + } + return success(); +} + +Result AclNet::CreateInputBuffers() { + input_dataset_ = aclmdlCreateDataset(); + auto n_inputs = aclmdlGetNumInputs(model_desc_); + MMDEPLOY_INFO("n_inputs = {}, dynamic_tensor_index_ = {}", n_inputs, dynamic_tensor_index_); + int dim_count = 0; + for (int i = 0; i < n_inputs; ++i) { + if (i == dynamic_tensor_index_) { + void* data{}; + auto input_len = aclmdlGetInputSizeByIndex(model_desc_, i); + OUTCOME_TRY(_m(aclrtMalloc(&data, input_len, ACL_MEM_MALLOC_HUGE_FIRST))); + OUTCOME_TRY(auto buffer, _p(aclCreateDataBuffer(data, input_len))); + OUTCOME_TRY(_m(aclmdlAddDatasetBuffer(input_dataset_, buffer))); + } else { + Buffers buffers{}; + aclmdlIODims dims{}; + OUTCOME_TRY(_m(aclmdlGetInputDims(model_desc_, i, &dims))); + input_dims_.push_back(dims); + auto data_type = aclmdlGetInputDataType(model_desc_, i); + input_data_type_.push_back(data_type); + MMDEPLOY_INFO("{}", dims); + + switch (input_shape_type_) { + case kStatic: { + OUTCOME_TRY(buffers, CreateBuffers(dims, data_type)); + break; + } + case kDynamicBatchSize: { + OUTCOME_TRY(buffers, CreateBuffersDynamicBatchSize(dims, data_type)); + break; + } + case kDynamicImageSize: { + OUTCOME_TRY(buffers, CreateBuffersDynamicImageSize(i, dims, data_type)); + break; + } + case kDynamicDims: { + OUTCOME_TRY(buffers, CreateBuffersDynamicDims(i, dim_count, dims, data_type)); + break; + } + default: + return Status(eInvalidArgument); + } + + OUTCOME_TRY(_m(aclmdlAddDatasetBuffer(input_dataset_, buffers.device_buffer))); + input_tensor_.push_back(std::move(buffers.host_tensor)); + dim_count += dims.dimCount; + } + } + return success(); +} + +Result AclNet::CreateOutputBuffers() { + output_dataset_ = aclmdlCreateDataset(); + auto n_outputs = aclmdlGetNumOutputs(model_desc_); + std::vector output_dims; + for (int i = 0; i < n_outputs; ++i) { + aclmdlIODims dims{}; + OUTCOME_TRY(_m(aclmdlGetOutputDims(model_desc_, i, &dims))); // return max dims + output_dims_.push_back(dims); + MMDEPLOY_INFO("{}", dims); + auto data_type = aclmdlGetOutputDataType(model_desc_, i); + output_data_type_.push_back(data_type); + OUTCOME_TRY(auto buffers, CreateBuffers(dims, data_type)); + OUTCOME_TRY(_m(aclmdlAddDatasetBuffer(output_dataset_, buffers.device_buffer))); + output_tensor_.push_back(std::move(buffers.host_tensor)); + } + return success(); +} + +Result AclNet::Init(const Value& args) { + auto& context = args["context"]; + cpu_stream_ = context["stream"].get(); + + auto name = args["name"].get(); + auto model = context["model"].get(); + + device_id_ = context["device"].get().device_id(); + acl_context_ = std::make_shared(); + + OUTCOME_TRY(auto config, model.GetModelConfig(name)); + OUTCOME_TRY(auto binary, model.ReadFile(config.net)); + + OUTCOME_TRY(_m(aclrtSetDevice(device_id_))); + + OUTCOME_TRY(_m(aclmdlLoadFromMem(binary.data(), binary.size(), &model_id_))); + + model_desc_ = aclmdlCreateDesc(); + OUTCOME_TRY(_m(aclmdlGetDesc(model_desc_, model_id_))); + + // dynamic_tensor_index_ + // input_shape_type_ + // dynamic_batch_size_ + // dynamic_input_dims_ + if (auto r = ConfigDynamicShapes(); !r) { + MMDEPLOY_ERROR("Failed to config dynamic shapes"); + return r.as_failure(); + } + + // input_dataset_ + // input_data_type_ + // input_dims_ + // input_tensor_ + if (auto r = CreateInputBuffers(); !r) { + MMDEPLOY_ERROR("Failed to create input buffers"); + return r.as_failure(); + } + + // output_dataset_ + // output_data_type_ + // output_dims_ + // output_tensor_ + if (auto r = CreateOutputBuffers(); !r) { + MMDEPLOY_ERROR("Failed to create output buffers"); + return r.as_failure(); + } + + return success(); +} + +Result AclNet::Deinit() { return success(); } + +Result> AclNet::GetInputTensors() { return input_tensor_; } + +Result> AclNet::GetOutputTensors() { return output_tensor_; } + +Result AclNet::Reshape(Span input_shapes) { + OUTCOME_TRY(_m(aclrtSetDevice(device_id_))); + // Sanity checks + if (input_shapes.size() != input_dims_.size()) { + MMDEPLOY_ERROR("inconsistent num inputs"); + return Status(eInvalidArgument); + } + for (int i = 0; i < input_dims_.size(); ++i) { + if (input_shapes[i].size() != input_dims_[i].dimCount) { + MMDEPLOY_ERROR("inconsistent num of dims"); + return Status(eInvalidArgument); + } + } + + switch (input_shape_type_) { + case kStatic: { + OUTCOME_TRY(ReshapeStatic(input_shapes)); + break; + } + case kDynamicBatchSize: { + OUTCOME_TRY(ReshapeDynamicBatchSize(input_shapes)); + break; + } + case kDynamicImageSize: { + OUTCOME_TRY(ReshapeDynamicImageSize(input_shapes)); + break; + } + case kDynamicDims: { + OUTCOME_TRY(ReshapeDynamicDims(input_shapes)); + break; + } + default: + return Status(eInvalidArgument); + } + + for (int i = 0; i < input_shapes.size(); ++i) { + auto buffer = input_tensor_[i].buffer(); + auto desc = input_tensor_[i].desc(); + desc.shape = input_shapes[i]; + input_tensor_[i] = Tensor(std::move(desc), std::move(buffer)); + } + + for (int i = 0; i < output_dims_.size(); ++i) { + aclmdlIODims dims{}; + OUTCOME_TRY(_m(aclmdlGetCurOutputDims(model_desc_, i, &dims))); + auto buffer = output_tensor_[i].buffer(); + auto desc = output_tensor_[i].desc(); + desc.shape = TensorShape(&dims.dims[0], &dims.dims[0] + dims.dimCount); + output_tensor_[i] = Tensor(std::move(desc), std::move(buffer)); + } + + return success(); +} + +Result AclNet::ReshapeStatic(Span input_shapes) { + for (int i = 0; i < input_dims_.size(); ++i) { + Span src(input_shapes[i]); + Span ref(input_dims_[i].dims, input_dims_[i].dimCount); + if (src != ref) { + MMDEPLOY_ERROR("Shape mismatch {} vs {}", src, ref); + return Status(eInvalidArgument); + } + } + return success(); +} + +Result AclNet::ReshapeDynamicBatchSize(Span input_shapes) { + int batch_size = -1; + for (int i = 0; i < input_dims_.size(); ++i) { + for (int j = 0; j < input_dims_[i].dimCount; ++j) { + if (input_dims_[i].dims[j] == -1) { + if (batch_size != -1 && batch_size != input_shapes[i][j]) { + // inconsistent batch size + return Status(eInvalidArgument); + } + batch_size = input_shapes[i][j]; + } + } + } + if (batch_size < 0) { + MMDEPLOY_ERROR("unable to determine batch size"); + return Status(eFail); + } + MMDEPLOY_INFO("batch size {} {}", batch_size, dynamic_tensor_index_); + auto index = + std::lower_bound(dynamic_batch_size_.begin(), dynamic_batch_size_.end(), batch_size) - + dynamic_batch_size_.begin(); + if (index == dynamic_batch_size_.size()) { + MMDEPLOY_ERROR("Unsupported batch size: {}", batch_size); + } + // TODO: memset padding memory to avoid potential extra computation + OUTCOME_TRY(_m(aclmdlSetDynamicBatchSize(model_id_, input_dataset_, dynamic_tensor_index_, + dynamic_batch_size_[index]))); + return success(); +} + +Result AclNet::ReshapeDynamicImageSize(Span input_shapes) { + uint64_t hw[2]; + bool found = false; + for (int i = 0; i < input_dims_.size(); ++i) { + uint64_t tmp[2]; + int ptr = 0; + for (int j = 0; j < input_dims_[i].dimCount; ++j) { + if (input_dims_[i].dims[j] == -1) { + if (ptr == 2) { + MMDEPLOY_ERROR("dynamic HW size out of bounds: {}", input_dims_[i]); + return Status(eInvalidArgument); + } else { + tmp[ptr++] = input_shapes[i][j]; + } + } + } + if (ptr && ptr != 2) { + MMDEPLOY_ERROR("Partially determined dynamic HW size: {}", input_dims_[i]); + return Status(eInvalidArgument); + } + if (ptr == 2) { + if (found) { + if (hw[0] != tmp[0] || hw[1] != tmp[1]) { + MMDEPLOY_ERROR("Inconsistent dynamic HW size: ({}, {}) vs ({}, {})", hw[0], hw[1], tmp[0], + tmp[1]); + return Status(eInvalidArgument); + } + } else { + found = true; + hw[0] = tmp[0]; + hw[1] = tmp[1]; + } + } + } + if (!found) { + MMDEPLOY_ERROR("Unable to determine image size"); + return Status(eInvalidArgument); + } + MMDEPLOY_INFO("dynamic HW size ({}, {})", hw[0], hw[1]); + OUTCOME_TRY( + _m(aclmdlSetDynamicHWSize(model_id_, input_dataset_, dynamic_tensor_index_, hw[0], hw[1]))); + return success(); +} + +Result AclNet::ReshapeDynamicDims(Span input_shapes) { + std::vector match(dynamic_input_dims_.size(), 1); + aclmdlIODims dims{}; + for (int i = 0; i < input_shapes.size(); ++i) { + const auto& shape = input_shapes[i]; + for (int j = 0; j < shape.size(); ++j) { + if (input_dims_[i].dims[j] == -1) { + for (int k = 0; k < dynamic_input_dims_.size(); ++k) { + // disable profile when dims mismatch, except for the first dim (batch size) + if (j == 0 && shape[j] < dynamic_input_dims_[k].dims[dims.dimCount]) { + // pass + } else if (shape[j] != dynamic_input_dims_[k].dims[dims.dimCount]) { + match[k] = 0; + } + } + } else { + if (input_dims_[i].dims[j] != shape[j]) { + return Status(eNotSupported); + } + } + dims.dims[dims.dimCount++] = shape[j]; + } + } + int dims_index = std::find(match.begin(), match.end(), 1) - match.begin(); + if (dims_index == match.size()) { + MMDEPLOY_ERROR("Shape not supported: {}", dims); + return Status(eNotSupported); + } + // TODO: memset padding memory to avoid potential extra computation + OUTCOME_TRY(_m(aclmdlSetInputDynamicDims(model_id_, input_dataset_, dynamic_tensor_index_, + &dynamic_input_dims_[dims_index]))); + return success(); +} + +Result AclNet::Forward() { + OUTCOME_TRY(cpu_stream_.Wait()); + + OUTCOME_TRY(_m(aclrtSetDevice(device_id_))); + + for (int i = 0; i < input_tensor_.size(); ++i) { + auto buffer = aclmdlGetDatasetBuffer(input_dataset_, i); + auto buffer_size = aclGetDataBufferSizeV2(buffer); + auto buffer_data = aclGetDataBufferAddr(buffer); + auto host_ptr = input_tensor_[i].data(); + OUTCOME_TRY(_m(aclrtMemcpy(buffer_data, buffer_size, host_ptr, input_tensor_[i].byte_size(), + ACL_MEMCPY_HOST_TO_DEVICE))); + } + + OUTCOME_TRY(_m(aclmdlExecute(model_id_, input_dataset_, output_dataset_))); + + for (int i = 0; i < output_tensor_.size(); ++i) { + auto buffer = aclmdlGetDatasetBuffer(output_dataset_, i); + auto buffer_data = aclGetDataBufferAddr(buffer); + auto host_ptr = output_tensor_[i].data(); + OUTCOME_TRY(_m(aclrtMemcpy(host_ptr, output_tensor_[i].byte_size(), buffer_data, + output_tensor_[i].byte_size(), ACL_MEMCPY_DEVICE_TO_HOST))); + } + return success(); +} + +Result AclNet::ForwardAsync(Event* event) { return Status(eNotSupported); } + +class AclNetCreator : public Creator { + public: + const char* GetName() const override { return "ascend"; } + int GetVersion() const override { return 0; } + std::unique_ptr Create(const Value& args) override { + try { + auto p = std::make_unique(); + if (auto r = p->Init(args)) { + return p; + } else { + MMDEPLOY_ERROR("error creating AclNet: {}", r.error().message().c_str()); + return nullptr; + } + } catch (const std::exception& e) { + MMDEPLOY_ERROR("unhandled exception when creating AclNet: {}", e.what()); + return nullptr; + } + } +}; + +REGISTER_MODULE(Net, AclNetCreator); + +} // namespace mmdeploy diff --git a/csrc/mmdeploy/net/acl/acl_net.h b/csrc/mmdeploy/net/acl/acl_net.h new file mode 100644 index 000000000..525d0554d --- /dev/null +++ b/csrc/mmdeploy/net/acl/acl_net.h @@ -0,0 +1,70 @@ +// Copyright (c) OpenMMLab. All rights reserved. + +#ifndef MMDEPLOY_SRC_NET_ACL_ACL_NET_H_ +#define MMDEPLOY_SRC_NET_ACL_ACL_NET_H_ + +#include "acl/acl.h" +#include "mmdeploy/core/net.h" +#include "mmdeploy/core/status_code.h" + +namespace mmdeploy { + +class AclNet : public Net { + public: + ~AclNet() override; + Result Init(const Value& cfg) override; + Result Deinit() override; + Result> GetInputTensors() override; + Result> GetOutputTensors() override; + Result Reshape(Span input_shapes) override; + Result Forward() override; + Result ForwardAsync(Event* event) override; + + private: + enum InputShapeType { kStatic, kDynamicBatchSize, kDynamicImageSize, kDynamicDims }; + + Result ReshapeStatic(Span input_shapes); + Result ReshapeDynamicBatchSize(Span input_shapes); + Result ReshapeDynamicImageSize(Span input_shapes); + Result ReshapeDynamicDims(Span input_shapes); + + struct Buffers { + aclDataBuffer* device_buffer; + Tensor host_tensor; + }; + + Result CreateBuffers(const aclmdlIODims& dims, aclDataType data_type); + + Result CreateBuffersDynamicBatchSize(aclmdlIODims dims, aclDataType data_type); + Result CreateBuffersDynamicImageSize(int index, aclmdlIODims dims, + aclDataType data_type); + Result CreateBuffersDynamicDims(int index, int dim_count, const aclmdlIODims& dims, + aclDataType data_type); + + Result ConfigDynamicShapes(); + + Result CreateInputBuffers(); + Result CreateOutputBuffers(); + + std::shared_ptr acl_context_; + Stream cpu_stream_; + int32_t device_id_{0}; + uint32_t model_id_{(uint32_t)-1}; + aclmdlDesc* model_desc_{nullptr}; + int dynamic_tensor_index_{-1}; + InputShapeType input_shape_type_{kStatic}; + std::vector dynamic_batch_size_; + std::vector dynamic_input_dims_; + aclmdlDataset* input_dataset_{nullptr}; + aclmdlDataset* output_dataset_{nullptr}; + std::vector input_dims_; + std::vector output_dims_; + std::vector input_data_type_; + std::vector output_data_type_; + std::vector input_tensor_; + std::vector output_tensor_; +}; + +} // namespace mmdeploy + +#endif // MMDEPLOY_SRC_NET_ACL_ACL_NET_H_ diff --git a/csrc/mmdeploy/preprocess/transform/pad.cpp b/csrc/mmdeploy/preprocess/transform/pad.cpp index c5cb85d58..8e3ceabb7 100644 --- a/csrc/mmdeploy/preprocess/transform/pad.cpp +++ b/csrc/mmdeploy/preprocess/transform/pad.cpp @@ -35,6 +35,7 @@ PadImpl::PadImpl(const Value& args) : TransformImpl(args) { } arg_.pad_to_square = args.value("pad_to_square", false); arg_.padding_mode = args.value("padding_mode", std::string("constant")); + arg_.orientation_agnostic = args.value("orientation_agnostic", false); } Result PadImpl::Process(const Value& input) { @@ -58,9 +59,19 @@ Result PadImpl::Process(const Value& input) { output["pad_fixed_size"].push_back(max_size); output["pad_fixed_size"].push_back(max_size); } else if (arg_.size[0] != 0 && arg_.size[1] != 0) { - padding = {0, 0, arg_.size[1] - width, arg_.size[0] - height}; - output["pad_fixed_size"].push_back(arg_.size[0]); - output["pad_fixed_size"].push_back(arg_.size[1]); + if (arg_.orientation_agnostic) { + auto size_min = min(arg_.size[0], arg_.size[1]); + auto size_max = max(arg_.size[0], arg_.size[1]); + auto pad_h = width < height ? size_max : size_min; + auto pad_w = width < height ? size_min : size_max; + padding = {0, 0, pad_w - width, pad_h - height}; + output["pad_fixed_size"].push_back(pad_h); + output["pad_fixed_size"].push_back(pad_w); + } else { + padding = {0, 0, arg_.size[1] - width, arg_.size[0] - height}; + output["pad_fixed_size"].push_back(arg_.size[0]); + output["pad_fixed_size"].push_back(arg_.size[1]); + } } else if (arg_.size_divisor != 1) { auto pad_h = (height + arg_.size_divisor - 1) / arg_.size_divisor * arg_.size_divisor; auto pad_w = (width + arg_.size_divisor - 1) / arg_.size_divisor * arg_.size_divisor; diff --git a/csrc/mmdeploy/preprocess/transform/pad.h b/csrc/mmdeploy/preprocess/transform/pad.h index 92246f940..073a208fa 100644 --- a/csrc/mmdeploy/preprocess/transform/pad.h +++ b/csrc/mmdeploy/preprocess/transform/pad.h @@ -29,6 +29,7 @@ class MMDEPLOY_API PadImpl : public TransformImpl { int size_divisor; float pad_val; bool pad_to_square; + bool orientation_agnostic; std::string padding_mode; }; using ArgType = struct pad_arg_t; diff --git a/docs/en/01-how-to-build/linux-x86_64.md b/docs/en/01-how-to-build/linux-x86_64.md index 9aa73c807..af26cb1dc 100644 --- a/docs/en/01-how-to-build/linux-x86_64.md +++ b/docs/en/01-how-to-build/linux-x86_64.md @@ -238,6 +238,17 @@ export LD_LIBRARY_PATH=$Torch_DIR/lib:$LD_LIBRARY_PATH + + Ascend + CANN + + 1. Install CANN follow official guide.
+ 2. Setup environment +

+export ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest"
+   
+ + diff --git a/docs/en/03-benchmark/benchmark.md b/docs/en/03-benchmark/benchmark.md index 059d53982..0174d4b55 100644 --- a/docs/en/03-benchmark/benchmark.md +++ b/docs/en/03-benchmark/benchmark.md @@ -36,6 +36,7 @@ Users can directly test the speed through [model profiling](../02-how-to-run/pro TensorRT(ms) PPLNN(ms) ncnn(ms) + Ascend(ms) @@ -48,6 +49,7 @@ Users can directly test the speed through [model profiling](../02-how-to-run/pro T4 SnapDragon888 Adreno660 + Ascend310 fp32 @@ -59,6 +61,7 @@ Users can directly test the speed through [model profiling](../02-how-to-run/pro fp16 fp32 fp32 + fp32 ResNet @@ -72,6 +75,7 @@ Users can directly test the speed through [model profiling](../02-how-to-run/pro 1.30 33.91 25.93 + 2.49 ResNeXt @@ -85,6 +89,7 @@ Users can directly test the speed through [model profiling](../02-how-to-run/pro 1.36 133.44 69.38 + - SE-ResNet @@ -98,6 +103,7 @@ Users can directly test the speed through [model profiling](../02-how-to-run/pro 1.91 107.84 80.85 + - ShuffleNetV2 @@ -111,6 +117,7 @@ Users can directly test the speed through [model profiling](../02-how-to-run/pro 4.69 9.55 10.66 + - @@ -419,6 +426,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ ONNX Runtime TensorRT PPLNN + Ascend @@ -432,6 +440,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ fp16 int8 fp16 + fp32 ResNet-18 @@ -443,6 +452,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 69.86 69.86 69.86 + 69.91 top-5 @@ -453,6 +463,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 89.33 89.38 89.34 + 89.43 ResNeXt-50 @@ -464,6 +475,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ - 77.78 77.89 + - top-5 @@ -474,6 +486,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ - 93.64 93.65 + - SE-ResNet-50 @@ -485,6 +498,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 77.75 77.63 77.73 + - top-5 @@ -495,6 +509,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 93.83 93.72 93.84 + - ShuffleNetV1 1.0x @@ -506,6 +521,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 68.13 67.71 68.11 + - top-5 @@ -516,6 +532,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 87.81 87.58 87.80 + - ShuffleNetV2 1.0x @@ -527,6 +544,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 69.54 69.10 69.54 + - top-5 @@ -537,6 +555,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 88.91 88.58 88.92 + - MobileNet V2 @@ -548,6 +567,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 71.87 70.91 71.84 + 71.87 top-5 @@ -558,6 +578,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 90.40 89.85 90.41 + 90.42 Vision Transformer @@ -569,6 +590,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 85.42 - - + 85.43 top-5 @@ -579,6 +601,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 97.76 - - + 97.77 Swin Transformer @@ -614,6 +637,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ ONNXRuntime TensorRT PPLNN + Ascend @@ -629,6 +653,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ fp16 int8 fp16 + fp32 YOLOV3 @@ -642,6 +667,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 33.5 33.5 - + - SSD @@ -655,6 +681,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 25.5 - - + - RetinaNet @@ -668,6 +695,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 36.4 36.3 36.5 + 36.4 FCOS @@ -681,6 +709,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 36.5 - - + - FSAF @@ -694,6 +723,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 37.4 37.2 37.4 + - YOLOX @@ -707,6 +737,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 40.3 29.3 - + - Faster R-CNN @@ -720,6 +751,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 37.3 37.1 37.3 + 37.2 ATSS @@ -733,6 +765,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 39.4 - - + - Cascade R-CNN @@ -746,6 +779,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 40.4 - 40.4 + - GFL @@ -759,6 +793,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 40.0 - - + - RepPoints @@ -772,6 +807,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ - - - + - DETR @@ -798,6 +834,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 38.1 - 38.0 + - mask AP @@ -808,6 +845,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 33.7 - - + - Swin-Transformer @@ -821,6 +859,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 37.7 - - + - mask AP @@ -831,6 +870,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 35.4 - - + - @@ -1216,6 +1256,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ ONNXRuntime TensorRT PPLNN + Ascend @@ -1230,6 +1271,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ fp16 int8 fp16 + fp32 FCN @@ -1242,6 +1284,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 72.35 74.19 72.35 + 72.35 PSPNet @@ -1254,6 +1297,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 78.24 77.97 78.09 + 78.67 deeplabv3 @@ -1266,6 +1310,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 79.12 78.96 79.12 + 79.06 deeplabv3+ @@ -1278,6 +1323,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 79.60 79.43 79.60 + 79.51 Fast-SCNN @@ -1290,6 +1336,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 70.92 66.00 70.92 + - UNet @@ -1302,6 +1349,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 69.10 68.95 - + - ANN @@ -1314,6 +1362,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 77.32 - - + - APCNet @@ -1326,6 +1375,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 77.32 - - + - BiSeNetV1 @@ -1338,6 +1388,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 74.43 - - + - BiSeNetV2 @@ -1350,6 +1401,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 73.21 - - + - CGNet @@ -1362,6 +1414,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 68.27 - - + - EMANet @@ -1374,6 +1427,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 77.6 - - + - EncNet @@ -1386,6 +1440,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 75.66 - - + - ERFNet @@ -1398,6 +1453,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 71.07 - - + - FastFCN @@ -1410,6 +1466,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 79.12 - - + - GCNet @@ -1422,6 +1479,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 77.69 - - + - ICNet @@ -1434,6 +1492,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 76.36 - - + - ISANet @@ -1446,6 +1505,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 78.49 - - + - OCRNet @@ -1458,6 +1518,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 73.67 - - + - PointRend @@ -1470,6 +1531,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 76.42 - - + - Semantic FPN @@ -1482,6 +1544,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 74.52 - - + - STDC @@ -1494,6 +1557,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 75.10 - - + - STDC @@ -1506,6 +1570,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 77.17 - - + - UPerNet @@ -1518,6 +1583,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 77.18 - - + - Segmenter @@ -1530,6 +1596,7 @@ Users can directly test the performance through [how_to_evaluate_a_model.md](../ 43.34 43.35 - + - diff --git a/docs/en/03-benchmark/supported_models.md b/docs/en/03-benchmark/supported_models.md index 3e060e298..415b015fd 100644 --- a/docs/en/03-benchmark/supported_models.md +++ b/docs/en/03-benchmark/supported_models.md @@ -2,82 +2,82 @@ The table below lists the models that are guaranteed to be exportable to other backends. -| Model | Codebase | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | Model config | -| :-------------------------- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: | :---------------------------------------------------------------------------------------------: | -| RetinaNet | MMDetection | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) | -| Faster R-CNN | MMDetection | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) | -| YOLOv3 | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) | -| YOLOX | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) | -| FCOS | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos) | -| FSAF | MMDetection | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) | -| Mask R-CNN | MMDetection | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) | -| SSD[\*](#note) | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) | -| FoveaBox | MMDetection | Y | Y | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox) | -| ATSS | MMDetection | N | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss) | -| GFL | MMDetection | N | Y | Y | N | ? | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl) | -| Cascade R-CNN | MMDetection | N | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | -| Cascade Mask R-CNN | MMDetection | N | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | -| Swin Transformer[\*](#note) | MMDetection | N | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/swin) | -| VFNet | MMDetection | N | N | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/vfnet) | -| RepPoints | MMDetection | N | N | Y | N | ? | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints) | -| DETR | MMDetection | N | Y | Y | N | ? | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detr) | -| ResNet | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) | -| ResNeXt | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) | -| SE-ResNet | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) | -| MobileNetV2 | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) | -| ShuffleNetV1 | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) | -| ShuffleNetV2 | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) | -| VisionTransformer | MMClassification | Y | Y | Y | Y | ? | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/vision_transformer) | -| SwinTransformer | MMClassification | Y | Y | Y | N | ? | N | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/swin_transformer) | -| FCN | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) | -| PSPNet[\*static](#note) | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) | -| DeepLabV3 | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) | -| DeepLabV3+ | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) | -| Fast-SCNN[\*static](#note) | MMSegmentation | Y | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastscnn) | -| UNet | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) | -| ANN[\*](#note) | MMSegmentation | Y | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ann) | -| APCNet | MMSegmentation | Y | Y | Y | Y | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/apcnet) | -| BiSeNetV1 | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv1) | -| BiSeNetV2 | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv2) | -| CGNet | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/cgnet) | -| DMNet | MMSegmentation | ? | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dmnet) | -| DNLNet | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dnlnet) | -| EMANet | MMSegmentation | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/emanet) | -| EncNet | MMSegmentation | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/encnet) | -| ERFNet | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/erfnet) | -| FastFCN | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastfcn) | -| GCNet | MMSegmentation | Y | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/gcnet) | -| ICNet[\*](#note) | MMSegmentation | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/icnet) | -| ISANet[\*static](#note) | MMSegmentation | N | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/isanet) | -| NonLocal Net | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/nonlocal_net) | -| OCRNet | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ocrnet) | -| PointRend | MMSegmentation | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/point_rend) | -| Semantic FPN | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/sem_fpn) | -| STDC | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/stdc) | -| UPerNet[\*](#note) | MMSegmentation | ? | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/upernet) | -| DANet | MMSegmentation | ? | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet) | -| Segmenter[\*static](#note) | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/segmenter) | -| SRCNN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srcnn) | -| ESRGAN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/esrgan) | -| SRGAN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | -| SRResNet | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | -| Real-ESRGAN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/real_esrgan) | -| EDSR | MMEditing | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/edsr) | -| RDN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/rdn) | -| DBNet | MMOCR | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/dbnet) | -| PANet | MMOCR | Y | Y | Y | Y | ? | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/panet) | -| DBNet | MMOCR | Y | Y | Y | Y | ? | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/psenet) | -| CRNN | MMOCR | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/crnn) | -| SAR | MMOCR | N | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/sar) | -| SATRN | MMOCR | Y | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/satrn) | -| HRNet | MMPose | N | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#hrnet-cvpr-2019) | -| MSPN | MMPose | N | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#mspn-arxiv-2019) | -| LiteHRNet | MMPose | N | Y | Y | N | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#litehrnet-cvpr-2021) | -| PointPillars | MMDetection3d | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/pointpillars) | -| CenterPoint (pillar) | MMDetection3d | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/centerpoint) | -| RotatedRetinaNet | RotatedDetection | N | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/rotated_retinanet/README.md) | -| Oriented RCNN | RotatedDetection | N | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/oriented_rcnn/README.md) | -| Gliding Vertex | RotatedDetection | N | N | Y | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/gliding_vertex/README.md) | +| Model | Codebase | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | Ascend | Model config | +| :-------------------------- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: | :----: | :---------------------------------------------------------------------------------------------: | +| RetinaNet | MMDetection | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) | +| Faster R-CNN | MMDetection | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) | +| YOLOv3 | MMDetection | Y | Y | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) | +| YOLOX | MMDetection | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) | +| FCOS | MMDetection | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos) | +| FSAF | MMDetection | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) | +| Mask R-CNN | MMDetection | Y | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) | +| SSD[\*](#note) | MMDetection | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) | +| FoveaBox | MMDetection | Y | Y | N | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox) | +| ATSS | MMDetection | N | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss) | +| GFL | MMDetection | N | Y | Y | N | ? | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl) | +| Cascade R-CNN | MMDetection | N | Y | Y | N | Y | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | +| Cascade Mask R-CNN | MMDetection | N | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | +| Swin Transformer[\*](#note) | MMDetection | N | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/swin) | +| VFNet | MMDetection | N | N | N | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/vfnet) | +| RepPoints | MMDetection | N | N | Y | N | ? | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints) | +| DETR | MMDetection | N | Y | Y | N | ? | N | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detr) | +| ResNet | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) | +| ResNeXt | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) | +| SE-ResNet | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) | +| MobileNetV2 | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) | +| ShuffleNetV1 | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) | +| ShuffleNetV2 | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) | +| VisionTransformer | MMClassification | Y | Y | Y | Y | ? | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/vision_transformer) | +| SwinTransformer | MMClassification | Y | Y | Y | N | ? | N | N | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/swin_transformer) | +| FCN | MMSegmentation | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) | +| PSPNet[\*static](#note) | MMSegmentation | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) | +| DeepLabV3 | MMSegmentation | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) | +| DeepLabV3+ | MMSegmentation | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) | +| Fast-SCNN[\*static](#note) | MMSegmentation | Y | Y | Y | N | Y | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastscnn) | +| UNet | MMSegmentation | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) | +| ANN[\*](#note) | MMSegmentation | Y | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ann) | +| APCNet | MMSegmentation | Y | Y | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/apcnet) | +| BiSeNetV1 | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv1) | +| BiSeNetV2 | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv2) | +| CGNet | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/cgnet) | +| DMNet | MMSegmentation | ? | Y | N | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dmnet) | +| DNLNet | MMSegmentation | ? | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dnlnet) | +| EMANet | MMSegmentation | Y | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/emanet) | +| EncNet | MMSegmentation | Y | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/encnet) | +| ERFNet | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/erfnet) | +| FastFCN | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastfcn) | +| GCNet | MMSegmentation | Y | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/gcnet) | +| ICNet[\*](#note) | MMSegmentation | Y | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/icnet) | +| ISANet | MMSegmentation | ? | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/isanet) | +| NonLocal Net | MMSegmentation | ? | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/nonlocal_net) | +| OCRNet | MMSegmentation | ? | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ocrnet) | +| PointRend | MMSegmentation | Y | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/point_rend) | +| Semantic FPN | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/sem_fpn) | +| STDC | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/stdc) | +| UPerNet[\*](#note) | MMSegmentation | ? | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/upernet) | +| DANet | MMSegmentation | ? | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet) | +| Segmenter | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/segmenter) | +| SRCNN | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srcnn) | +| ESRGAN | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/esrgan) | +| SRGAN | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | +| SRResNet | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | +| Real-ESRGAN | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/real_esrgan) | +| EDSR | MMEditing | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/edsr) | +| RDN | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/rdn) | +| DBNet | MMOCR | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/dbnet) | +| PANet | MMOCR | Y | Y | Y | Y | ? | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/panet) | +| PSENet | MMOCR | Y | Y | Y | Y | ? | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/psenet) | +| CRNN | MMOCR | Y | Y | Y | Y | Y | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/crnn) | +| SAR[\*](#note) | MMOCR | N | Y | N | N | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/sar) | +| SATRN | MMOCR | Y | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/satrn) | +| HRNet | MMPose | N | Y | Y | Y | N | Y | N | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#hrnet-cvpr-2019) | +| MSPN | MMPose | N | Y | Y | Y | N | Y | N | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#mspn-arxiv-2019) | +| LiteHRNet | MMPose | N | Y | Y | N | N | Y | N | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#litehrnet-cvpr-2021) | +| PointPillars | MMDetection3d | ? | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/pointpillars) | +| CenterPoint (pillar) | MMDetection3d | ? | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/centerpoint) | +| RotatedRetinaNet | RotatedDetection | N | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/rotated_retinanet/README.md) | +| Oriented RCNN | RotatedDetection | N | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/oriented_rcnn/README.md) | +| Gliding Vertex | RotatedDetection | N | N | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/gliding_vertex/README.md) | ### Note @@ -85,4 +85,5 @@ The table below lists the models that are guaranteed to be exportable to other b - static: This model only support static export. Please use `static` deploy config, just like $MMDEPLOY_DIR/configs/mmseg/segmentation_tensorrt_static-1024x2048.py. - SSD: When you convert SSD model, you need to use min shape deploy config just like 300x300-512x512 rather than 320x320-1344x1344, for example $MMDEPLOY_DIR/configs/mmdet/detection/detection_tensorrt_dynamic-300x300-512x512.py. - YOLOX: YOLOX with ncnn only supports static shape. +- Swin Transformer: For TensorRT, only version 8.4+ is supported. - SAR: Chinese text recognition model is not supported as the protobuf size of ONNX is limited. diff --git a/docs/zh_cn/01-how-to-build/linux-x86_64.md b/docs/zh_cn/01-how-to-build/linux-x86_64.md index 95333afa5..565fec337 100644 --- a/docs/zh_cn/01-how-to-build/linux-x86_64.md +++ b/docs/zh_cn/01-how-to-build/linux-x86_64.md @@ -235,6 +235,17 @@ export LD_LIBRARY_PATH=$Torch_DIR/lib:$LD_LIBRARY_PATH + + Ascend + CANN + + 1. 按照 官方指引 安装 CANN 工具集.
+ 2. 配置环境 +

+export ASCEND_TOOLKIT_HOME="/usr/local/Ascend/ascend-toolkit/latest"
+   
+ + diff --git a/docs/zh_cn/03-benchmark/benchmark.md b/docs/zh_cn/03-benchmark/benchmark.md index c665c1c41..20192d6c6 100644 --- a/docs/zh_cn/03-benchmark/benchmark.md +++ b/docs/zh_cn/03-benchmark/benchmark.md @@ -33,6 +33,7 @@ GPU: ncnn, TensorRT, PPLNN TensorRT(ms) PPLNN(ms) ncnn(ms) + Ascend(ms) @@ -45,6 +46,7 @@ GPU: ncnn, TensorRT, PPLNN T4 SnapDragon888 Adreno660 + Ascend310 fp32 @@ -56,6 +58,7 @@ GPU: ncnn, TensorRT, PPLNN fp16 fp32 fp32 + fp32 ResNet @@ -69,6 +72,7 @@ GPU: ncnn, TensorRT, PPLNN 1.30 33.91 25.93 + 2.49 ResNeXt @@ -82,6 +86,7 @@ GPU: ncnn, TensorRT, PPLNN 1.36 133.44 69.38 + - SE-ResNet @@ -95,6 +100,7 @@ GPU: ncnn, TensorRT, PPLNN 1.91 107.84 80.85 + - ShuffleNetV2 @@ -108,6 +114,7 @@ GPU: ncnn, TensorRT, PPLNN 4.69 9.55 10.66 + - @@ -416,6 +423,7 @@ GPU: ncnn, TensorRT, PPLNN ONNX Runtime TensorRT PPLNN + Ascend @@ -429,6 +437,7 @@ GPU: ncnn, TensorRT, PPLNN fp16 int8 fp16 + fp32 ResNet-18 @@ -440,6 +449,7 @@ GPU: ncnn, TensorRT, PPLNN 69.86 69.86 69.86 + 69.91 top-5 @@ -450,6 +460,7 @@ GPU: ncnn, TensorRT, PPLNN 89.33 89.38 89.34 + 89.43 ResNeXt-50 @@ -461,6 +472,7 @@ GPU: ncnn, TensorRT, PPLNN - 77.78 77.89 + - top-5 @@ -471,6 +483,7 @@ GPU: ncnn, TensorRT, PPLNN - 93.64 93.65 + - SE-ResNet-50 @@ -482,6 +495,7 @@ GPU: ncnn, TensorRT, PPLNN 77.75 77.63 77.73 + - top-5 @@ -492,6 +506,7 @@ GPU: ncnn, TensorRT, PPLNN 93.83 93.72 93.84 + - ShuffleNetV1 1.0x @@ -503,6 +518,7 @@ GPU: ncnn, TensorRT, PPLNN 68.13 67.71 68.11 + - top-5 @@ -513,6 +529,7 @@ GPU: ncnn, TensorRT, PPLNN 87.81 87.58 87.80 + - ShuffleNetV2 1.0x @@ -524,6 +541,7 @@ GPU: ncnn, TensorRT, PPLNN 69.54 69.10 69.54 + - top-5 @@ -534,6 +552,7 @@ GPU: ncnn, TensorRT, PPLNN 88.91 88.58 88.92 + - MobileNet V2 @@ -545,6 +564,7 @@ GPU: ncnn, TensorRT, PPLNN 71.87 70.91 71.84 + 71.87 top-5 @@ -555,6 +575,7 @@ GPU: ncnn, TensorRT, PPLNN 90.40 89.85 90.41 + 90.42 Vision Transformer @@ -566,6 +587,7 @@ GPU: ncnn, TensorRT, PPLNN 85.42 - - + 85.43 top-5 @@ -576,6 +598,7 @@ GPU: ncnn, TensorRT, PPLNN 97.76 - - + 97.77 @@ -590,6 +613,7 @@ GPU: ncnn, TensorRT, PPLNN ONNXRuntime TensorRT PPLNN + Ascend @@ -605,6 +629,7 @@ GPU: ncnn, TensorRT, PPLNN fp16 int8 fp16 + fp32 YOLOV3 @@ -618,6 +643,7 @@ GPU: ncnn, TensorRT, PPLNN 33.5 33.5 - + - SSD @@ -631,6 +657,7 @@ GPU: ncnn, TensorRT, PPLNN 25.5 - - + - RetinaNet @@ -644,6 +671,7 @@ GPU: ncnn, TensorRT, PPLNN 36.4 36.3 36.5 + 36.4 FCOS @@ -657,6 +685,7 @@ GPU: ncnn, TensorRT, PPLNN 36.5 - - + - FSAF @@ -670,6 +699,7 @@ GPU: ncnn, TensorRT, PPLNN 37.4 37.2 37.4 + - YOLOX @@ -683,6 +713,7 @@ GPU: ncnn, TensorRT, PPLNN 40.3 29.3 - + - Faster R-CNN @@ -696,6 +727,7 @@ GPU: ncnn, TensorRT, PPLNN 37.3 37.1 37.3 + - ATSS @@ -709,6 +741,7 @@ GPU: ncnn, TensorRT, PPLNN 39.4 - - + - Cascade R-CNN @@ -722,6 +755,7 @@ GPU: ncnn, TensorRT, PPLNN 40.4 - 40.4 + - GFL @@ -735,6 +769,7 @@ GPU: ncnn, TensorRT, PPLNN 40.0 - - + - RepPoints @@ -748,6 +783,7 @@ GPU: ncnn, TensorRT, PPLNN - - - + - DETR @@ -774,6 +810,7 @@ GPU: ncnn, TensorRT, PPLNN 38.1 - 38.0 + - mask AP @@ -784,6 +821,7 @@ GPU: ncnn, TensorRT, PPLNN 33.7 - - + - Swin-Transformer @@ -797,6 +835,7 @@ GPU: ncnn, TensorRT, PPLNN 37.7 - - + - mask AP @@ -807,6 +846,7 @@ GPU: ncnn, TensorRT, PPLNN 35.4 - - + - @@ -1192,6 +1232,7 @@ GPU: ncnn, TensorRT, PPLNN ONNXRuntime TensorRT PPLNN + Ascend @@ -1206,6 +1247,7 @@ GPU: ncnn, TensorRT, PPLNN fp16 int8 fp16 + fp32 FCN @@ -1218,6 +1260,7 @@ GPU: ncnn, TensorRT, PPLNN 72.35 74.19 72.35 + 72.35 PSPNet @@ -1230,6 +1273,7 @@ GPU: ncnn, TensorRT, PPLNN 78.24 77.97 78.09 + 78.67 deeplabv3 @@ -1242,6 +1286,7 @@ GPU: ncnn, TensorRT, PPLNN 79.12 78.96 79.12 + 79.06 deeplabv3+ @@ -1254,6 +1299,7 @@ GPU: ncnn, TensorRT, PPLNN 79.60 79.43 79.60 + 79.51 Fast-SCNN @@ -1266,6 +1312,7 @@ GPU: ncnn, TensorRT, PPLNN 70.92 66.00 70.92 + - UNet @@ -1278,6 +1325,7 @@ GPU: ncnn, TensorRT, PPLNN 69.10 68.95 - + - ANN @@ -1290,6 +1338,7 @@ GPU: ncnn, TensorRT, PPLNN 77.32 - - + - APCNet @@ -1302,6 +1351,7 @@ GPU: ncnn, TensorRT, PPLNN 77.32 - - + - BiSeNetV1 @@ -1314,6 +1364,7 @@ GPU: ncnn, TensorRT, PPLNN 74.43 - - + - BiSeNetV2 @@ -1326,6 +1377,7 @@ GPU: ncnn, TensorRT, PPLNN 73.21 - - + - CGNet @@ -1338,6 +1390,7 @@ GPU: ncnn, TensorRT, PPLNN 68.27 - - + - EMANet @@ -1350,6 +1403,7 @@ GPU: ncnn, TensorRT, PPLNN 77.6 - - + - EncNet @@ -1362,6 +1416,7 @@ GPU: ncnn, TensorRT, PPLNN 75.66 - - + - ERFNet @@ -1374,6 +1429,7 @@ GPU: ncnn, TensorRT, PPLNN 71.07 - - + - FastFCN @@ -1386,6 +1442,7 @@ GPU: ncnn, TensorRT, PPLNN 79.12 - - + - GCNet @@ -1398,6 +1455,7 @@ GPU: ncnn, TensorRT, PPLNN 77.69 - - + - ICNet @@ -1410,6 +1468,7 @@ GPU: ncnn, TensorRT, PPLNN 76.36 - - + - ISANet @@ -1422,6 +1481,7 @@ GPU: ncnn, TensorRT, PPLNN 78.49 - - + - OCRNet @@ -1434,6 +1494,7 @@ GPU: ncnn, TensorRT, PPLNN 73.67 - - + - PointRend @@ -1446,6 +1507,7 @@ GPU: ncnn, TensorRT, PPLNN 76.42 - - + - Semantic FPN @@ -1458,6 +1520,7 @@ GPU: ncnn, TensorRT, PPLNN 74.52 - - + - STDC @@ -1470,6 +1533,7 @@ GPU: ncnn, TensorRT, PPLNN 75.10 - - + - STDC @@ -1482,6 +1546,7 @@ GPU: ncnn, TensorRT, PPLNN 77.17 - - + - UPerNet @@ -1494,6 +1559,7 @@ GPU: ncnn, TensorRT, PPLNN 77.18 - - + - Segmenter @@ -1506,6 +1572,7 @@ GPU: ncnn, TensorRT, PPLNN 43.34 43.35 - + - diff --git a/docs/zh_cn/03-benchmark/supported_models.md b/docs/zh_cn/03-benchmark/supported_models.md index 9cb3aa874..8f6b3a8f2 100644 --- a/docs/zh_cn/03-benchmark/supported_models.md +++ b/docs/zh_cn/03-benchmark/supported_models.md @@ -2,79 +2,82 @@ 自测完成的 model-backend 组合: -| Model | Codebase | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | Model config | -| :-------------------------- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: | :---------------------------------------------------------------------------------------------: | -| RetinaNet | MMDetection | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) | -| Faster R-CNN | MMDetection | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) | -| YOLOv3 | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) | -| YOLOX | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) | -| FCOS | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos) | -| FSAF | MMDetection | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) | -| Mask R-CNN | MMDetection | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) | -| SSD[\*](#note) | MMDetection | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) | -| FoveaBox | MMDetection | Y | Y | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox) | -| ATSS | MMDetection | N | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss) | -| GFL | MMDetection | N | Y | Y | N | ? | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl) | -| Cascade R-CNN | MMDetection | N | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | -| Cascade Mask R-CNN | MMDetection | N | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | -| Swin Transformer[\*](#note) | MMDetection | N | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/swin) | -| VFNet | MMDetection | N | N | N | N | N | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/vfnet) | -| RepPoints | MMDetection | N | N | Y | N | ? | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints) | -| DETR | MMDetection | N | Y | Y | N | ? | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detr) | -| ResNet | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) | -| ResNeXt | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) | -| SE-ResNet | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) | -| MobileNetV2 | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) | -| ShuffleNetV1 | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) | -| ShuffleNetV2 | MMClassification | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) | -| VisionTransformer | MMClassification | Y | Y | Y | Y | ? | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/vision_transformer) | -| SwinTransformer | MMClassification | Y | Y | Y | N | ? | N | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/swin_transformer) | -| FCN | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) | -| PSPNet[\*static](#note) | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) | -| DeepLabV3 | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) | -| DeepLabV3+ | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) | -| Fast-SCNN[\*static](#note) | MMSegmentation | Y | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastscnn) | -| UNet | MMSegmentation | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) | -| ANN[\*](#note) | MMSegmentation | Y | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ann) | -| APCNet | MMSegmentation | Y | Y | Y | Y | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/apcnet) | -| BiSeNetV1 | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv1) | -| BiSeNetV2 | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv2) | -| CGNet | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/cgnet) | -| DMNet | MMSegmentation | ? | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dmnet) | -| DNLNet | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dnlnet) | -| EMANet | MMSegmentation | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/emanet) | -| EncNet | MMSegmentation | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/encnet) | -| ERFNet | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/erfnet) | -| FastFCN | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastfcn) | -| GCNet | MMSegmentation | Y | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/gcnet) | -| ICNet[\*](#note) | MMSegmentation | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/icnet) | -| ISANet[\*static](#note) | MMSegmentation | N | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/isanet) | -| NonLocal Net | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/nonlocal_net) | -| OCRNet | MMSegmentation | ? | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ocrnet) | -| PointRend | MMSegmentation | Y | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/point_rend) | -| Semantic FPN | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/sem_fpn) | -| STDC | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/stdc) | -| UPerNet[\*](#note) | MMSegmentation | ? | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/upernet) | -| DANet | MMSegmentation | ? | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet) | -| Segmenter[\*static](#note) | MMSegmentation | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/segmenter) | -| SRCNN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srcnn) | -| ESRGAN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/esrgan) | -| SRGAN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | -| SRResNet | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | -| Real-ESRGAN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/real_esrgan) | -| EDSR | MMEditing | Y | Y | Y | Y | N | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/edsr) | -| RDN | MMEditing | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/rdn) | -| DBNet | MMOCR | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/dbnet) | -| CRNN | MMOCR | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/crnn) | -| SAR | MMOCR | N | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/sar) | -| HRNet | MMPose | N | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#hrnet-cvpr-2019) | -| MSPN | MMPose | N | Y | Y | Y | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#mspn-arxiv-2019) | -| LiteHRNet | MMPose | N | Y | Y | N | N | Y | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#litehrnet-cvpr-2021) | -| PointPillars | MMDetection3d | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/pointpillars) | -| CenterPoint (pillar) | MMDetection3d | ? | Y | Y | N | N | Y | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/centerpoint) | -| RotatedRetinaNet | RotatedDetection | N | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/rotated_retinanet/README.md) | -| Oriented RCNN | RotatedDetection | N | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/oriented_rcnn/README.md) | -| Gliding Vertex | RotatedDetection | N | N | Y | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/gliding_vertex/README.md) | +| Model | Codebase | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | Ascend | Model config | +| :-------------------------- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: | :----: | :---------------------------------------------------------------------------------------------: | +| RetinaNet | MMDetection | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet) | +| Faster R-CNN | MMDetection | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn) | +| YOLOv3 | MMDetection | Y | Y | Y | Y | N | Y | Y | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo) | +| YOLOX | MMDetection | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox) | +| FCOS | MMDetection | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos) | +| FSAF | MMDetection | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf) | +| Mask R-CNN | MMDetection | Y | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn) | +| SSD[\*](#note) | MMDetection | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd) | +| FoveaBox | MMDetection | Y | Y | N | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox) | +| ATSS | MMDetection | N | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss) | +| GFL | MMDetection | N | Y | Y | N | ? | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl) | +| Cascade R-CNN | MMDetection | N | Y | Y | N | Y | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | +| Cascade Mask R-CNN | MMDetection | N | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/cascade_rcnn) | +| Swin Transformer[\*](#note) | MMDetection | N | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/swin) | +| VFNet | MMDetection | N | N | N | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/vfnet) | +| RepPoints | MMDetection | N | N | Y | N | ? | Y | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/reppoints) | +| DETR | MMDetection | N | Y | Y | N | ? | N | N | [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/detr) | +| ResNet | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnet) | +| ResNeXt | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/resnext) | +| SE-ResNet | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/seresnet) | +| MobileNetV2 | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/mobilenet_v2) | +| ShuffleNetV1 | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v1) | +| ShuffleNetV2 | MMClassification | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/shufflenet_v2) | +| VisionTransformer | MMClassification | Y | Y | Y | Y | ? | Y | Y | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/vision_transformer) | +| SwinTransformer | MMClassification | Y | Y | Y | N | ? | N | ? | [config](https://github.com/open-mmlab/mmclassification/tree/master/configs/swin_transformer) | +| FCN | MMSegmentation | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fcn) | +| PSPNet[\*static](#note) | MMSegmentation | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/pspnet) | +| DeepLabV3 | MMSegmentation | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3) | +| DeepLabV3+ | MMSegmentation | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/deeplabv3plus) | +| Fast-SCNN[\*static](#note) | MMSegmentation | Y | Y | Y | N | Y | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastscnn) | +| UNet | MMSegmentation | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/unet) | +| ANN[\*](#note) | MMSegmentation | Y | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ann) | +| APCNet | MMSegmentation | Y | Y | Y | Y | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/apcnet) | +| BiSeNetV1 | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv1) | +| BiSeNetV2 | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/bisenetv2) | +| CGNet | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/cgnet) | +| DMNet | MMSegmentation | ? | Y | N | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dmnet) | +| DNLNet | MMSegmentation | ? | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/dnlnet) | +| EMANet | MMSegmentation | Y | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/emanet) | +| EncNet | MMSegmentation | Y | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/encnet) | +| ERFNet | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/erfnet) | +| FastFCN | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/fastfcn) | +| GCNet | MMSegmentation | Y | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/gcnet) | +| ICNet[\*](#note) | MMSegmentation | Y | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/icnet) | +| ISANet | MMSegmentation | ? | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/isanet) | +| NonLocal Net | MMSegmentation | ? | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/nonlocal_net) | +| OCRNet | MMSegmentation | ? | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/ocrnet) | +| PointRend | MMSegmentation | Y | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/point_rend) | +| Semantic FPN | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/sem_fpn) | +| STDC | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/stdc) | +| UPerNet[\*](#note) | MMSegmentation | ? | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/upernet) | +| DANet | MMSegmentation | ? | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/danet) | +| Segmenter | MMSegmentation | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmsegmentation/tree/master/configs/segmenter) | +| SRCNN | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srcnn) | +| ESRGAN | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/esrgan) | +| SRGAN | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | +| SRResNet | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/srresnet_srgan) | +| Real-ESRGAN | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/real_esrgan) | +| EDSR | MMEditing | Y | Y | Y | Y | N | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/edsr) | +| RDN | MMEditing | Y | Y | Y | Y | Y | Y | N | [config](https://github.com/open-mmlab/mmediting/tree/master/configs/restorers/rdn) | +| DBNet | MMOCR | Y | Y | Y | Y | Y | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/dbnet) | +| PANet | MMOCR | Y | Y | Y | Y | ? | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/panet) | +| PSENet | MMOCR | Y | Y | Y | Y | ? | Y | Y | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textdet/psenet) | +| CRNN | MMOCR | Y | Y | Y | Y | Y | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/crnn) | +| SAR | MMOCR | N | Y | N | N | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/sar) | +| SATRN | MMOCR | Y | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmocr/tree/main/configs/textrecog/satrn) | +| HRNet | MMPose | N | Y | Y | Y | N | Y | N | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#hrnet-cvpr-2019) | +| MSPN | MMPose | N | Y | Y | Y | N | Y | N | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#mspn-arxiv-2019) | +| LiteHRNet | MMPose | N | Y | Y | N | N | Y | N | [config](https://mmpose.readthedocs.io/en/latest/papers/backbones.html#litehrnet-cvpr-2021) | +| PointPillars | MMDetection3d | ? | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/pointpillars) | +| CenterPoint (pillar) | MMDetection3d | ? | Y | Y | N | N | Y | N | [config](https://github.com/open-mmlab/mmdetection3d/blob/master/configs/centerpoint) | +| RotatedRetinaNet | RotatedDetection | N | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/rotated_retinanet/README.md) | +| Oriented RCNN | RotatedDetection | N | Y | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/oriented_rcnn/README.md) | +| Gliding Vertex | RotatedDetection | N | N | Y | N | N | N | N | [config](https://github.com/open-mmlab/mmrotate/blob/main/configs/gliding_vertex/README.md) | ## Note @@ -82,4 +85,5 @@ - static: This model only support static export. Please use `static` deploy config, just like $MMDEPLOY_DIR/configs/mmseg/segmentation_tensorrt_static-1024x2048.py. - SSD: When you convert SSD model, you need to use min shape deploy config just like 300x300-512x512 rather than 320x320-1344x1344, for example $MMDEPLOY_DIR/configs/mmdet/detection/detection_tensorrt_dynamic-300x300-512x512.py. - YOLOX: YOLOX with ncnn only supports static shape. +- Swin Transformer: For TensorRT, only version 8.4+ is supported. - SAR: Chinese text recognition model is not supported as the protobuf size of ONNX is limited. diff --git a/mmdeploy/apis/ascend/__init__.py b/mmdeploy/apis/ascend/__init__.py new file mode 100644 index 000000000..984605b7c --- /dev/null +++ b/mmdeploy/apis/ascend/__init__.py @@ -0,0 +1,11 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +from mmdeploy.backend.ascend import is_available + +__all__ = ['is_available'] + +if is_available(): + from mmdeploy.backend.ascend.onnx2ascend import from_onnx as _from_onnx + from ..core import PIPELINE_MANAGER + from_onnx = PIPELINE_MANAGER.register_pipeline()(_from_onnx) + __all__ += ['from_onnx'] diff --git a/mmdeploy/backend/ascend/__init__.py b/mmdeploy/backend/ascend/__init__.py new file mode 100644 index 000000000..9bbfadabe --- /dev/null +++ b/mmdeploy/backend/ascend/__init__.py @@ -0,0 +1,20 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import importlib + +from .utils import update_sdk_pipeline + + +def is_available(): + """Check whether acl is installed. + + Returns: + bool: True if acl package is installed. + """ + return importlib.util.find_spec('acl') is not None + + +__all__ = ['update_sdk_pipeline'] + +if is_available(): + from .wrapper import AscendWrapper, Error + __all__ += ['AscendWrapper', 'Error'] diff --git a/mmdeploy/backend/ascend/onnx2ascend.py b/mmdeploy/backend/ascend/onnx2ascend.py new file mode 100644 index 000000000..d91cd3018 --- /dev/null +++ b/mmdeploy/backend/ascend/onnx2ascend.py @@ -0,0 +1,81 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import tempfile +from subprocess import call +from typing import Dict, Sequence, Union + +import onnx + +from mmdeploy.utils import get_root_logger + + +def make_shape_string(name, dims): + return f'{name}:{",".join(map(str, dims))}' + + +def _concat(dims: Sequence) -> str: + return ';'.join([','.join(map(str, x)) for x in dims]) + + +def from_onnx(onnx_model: Union[onnx.ModelProto, str], work_dir: str, + model_inputs: Dict): + """Convert ONNX to Ascend model. + + Example: + >>> from mmdeploy.apis.ascend import from_onnx + >>> onnx_path = 'work_dir/end2end.onnx' + >>> model_inputs = mmcv.Config( + >>> dict(input_shapes=dict(input=[1, 3, 224, 224]))) + >>> from_onnx(onnx_path, work_dir, model_inputs) + + Args: + onnx_path (ModelProto|str): The path of the onnx model. + work_dir (str): Path to load onnx and save model. + model_inputs (Dict): The input args to the atc tools. + """ + logger = get_root_logger() + if not isinstance(onnx_model, str): + onnx_path = tempfile.NamedTemporaryFile(suffix='.onnx').name + onnx.save(onnx_model, onnx_path) + else: + onnx_path = onnx_model + + onnx_model = onnx.load(onnx_path) + for n in onnx_model.graph.node: + if n.domain != '': + n.domain = '' + for i in range(1, len(onnx_model.opset_import)): + onnx_model.opset_import.pop(i) + onnx.save(onnx_model, onnx_path) + + output_path = osp.join(work_dir, osp.splitext(osp.split(onnx_path)[1])[0]) + + input_shapes = [] + + for name, dims in model_inputs['input_shapes'].items(): + input_shapes.append(make_shape_string(name, dims)) + input_shapes = ';'.join(input_shapes) + + input_format = 'ND' if 'dynamic_dims' in model_inputs else 'NCHW' + + args = [ + f'--model={onnx_path}', '--framework=5', f'--output={output_path}', + '--soc_version=Ascend310', f'--input_format={input_format}', + f'--input_shape={input_shapes}' + ] + + if 'dynamic_batch_size' in model_inputs: + dynamic_batch_size = ','.join( + map(str, model_inputs['dynamic_batch_size'])) + args.append(f'--dynamic_batch_size={dynamic_batch_size}') + elif 'dynamic_image_size' in model_inputs: + dynamic_image_size = _concat(model_inputs['dynamic_image_size']) + args.append(f'--dynamic_image_size={dynamic_image_size}') + elif 'dynamic_dims' in model_inputs: + dynamic_dims = _concat(model_inputs['dynamic_dims']) + args.append(f'--dynamic_dims={dynamic_dims}') + + logger.info(' '.join(('atc', *args))) + + ret_code = call(['atc', *args]) + assert ret_code == 0 diff --git a/mmdeploy/backend/ascend/utils.py b/mmdeploy/backend/ascend/utils.py new file mode 100644 index 000000000..0d5a54dd9 --- /dev/null +++ b/mmdeploy/backend/ascend/utils.py @@ -0,0 +1,48 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import math +import os.path as osp + +from mmdeploy.utils import get_root_logger + + +def update_sdk_pipeline(work_dir: str): + """Update pipeline.json for Ascend. + + Args: + work_dir (str):The work directory to load/save the pipeline.json + """ + logger = get_root_logger() + + def _try_ori_agnostic_pad(transforms): + trans_resize = None + trans_pad = None + + for trans in transforms: + if trans['type'] == 'Resize' and trans.get('keep_ratio', False): + trans_resize = trans + elif trans['type'] == 'Pad' and trans.get('size_divisor', + None) is not None: + trans_pad = trans + + if trans_resize is not None and trans_pad is not None: + logger.info('update Pad transform.') + size = trans_resize['size'] + divisor = trans_pad['size_divisor'] + size = tuple(int(math.ceil(s / divisor) * divisor) for s in size) + trans_pad['size'] = size + trans_pad['orientation_agnostic'] = True + trans_pad.pop('size_divisor') + + pipeline_path = osp.join(work_dir, 'pipeline.json') + + if osp.exists(pipeline_path): + import mmcv + pipeline = mmcv.load(pipeline_path) + tasks = pipeline['pipeline'].get('tasks', []) + + for task in tasks: + if task.get('module', '') == 'Transform': + transforms = task['transforms'] + _try_ori_agnostic_pad(transforms) + + mmcv.dump(pipeline, pipeline_path, sort_keys=False, indent=4) diff --git a/mmdeploy/backend/ascend/wrapper.py b/mmdeploy/backend/ascend/wrapper.py new file mode 100644 index 000000000..6264a1af8 --- /dev/null +++ b/mmdeploy/backend/ascend/wrapper.py @@ -0,0 +1,593 @@ +# Copyright (c) OpenMMLab. All rights reserved. + +import os +from contextlib import contextmanager +from typing import Dict, List, NamedTuple, Sequence + +import acl +import numpy as np +import torch + +from mmdeploy.utils import Backend +from mmdeploy.utils.timer import TimeCounter +from ..base import BACKEND_WRAPPER, BaseWrapper + +_from_acl_data_type = {0: torch.float32, 3: torch.int32, 9: torch.int64} + +ACL_MEMCPY_HOST_TO_HOST = 0 +ACL_MEMCPY_HOST_TO_DEVICE = 1 +ACL_MEMCPY_DEVICE_TO_HOST = 2 +ACL_MEMCPY_DEVICE_TO_DEVICE = 3 + + +class Error(Exception): + """Acl Exception.""" + pass + + +def _check(code: int, msg: str): + """check the error code. + + Args: + code (int): The error code. + msg (str): Error message. + """ + if code != 0: + raise Error(msg, code) + + +class DataBuffer: + """The acl data buffer. + + Args: + size (int): Buffer size. + """ + + def __init__(self, size: int): + data, ret = acl.rt.malloc(size, 0) + _check(ret, 'acl.rt.malloc') + self.data = data + self.size = size + self.handle = acl.create_data_buffer(data, size) + + def destroy(self): + if self.handle is not None: + acl.destroy_data_buffer(self.handle) + acl.rt.free(self.data) + self.handle = None + + def __del__(self): + self.destroy() + + +class Dataset: + """The acl dataset.""" + + def __init__(self): + self.handle = acl.mdl.create_dataset() + self.buffers = [] + + def destroy(self): + if self.handle is not None: + for buffer in self.buffers: + buffer.destroy() + acl.mdl.destroy_dataset(self.handle) + self.handle = None + + def __del__(self): + self.destroy() + + def add_buffer(self, buffer: DataBuffer): + """Add data buffer into the dataset. + + Args: + buffer (DataBuffer): The DataBuffer instance. + """ + self.buffers.append(buffer) + _, ret = acl.mdl.add_dataset_buffer(self.handle, buffer.handle) + _check(ret, 'acl.mdl.add_dataset_buffer') + + +class Binding(NamedTuple): + index: int + name: str + dims: List[int] + data_type: np.dtype + size: int + + +class ModelDesc: + """The model description wrapper. + + Args: + model_id (int): The id of the model, created by acl tools. + """ + + def __init__(self, model_id): + self._desc = acl.mdl.create_desc() + ret = acl.mdl.get_desc(self._desc, model_id) + _check(ret, 'acl.mdl.get_desc') + + self.inputs = [] + self.dynamic_tensor = None + num_inputs = acl.mdl.get_num_inputs(self._desc) + for index in range(num_inputs): + dims = self._get_input_dims(index) + data_type = acl.mdl.get_input_data_type(self._desc, index) + data_type = _from_acl_data_type[data_type] + size = acl.mdl.get_input_size_by_index(self._desc, index) + binding = Binding(index, dims['name'], dims['dims'], data_type, + size) + if dims['name'] == 'ascend_mbatch_shape_data': + self.dynamic_tensor = binding + else: + self.inputs.append(binding) + + self.outputs = [] + num_outputs = acl.mdl.get_num_outputs(self._desc) + for index in range(num_outputs): + dims = self._get_output_dims(index) + data_type = acl.mdl.get_output_data_type(self._desc, index) + data_type = _from_acl_data_type[data_type] + size = acl.mdl.get_output_size_by_index(self._desc, index) + self.outputs.append( + Binding(index, dims['name'], dims['dims'], data_type, size)) + + def destroy(self): + if self._desc is not None: + acl.mdl.destroy_desc(self._desc) + self._desc = None + + def __del__(self): + self.destroy() + + def _get_input_dims(self, index: int): + """Get the dimension of the input by index. + + Args: + index (int): The index of the input. + """ + dims, ret = acl.mdl.get_input_dims(self._desc, index) + _check(ret, 'acl.mdl.get_input_dims') + return dims + + def _get_output_dims(self, index: int): + """Get the dimension of the output by index. + + Args: + index (int): The index of the output. + """ + dims, ret = acl.mdl.get_output_dims(self._desc, index) + _check(ret, 'acl.mdl.get_output_dims') + dims['name'] = dims['name'].split(':')[-1] + return dims + + def _get_current_output_dims(self, index: int): + """Get the dimension of current output implementation. + + Args: + index (int): The index of the output. + """ + dims, ret = acl.mdl.get_cur_output_dims(self._desc, index) + _check(ret, 'acl.mdl.get_cur_output_dims') + return dims + + def get_current_ouptut_dims(self): + """Get the dimension of current output.""" + dimses = [] + for output in self.outputs: + dims = self._get_current_output_dims(output.index) + dimses.append(dims['dims']) + return dimses + + def _get_input_index(self, name: str) -> int: + """Get input index by name. + + Args: + name (str): The name of the input. + + Returns: + (int): The input index. + """ + index, ret = acl.mdl.get_input_index_by_name(self._desc, name) + return index if ret == 0 else -1 + + def get_dynamic_batch(self) -> Sequence: + """Get dynamic batch size list. + + Returns: + (Sequence): The dynamic batch list. + """ + batch, ret = acl.mdl.get_dynamic_batch(self._desc) + _check(ret, 'acl.mdl.get_dynamic_batch') + batch = batch['batch'] + return sorted(batch) + + def get_dynamic_hw(self) -> Sequence: + """Get dynamic height and width size list. + + Returns: + (Sequence): The dynamic height and width + """ + hw_info, ret = acl.mdl.get_dynamic_hw(self._desc, -1) + _check(ret, 'acl.mdl.get_dynamic_hw') + return hw_info['hw'] + + def get_input_dynamic_dims(self) -> Sequence: + """Get dynamic dims. + + Returns: + (Sequence): The dynamic dims + """ + count, ret = acl.mdl.get_input_dynamic_gear_count(self._desc, -1) + _check(ret, 'acl.mdl.get_input_dynamic_gear_count') + dims, ret = acl.mdl.get_input_dynamic_dims(self._desc, -1, count) + _check(ret, 'acl.mdl.get_input_dynamic_dims') + return dims + + +class Context: + + ref_count = 0 + owned_acl = False + + def __init__(self): + if not _is_torch_npu_available: + self._active = True + if Context.ref_count == 0: + ret = acl.init() + if ret == 0: + Context.owned_acl = True + elif ret == 100002: # ACL_ERROR_REPEAT_INITIALIZE + pass + else: + _check(ret, 'acl.init') + Context.ref_count += 1 + else: + self._active = False + + def __del__(self): + self.destroy() + + def destroy(self): + if not self._active: + return + Context.ref_count -= 1 + if Context.ref_count == 0 and Context.owned_acl: + ret = acl.finalize() + if ret == 0: + Context.owned_acl = False + elif ret == 100037: # ACL_ERROR_REPEAT_FINALIZE + pass + else: + _check(ret, 'acl.finalize') + self._active = False + + +_is_torch_npu_available = False + +if os.environ.get('MMDEPLOY_USE_TORCH_NPU'): + try: + import torch_npu + _is_torch_npu_available = True + except Exception: + print('import torch_npu failed, torch_npu is disabled') + + +class Device: + + def __init__(self, device: str): + if _is_torch_npu_available: + self._torch_device = torch.device(device) + self.index = self._torch_device.index + # force torch_npu to initialize + with torch_npu.npu.device(self.index): + pass + else: + self._torch_device = torch.device('cpu') + name_idx = device.split(':') + self.index = 0 if len(name_idx) == 1 else int(name_idx[-1]) + + @contextmanager + def __call__(self): + # torch_npu.npu.device() leads to segfault when index > 0 + _check(acl.rt.set_device(self.index), 'acl.rt.set_device') + try: + yield + finally: + pass + + +@BACKEND_WRAPPER.register_module(Backend.ASCEND.value) +class AscendWrapper(BaseWrapper): + """Ascend wrapper class for inference. + + Args: + model (str): Path of the model file. + + Examples: + >>> from mmdeploy.backend.ascend import AscendWrapper + >>> import torch + >>> + >>> model_file = 'model.om' + >>> model = AscendWrapper(model_file) + >>> inputs = dict(input=torch.randn(1, 3, 224, 224)) + >>> outputs = model(inputs) + """ + + def __init__(self, model: str, device: str = 'npu'): + + self._context = Context() + self._device = Device(device) + + with self._device(): + + self._model_id, ret = acl.mdl.load_from_file(model) + _check(ret, 'acl.mdl.load_from_file') + + self._model_desc = ModelDesc(self._model_id) + + self._config_dynamic_shapes() + self._create_input_buffers() + self._create_output_buffers() + + output_names = [output.name for output in self._model_desc.outputs] + + super().__init__(output_names) + + def destroy(self): + if self._model_id is None: + return + with self._device(): + self._input.destroy() + self._output.destroy() + self._model_desc.destroy() + acl.mdl.unload(self._model_id) + self._model_id = None + self._context.destroy() + + def __del__(self): + self.destroy() + + def forward(self, inputs: Dict[str, + torch.Tensor]) -> Dict[str, torch.Tensor]: + """Run forward inference. + + Args: + inputs (Dict[str, torch.Tensor]): Key-value pairs of model inputs. + + Returns: + Dict[str, torch.Tensor]: Key-value pairs of model outputs. + """ + + with self._device(): + input_shapes = [ + inputs[x.name].shape for x in self._model_desc.inputs + ] + + output_shapes = self._reshape(input_shapes) + + self._synchronize_torch_stream() + + torch_device = self._device._torch_device + + for binding in self._model_desc.inputs: + tensor = inputs[binding.name].to( + torch_device, dtype=binding.data_type).contiguous() + self._copy_tensor_to_buffer(tensor, + self._input.buffers[binding.index]) + + outputs = {} + for binding in self._model_desc.outputs: + shape = output_shapes[binding.index] + tensor = torch.empty( + shape, dtype=binding.data_type, device=torch_device) + if torch_device.type == 'npu': + ret = acl.update_data_buffer( + self._output.buffers[binding.index].handle, + tensor.data_ptr(), + tensor.element_size() * tensor.numel()) + _check(ret, 'acl.update_data_buffer') + outputs[binding.name] = tensor + + self.__ascend_execute() + + for binding in self._model_desc.outputs: + self._copy_buffer_to_tensor( + self._output.buffers[binding.index], tensor) + + return outputs + + def _copy_tensor_to_buffer(self, tensor: torch.Tensor, buffer: DataBuffer): + if tensor.device.type == 'cpu': + kind = ACL_MEMCPY_HOST_TO_DEVICE + ret = acl.rt.memcpy(buffer.data, buffer.size, tensor.data_ptr(), + tensor.element_size() * tensor.numel(), kind) + _check(ret, 'acl.rt.memcpy') + else: + ret = acl.update_data_buffer( + buffer.handle, tensor.data_ptr(), + tensor.element_size() * tensor.numel()) + _check(ret, 'acl.update_data_buffer') + + def _copy_buffer_to_tensor(self, buffer: DataBuffer, tensor: torch.Tensor): + if tensor.device.type == 'cpu': + kind = ACL_MEMCPY_DEVICE_TO_HOST + size = tensor.element_size() * tensor.numel() + ret = acl.rt.memcpy(tensor.data_ptr(), size, buffer.data, size, + kind) + _check(ret, 'acl.rt.memcpy') + + def _verify_dims(self, src: Sequence[int], ref: Sequence[int]): + """Check if src match ref.""" + if len(src) != len(ref): + raise RuntimeError(f'Shape mismatch {src} vs {ref}') + for src_dim, ref_dim in zip(src, ref): + if ref_dim != -1 and src_dim != ref_dim: + raise RuntimeError(f'Shape mismatch {src} vs {ref}') + + def _reshape(self, input_shapes: Sequence[Sequence[int]]): + """Reshape the inputs. + + Args: + input_shapes (Sequence[Sequence[int]]): The shapes used to + do reshape + """ + + if len(input_shapes) != len(self._model_desc.inputs): + raise RuntimeError('#inputs mismatch') + + for src, ref in zip(input_shapes, self._model_desc.inputs): + self._verify_dims(src, ref.dims) + + self._reshape_fn(input_shapes) + + dimses = self._model_desc.get_current_ouptut_dims() + return dimses + + def _reshape_static(self, input_shapes): + """Do nothing. + + Args: + input_shapes (Sequence[Sequence[int]]): Not used. + """ + pass + + def _reshape_dynamic_batch_size(self, + input_shapes: Sequence[Sequence[int]]): + """Reshape for dynamic batch size. + + Args: + input_shapes (Sequence[Sequence[int]]): The shapes used to + do reshape + """ + batch_size = None + for src, ref in zip(input_shapes, self._model_desc.inputs): + if ref.dims[0] == -1: + if batch_size is None: + batch_size = src[0] + elif batch_size != src[0]: + raise RuntimeError( + f'Inconsistent batch size {batch_size} vs {src[0]}') + + if batch_size is None: + raise RuntimeError('Can\'t determine batch size') + + candidates = list( + filter(lambda x: x >= batch_size, self._dynamic_batch_size)) + if not candidates: + raise RuntimeError(f'Batch size {batch_size} is not supported.' + f' ({self._dynamic_batch_size})') + + ret = acl.mdl.set_dynamic_batch_size( + self._model_id, self._input.handle, + self._model_desc.dynamic_tensor.index, candidates[0]) + _check(ret, 'acl.mdl.set_dynamic_batch_size') + + def _reshape_dynamic_image_size(self, + input_shapes: Sequence[Sequence[int]]): + """Reshape for dynamic image size. + + Args: + input_shapes (Sequence[Sequence[int]]): The shapes used to + do reshape + """ + size = None + for src, ref in zip(input_shapes, self._model_desc.inputs): + if -1 in ref.dims: + tmp_size = src[-2], src[-1] + if size is None: + size = tmp_size + elif size != tmp_size: + raise RuntimeError( + f'Inconsistent image size {size} vs {tmp_size}') + + if size is None: + raise RuntimeError('Can\'t determine dynamic HW') + if not list(size) in self._dynamic_hw: + raise RuntimeError( + f'size {size} is not supported. ({self._dynamic_hw})') + height, width = size + ret = acl.mdl.set_dynamic_hw_size( + self._model_id, self._input.handle, + self._model_desc.dynamic_tensor.index, height, width) + _check(ret, 'acl.mdl.set_dynamic_hw_size') + + def _reshape_dynamic_dims(self, input_shapes: Sequence[Sequence[int]]): + """Reshape for dynamic dims. + + Args: + input_shapes (Sequence[Sequence[int]]): The shapes used to + do reshape + """ + match = [True] * len(self._dynamic_dims) + ptr = 0 + for src in input_shapes: + for axis, src_dim in enumerate(src): + for index, dims in enumerate(self._dynamic_dims): + ref_dim = dims['dims'][ptr] + # allow batch dimension to vary + if axis == 0 and src_dim < ref_dim: + pass + elif src_dim != ref_dim: + match[index] = False + ptr += 1 + + indices = [i for i, v in enumerate(match) if v] + if not indices: + raise RuntimeError('No matching profile found') + index = indices[0] + + ret = acl.mdl.set_input_dynamic_dims( + self._model_id, self._input.handle, + self._model_desc.dynamic_tensor.index, self._dynamic_dims[index]) + _check(ret, 'acl.mdl.set_input_dynamic_dims') + + def _config_dynamic_shapes(self): + """Set the reshape function.""" + if self._model_desc.dynamic_tensor is None: + self._reshape_fn = self._reshape_static + return + + self._dynamic_batch_size = self._model_desc.get_dynamic_batch() + if self._dynamic_batch_size: + self._reshape_fn = self._reshape_dynamic_batch_size + return + + self._dynamic_dims = self._model_desc.get_input_dynamic_dims() + if self._dynamic_dims: + self._reshape_fn = self._reshape_dynamic_dims + return + + self._dynamic_hw = self._model_desc.get_dynamic_hw() + if self._dynamic_hw: + self._reshape_fn = self._reshape_dynamic_image_size + return + + raise RuntimeError('Can\'t infer input shape type') + + def _create_input_buffers(self): + """Create buffers for inputs.""" + self._input = Dataset() + for binding in self._model_desc.inputs: + self._input.add_buffer(DataBuffer(binding.size)) + if self._model_desc.dynamic_tensor: + self._input.add_buffer( + DataBuffer(self._model_desc.dynamic_tensor.size)) + + def _create_output_buffers(self): + """Create buffers for outputs.""" + self._output = Dataset() + for binding in self._model_desc.outputs: + self._output.add_buffer(DataBuffer(binding.size)) + + def _synchronize_torch_stream(self): + if _is_torch_npu_available: + torch.npu.current_stream(self._device._torch_device).synchronize() + + @TimeCounter.count_time('ascend') + def __ascend_execute(self): + """Run inference on Ascend.""" + ret = acl.mdl.execute(self._model_id, self._input.handle, + self._output.handle) + _check(ret, 'acl.mdl.execute') diff --git a/mmdeploy/backend/sdk/export_info.py b/mmdeploy/backend/sdk/export_info.py index 5be46328d..90cbdd20b 100644 --- a/mmdeploy/backend/sdk/export_info.py +++ b/mmdeploy/backend/sdk/export_info.py @@ -130,6 +130,8 @@ def get_models(deploy_cfg: Union[str, mmcv.Config], weights = replace_suffix(ir_name, '.bin') if 'precision' in deploy_cfg['backend_config']: precision = deploy_cfg['backend_config']['precision'] + elif backend == Backend.ASCEND: + net = replace_suffix(ir_name, '.om') elif backend == Backend.SNPE: net = replace_suffix(ir_name, '.dlc') elif backend in [Backend.ONNXRUNTIME, Backend.TORCHSCRIPT]: diff --git a/mmdeploy/codebase/base/backend_model.py b/mmdeploy/codebase/base/backend_model.py index 3a3ae3faf..44abd2351 100644 --- a/mmdeploy/codebase/base/backend_model.py +++ b/mmdeploy/codebase/base/backend_model.py @@ -106,6 +106,9 @@ class BaseBackendModel(torch.nn.Module, metaclass=ABCMeta): model=backend_files[0], input_names=input_names, output_names=output_names) + elif backend == Backend.ASCEND: + from mmdeploy.backend.ascend import AscendWrapper + return AscendWrapper(model=backend_files[0], device=device) elif backend == Backend.SNPE: from mmdeploy.backend.snpe import SNPEWrapper uri = None @@ -116,6 +119,10 @@ class BaseBackendModel(torch.nn.Module, metaclass=ABCMeta): else: raise NotImplementedError(f'Unknown backend type: {backend.value}') + def destroy(self): + if hasattr(self, 'wrapper') and hasattr(self.wrapper, 'destroy'): + self.wrapper.destroy() + @abstractmethod def forward(self, *args, **kwargs): """The forward interface that must be implemented. diff --git a/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py b/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py index f04d55891..1e425daa5 100644 --- a/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py +++ b/mmdeploy/codebase/mmdet/core/post_processing/bbox_nms.py @@ -324,3 +324,90 @@ def multiclass_nms__torchscript(ctx, scores, boxes, keeps, batch_size, keep_top_k=keep_top_k) return dets, labels + + +class AscendBatchNMSOp(torch.autograd.Function): + + @staticmethod + def forward(ctx, bboxes: torch.Tensor, scores: torch.Tensor, + score_threshold: float, iou_threshold: float, + max_size_per_class: int, max_total_size: int): + """Dummy nms forward + Args: + boxes (torch.Tensor): boxes in shape (batch, N, C, 4). + scores (torch.Tensor): scores in shape (batch, N, C). + score_threshold (float): the score threshold. + iou_threshold (float): the iou threshold. + max_size_per_class (int): max size per class. + max_total_size (int): max total size. + + Returns: + (torch.Tensor): boxes,(1, N, 4) + (torch.Tensor): scores,(1, N) + (torch.Tensor): classes,(1, N) + (torch.Tensor): num_dets,(1,) + """ + + # Python implementation for onnx export + nmsed_boxes = bboxes[:, :max_total_size, 0, :] + nmsed_scores = scores[:, :max_total_size, 0] + nmsed_classes = torch.arange(max_total_size, dtype=torch.long) + nmsed_num = torch.Tensor([max_total_size]) + + return nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num + + @staticmethod + def symbolic(g, bboxes, scores, score_thr, iou_thr, max_size_p_class, + max_t_size): + nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num = g.op( + 'mmdeploy::BatchMultiClassNMS', + bboxes, + scores, + score_threshold_f=score_thr, + iou_threshold_f=iou_thr, + max_size_per_class_i=max_size_p_class, + max_total_size_i=max_t_size, + outputs=4) + return nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_num + + +@FUNCTION_REWRITER.register_rewriter( + func_name='mmdeploy.codebase.mmdet.core.post_processing._multiclass_nms', + backend='ascend') +def multiclass_nms__ascend(ctx, + boxes: Tensor, + scores: Tensor, + max_output_boxes_per_class: int = 1000, + iou_threshold: float = 0.5, + score_threshold: float = 0.05, + pre_top_k: int = -1, + keep_top_k: int = -1): + """Wrapper for `multiclass_nms` with Ascend. + + Args: + ctx (ContextCaller): The context with additional information. + boxes (Tensor): The bounding boxes of shape [N, num_boxes, 4]. + scores (Tensor): The detection scores of shape + [N, num_boxes, num_classes]. + max_output_boxes_per_class (int): Maximum number of output + boxes per class of nms. Defaults to 1000. + iou_threshold (float): IOU threshold of nms. Defaults to 0.5. + score_threshold (float): score threshold of nms. + Defaults to 0.05. + pre_top_k (int): Number of top K boxes to keep before nms. + Defaults to -1. + keep_top_k (int): Number of top K boxes to keep after nms. + Defaults to -1. + + Returns: + tuple[Tensor, Tensor]: (dets, labels), `dets` of shape [N, num_det, 5] + and `labels` of shape [N, num_det]. + """ + boxes = boxes if boxes.dim() == 4 else boxes.unsqueeze(2) + keep_top_k = max_output_boxes_per_class if keep_top_k < 0 else min( + max_output_boxes_per_class, keep_top_k) + nmsed_boxes, nmsed_scores, nmsed_classes, _ = AscendBatchNMSOp.apply( + boxes, scores, score_threshold, iou_threshold, keep_top_k, keep_top_k) + + dets = torch.cat([nmsed_boxes, nmsed_scores.unsqueeze(2)], dim=-1) + return dets, nmsed_classes diff --git a/mmdeploy/codebase/mmdet/models/dense_heads/yolo_head.py b/mmdeploy/codebase/mmdet/models/dense_heads/yolo_head.py index e6125470d..2a00966eb 100644 --- a/mmdeploy/codebase/mmdet/models/dense_heads/yolo_head.py +++ b/mmdeploy/codebase/mmdet/models/dense_heads/yolo_head.py @@ -108,7 +108,8 @@ def yolov3_head__get_bboxes(ctx, batch_inds = torch.arange( batch_size, device=device).unsqueeze(-1).long() # Avoid onnx2tensorrt issue in https://github.com/NVIDIA/TensorRT/issues/1134 # noqa: E501 - transformed_inds = (bbox_pred.shape[1] * batch_inds + topk_inds) + transformed_inds = ( + bbox_pred.shape[1] * batch_inds + topk_inds.long()) bbox_pred = bbox_pred.reshape(-1, 4)[transformed_inds, :].reshape( batch_size, -1, 4) cls_pred = cls_pred.reshape( diff --git a/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py b/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py index 321522e7c..4bbc055b4 100644 --- a/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py +++ b/mmdeploy/codebase/mmdet/models/roi_heads/single_level_roi_extractor.py @@ -100,6 +100,94 @@ def single_roi_extractor__forward__tensorrt(ctx, finest_scale, featmap_strides, aligned) +class AscendRoiExtractor(Function): + """Create AscendRoiExtractor op. + + This class is used to create a AscendRoiExtractor in ONNX for the Ascend + backend. + """ + + @staticmethod + def symbolic(g, *args): + """Symbolic function for creating onnx op.""" + aligned = args[-1] + featmap_strides = [1 / stride for stride in args[-2]] + finest_scale = args[-3] + roi_scale_factor = args[-4] + sampling_ratio = args[-5] + pool_mode = args[-6] + output_size = args[-7] + inputs = args[:len(featmap_strides)] + rois = args[len(featmap_strides)] + + return g.op( + 'mmdeploy::RoiExtractor', + *inputs, + rois, + pooled_height_i=output_size[1], + pooled_width_i=output_size[0], + pool_mode_s=pool_mode, + sample_num_i=sampling_ratio, + roi_scale_factor_f=roi_scale_factor, + finest_scale_i=finest_scale, + spatial_scale_f=featmap_strides, + aligned_i=aligned, + outputs=1) + + @staticmethod + def forward(ctx, *args): + """Run forward.""" + # aligned = args[-1] + featmap_strides = args[-2] + # finest_scale = args[-3] + # roi_scale_factor = args[-4] + # sampling_ratio = args[-5] + output_size = args[-7] + inputs = args[:len(featmap_strides)] + rois = args[len(featmap_strides)] + + num_proposals = rois.shape[0] + channel = inputs[0].shape[1] + + return rois.new_zeros( + (num_proposals, channel, output_size[1], output_size[0])) + + +@FUNCTION_REWRITER.register_rewriter( + 'mmdet.models.roi_heads.roi_extractors.' + 'single_level_roi_extractor.SingleRoIExtractor.forward', + backend='ascend') +def single_roi_extractor__forward__ascend(ctx, + self, + feats, + rois, + roi_scale_factor=None): + """Rewrite `forward` of `SingleRoIExtractor` for Ascend backend. + + This function uses RoiExtractor op for Ascend deployment. + """ + featmap_strides = self.featmap_strides + finest_scale = self.finest_scale + + for roi_layer in self.roi_layers: + assert isinstance( + roi_layer, + RoIAlign), f'{type(roi_layer)} is not supported in Ascend.' + + roi_layer = self.roi_layers[0] + out_size = roi_layer.output_size + sampling_ratio = roi_layer.sampling_ratio + pool_mode = roi_layer.pool_mode + aligned = roi_layer.aligned + if roi_scale_factor is None: + roi_scale_factor = 1.0 + + featmap_strides = [float(s) for s in featmap_strides] + return AscendRoiExtractor.apply(*feats, rois, out_size, pool_mode, + sampling_ratio, roi_scale_factor, + finest_scale, featmap_strides, aligned) + + @FUNCTION_REWRITER.register_rewriter( func_name='mmdet.models.roi_heads.SingleRoIExtractor.forward') @mark('roi_extractor', inputs=['feats', 'rois'], outputs=['bbox_feats']) diff --git a/mmdeploy/mmcv/ops/roi_align.py b/mmdeploy/mmcv/ops/roi_align.py index 33cd7342d..034629c38 100644 --- a/mmdeploy/mmcv/ops/roi_align.py +++ b/mmdeploy/mmcv/ops/roi_align.py @@ -44,7 +44,17 @@ def roi_align_default(ctx, g, input: Tensor, rois: Tensor, backend = get_backend(ctx.cfg) if backend == Backend.PPLNN: domain = 'mmcv' - elif backend == Backend.ONNXRUNTIME: + return g.op( + f'{domain}::MMCVRoiAlign', + input, + rois, + output_height_i=output_size[0], + output_width_i=output_size[1], + spatial_scale_f=spatial_scale, + sampling_ratio_i=sampling_ratio, + mode_s=pool_mode, + aligned_i=aligned) + else: from torch.onnx.symbolic_opset9 import _cast_Long from torch.onnx.symbolic_opset11 import add, select, squeeze batch_indices = _cast_Long( @@ -96,15 +106,3 @@ def roi_align_default(ctx, g, input: Tensor, rois: Tensor, sampling_ratio_i=sampling_ratio, mode_s=pool_mode, aligned_i=aligned) - else: - domain = 'mmdeploy' - return g.op( - f'{domain}::MMCVRoiAlign', - input, - rois, - output_height_i=output_size[0], - output_width_i=output_size[1], - spatial_scale_f=spatial_scale, - sampling_ratio_i=sampling_ratio, - mode_s=pool_mode, - aligned_i=aligned) diff --git a/mmdeploy/pytorch/functions/__init__.py b/mmdeploy/pytorch/functions/__init__.py index 296c873c2..a4b161844 100644 --- a/mmdeploy/pytorch/functions/__init__.py +++ b/mmdeploy/pytorch/functions/__init__.py @@ -13,6 +13,7 @@ from .masked_fill import masked_fill__onnxruntime from .normalize import normalize__ncnn from .repeat import tensor__repeat__tensorrt from .size import tensor__size__ncnn +from .tensor_getitem import tensor__getitem__ascend from .tensor_setitem import tensor__setitem__default from .topk import topk__dynamic, topk__tensorrt from .triu import triu__default @@ -23,6 +24,7 @@ __all__ = [ 'tensor__size__ncnn', 'topk__dynamic', 'topk__tensorrt', 'chunk__ncnn', 'triu__default', 'atan2__default', 'normalize__ncnn', 'expand__ncnn', 'chunk__torchscript', 'masked_fill__onnxruntime', - 'tensor__setitem__default', 'adaptive_avg_pool2d__default', - 'adaptive_avg_pool2d__ncnn', 'multi_head_attention_forward' + 'tensor__setitem__default', 'tensor__getitem__ascend', + 'adaptive_avg_pool2d__default', 'adaptive_avg_pool2d__ncnn', + 'multi_head_attention_forward' ] diff --git a/mmdeploy/pytorch/functions/size.py b/mmdeploy/pytorch/functions/size.py index 4e7ec3561..30ead981a 100644 --- a/mmdeploy/pytorch/functions/size.py +++ b/mmdeploy/pytorch/functions/size.py @@ -22,3 +22,20 @@ def tensor__size__ncnn(ctx, self, *args): ret = [int(r) for r in ret] ret = tuple(ret) return ret + + +@FUNCTION_REWRITER.register_rewriter( + func_name='torch.Tensor.size', backend='ascend') +def tensor__size__ascend(ctx, self, *args): + """Rewrite `size` for ascens backend. + + Support negative index. + """ + + if len(args) != 0: + index = args[0] + if index < 0: + index = self.dim() + index + args = (index, ) + + return ctx.origin_func(self, *args) diff --git a/mmdeploy/pytorch/functions/tensor_getitem.py b/mmdeploy/pytorch/functions/tensor_getitem.py new file mode 100644 index 000000000..7454a5a6d --- /dev/null +++ b/mmdeploy/pytorch/functions/tensor_getitem.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +from typing import Iterable + +import torch + +from mmdeploy.core import FUNCTION_REWRITER + + +@FUNCTION_REWRITER.register_rewriter( + func_name='torch.Tensor.__getitem__', backend='ascend') +def tensor__getitem__ascend(ctx, self, key) -> torch.Tensor: + """Rewrite `getitem` for ascend backend. + + Ascend does not support negative select + """ + if not isinstance(key, (tuple, list)): + if isinstance(key, int) and key < 0: + key = self.dim() + key + return ctx.origin_func(self, key) + + def _num_slice_types(slices): + num_slice = 0 + for s in slices: + if isinstance(s, slice) or isinstance(s, int) or isinstance( + s, Iterable): + num_slice += 1 + return num_slice + + shape = self.shape + new_key = list(key) + num_ellipsis = len(shape) - _num_slice_types(new_key) + dim_count = 0 + for i, k in enumerate(new_key): + if isinstance(k, int): + if k < 0: + new_key[i] = shape[dim_count] + k + if k == Ellipsis: + dim_count = dim_count + num_ellipsis + elif k is not None: + dim_count += 1 + return ctx.origin_func(self, new_key) diff --git a/mmdeploy/utils/constants.py b/mmdeploy/utils/constants.py index 8501ab2f6..21b283ae8 100644 --- a/mmdeploy/utils/constants.py +++ b/mmdeploy/utils/constants.py @@ -59,6 +59,7 @@ class Backend(AdvancedEnum): OPENVINO = 'openvino' SDK = 'sdk' TORCHSCRIPT = 'torchscript' + ASCEND = 'ascend' DEFAULT = 'default' diff --git a/mmdeploy/utils/test.py b/mmdeploy/utils/test.py index 62c7fce94..0e45ee229 100644 --- a/mmdeploy/utils/test.py +++ b/mmdeploy/utils/test.py @@ -46,6 +46,8 @@ def backend_checker(backend: Backend, require_plugin: bool = False): from mmdeploy.apis.ncnn import is_custom_ops_available elif backend == Backend.OPENVINO: from mmdeploy.apis.openvino import is_available + elif backend == Backend.ASCEND: + from mmdeploy.apis.ascend import is_available else: warnings.warn('The backend checker is not available') return @@ -96,6 +98,8 @@ def check_backend(backend: Backend, require_plugin: bool = False): from mmdeploy.apis.openvino import is_available elif backend == Backend.TORCHSCRIPT: from mmdeploy.backend.torchscript import ops_available as is_available + elif backend == Backend.ASCEND: + from mmdeploy.backend.ascend import is_available else: warnings.warn('The backend checker is not available') return @@ -537,6 +541,20 @@ def get_backend_outputs(ir_file_path: str, backend_files = [ir_file_path] device = 'cpu' backend_feats = [v for _, v in model_inputs.items()] + elif backend == Backend.ASCEND: + # Ascend model conversion + import mmdeploy.apis.ascend as ascend_apis + from mmdeploy.utils import get_model_inputs + if not ascend_apis.is_available(): + return None + work_dir = osp.split(ir_file_path)[0] + # convert model + convert_args = get_model_inputs(deploy_cfg) + ascend_apis.from_onnx(ir_file_path, work_dir, convert_args[0]) + om_file_name = osp.splitext(osp.split(ir_file_path)[1])[0] + backend_files = [osp.join(work_dir, om_file_name + '.om')] + backend_feats = flatten_model_inputs + device = 'cpu' else: raise NotImplementedError( f'Unimplemented backend type: {backend.value}') diff --git a/setup.cfg b/setup.cfg index b02db3a5c..a7c03c88b 100644 --- a/setup.cfg +++ b/setup.cfg @@ -15,3 +15,6 @@ known_third_party = h5py,m2r,mmcls,mmcv,mmdeploy_python,mmdet,mmedit,mmocr,mmseg no_lines_before = STDLIB,LOCALFOLDER default_section = THIRDPARTY skip = service/snpe/client/inference_pb2.py,service/snpe/client/inference_pb2_grpc.py + +[codespell] +ignore-words=.codespell_ignore.txt diff --git a/tests/test_apis/test_onnx2ascend.py b/tests/test_apis/test_onnx2ascend.py new file mode 100644 index 000000000..30024a03d --- /dev/null +++ b/tests/test_apis/test_onnx2ascend.py @@ -0,0 +1,71 @@ +# Copyright (c) OpenMMLab. All rights reserved. +import os.path as osp +import tempfile + +import mmcv +import pytest +import torch +import torch.nn as nn + +from mmdeploy.utils import Backend +from mmdeploy.utils.test import backend_checker + +onnx_file = tempfile.NamedTemporaryFile(suffix='.onnx').name +test_img = torch.rand([1, 3, 8, 8]) + + +@pytest.mark.skip(reason='This a not test class but a utility class.') +class TestModel(nn.Module): + + def __init__(self): + super().__init__() + + def forward(self, x): + return x * 0.5 + + +test_model = TestModel().eval() + + +def generate_onnx_file(model): + with torch.no_grad(): + dynamic_axes = { + 'input': { + 0: 'batch', + 2: 'width', + 3: 'height' + }, + 'output': { + 0: 'batch' + } + } + torch.onnx.export( + model, + test_img, + onnx_file, + output_names=['output'], + input_names=['input'], + keep_initializers_as_inputs=True, + do_constant_folding=True, + verbose=False, + opset_version=11, + dynamic_axes=dynamic_axes) + assert osp.exists(onnx_file) + + +@backend_checker(Backend.ASCEND) +def test_onnx2ascend(): + from mmdeploy.apis.ascend import from_onnx + model = test_model + generate_onnx_file(model) + + work_dir, _ = osp.split(onnx_file) + file_name = osp.splitext(onnx_file)[0] + om_path = osp.join(work_dir, file_name + '.om') + model_inputs = mmcv.Config( + dict( + dynamic_batch_size=[1, 2, 4], + input_shapes=dict(input=[-1, 3, 224, 224]))) + from_onnx(onnx_file, work_dir, model_inputs) + assert osp.exists(work_dir) + assert osp.exists(om_path) diff --git a/tests/test_backend/test_wrapper.py b/tests/test_backend/test_wrapper.py index 33ecb9ef9..d089665d4 100644 --- a/tests/test_backend/test_wrapper.py +++ b/tests/test_backend/test_wrapper.py @@ -103,6 +103,18 @@ def onnx2backend(backend, onnx_file): work_dir = backend_dir from_onnx(onnx_file, work_dir, input_info, output_names) return backend_file + elif backend == Backend.ASCEND: + import mmcv + + from mmdeploy.apis.ascend import from_onnx + backend_dir = tempfile.TemporaryDirectory().name + work_dir = backend_dir + file_name = osp.splitext(osp.split(onnx_file)[1])[0] + backend_file = osp.join(work_dir, file_name + '.om') + model_inputs = mmcv.Config( + dict(input_shapes=dict(input=test_img.shape))) + from_onnx(onnx_file, work_dir, model_inputs) + return backend_file def create_wrapper(backend, model_files): @@ -133,6 +145,10 @@ def create_wrapper(backend, model_files): torchscript_model = TorchscriptWrapper( model_files, input_names=input_names, output_names=output_names) return torchscript_model + elif backend == Backend.ASCEND: + from mmdeploy.backend.ascend import AscendWrapper + ascend_model = AscendWrapper(model_files) + return ascend_model else: raise NotImplementedError(f'Unknown backend type: {backend.value}') @@ -163,13 +179,16 @@ def run_wrapper(backend, wrapper, input): elif backend == Backend.TORCHSCRIPT: results = wrapper({'input': input})['output'] return results + elif backend == Backend.ASCEND: + results = wrapper({'input': input})['output'] + return results else: raise NotImplementedError(f'Unknown backend type: {backend.value}') ALL_BACKEND = [ Backend.TENSORRT, Backend.ONNXRUNTIME, Backend.PPLNN, Backend.NCNN, - Backend.OPENVINO, Backend.TORCHSCRIPT + Backend.OPENVINO, Backend.TORCHSCRIPT, Backend.ASCEND ] diff --git a/tests/test_codebase/test_mmdet/test_mmdet_core.py b/tests/test_codebase/test_mmdet/test_mmdet_core.py index f822ea333..6469aa790 100644 --- a/tests/test_codebase/test_mmdet/test_mmdet_core.py +++ b/tests/test_codebase/test_mmdet/test_mmdet_core.py @@ -343,3 +343,49 @@ def test__anchorgenerator__single_level_grid_priors(): find_trt_grid_priors = True assert find_trt_grid_priors + + +@backend_checker(Backend.ASCEND) +def test_multiclass_nms__ascend(): + from mmdeploy.codebase.mmdet.core import multiclass_nms + deploy_cfg = mmcv.Config( + dict( + onnx_config=dict( + input_names=['boxes', 'scores'], + output_names=['dets', 'labels'], + input_shape=None), + backend_config=dict( + type='ascend', + model_inputs=[ + dict(input_shapes=dict(boxes=[1, 5, 4], scores=[1, 5, 8])) + ]), + codebase_config=dict( + type='mmdet', + task='ObjectDetection', + post_processing=dict( + score_threshold=0.05, + iou_threshold=0.5, + max_output_boxes_per_class=20, + pre_top_k=-1, + keep_top_k=10, + background_label_id=-1, + )))) + + boxes = torch.rand(1, 5, 4) + scores = torch.rand(1, 5, 8) + max_output_boxes_per_class = 20 + keep_top_k = 10 + wrapped_func = WrapFunction( + multiclass_nms, + max_output_boxes_per_class=max_output_boxes_per_class, + keep_top_k=keep_top_k) + rewrite_outputs, _ = get_rewrite_outputs( + wrapped_func, + model_inputs={ + 'boxes': boxes, + 'scores': scores + }, + deploy_cfg=deploy_cfg) + + assert rewrite_outputs is not None, 'Got unexpected rewrite '\ + 'outputs: {}'.format(rewrite_outputs) diff --git a/tests/test_codebase/test_mmdet/test_mmdet_models.py b/tests/test_codebase/test_mmdet/test_mmdet_models.py index c855d4156..9aa7a9d52 100644 --- a/tests/test_codebase/test_mmdet/test_mmdet_models.py +++ b/tests/test_codebase/test_mmdet/test_mmdet_models.py @@ -638,6 +638,73 @@ def test_single_roi_extractor(backend_type: Backend): model_output, backend_output, rtol=1e-03, atol=1e-05) +def test_single_roi_extractor__ascend(): + check_backend(Backend.ASCEND) + + # create wrap function + from mmdeploy.utils.test import WrapFunction + single_roi_extractor = get_single_roi_extractor() + out_channels = single_roi_extractor.out_channels + + def single_roi_extractor_func(feat0, feat1, feat2, feat3, rois): + return single_roi_extractor([feat0, feat1, feat2, feat3], rois) + + single_roi_extractor_wrapper = WrapFunction(single_roi_extractor_func) + + # generate data + seed_everything(1234) + feats = [ + torch.rand((1, out_channels, 200, 336)), + torch.rand((1, out_channels, 100, 168)), + torch.rand((1, out_channels, 50, 84)), + torch.rand((1, out_channels, 25, 42)), + ] + seed_everything(5678) + rois = torch.tensor([[0.0000, 587.8285, 52.1405, 886.2484, 341.5644]]) + + # create config + input_names = ['feat0', 'feat1', 'feat2', 'feat3', 'rois'] + output_names = ['roi_feat'] + model_inputs = dict(zip(input_names, feats + [rois])) + deploy_cfg = mmcv.Config( + dict( + backend_config=dict( + type=Backend.ASCEND.value, + model_inputs=[ + dict( + input_shapes=dict( + feat0=feats[0].shape, + feat1=feats[1].shape, + feat2=feats[2].shape, + feat3=feats[3].shape, + rois=rois.shape)) + ]), + onnx_config=dict( + input_names=input_names, + output_names=output_names, + input_shape=None), + codebase_config=dict( + type='mmdet', + task='ObjectDetection', + ))) + + # get torch output + model_outputs = get_model_outputs(single_roi_extractor_wrapper, 'forward', + model_inputs) + + # get backend output + backend_outputs, _ = get_rewrite_outputs( + wrapped_model=single_roi_extractor_wrapper, + model_inputs=model_inputs, + deploy_cfg=deploy_cfg) + if isinstance(backend_outputs, dict): + backend_outputs = backend_outputs.values() + for model_output, backend_output in zip(model_outputs[0], backend_outputs): + model_output = model_output.squeeze().cpu().numpy() + backend_output = backend_output.squeeze() + assert model_output.shape == backend_output.shape + + def get_cascade_roi_head(is_instance_seg=False): """CascadeRoIHead Config.""" num_stages = 3 diff --git a/tests/test_pytorch/test_pytorch_functions.py b/tests/test_pytorch/test_pytorch_functions.py index f2aa31136..22f40475c 100644 --- a/tests/test_pytorch/test_pytorch_functions.py +++ b/tests/test_pytorch/test_pytorch_functions.py @@ -184,6 +184,32 @@ def test_size_of_tensor_static(): 'outputs: {}'.format(rewrite_outputs) +@backend_checker(Backend.ASCEND) +def test_size__ascend(): + + def model_func(input): + x = torch.Tensor.size(input, -1) + return torch.tensor(x) + + input = torch.zeros([1, 2, 3, 4]) + deploy_cfg_ascend = mmcv.Config( + dict( + onnx_config=dict(input_shape=None), + backend_config=dict( + type='ascend', + model_inputs=[dict(input_shapes=dict(input=input.shape))]), + codebase_config=dict(type='mmdet', task='ObjectDetection'))) + wrapped_func = WrapFunction(model_func) + rewrite_outputs, _ = get_rewrite_outputs( + wrapped_func, + model_inputs={'input': input}, + deploy_cfg=deploy_cfg_ascend, + run_with_backend=True) + + assert rewrite_outputs is not None, 'Got unexpected rewrite ' + 'outputs: {}'.format(rewrite_outputs) + + class TestTopk: input = torch.rand(1, 5, 5, 5) @@ -286,6 +312,32 @@ def test_normalize_ncnn(input, dim): assert osp.exists(bin_path) +@backend_checker(Backend.ASCEND) +def test_getitem__ascend(): + + input = torch.rand(1, 2, 3) + + def tensor_getitem(x): + return x[..., -1] + + # create wrapped model + wrapped_func = WrapFunction(tensor_getitem) + import tempfile + + import onnx + + from mmdeploy.core import RewriterContext + onnx_file = tempfile.NamedTemporaryFile(suffix='onnx').name + + # convert model + with RewriterContext( + cfg={}, backend=Backend.ASCEND.value, opset=11), torch.no_grad(): + torch.onnx.export(wrapped_func, input, onnx_file, opset_version=11) + onnx_model = onnx.load(onnx_file) + nodes = onnx_model.graph.node + assert nodes is not None + + @backend_checker(Backend.ONNXRUNTIME) @pytest.mark.parametrize( 'input', diff --git a/tools/check_env.py b/tools/check_env.py index e25806fa5..848582b15 100644 --- a/tools/check_env.py +++ b/tools/check_env.py @@ -44,6 +44,9 @@ def check_backend(): import mmdeploy.apis.snpe as snpe_apis logger.info(f'snpe_is_available: {snpe_apis.is_available()}') + import mmdeploy.apis.ascend as ascend_apis + logger.info(f'ascend_is_available: {ascend_apis.is_available()}') + def check_codebase(): codebase_versions = get_codebase_version() diff --git a/tools/deploy.py b/tools/deploy.py index 3dc2f2bf9..d6360c405 100644 --- a/tools/deploy.py +++ b/tools/deploy.py @@ -204,7 +204,7 @@ def main(): from mmdeploy.apis.tensorrt import onnx2tensorrt PIPELINE_MANAGER.enable_multiprocess(True, [onnx2tensorrt]) - PIPELINE_MANAGER.set_log_level(logging.INFO, [onnx2tensorrt]) + PIPELINE_MANAGER.set_log_level(log_level, [onnx2tensorrt]) backend_files = [] for model_id, model_param, onnx_path in zip( @@ -331,7 +331,7 @@ def main(): from mmdeploy.apis.pplnn import from_onnx pplnn_pipeline_funcs = [from_onnx] - PIPELINE_MANAGER.set_log_level(logging.INFO, pplnn_pipeline_funcs) + PIPELINE_MANAGER.set_log_level(log_level, pplnn_pipeline_funcs) pplnn_files = [] for onnx_path in ir_files: @@ -351,13 +351,32 @@ def main(): pplnn_files += [onnx_path, algo_file] backend_files = pplnn_files + elif backend == Backend.ASCEND: + from mmdeploy.apis.ascend import from_onnx + + ascend_pipeline_funcs = [from_onnx] + PIPELINE_MANAGER.set_log_level(log_level, ascend_pipeline_funcs) + + model_inputs = get_model_inputs(deploy_cfg) + + om_files = [] + for model_id, onnx_path in enumerate(ir_files): + om_path = osp.splitext(onnx_path)[0] + '.om' + from_onnx(onnx_path, args.work_dir, model_inputs[model_id]) + om_files.append(om_path) + backend_files = om_files + + if args.dump_info: + from mmdeploy.backend.ascend import update_sdk_pipeline + update_sdk_pipeline(args.work_dir) + if args.test_img is None: args.test_img = args.img headless = False # check headless or not for all platforms. - import tkinter try: + import tkinter tkinter.Tk() except Exception: headless = True diff --git a/tools/test.py b/tools/test.py index 07783eaae..324e02bc9 100644 --- a/tools/test.py +++ b/tools/test.py @@ -119,6 +119,7 @@ def main(): is_device_cpu = (args.device == 'cpu') device_id = None if is_device_cpu else parse_device_id(args.device) + destroy_model = model.destroy model = MMDataParallel(model, device_ids=[device_id]) # The whole dataset test wrapped a MMDataParallel class outside the module. # As mmcls.apis.test.py single_gpu_test defined, the MMDataParallel needs @@ -142,6 +143,8 @@ def main(): task_processor.evaluate_outputs(model_cfg, outputs, dataset, args.metrics, args.out, args.metric_options, args.format_only, args.log2file) + # only effective when the backend requires explicit clean-up (e.g. Ascend) + destroy_model() if __name__ == '__main__':