Merge branch 'dev-1.x' of github.com:open-mmlab/mmdeploy into triplemu/dockerfile

2025-01-14 08:09:43 +08:00 · 2022-11-08 13:24:41 +08:00 · 2022-11-08 13:24:41 +08:00 · df962aa2a5
commit df962aa2a5
parent 9263c7f604 d330e17af3
132 changed files with 2222 additions and 1588 deletions
--- a/configs/_base_/backends/rknn.py
+++ b/configs/_base_/backends/rknn.py
@ -1,8 +1,8 @@
 backend_config = dict(
    type='rknn',
    common_config=dict(
-        mean_values=None,
-        std_values=None,
-        target_platform='rk3588',
-        optimization_level=3),
+        mean_values=None,  # [[103.53, 116.28, 123.675]],
+        std_values=None,  # [[57.375, 57.12, 58.395]],
+        target_platform='rv1126',  # 'rk3588'
+        optimization_level=1),
    quantization_config=dict(do_quantization=False, dataset=None))
--- a/configs/mmcls/classification_rknn_static-224x224.py
+++ b/configs/mmcls/classification_rknn_static-224x224.py
--- a/configs/mmdet/detection/detection_rknn_static-320x320.py
+++ b/configs/mmdet/detection/detection_rknn_static-320x320.py
@ -0,0 +1,29 @@
+_base_ = ['../_base_/base_static.py', '../../_base_/backends/rknn.py']
+
+onnx_config = dict(input_shape=[320, 320])
+
+codebase_config = dict(model_type='rknn')
+
+backend_config = dict(input_size_list=[[3, 320, 320]])
+
+# yolov3, yolox
+# partition_config = dict(
+#     type='rknn',  # the partition policy name
+#     apply_marks=True,  # should always be set to True
+#     partition_cfg=[
+#         dict(
+#             save_file='model.onnx',  # name to save the partitioned onnx
+#             start=['detector_forward:input'],  # [mark_name:input, ...]
+#             end=['yolo_head:input'])  # [mark_name:output, ...]
+#     ])
+
+# # retinanet, ssd, fsaf
+# partition_config = dict(
+#     type='rknn',  # the partition policy name
+#     apply_marks=True,
+#     partition_cfg=[
+#         dict(
+#             save_file='model.onnx',
+#             start='detector_forward:input',
+#             end=['BaseDenseHead:output'])
+#     ])
--- a/configs/mmdet/detection/detection_rknn_static.py
+++ b/configs/mmdet/detection/detection_rknn_static.py
@ -1,17 +0,0 @@
-_base_ = ['../_base_/base_static.py', '../../_base_/backends/rknn.py']
-
-onnx_config = dict(input_shape=[640, 640])
-
-codebase_config = dict(model_type='rknn')
-
-backend_config = dict(input_size_list=[[3, 640, 640]])
-
-partition_config = dict(
-    type='rknn',  # the partition policy name
-    apply_marks=True,  # should always be set to True
-    partition_cfg=[
-        dict(
-            save_file='model.onnx',  # name to save the partitioned onnx model
-            start=['detector_forward:input'],  # [mark_name:input/output, ...]
-            end=['yolo_head:input'])  # [mark_name:input/output, ...]
-    ])
--- a/configs/mmdet3d/voxel-detection/voxel-detection_static.py
+++ b/configs/mmdet3d/voxel-detection/voxel-detection_static.py
@ -3,4 +3,4 @@ codebase_config = dict(
    type='mmdet3d', task='VoxelDetection', model_type='end2end')
 onnx_config = dict(
    input_names=['voxels', 'num_points', 'coors'],
-    output_names=['scores', 'bbox_preds', 'dir_scores'])
+    output_names=['cls_score', 'bbox_pred', 'dir_cls_pred'])
--- a/configs/mmseg/segmentation_rknn_static-320x320.py
+++ b/configs/mmseg/segmentation_rknn_static-320x320.py
@ -1,7 +1,7 @@
 _base_ = ['./segmentation_static.py', '../_base_/backends/rknn.py']

-onnx_config = dict(input_shape=[512, 512])
+onnx_config = dict(input_shape=[320, 320])

 codebase_config = dict(model_type='rknn')

-backend_config = dict(input_size_list=[[3, 512, 512]])
+backend_config = dict(input_size_list=[[3, 320, 320]])
--- a/docs/en/01-how-to-build/rockchip.md
+++ b/docs/en/01-how-to-build/rockchip.md
@ -1,18 +1,26 @@
 # Build for RKNN

-This tutorial is based on Linux systems like Ubuntu-18.04 and Rockchip NPU like `rk3588`.
+This tutorial is based on Ubuntu-18.04 and Rockchip NPU `rk3588`. For different NPU devices, you may have to use different rknn packages.
+Below is a table describing the relationship:
+
+| Device               | Python Package                                                   | c/c++ SDK                                          |
+| -------------------- | ---------------------------------------------------------------- | -------------------------------------------------- |
+| RK1808/RK1806        | [rknn-toolkit](https://github.com/rockchip-linux/rknn-toolkit)   | [rknpu](https://github.com/rockchip-linux/rknpu)   |
+| RV1109/RV1126        | [rknn-toolkit](https://github.com/rockchip-linux/rknn-toolkit)   | [rknpu](https://github.com/rockchip-linux/rknpu)   |
+| RK3566/RK3568/RK3588 | [rknn-toolkit2](https://github.com/rockchip-linux/rknn-toolkit2) | [rknpu2](https://github.com/rockchip-linux/rknpu2) |
+| RV1103/RV1106        | [rknn-toolkit2](https://github.com/rockchip-linux/rknn-toolkit2) | [rknpu2](https://github.com/rockchip-linux/rknpu2) |

 ## Installation

 It is recommended to create a virtual environment for the project.

-1. get RKNN-Toolkit2 through:
+1. Get RKNN-Toolkit2 or RKNN-Toolkit through git. RKNN-Toolkit2 for example:

   ```
   git clone git@github.com:rockchip-linux/rknn-toolkit2.git
   ```

-2. install RKNN python package following [official doc](https://github.com/rockchip-linux/rknn-toolkit2/tree/master/doc). In our testing, we used the rknn-toolkit2 1.2.0 with commit id `834ba0b0a1ab8ee27024443d77b02b5ba48b67fc`. When installing rknn-toolkit2, it is better to append `--no-deps` after the commands to avoid dependency conflicts. For example:
+2. Install RKNN python package following [rknn-toolkit2 doc](https://github.com/rockchip-linux/rknn-toolkit2/tree/master/doc) or [rknn-toolkit doc](https://github.com/rockchip-linux/rknn-toolkit/tree/master/doc). When installing rknn python package, it is better to append `--no-deps` after the commands to avoid dependency conflicts. RKNN-Toolkit2 package for example:

   ```
   pip install packages/rknn_toolkit2-1.2.0_f7bb160f-cp36-cp36m-linux_x86_64.whl --no-deps
@ -67,17 +75,19 @@ backend_config = dict(

 ```

-The contents of `common_config` are for `rknn.config()`. The contents of `quantization_config` are used to control `rknn.build()`.
+The contents of `common_config` are for `rknn.config()`. The contents of `quantization_config` are used to control `rknn.build()`. You may have to modify `target_platform` for your own preference.

 ## Build SDK with Rockchip NPU

-1. get rknpu2 through:
+### Build SDK with RKNPU2
+
+1. Get rknpu2 through git:

   ```
   git clone git@github.com:rockchip-linux/rknpu2.git
   ```

-2. for linux, download gcc cross compiler. The download link of the compiler from the official user guide of `rknpu2` was deprecated. You may use another verified [link](https://github.com/Caesar-github/gcc-buildroot-9.3.0-2020.03-x86_64_aarch64-rockchip-linux-gnu). After download and unzip the compiler, you may open the terminal, set `RKNN_TOOL_CHAIN` and `RKNPU2_DEVICE_DIR` by `export RKNN_TOOL_CHAIN=/path/to/gcc/usr;export RKNPU2_DEVICE_DIR=/path/to/rknpu2/runtime/RK3588`.
+2. For linux, download gcc cross compiler. The download link of the compiler from the official user guide of `rknpu2` was deprecated. You may use another verified [link](https://github.com/Caesar-github/gcc-buildroot-9.3.0-2020.03-x86_64_aarch64-rockchip-linux-gnu). After download and unzip the compiler, you may open the terminal, set `RKNN_TOOL_CHAIN` and `RKNPU2_DEVICE_DIR` by `export RKNN_TOOL_CHAIN=/path/to/gcc/usr;export RKNPU2_DEVICE_DIR=/path/to/rknpu2/runtime/RK3588`.

 3. after the above preparition, run the following commands:

@ -144,4 +154,38 @@ label: 65, score: 0.95
    mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
  ```

-  Besides, the `mean_values` and `std_values` of deploy_cfg should be replaced with original normalization settings of `model_cfg`. Let `mean_values=[123.675, 116.28, 103.53]` and `std_values=[58.395, 57.12, 57.375]`.
+  Besides, the `mean_values` and `std_values` of deploy_cfg should be replaced with original normalization settings of `model_cfg`. Let `mean_values=[[103.53, 116.28, 123.675]]` and `std_values=[[57.375, 57.12, 58.395]]`.
+
+- MMDet models.
+
+  YOLOV3 & YOLOX: you may paste the following partition configuration into [detection_rknn_static.py](https://github.com/open-mmlab/mmdeploy/blob/master/configs/mmdet/detection/detection_rknn_static.py):
+
+  ```python
+  # yolov3, yolox
+  partition_config = dict(
+      type='rknn',  # the partition policy name
+      apply_marks=True,  # should always be set to True
+      partition_cfg=[
+          dict(
+              save_file='model.onnx',  # name to save the partitioned onnx
+              start=['detector_forward:input'],  # [mark_name:input, ...]
+              end=['yolo_head:input'])  # [mark_name:output, ...]
+      ])
+  ```
+
+  RetinaNet & SSD & FSAF with rknn-toolkit2, you may paste the following partition configuration into [detection_rknn_static.py](https://github.com/open-mmlab/mmdeploy/blob/master/configs/mmdet/detection/detection_rknn_static.py). Users with rknn-toolkit can directly use default config.
+
+  ```python
+  # retinanet, ssd
+  partition_config = dict(
+      type='rknn',  # the partition policy name
+      apply_marks=True,
+      partition_cfg=[
+          dict(
+              save_file='model.onnx',
+              start='detector_forward:input',
+              end=['BaseDenseHead:output'])
+      ])
+  ```
+
+- SDK only supports int8 rknn model, which require `do_quantization=True` when converting models.
--- a/docs/en/03-benchmark/supported_models.md
+++ b/docs/en/03-benchmark/supported_models.md
@ -4,14 +4,14 @@ The table below lists the models that are guaranteed to be exportable to other b

 | Model                       | Codebase         | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | Ascend | RKNN |                                          Model config                                           |
 | :-------------------------- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: | :----: | :--: | :---------------------------------------------------------------------------------------------: |
-| RetinaNet                   | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     |   Y    |  N   |        [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet)        |
+| RetinaNet                   | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     |   Y    |  Y   |        [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet)        |
 | Faster R-CNN                | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     |   Y    |  N   |       [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn)       |
 | YOLOv3                      | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     |   Y    |  Y   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo)           |
 | YOLOX                       | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     |   N    |  Y   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox)          |
 | FCOS                        | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     |   N    |  N   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos)           |
-| FSAF                        | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     |   N    |  N   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf)           |
+| FSAF                        | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     |   N    |  Y   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf)           |
 | Mask R-CNN                  | MMDetection      |      Y      |      Y      |    Y     |  N   |   N   |    Y     |   N    |  N   |        [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn)        |
-| SSD[\*](#note)              | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     |   N    |  N   |           [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd)           |
+| SSD[\*](#note)              | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     |   N    |  Y   |           [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd)           |
 | FoveaBox                    | MMDetection      |      Y      |      Y      |    N     |  N   |   N   |    Y     |   N    |  N   |        [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox)         |
 | ATSS                        | MMDetection      |      N      |      Y      |    Y     |  N   |   N   |    Y     |   N    |  N   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss)           |
 | GFL                         | MMDetection      |      N      |      Y      |    Y     |  N   |   ?   |    Y     |   N    |  N   |           [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl)           |
--- a/docs/en/05-supported-backends/rknn.md
+++ b/docs/en/05-supported-backends/rknn.md
@ -1,9 +1,9 @@
 # Supported RKNN feature

-Currently, MMDeploy only tests rk3588 with linux platform.
+Currently, MMDeploy only tests rk3588 and rv1126 with linux platform.

 The following features cannot be automatically enabled by mmdeploy and you need to manually modify the configuration in MMDeploy like [here](https://github.com/open-mmlab/mmdeploy/blob/master/configs/_base_/backends/rknn.py).

- target_platform other than `3588`
+- target_platform other than default
 - quantization settings
- optimization level other than 3
+- optimization level other than 1
--- a/docs/zh_cn/01-how-to-build/rockchip.md
+++ b/docs/zh_cn/01-how-to-build/rockchip.md
@ -1,18 +1,26 @@
 # 支持 RKNN

-本教程基于 Ubuntu-18.04 和 Rockchip `rk3588` NPU。
+本教程基于 Ubuntu-18.04 和 Rockchip `rk3588` NPU。对于不同的 NPU 设备，您需要使用不同的 rknn 包.
+这是设备和安装包的关系表:
+
+| Device               | Python Package                                                   | c/c++ SDK                                          |
+| -------------------- | ---------------------------------------------------------------- | -------------------------------------------------- |
+| RK1808/RK1806        | [rknn-toolkit](https://github.com/rockchip-linux/rknn-toolkit)   | [rknpu](https://github.com/rockchip-linux/rknpu)   |
+| RV1109/RV1126        | [rknn-toolkit](https://github.com/rockchip-linux/rknn-toolkit)   | [rknpu](https://github.com/rockchip-linux/rknpu)   |
+| RK3566/RK3568/RK3588 | [rknn-toolkit2](https://github.com/rockchip-linux/rknn-toolkit2) | [rknpu2](https://github.com/rockchip-linux/rknpu2) |
+| RV1103/RV1106        | [rknn-toolkit2](https://github.com/rockchip-linux/rknn-toolkit2) | [rknpu2](https://github.com/rockchip-linux/rknpu2) |

 ## 安装

 建议为项目创建一个虚拟环境。

-1. 获取 RKNN-Toolkit2:
+1. 使用 git 获取 RKNN-Toolkit2 或者 RKNN-Toolkit。以 RKNN-Toolkit2 为例:

   ```
   git clone git@github.com:rockchip-linux/rknn-toolkit2.git
   ```

-2. 通过 [官方文档](https://github.com/rockchip-linux/rknn-toolkit2/tree/master/doc)，安装 RKNN python 安装包. 在我们的测试中, 使用的 rknn-toolkit 版本是 1.2.0，commit id `834ba0b0a1ab8ee27024443d77b02b5ba48b67fc`。安装 rknn-toolkit2 时，最好在安装命令后添加`--no-deps`，以避免依赖包的冲突。比如:
+2. 通过 [rknn-toolkit2 文档](https://github.com/rockchip-linux/rknn-toolkit2/tree/master/doc) 或者 [rknn-toolkit 文档](https://github.com/rockchip-linux/rknn-toolkit/tree/master/doc)安装 RKNN python 安装包。安装 rknn python 包时，最好在安装命令后添加`--no-deps`，以避免依赖包的冲突。以rknn-toolkit2为例:

   ```
   pip install packages/rknn_toolkit2-1.2.0_f7bb160f-cp36-cp36m-linux_x86_64.whl --no-deps
@ -71,6 +79,8 @@ backend_config = dict(

 ## 安装 SDK

+### RKNPU2 编译 MMDeploy SDK
+
 1. 获取 rknpu2:

   ```
@ -144,4 +154,38 @@ label: 65, score: 0.95
    mean=[0, 0, 0], std=[1, 1, 1], to_rgb=True)
  ```

-  此外, deploy_cfg 的 `mean_values` 和 `std_values` 应该被设置为 `model_cfg` 中归一化的设置. 使 `mean_values=[123.675, 116.28, 103.53]`， `std_values=[58.395, 57.12, 57.375]`。
+  此外, deploy_cfg 的 `mean_values` 和 `std_values` 应该被设置为 `model_cfg` 中归一化的设置. 使 `mean_values=[[103.53, 116.28, 123.675]]`, `std_values=[[57.375, 57.12, 58.395]]`。
+
+- MMDet 模型.
+
+  YOLOV3 & YOLOX: 将下面的模型拆分配置写入到 [detection_rknn_static.py](https://github.com/open-mmlab/mmdeploy/blob/master/configs/mmdet/detection/detection_rknn_static.py):
+
+  ```python
+  # yolov3, yolox
+  partition_config = dict(
+      type='rknn',  # the partition policy name
+      apply_marks=True,  # should always be set to True
+      partition_cfg=[
+          dict(
+              save_file='model.onnx',  # name to save the partitioned onnx
+              start=['detector_forward:input'],  # [mark_name:input, ...]
+              end=['yolo_head:input'])  # [mark_name:output, ...]
+      ])
+  ```
+
+  RetinaNet & SSD & FSAF with rknn-toolkit2, 将下面的模型拆分配置写入到 [detection_rknn_static.py](https://github.com/open-mmlab/mmdeploy/blob/master/configs/mmdet/detection/detection_rknn_static.py)。使用 rknn-toolkit 的用户则不用。
+
+  ```python
+  # retinanet, ssd
+  partition_config = dict(
+      type='rknn',  # the partition policy name
+      apply_marks=True,
+      partition_cfg=[
+          dict(
+              save_file='model.onnx',
+              start='detector_forward:input',
+              end=['BaseDenseHead:output'])
+      ])
+  ```
+
+- SDK 只支持 int8 的 rknn 模型，这需要在转换模型时设置 `do_quantization=True`。
--- a/docs/zh_cn/03-benchmark/supported_models.md
+++ b/docs/zh_cn/03-benchmark/supported_models.md
@ -4,14 +4,14 @@

 | Model                       | Codebase         | TorchScript | OnnxRuntime | TensorRT | ncnn | PPLNN | OpenVINO | Ascend | RKNN |                                          Model config                                           |
 | :-------------------------- | :--------------- | :---------: | :---------: | :------: | :--: | :---: | :------: | :----: | :--: | :---------------------------------------------------------------------------------------------: |
-| RetinaNet                   | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     |   Y    |  N   |        [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet)        |
+| RetinaNet                   | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     |   Y    |  Y   |        [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/retinanet)        |
 | Faster R-CNN                | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     |   Y    |  N   |       [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/faster_rcnn)       |
 | YOLOv3                      | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     |   Y    |  Y   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolo)           |
 | YOLOX                       | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     |   N    |  Y   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/yolox)          |
 | FCOS                        | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     |   N    |  N   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fcos)           |
-| FSAF                        | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     |   N    |  N   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf)           |
+| FSAF                        | MMDetection      |      Y      |      Y      |    Y     |  Y   |   Y   |    Y     |   N    |  Y   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/fsaf)           |
 | Mask R-CNN                  | MMDetection      |      Y      |      Y      |    Y     |  N   |   N   |    Y     |   N    |  N   |        [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/mask_rcnn)        |
-| SSD[\*](#note)              | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     |   N    |  N   |           [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd)           |
+| SSD[\*](#note)              | MMDetection      |      Y      |      Y      |    Y     |  Y   |   N   |    Y     |   N    |  Y   |           [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/ssd)           |
 | FoveaBox                    | MMDetection      |      Y      |      Y      |    N     |  N   |   N   |    Y     |   N    |  N   |        [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/foveabox)         |
 | ATSS                        | MMDetection      |      N      |      Y      |    Y     |  N   |   N   |    Y     |   N    |  N   |          [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/atss)           |
 | GFL                         | MMDetection      |      N      |      Y      |    Y     |  N   |   ?   |    Y     |   N    |  N   |           [config](https://github.com/open-mmlab/mmdetection/tree/master/configs/gfl)           |
--- a/docs/zh_cn/05-supported-backends/rknn.md
+++ b/docs/zh_cn/05-supported-backends/rknn.md
@ -1,9 +1,9 @@
 # 支持的 RKNN 特征

-目前, MMDeploy 只在 rk3588 的 linux 平台上测试过.
+目前, MMDeploy 只在 rk3588 和 rv1126 的 linux 平台上测试过.

 以下特性需要手动在 MMDeploy 自行配置，如[这里](https://github.com/open-mmlab/mmdeploy/blob/master/configs/_base_/backends/rknn.py).

- target_platform ！= `3588`
+- target_platform ！= default
 - quantization settings
- optimization level ！= 3
+- optimization level ！= 1
--- a/mmdeploy/apis/onnx/partition.py
+++ b/mmdeploy/apis/onnx/partition.py
@ -8,7 +8,8 @@ import onnx.utils
 from mmdeploy.apis.core import PIPELINE_MANAGER
 from mmdeploy.core.optimizers import (attribute_to_dict, create_extractor,
                                      get_new_name, parse_extractor_io_string,
-                                      remove_identity, rename_value)
+                                      remove_identity, remove_imports,
+                                      rename_value)
 from mmdeploy.utils import get_root_logger


@ -198,6 +199,9 @@ def extract_partition(model: Union[str, onnx.ModelProto],
                dim.dim_value = 0
                dim.dim_param = f'dim_{idx}'

+    # remove mmdeploy domain if useless
+    remove_imports(extracted_model)
+
    # save extract_model if save_file is given
    if save_file is not None:
        onnx.save(extracted_model, save_file)
--- a/mmdeploy/apis/pytorch2onnx.py
+++ b/mmdeploy/apis/pytorch2onnx.py
@ -3,7 +3,6 @@ import os.path as osp
 from typing import Any, Optional, Union

 import mmengine
-import torch

 from mmdeploy.apis.core.pipeline_manager import no_mp
 from mmdeploy.utils import (Backend, get_backend, get_dynamic_axes,
@ -64,7 +63,8 @@ def torch2onnx(img: Any,
        img,
        input_shape,
        data_preprocessor=getattr(torch_model, 'data_preprocessor', None))
-    if not isinstance(model_inputs, torch.Tensor) and len(model_inputs) == 1:
+
+    if isinstance(model_inputs, list) and len(model_inputs) == 1:
        model_inputs = model_inputs[0]
    data_samples = data['data_samples']
    patch_metas = {'data_samples': data_samples}
--- a/mmdeploy/apis/visualize.py
+++ b/mmdeploy/apis/visualize.py
@ -71,11 +71,20 @@ def visualize_model(model_cfg: Union[str, mmengine.Config],
    with torch.no_grad():
        result = model.test_step(model_inputs)[0]

+    visualize = True
    try:
        # check headless
        import tkinter
        tkinter.Tk()
+    except Exception as e:
+        from mmdeploy.utils import get_root_logger
+        logger = get_root_logger()
+        logger.warning(
+            f'render and display result skipped for headless device, exception {e}'  # noqa: E501
+        )
+        visualize = False

+    if visualize is True:
        task_processor.visualize(
            image=img,
            model=model,
@ -83,9 +92,3 @@ def visualize_model(model_cfg: Union[str, mmengine.Config],
            output_file=output_file,
            window_name=backend.value,
            show_result=show_result)
-    except Exception as e:
-        from mmdeploy.utils import get_root_logger
-        logger = get_root_logger()
-        logger.warn(
-            f'render and display result skipped for headless device, exception {e}'  # noqa: E501
-        )
--- a/mmdeploy/backend/sdk/export_info.py
+++ b/mmdeploy/backend/sdk/export_info.py
@ -132,7 +132,7 @@ def get_inference_info(deploy_cfg: mmengine.Config, model_cfg: mmengine.Config,
    name, _ = get_model_name_customs(deploy_cfg, model_cfg, work_dir, device)
    ir_config = get_ir_config(deploy_cfg)
    backend = get_backend(deploy_cfg=deploy_cfg)
-    if backend == Backend.TORCHSCRIPT:
+    if backend in (Backend.TORCHSCRIPT, Backend.RKNN):
        output_names = ir_config.get('output_names', None)
        input_map = dict(img='#0')
        output_map = {name: f'#{i}' for i, name in enumerate(output_names)}
@ -159,6 +159,11 @@ def get_preprocess(deploy_cfg: mmengine.Config, model_cfg: mmengine.Config,
    task_processor = build_task_processor(
        model_cfg=model_cfg, deploy_cfg=deploy_cfg, device=device)
    transforms = task_processor.get_preprocess()
+    if get_backend(deploy_cfg) == Backend.RKNN:
+        del transforms[-2]
+        for transform in transforms:
+            if transform['type'] == 'Normalize':
+                transform['to_float'] = False
    assert transforms[0]['type'] == 'LoadImageFromFile', 'The first item'\
        ' type of pipeline should be LoadImageFromFile'
    return dict(
--- a/mmdeploy/backend/tensorrt/utils.py
+++ b/mmdeploy/backend/tensorrt/utils.py
@ -43,7 +43,6 @@ def load(path: str) -> trt.ICudaEngine:

 def search_cuda_version() -> str:
    """try cmd to get cuda version, then try `torch.cuda`
-
    Returns:
        str: cuda version, for example 10.2
    """
--- a/mmdeploy/codebase/base/mmcodebase.py
+++ b/mmdeploy/codebase/base/mmcodebase.py
@ -49,9 +49,15 @@ class MMCodebase(metaclass=ABCMeta):
                deploy_cfg=deploy_cfg,
                device=device))

+    @classmethod
+    def register_deploy_modules(cls):
+        """register deploy module."""
+        raise NotImplementedError('register_deploy_modules not implemented.')
+
    @classmethod
    def register_all_modules(cls):
-        pass
+        """register codebase module."""
+        raise NotImplementedError('register_all_modules not implemented.')


 # Note that the build function returns the class instead of its instance.
--- a/mmdeploy/codebase/base/task.py
+++ b/mmdeploy/codebase/base/task.py
@ -75,6 +75,7 @@ class BaseTask(metaclass=ABCMeta):

        from mmengine.registry import MODELS
        data_preprocessor = MODELS.build(preprocess_cfg)
+        data_preprocessor.to(self.device)

        return data_preprocessor

--- a/mmdeploy/codebase/mmaction/init.py
+++ b/mmdeploy/codebase/mmaction/init.py
@ -1,4 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.

 from .deploy import *  # noqa: F401,F403
-from .models import *  # noqa: F401,F403
--- a/mmdeploy/codebase/mmaction/deploy/mmaction.py
+++ b/mmdeploy/codebase/mmaction/deploy/mmaction.py
@ -13,7 +13,12 @@ class MMACTION(MMCodebase):

    task_registry = MMACTION_TASK

+    @classmethod
+    def register_deploy_modules(cls):
+        import mmdeploy.codebase.mmaction.models  # noqa: F401
+
    @classmethod
    def register_all_modules(cls):
        from mmaction.utils.setup_env import register_all_modules
+        cls.register_deploy_modules()
        register_all_modules(True)
--- a/mmdeploy/codebase/mmaction/models/init.py
+++ b/mmdeploy/codebase/mmaction/models/init.py
@ -1,3 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.

-from .recognizers import *  # noqa: F401,F403
+from . import recognizers  # noqa: F401,F403
--- a/mmdeploy/codebase/mmaction/models/recognizers/init.py
+++ b/mmdeploy/codebase/mmaction/models/recognizers/init.py
@ -1,5 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.

-from .base import base_recognizer__forward
-
-__all__ = ['base_recognizer__forward']
+from . import base  # noqa: F401,F403
--- a/mmdeploy/codebase/mmcls/init.py
+++ b/mmdeploy/codebase/mmcls/init.py
@ -1,3 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .deploy import *  # noqa: F401,F403
-from .models import *  # noqa: F401,F403
--- a/mmdeploy/codebase/mmcls/models/init.py
+++ b/mmdeploy/codebase/mmcls/models/init.py
@ -1,5 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .backbones import *  # noqa: F401,F403
-from .classifiers import *  # noqa: F401,F403
-from .necks import *  # noqa: F401,F403
-from .utils import *  # noqa: F401,F403
+from . import backbones  # noqa: F401,F403
+from . import classifiers  # noqa: F401,F403
+from . import necks  # noqa: F401,F403
+from . import utils  # noqa: F401,F403
--- a/mmdeploy/codebase/mmcls/models/backbones/init.py
+++ b/mmdeploy/codebase/mmcls/models/backbones/init.py
@ -1,8 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .shufflenet_v2 import shufflenetv2_backbone__forward__default
-from .vision_transformer import visiontransformer__forward__ncnn
-
-__all__ = [
-    'shufflenetv2_backbone__forward__default',
-    'visiontransformer__forward__ncnn'
-]
+from . import shufflenet_v2  # noqa: F401,F403
+from . import vision_transformer  # noqa: F401,F403
--- a/mmdeploy/codebase/mmcls/models/classifiers/init.py
+++ b/mmdeploy/codebase/mmcls/models/classifiers/init.py
@ -1,4 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .base import base_classifier__forward
-
-__all__ = ['base_classifier__forward']
+from . import base  # noqa: F401,F403
--- a/mmdeploy/codebase/mmcls/models/necks/init.py
+++ b/mmdeploy/codebase/mmcls/models/necks/init.py
@ -1,5 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-
-from .gap import gap__forward
-
-__all__ = ['gap__forward']
+from . import gap  # noqa: F401,F403
--- a/mmdeploy/codebase/mmcls/models/utils/init.py
+++ b/mmdeploy/codebase/mmcls/models/utils/init.py
@ -1,10 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .attention import (multiheadattention__forward__ncnn,
-                        shift_window_msa__forward__default,
-                        shift_window_msa__get_attn_mask__default)
-
-__all__ = [
-    'multiheadattention__forward__ncnn',
-    'shift_window_msa__get_attn_mask__default',
-    'shift_window_msa__forward__default'
-]
+from . import attention  # noqa: F401,F403
--- a/mmdeploy/codebase/mmdet/init.py
+++ b/mmdeploy/codebase/mmdet/init.py
@ -2,9 +2,6 @@
 from .deploy import (ObjectDetection, clip_bboxes, gather_topk,
                     get_post_processing_params, pad_with_value,
                     pad_with_value_if_necessary)
-from .models import *  # noqa: F401,F403
-from .ops import *  # noqa: F401,F403
-from .structures import *  # noqa: F401, F403

 __all__ = [
    'get_post_processing_params', 'clip_bboxes', 'pad_with_value',
--- a/mmdeploy/codebase/mmdet/deploy/object_detection.py
+++ b/mmdeploy/codebase/mmdet/deploy/object_detection.py
@ -10,8 +10,9 @@ from mmengine.model import BaseDataPreprocessor
 from mmengine.registry import Registry

 from mmdeploy.codebase.base import CODEBASE, BaseTask, MMCodebase
-from mmdeploy.utils import Codebase, Task
-from mmdeploy.utils.config_utils import get_input_shape, is_dynamic_shape
+from mmdeploy.utils import Backend, Codebase, Task
+from mmdeploy.utils.config_utils import (get_backend, get_input_shape,
+                                         is_dynamic_shape)

 MMDET_TASK = Registry('mmdet_tasks')

@ -278,6 +279,14 @@ class ObjectDetection(BaseTask):
            if 'mask_thr_binary' in params['rcnn']:
                params['mask_thr_binary'] = params['rcnn']['mask_thr_binary']
                type = 'ResizeInstanceMask'  # for instance-seg
+        if get_backend(self.deploy_cfg) == Backend.RKNN:
+            if 'YOLO' in self.model_cfg.model.type:
+                bbox_head = self.model_cfg.model.bbox_head
+                type = bbox_head.type
+                params['anchor_generator'] = bbox_head.get(
+                    'anchor_generator', None)
+            else:  # default using base_dense_head
+                type = 'BaseDenseHead'
        return dict(type=type, params=params)

    def get_model_name(self, *args, **kwargs) -> str:
--- a/mmdeploy/codebase/mmdet/deploy/object_detection_model.py
+++ b/mmdeploy/codebase/mmdet/deploy/object_detection_model.py
@ -13,7 +13,8 @@ from torch import Tensor, nn

 from mmdeploy.backend.base import get_backend_file_count
 from mmdeploy.codebase.base import BaseBackendModel
-from mmdeploy.codebase.mmdet import get_post_processing_params, multiclass_nms
+from mmdeploy.codebase.mmdet.deploy import get_post_processing_params
+from mmdeploy.codebase.mmdet.models.layers import multiclass_nms
 from mmdeploy.utils import (Backend, get_backend, get_codebase_config,
                            get_partition_config, load_config)

@ -657,10 +658,11 @@ class RKNNModel(End2EndModel):
        head_cfg = self.model_cfg._cfg_dict.model.bbox_head
        head = build_head(head_cfg)
        if head_cfg.type == 'YOLOXHead':
+            divisor = round(len(outputs) / 3)
            ret = head.predict_by_feat(
-                outputs[:3],
-                outputs[3:6],
-                outputs[6:9],
+                outputs[:divisor],
+                outputs[divisor:2 * divisor],
+                outputs[2 * divisor:],
                metainfos,
                cfg=self.model_cfg._cfg_dict.model.test_cfg,
                rescale=True)
@ -670,6 +672,31 @@ class RKNNModel(End2EndModel):
                metainfos,
                cfg=self.model_cfg._cfg_dict.model.test_cfg,
                rescale=True)
+        elif head_cfg.type in ('RetinaHead', 'SSDHead', 'FSAFHead'):
+            partition_cfgs = get_partition_config(self.deploy_cfg)
+            if partition_cfgs is None:  # bbox decoding done in rknn model
+                from mmdet.structures.bbox import scale_boxes
+
+                from ..models.layers.bbox_nms import _multiclass_nms
+                dets, labels = _multiclass_nms(outputs[0], outputs[1])
+                ret = [InstanceData() for i in range(dets.shape[0])]
+                for i, instance_data in enumerate(ret):
+                    instance_data.bboxes = dets[i, :, :4]
+                    instance_data.scores = dets[i, :, 4]
+                    instance_data.labels = labels[i]
+                    scale_factor = [
+                        1 / s for s in metainfos[i]['scale_factor']
+                    ]
+                    instance_data.bboxes = scale_boxes(instance_data.bboxes,
+                                                       scale_factor)
+                return ret
+            divisor = round(len(outputs) / 2)
+            ret = head.predict_by_feat(
+                outputs[:divisor],
+                outputs[divisor:],
+                batch_img_metas=metainfos,
+                rescale=True,
+                cfg=self.model_cfg._cfg_dict.model.test_cfg)
        else:
            raise NotImplementedError(f'{head_cfg.type} not supported yet.')
        return ret
--- a/mmdeploy/codebase/mmdet/models/init.py
+++ b/mmdeploy/codebase/mmdet/models/init.py
@ -1,9 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .backbones import *  # noqa: F401, F403
-from .dense_heads import *  # noqa: F401,F403
-from .detectors import *  # noqa: F401,F403
-from .layers import *  # noqa: F401,F403
-from .necks import *  # noqa: F401,F403
-from .roi_heads import *  # noqa: F401,F403
-from .task_modules import *  # noqa: F401,F403
-from .transformer import *  # noqa: F401,F403
+from . import backbones  # noqa: F401, F403
+from . import dense_heads  # noqa: F401,F403
+from . import detectors  # noqa: F401,F403
+from . import layers  # noqa: F401,F403
+from . import necks  # noqa: F401,F403
+from . import roi_heads  # noqa: F401,F403
+from . import task_modules  # noqa: F401,F403
+from . import transformer  # noqa: F401,F403
--- a/mmdeploy/codebase/mmdet/models/dense_heads/init.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/init.py
@ -1,23 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from . import detr_head
-from .base_dense_head import (base_dense_head__predict_by_feat,
-                              base_dense_head__predict_by_feat__ncnn)
-from .fovea_head import fovea_head__predict_by_feat
-from .gfl_head import gfl_head__predict_by_feat
-from .reppoints_head import reppoints_head__predict_by_feat
-from .rpn_head import rpn_head__get_bboxes__ncnn, rpn_head__predict_by_feat
-from .rtmdet_head import rtmdet_head__predict_by_feat
-from .yolo_head import (yolov3_head__predict_by_feat,
-                        yolov3_head__predict_by_feat__ncnn)
-from .yolox_head import (yolox_head__predict_by_feat,
-                         yolox_head__predict_by_feat__ncnn)
-
-__all__ = [
-    'rpn_head__predict_by_feat', 'rpn_head__get_bboxes__ncnn',
-    'yolov3_head__predict_by_feat', 'yolov3_head__predict_by_feat__ncnn',
-    'yolox_head__predict_by_feat', 'base_dense_head__predict_by_feat',
-    'fovea_head__predict_by_feat', 'base_dense_head__predict_by_feat__ncnn',
-    'yolox_head__predict_by_feat__ncnn', 'gfl_head__predict_by_feat',
-    'reppoints_head__predict_by_feat', 'detr_head',
-    'rtmdet_head__predict_by_feat'
-]
+from . import base_dense_head  # noqa: F401,F403
+from . import detr_head  # noqa: F401,F403
+from . import fovea_head  # noqa: F401,F403
+from . import gfl_head  # noqa: F401,F403
+from . import reppoints_head  # noqa: F401,F403
+from . import rpn_head  # noqa: F401,F403
+from . import rtmdet_head  # noqa: F401,F403
+from . import yolo_head  # noqa: F401,F403
+from . import yolox_head  # noqa: F401,F403
--- a/mmdeploy/codebase/mmdet/models/dense_heads/base_dense_head.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/base_dense_head.py
@ -10,11 +10,12 @@ from mmdet.structures.bbox.transforms import distance2bbox
 from mmengine import ConfigDict
 from torch import Tensor

-from mmdeploy.codebase.mmdet import (gather_topk, get_post_processing_params,
-                                     pad_with_value_if_necessary)
+from mmdeploy.codebase.mmdet.deploy import (gather_topk,
+                                            get_post_processing_params,
+                                            pad_with_value_if_necessary)
 from mmdeploy.codebase.mmdet.models.layers import multiclass_nms
 from mmdeploy.codebase.mmdet.ops import ncnn_detection_output_forward
-from mmdeploy.core import FUNCTION_REWRITER
+from mmdeploy.core import FUNCTION_REWRITER, mark
 from mmdeploy.utils import Backend, is_dynamic_shape


@ -192,6 +193,132 @@ def base_dense_head__predict_by_feat(
@FUNCTION_REWRITER.register_rewriter(
    func_name='mmdet.models.dense_heads.base_dense_head.'
    'BaseDenseHead.predict_by_feat',
+    backend=Backend.RKNN.value)
+def base_dense_head__predict_by_feat__rknn(
+        ctx,
+        self,
+        cls_scores: List[Tensor],
+        bbox_preds: List[Tensor],
+        score_factors: Optional[List[Tensor]] = None,
+        batch_img_metas: Optional[List[dict]] = None,
+        cfg: Optional[ConfigDict] = None,
+        rescale: bool = False,
+        with_nms: bool = True,
+        **kwargs):
+    """Rewrite `predict_by_feat` of `BaseDenseHead` for default backend.
+    Rewrite this function to deploy model, transform network output for a
+    batch into bbox predictions.
+    Args:
+        ctx (ContextCaller): The context with additional information.
+        cls_scores (list[Tensor]): Classification scores for all
+            scale levels, each is a 4D-tensor, has shape
+            (batch_size, num_priors * num_classes, H, W).
+        bbox_preds (list[Tensor]): Box energies / deltas for all
+            scale levels, each is a 4D-tensor, has shape
+            (batch_size, num_priors * 4, H, W).
+        score_factors (list[Tensor], optional): Score factor for
+            all scale level, each is a 4D-tensor, has shape
+            (batch_size, num_priors * 1, H, W). Defaults to None.
+        batch_img_metas (list[dict], Optional): Batch image meta info.
+            Defaults to None.
+        cfg (ConfigDict, optional): Test / postprocessing
+            configuration, if None, test_cfg would be used.
+            Defaults to None.
+        rescale (bool): If True, return boxes in original image space.
+            Defaults to False.
+        with_nms (bool): If True, do nms before return boxes.
+            Defaults to True.
+    Returns:
+        If with_nms == True:
+            tuple[Tensor, Tensor]: tuple[Tensor, Tensor]: (dets, labels),
+            `dets` of shape [N, num_det, 5] and `labels` of shape
+            [N, num_det].
+        Else:
+            tuple[Tensor, Tensor, Tensor]: batch_mlvl_bboxes,
+                batch_mlvl_scores, batch_mlvl_centerness
+    """
+    # mark nodes for partition
+    @mark('BaseDenseHead', outputs=['BaseDenseHead.cls', 'BaseDenseHead.loc'])
+    def __mark_dense_head(cls_scores, bbox_preds):
+        return cls_scores, bbox_preds
+
+    cls_scores, bbox_preds = __mark_dense_head(cls_scores, bbox_preds)
+
+    deploy_cfg = ctx.cfg
+    is_dynamic_flag = is_dynamic_shape(deploy_cfg)
+    num_levels = len(cls_scores)
+
+    featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+    mlvl_priors = self.prior_generator.grid_priors(
+        featmap_sizes, dtype=bbox_preds[0].dtype, device=bbox_preds[0].device)
+    mlvl_priors = [priors.unsqueeze(0) for priors in mlvl_priors]
+
+    mlvl_cls_scores = [cls_scores[i].detach() for i in range(num_levels)]
+    mlvl_bbox_preds = [bbox_preds[i].detach() for i in range(num_levels)]
+    if score_factors is None:
+        with_score_factors = False
+        mlvl_score_factor = [None for _ in range(num_levels)]
+    else:
+        with_score_factors = True
+        mlvl_score_factor = [
+            score_factors[i].detach() for i in range(num_levels)
+        ]
+        mlvl_score_factors = []
+    assert batch_img_metas is not None
+    img_shape = batch_img_metas[0]['img_shape']
+
+    assert len(cls_scores) == len(bbox_preds) == len(mlvl_priors)
+    batch_size = cls_scores[0].shape[0]
+
+    mlvl_valid_bboxes = []
+    mlvl_valid_scores = []
+    mlvl_valid_priors = []
+
+    for cls_score, bbox_pred, score_factors, priors in zip(
+            mlvl_cls_scores, mlvl_bbox_preds, mlvl_score_factor, mlvl_priors):
+        assert cls_score.size()[-2:] == bbox_pred.size()[-2:]
+
+        scores = cls_score.permute(0, 2, 3, 1).reshape(batch_size, -1,
+                                                       self.cls_out_channels)
+        if self.use_sigmoid_cls:
+            scores = scores.sigmoid()
+        else:
+            scores = scores.softmax(-1)[:, :, :-1]
+        if with_score_factors:
+            score_factors = score_factors.permute(0, 2, 3,
+                                                  1).reshape(batch_size,
+                                                             -1).sigmoid()
+            score_factors = score_factors.unsqueeze(2)
+        bbox_pred = bbox_pred.permute(0, 2, 3, 1).reshape(batch_size, -1, 4)
+        if not is_dynamic_flag:
+            priors = priors.data
+
+        mlvl_valid_bboxes.append(bbox_pred)
+        mlvl_valid_scores.append(scores)
+        mlvl_valid_priors.append(priors)
+        if with_score_factors:
+            mlvl_score_factors.append(score_factors)
+
+    batch_mlvl_bboxes_pred = torch.cat(mlvl_valid_bboxes, dim=1)
+    batch_scores = torch.cat(mlvl_valid_scores, dim=1)
+    batch_priors = torch.cat(mlvl_valid_priors, dim=1)
+    batch_bboxes = self.bbox_coder.decode(
+        batch_priors, batch_mlvl_bboxes_pred, max_shape=img_shape)
+    if with_score_factors:
+        batch_score_factors = torch.cat(mlvl_score_factors, dim=1)
+    if not self.use_sigmoid_cls:
+        batch_scores = batch_scores[..., :self.num_classes]
+
+    if with_score_factors:
+        batch_scores = batch_scores * batch_score_factors
+        if isinstance(self, PAAHead):
+            batch_scores = batch_scores.sqrt()
+    return batch_bboxes, batch_scores
+
+
+@FUNCTION_REWRITER.register_rewriter(
+    func_name='mmdet.models.dense_heads.base_dense_head.BaseDenseHead'
+    '.get_bboxes',
    backend=Backend.NCNN.value)
 def base_dense_head__predict_by_feat__ncnn(
        ctx,
--- a/mmdeploy/codebase/mmdet/models/dense_heads/fovea_head.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/fovea_head.py
@ -6,7 +6,7 @@ from mmengine.config import ConfigDict
 from mmengine.structures import InstanceData
 from torch import Tensor

-from mmdeploy.codebase.mmdet import get_post_processing_params
+from mmdeploy.codebase.mmdet.deploy import get_post_processing_params
 from mmdeploy.codebase.mmdet.models.layers import multiclass_nms
 from mmdeploy.core import FUNCTION_REWRITER

--- a/mmdeploy/codebase/mmdet/models/dense_heads/gfl_head.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/gfl_head.py
@ -7,8 +7,9 @@ from mmengine.config import ConfigDict
 from mmengine.structures import InstanceData
 from torch import Tensor

-from mmdeploy.codebase.mmdet import (gather_topk, get_post_processing_params,
-                                     pad_with_value)
+from mmdeploy.codebase.mmdet.deploy import (gather_topk,
+                                            get_post_processing_params,
+                                            pad_with_value)
 from mmdeploy.codebase.mmdet.models.layers import multiclass_nms
 from mmdeploy.core import FUNCTION_REWRITER
 from mmdeploy.utils import Backend, get_backend, is_dynamic_shape
--- a/mmdeploy/codebase/mmdet/models/dense_heads/reppoints_head.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/reppoints_head.py
@ -6,8 +6,9 @@ from mmengine.config import ConfigDict
 from mmengine.structures import InstanceData
 from torch import Tensor

-from mmdeploy.codebase.mmdet import (gather_topk, get_post_processing_params,
-                                     pad_with_value_if_necessary)
+from mmdeploy.codebase.mmdet.deploy import (gather_topk,
+                                            get_post_processing_params,
+                                            pad_with_value_if_necessary)
 from mmdeploy.codebase.mmdet.models.layers import multiclass_nms
 from mmdeploy.core import FUNCTION_REWRITER
 from mmdeploy.utils import is_dynamic_shape
--- a/mmdeploy/codebase/mmdet/models/dense_heads/rpn_head.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/rpn_head.py
@ -5,8 +5,9 @@ import torch
 from mmengine import ConfigDict
 from torch import Tensor

-from mmdeploy.codebase.mmdet import (gather_topk, get_post_processing_params,
-                                     pad_with_value_if_necessary)
+from mmdeploy.codebase.mmdet.deploy import (gather_topk,
+                                            get_post_processing_params,
+                                            pad_with_value_if_necessary)
 from mmdeploy.codebase.mmdet.models.layers import multiclass_nms
 from mmdeploy.core import FUNCTION_REWRITER
 from mmdeploy.utils import Backend, is_dynamic_shape
--- a/mmdeploy/codebase/mmdet/models/dense_heads/ssd_head.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/ssd_head.py
@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import torch

-from mmdeploy.codebase.mmdet import get_post_processing_params
+from mmdeploy.codebase.mmdet.deploy import get_post_processing_params
 from mmdeploy.codebase.mmdet.ops import (ncnn_detection_output_forward,
                                         ncnn_prior_box_forward)
 from mmdeploy.core import FUNCTION_REWRITER
--- a/mmdeploy/codebase/mmdet/models/dense_heads/yolo_head.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/yolo_head.py
@ -6,8 +6,8 @@ import torch
 from mmdet.utils.typing import OptConfigType
 from torch import Tensor

-from mmdeploy.codebase.mmdet import (get_post_processing_params,
-                                     pad_with_value_if_necessary)
+from mmdeploy.codebase.mmdet.deploy import (get_post_processing_params,
+                                            pad_with_value_if_necessary)
 from mmdeploy.codebase.mmdet.models.layers import multiclass_nms
 from mmdeploy.core import FUNCTION_REWRITER, mark
 from mmdeploy.utils import Backend, is_dynamic_shape
--- a/mmdeploy/codebase/mmdet/models/dense_heads/yolox_head.py
+++ b/mmdeploy/codebase/mmdet/models/dense_heads/yolox_head.py
@ -6,7 +6,7 @@ from mmengine.config import ConfigDict
 from mmengine.structures import InstanceData
 from torch import Tensor

-from mmdeploy.codebase.mmdet import get_post_processing_params
+from mmdeploy.codebase.mmdet.deploy import get_post_processing_params
 from mmdeploy.codebase.mmdet.models.layers import multiclass_nms
 from mmdeploy.core import FUNCTION_REWRITER, mark
 from mmdeploy.utils import Backend
--- a/mmdeploy/codebase/mmdet/models/layers/init.py
+++ b/mmdeploy/codebase/mmdet/models/layers/init.py
@ -1,4 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .bbox_nms import _multiclass_nms, multiclass_nms
+from .bbox_nms import multiclass_nms

-__all__ = ['multiclass_nms', '_multiclass_nms']
+__all__ = ['multiclass_nms']
--- a/mmdeploy/codebase/mmdet/models/layers/bbox_nms.py
+++ b/mmdeploy/codebase/mmdet/models/layers/bbox_nms.py
@ -2,7 +2,6 @@
 import torch
 from torch import Tensor

-import mmdeploy
 from mmdeploy.core import FUNCTION_REWRITER, mark
 from mmdeploy.mmcv.ops import ONNXNMSop, TRTBatchedNMSop
 from mmdeploy.utils import IR, is_dynamic_batch
@ -166,7 +165,7 @@ def _multiclass_nms_single(boxes: Tensor,


@FUNCTION_REWRITER.register_rewriter(
-    func_name='mmdeploy.codebase.mmdet.models.layers._multiclass_nms')
+    func_name='mmdeploy.codebase.mmdet.models.layers.bbox_nms._multiclass_nms')
 def multiclass_nms__default(ctx,
                            boxes: Tensor,
                            scores: Tensor,
@ -223,7 +222,7 @@ def multiclass_nms__default(ctx,


@FUNCTION_REWRITER.register_rewriter(
-    func_name='mmdeploy.codebase.mmdet.models.layers._multiclass_nms',
+    func_name='mmdeploy.codebase.mmdet.models.layers.bbox_nms._multiclass_nms',
    backend='tensorrt')
 def multiclass_nms_static(ctx,
                          boxes: Tensor,
@ -274,12 +273,11 @@ def multiclass_nms_static(ctx,
@mark('multiclass_nms', inputs=['boxes', 'scores'], outputs=['dets', 'labels'])
 def multiclass_nms(*args, **kwargs):
    """Wrapper function for `_multiclass_nms`."""
-    return mmdeploy.codebase.mmdet.models.layers._multiclass_nms(
-        *args, **kwargs)
+    return _multiclass_nms(*args, **kwargs)


@FUNCTION_REWRITER.register_rewriter(
-    func_name='mmdeploy.codebase.mmdet.models.layers._multiclass_nms',
+    func_name='mmdeploy.codebase.mmdet.models.layers.bbox_nms._multiclass_nms',
    backend=Backend.COREML.value)
 def multiclass_nms__coreml(ctx,
                           boxes: Tensor,
@ -340,7 +338,7 @@ def multiclass_nms__coreml(ctx,


@FUNCTION_REWRITER.register_rewriter(
-    func_name='mmdeploy.codebase.mmdet.models.layers._multiclass_nms',
+    func_name='mmdeploy.codebase.mmdet.models.layers.bbox_nms._multiclass_nms',
    ir=IR.TORCHSCRIPT)
 def multiclass_nms__torchscript(ctx,
                                boxes: Tensor,
--- a/mmdeploy/codebase/mmdet/models/roi_heads/init.py
+++ b/mmdeploy/codebase/mmdet/models/roi_heads/init.py
@ -1,19 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .bbox_head import bbox_head__forward, bbox_head__predict_by_feat
-from .cascade_roi_head import (cascade_roi_head__predict_bbox,
-                               cascade_roi_head__predict_mask)
-from .fcn_mask_head import fcn_mask_head__predict_by_feat
-from .single_level_roi_extractor import (
-    single_roi_extractor__forward, single_roi_extractor__forward__openvino,
-    single_roi_extractor__forward__tensorrt)
-from .standard_roi_head import (standard_roi_head__predict_bbox,
-                                standard_roi_head__predict_mask)
-
-__all__ = [
-    'bbox_head__predict_by_feat', 'bbox_head__forward',
-    'cascade_roi_head__predict_bbox', 'cascade_roi_head__predict_mask',
-    'fcn_mask_head__predict_by_feat', 'single_roi_extractor__forward',
-    'single_roi_extractor__forward__openvino',
-    'single_roi_extractor__forward__tensorrt',
-    'standard_roi_head__predict_bbox', 'standard_roi_head__predict_mask'
-]
+from . import bbox_head  # noqa: F401,F403
+from . import cascade_roi_head  # noqa: F401,F403
+from . import fcn_mask_head  # noqa: F401,F403
+from . import single_level_roi_extractor  # noqa: F401,F403
+from . import standard_roi_head  # noqa: F401,F403
--- a/mmdeploy/codebase/mmdet/models/roi_heads/bbox_head.py
+++ b/mmdeploy/codebase/mmdet/models/roi_heads/bbox_head.py
@ -6,7 +6,7 @@ import torch.nn.functional as F
 from mmengine import ConfigDict
 from torch import Tensor

-from mmdeploy.codebase.mmdet import get_post_processing_params
+from mmdeploy.codebase.mmdet.deploy import get_post_processing_params
 from mmdeploy.codebase.mmdet.models.layers import multiclass_nms
 from mmdeploy.core import FUNCTION_REWRITER, mark

--- a/mmdeploy/codebase/mmdet/models/roi_heads/fcn_mask_head.py
+++ b/mmdeploy/codebase/mmdet/models/roi_heads/fcn_mask_head.py
@ -6,7 +6,7 @@ import torch.nn.functional as F
 from mmengine import ConfigDict
 from torch import Tensor

-from mmdeploy.codebase.mmdet import get_post_processing_params
+from mmdeploy.codebase.mmdet.deploy import get_post_processing_params
 from mmdeploy.core import FUNCTION_REWRITER
 from mmdeploy.utils import Backend, get_backend

--- a/mmdeploy/codebase/mmdet/models/task_modules/init.py
+++ b/mmdeploy/codebase/mmdet/models/task_modules/init.py
@ -1,3 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .coders import *  # noqa: F401,F403
-from .prior_generators import *  # noqa: F401,F403
+from . import coders  # noqa: F401,F403
+from . import prior_generators  # noqa: F401,F403
--- a/mmdeploy/codebase/mmdet/models/task_modules/coders/init.py
+++ b/mmdeploy/codebase/mmdet/models/task_modules/coders/init.py
@ -1,4 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .delta_xywh_bbox_coder import *  # noqa: F401,F403
-from .distance_point_bbox_coder import *  # noqa: F401,F403
-from .tblr_bbox_coder import *  # noqa: F401,F403
+from . import delta_xywh_bbox_coder  # noqa: F401,F403
+from . import distance_point_bbox_coder  # noqa: F401,F403
+from . import tblr_bbox_coder  # noqa: F401,F403
--- a/mmdeploy/codebase/mmdet/models/task_modules/prior_generators/init.py
+++ b/mmdeploy/codebase/mmdet/models/task_modules/prior_generators/init.py
@ -1,3 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .anchor import *  # noqa: F401,F403
-from .point_generator import *  # noqa: F401,F403
+from . import anchor  # noqa: F401,F403
+from . import point_generator  # noqa: F401,F403
--- a/mmdeploy/codebase/mmdet/structures/init.py
+++ b/mmdeploy/codebase/mmdet/structures/init.py
@ -1,2 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .bbox import *  # noqa: F401,F403
+from . import bbox  # noqa: F401,F403
--- a/mmdeploy/codebase/mmdet/structures/bbox/init.py
+++ b/mmdeploy/codebase/mmdet/structures/bbox/init.py
@ -1,2 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .transforms import *  # noqa: F401,F403
+from . import transforms  # noqa: F401,F403
--- a/mmdeploy/codebase/mmdet3d/init.py
+++ b/mmdeploy/codebase/mmdet3d/init.py
@ -1,5 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .deploy import MMDetection3d, VoxelDetection
-from .models import *  # noqa: F401,F403

 __all__ = ['MMDetection3d', 'VoxelDetection']
--- a/mmdeploy/codebase/mmdet3d/deploy/init.py
+++ b/mmdeploy/codebase/mmdet3d/deploy/init.py
@ -1,6 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .mmdetection3d import MMDetection3d
-from .voxel_detection import VoxelDetection
+from .voxel_detection import MMDetection3d, VoxelDetection
 from .voxel_detection_model import VoxelDetectionModel

 __all__ = ['MMDetection3d', 'VoxelDetection', 'VoxelDetectionModel']
--- a/mmdeploy/codebase/mmdet3d/deploy/mmdetection3d.py
+++ b/mmdeploy/codebase/mmdet3d/deploy/mmdetection3d.py
@ -1,128 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from typing import Optional, Union
-
-import mmengine
-from mmcv.utils import Registry
-from torch.utils.data import DataLoader, Dataset
-
-from mmdeploy.codebase.base import CODEBASE, BaseTask, MMCodebase
-from mmdeploy.utils import Codebase, get_task_type
-
-
-def __build_mmdet3d_task(model_cfg: mmengine.Config,
-                         deploy_cfg: mmengine.Config, device: str,
-                         registry: Registry) -> BaseTask:
-    task = get_task_type(deploy_cfg)
-    return registry.module_dict[task.value](model_cfg, deploy_cfg, device)
-
-
-MMDET3D_TASK = Registry('mmdet3d_tasks', build_func=__build_mmdet3d_task)
-
-
-@CODEBASE.register_module(Codebase.MMDET3D.value)
-class MMDetection3d(MMCodebase):
-
-    task_registry = MMDET3D_TASK
-
-    def __init__(self):
-        super().__init__()
-
-    @staticmethod
-    def build_task_processor(model_cfg: mmengine.Config,
-                             deploy_cfg: mmengine.Config,
-                             device: str) -> BaseTask:
-        """The interface to build the task processors of mmdet3d.
-
-        Args:
-            model_cfg (str | mmengine.Config): Model config file.
-            deploy_cfg (str | mmengine.Config): Deployment config file.
-            device (str): A string specifying device type.
-
-        Returns:
-            BaseTask: A task processor.
-        """
-
-        return MMDET3D_TASK.build(model_cfg, deploy_cfg, device)
-
-    @classmethod
-    def register_deploy_modules(cls):
-        import mmdeploy.codebase.mmdet3d.models  # noqa: F401
-
-    @classmethod
-    def register_all_modules(cls):
-        from mmdet3d.utils.setup_env import register_all_modules
-
-        cls.register_deploy_modules()
-        register_all_modules(True)
-
-    @staticmethod
-    def build_dataset(dataset_cfg: Union[str, mmengine.Config], *args,
-                      **kwargs) -> Dataset:
-        """Build dataset for detection3d.
-
-        Args:
-            dataset_cfg (str | mmengine.Config): The input dataset config.
-
-        Returns:
-            Dataset: A PyTorch dataset.
-        """
-        from mmdet3d.datasets import build_dataset as build_dataset_mmdet3d
-
-        from mmdeploy.utils import load_config
-        dataset_cfg = load_config(dataset_cfg)[0]
-        data = dataset_cfg.data
-
-        dataset = build_dataset_mmdet3d(data.test)
-        return dataset
-
-    @staticmethod
-    def build_dataloader(dataset: Dataset,
-                         samples_per_gpu: int,
-                         workers_per_gpu: int,
-                         num_gpus: int = 1,
-                         dist: bool = False,
-                         shuffle: bool = False,
-                         seed: Optional[int] = None,
-                         runner_type: str = 'EpochBasedRunner',
-                         persistent_workers: bool = True,
-                         **kwargs) -> DataLoader:
-        """Build dataloader for detection3d.
-
-        Args:
-            dataset (Dataset): Input dataset.
-            samples_per_gpu (int): Number of training samples on each GPU, i.e.
-                ,batch size of each GPU.
-            workers_per_gpu (int): How many subprocesses to use for data
-                loading for each GPU.
-            num_gpus (int): Number of GPUs. Only used in non-distributed
-                training.
-            dist (bool): Distributed training/test or not.
-                Defaults  to `False`.
-            shuffle (bool): Whether to shuffle the data at every epoch.
-                Defaults to `False`.
-            seed (int): An integer set to be seed. Default is `None`.
-            runner_type (str): Type of runner. Default: `EpochBasedRunner`.
-            persistent_workers (bool): If True, the data loader will not
-                shutdown the worker processes after a dataset has been consumed
-                once. This allows to maintain the workers `Dataset` instances
-                alive. This argument is only valid when PyTorch>=1.7.0.
-                Default: False.
-            kwargs: Any other keyword argument to be used to initialize
-                DataLoader.
-
-        Returns:
-            DataLoader: A PyTorch dataloader.
-        """
-        from mmdet3d.datasets import \
-            build_dataloader as build_dataloader_mmdet3d
-        return build_dataloader_mmdet3d(
-            dataset,
-            samples_per_gpu,
-            workers_per_gpu,
-            num_gpus=num_gpus,
-            dist=dist,
-            shuffle=shuffle,
-            seed=seed,
-            runner_type=runner_type,
-            persistent_workers=persistent_workers,
-            **kwargs)
--- a/mmdeploy/codebase/mmdet3d/deploy/voxel_detection.py
+++ b/mmdeploy/codebase/mmdet3d/deploy/voxel_detection.py
@ -1,20 +1,60 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Any, Dict, List, Optional, Sequence, Tuple, Union
+from copy import deepcopy
+from typing import Dict, Optional, Sequence, Tuple, Union

-import mmcv
 import mmengine
 import numpy as np
 import torch
-import torch.nn as nn
-from mmcv.parallel import collate, scatter
-from mmdet3d.core.bbox import get_box_type
-from mmdet3d.datasets.pipelines import Compose
-from torch.utils.data import DataLoader, Dataset
+from mmdet3d.structures import get_box_type
+from mmengine import Config
+from mmengine.dataset import Compose, pseudo_collate
+from mmengine.model import BaseDataPreprocessor
+from mmengine.registry import Registry

-from mmdeploy.codebase.base import BaseTask
-from mmdeploy.codebase.mmdet3d.deploy.mmdetection3d import MMDET3D_TASK
-from mmdeploy.utils import Task, get_root_logger, load_config
-from .voxel_detection_model import VoxelDetectionModel
+from mmdeploy.codebase.base import CODEBASE, BaseTask, MMCodebase
+from mmdeploy.utils import Codebase, Task
+
+MMDET3D_TASK = Registry('mmdet3d_tasks')
+
+
+@CODEBASE.register_module(Codebase.MMDET3D.value)
+class MMDetection3d(MMCodebase):
+    """MMDetection3d codebase class."""
+
+    task_registry = MMDET3D_TASK
+
+    @classmethod
+    def register_deploy_modules(mmdet3d):
+        import mmdeploy.codebase.mmdet3d.models  # noqa: F401
+
+    @classmethod
+    def register_all_modules(mmdet3d):
+        from mmdet3d.utils.setup_env import register_all_modules
+
+        mmdet3d.register_deploy_modules()
+        register_all_modules(True)
+
+
+def _get_dataset_metainfo(model_cfg: Config):
+    """Get metainfo of dataset.
+
+    Args:
+        model_cfg Config: Input model Config object.
+
+    Returns:
+        list[str]: A list of string specifying names of different class.
+    """
+
+    for dataloader_name in [
+            'test_dataloader', 'val_dataloader', 'train_dataloader'
+    ]:
+        if dataloader_name not in model_cfg:
+            continue
+        dataloader_cfg = model_cfg[dataloader_name]
+        dataset_cfg = dataloader_cfg.dataset
+        if 'metainfo' in dataset_cfg:
+            return dataset_cfg.metainfo
+    return None


@MMDET3D_TASK.register_module(Task.VOXEL_DETECTION.value)
@ -36,168 +76,106 @@ class VoxelDetection(BaseTask):
            nn.Module: An initialized backend model.
        """
        from .voxel_detection_model import build_voxel_detection_model
+
+        data_preprocessor = deepcopy(
+            self.model_cfg.model.get('data_preprocessor', {}))
+        data_preprocessor.setdefault('type', 'mmdet3D.Det3DDataPreprocessor')
+
        model = build_voxel_detection_model(
-            model_files, self.model_cfg, self.deploy_cfg, device=self.device)
+            model_files,
+            self.model_cfg,
+            self.deploy_cfg,
+            device=self.device,
+            data_preprocessor=data_preprocessor)
+        model = model.to(self.device)
        return model

-    def build_pytorch_model(self,
-                            model_checkpoint: Optional[str] = None,
-                            cfg_options: Optional[Dict] = None,
-                            **kwargs) -> torch.nn.Module:
-        """Initialize torch model.
-
-        Args:
-            model_checkpoint (str): The checkpoint file of torch model,
-                defaults to `None`.
-            cfg_options (dict): Optional config key-pair parameters.
-        Returns:
-            nn.Module: An initialized torch model generated by other OpenMMLab
-                codebases.
-        """
-        from mmdet3d.apis import init_model
-        device = self.device
-        model = init_model(self.model_cfg, model_checkpoint, device)
-        return model.eval()
-
-    def create_input(self, pcd: str, *args) -> Tuple[Dict, torch.Tensor]:
+    def create_input(
+        self,
+        pcd: str,
+        input_shape: Sequence[int] = None,
+        data_preprocessor: Optional[BaseDataPreprocessor] = None
+    ) -> Tuple[Dict, torch.Tensor]:
        """Create input for detector.

        Args:
            pcd (str): Input pcd file path.
+            input_shape (Sequence[int], optional): model input shape.
+                Defaults to None.
+            data_preprocessor (Optional[BaseDataPreprocessor], optional):
+                model input preprocess. Defaults to None.

        Returns:
            tuple: (data, input), meta information for the input pcd
                and model input.
        """
-        data = VoxelDetection.read_pcd_file(pcd, self.model_cfg, self.device)
-        voxels, num_points, coors = VoxelDetectionModel.voxelize(
-            self.model_cfg, data['points'][0])
-        return data, (voxels, num_points, coors)
+
+        cfg = self.model_cfg
+        test_pipeline = deepcopy(cfg.test_dataloader.dataset.pipeline)
+        test_pipeline = Compose(test_pipeline)
+        box_type_3d, box_mode_3d = \
+            get_box_type(cfg.test_dataloader.dataset.box_type_3d)
+
+        data = []
+        data_ = dict(
+            lidar_points=dict(lidar_path=pcd),
+            timestamp=1,
+            # for ScanNet demo we need axis_align_matrix
+            axis_align_matrix=np.eye(4),
+            box_type_3d=box_type_3d,
+            box_mode_3d=box_mode_3d)
+        data_ = test_pipeline(data_)
+        data.append(data_)
+
+        collate_data = pseudo_collate(data)
+        data[0]['inputs']['points'] = data[0]['inputs']['points'].to(
+            self.device)
+
+        if data_preprocessor is not None:
+            collate_data = data_preprocessor(collate_data, False)
+            voxels = collate_data['inputs']['voxels']
+            inputs = [voxels['voxels'], voxels['num_points'], voxels['coors']]
+        else:
+            inputs = collate_data['inputs']
+        return collate_data, inputs

    def visualize(self,
+                  image: Union[str, np.ndarray],
                  model: torch.nn.Module,
-                  image: str,
                  result: list,
                  output_file: str,
-                  window_name: str,
+                  window_name: str = '',
                  show_result: bool = False,
-                  score_thr: float = 0.3):
-        """Visualize predictions of a model.
+                  draw_gt: bool = False,
+                  **kwargs):
+        """visualize backend output.

        Args:
-            model (nn.Module): Input model.
-            image (str): Pcd file to draw predictions on.
-            result (list): A list of predictions.
-            output_file (str): Output file to save result.
-            window_name (str): The name of visualization window. Defaults to
-                an empty string.
-            show_result (bool): Whether to show result in windows, defaults
-                to `False`.
-            score_thr (float): The score threshold to display the bbox.
-                Defaults to 0.3.
+            image (Union[str, np.ndarray]): pcd file path
+            result (list): output bbox, score and type
+            output_file (str): the directory to save output
+            window_name (str, optional): display window name
+            show_result (bool, optional): show result or not.
+                Defaults to False.
+            draw_gt (bool, optional): show gt or not. Defaults to False.
        """
-        from mmdet3d.apis import show_result_meshlab
-        data = VoxelDetection.read_pcd_file(image, self.model_cfg, self.device)
-        show_result_meshlab(
-            data,
-            result,
-            output_file,
-            score_thr,
+        cfg = self.model_cfg
+        visualizer = super().get_visualizer(window_name, output_file)
+        visualizer.dataset_meta = _get_dataset_metainfo(cfg)
+
+        # show the results
+        collate_data, _ = self.create_input(pcd=image)
+
+        visualizer.add_datasample(
+            window_name,
+            dict(points=collate_data['inputs']['points'][0]),
+            data_sample=result,
+            draw_gt=draw_gt,
            show=show_result,
-            snapshot=1 - show_result,
-            task='det')
-
-    @staticmethod
-    def read_pcd_file(pcd: str, model_cfg: Union[str, mmengine.Config],
-                      device: str) -> Dict:
-        """Read data from pcd file and run test pipeline.
-
-        Args:
-            pcd (str): Pcd file path.
-            model_cfg (str | mmengine.Config): The model config.
-            device (str): A string specifying device type.
-
-        Returns:
-            dict: meta information for the input pcd.
-        """
-        if isinstance(pcd, (list, tuple)):
-            pcd = pcd[0]
-        model_cfg = load_config(model_cfg)[0]
-        test_pipeline = Compose(model_cfg.data.test.pipeline)
-        box_type_3d, box_mode_3d = get_box_type(
-            model_cfg.data.test.box_type_3d)
-        data = dict(
-            pts_filename=pcd,
-            box_type_3d=box_type_3d,
-            box_mode_3d=box_mode_3d,
-            # for ScanNet demo we need axis_align_matrix
-            ann_info=dict(axis_align_matrix=np.eye(4)),
-            sweeps=[],
-            # set timestamp = 0
-            timestamp=[0],
-            img_fields=[],
-            bbox3d_fields=[],
-            pts_mask_fields=[],
-            pts_seg_fields=[],
-            bbox_fields=[],
-            mask_fields=[],
-            seg_fields=[])
-        data = test_pipeline(data)
-        data = collate([data], samples_per_gpu=1)
-        data['img_metas'] = [
-            img_metas.data[0] for img_metas in data['img_metas']
-        ]
-        data['points'] = [point.data[0] for point in data['points']]
-        if device != 'cpu':
-            data = scatter(data, [device])[0]
-        return data
-
-    @staticmethod
-    def run_inference(model: nn.Module,
-                      model_inputs: Dict[str, torch.Tensor]) -> List:
-        """Run inference once for a object detection model of mmdet3d.
-
-        Args:
-            model (nn.Module): Input model.
-            model_inputs (dict): A dict containing model inputs tensor and
-                meta info.
-
-        Returns:
-            list: The predictions of model inference.
-        """
-        result = model(
-            return_loss=False,
-            points=model_inputs['points'],
-            img_metas=model_inputs['img_metas'])
-        return [result]
-
-    @staticmethod
-    def evaluate_outputs(model_cfg,
-                         outputs: Sequence,
-                         dataset: Dataset,
-                         metrics: Optional[str] = None,
-                         out: Optional[str] = None,
-                         metric_options: Optional[dict] = None,
-                         format_only: bool = False,
-                         log_file: Optional[str] = None):
-        if out:
-            logger = get_root_logger()
-            logger.info(f'\nwriting results to {out}')
-            mmcv.dump(outputs, out)
-        kwargs = {} if metric_options is None else metric_options
-        if format_only:
-            dataset.format_results(outputs, **kwargs)
-        if metrics:
-            eval_kwargs = model_cfg.get('evaluation', {}).copy()
-            # hard-code way to remove EvalHook args
-            for key in [
-                    'interval', 'tmpdir', 'start', 'gpu_collect', 'save_best',
-                    'rule'
-            ]:
-                eval_kwargs.pop(key, None)
-                eval_kwargs.pop(key, None)
-            eval_kwargs.update(dict(metric=metrics, **kwargs))
-            dataset.evaluate(outputs, **eval_kwargs)
+            wait_time=0,
+            out_file=output_file,
+            pred_score_thr=0.0,
+            vis_task='lidar_det')

    def get_model_name(self, *args, **kwargs) -> str:
        """Get the model name.
@ -207,18 +185,6 @@ class VoxelDetection(BaseTask):
        """
        raise NotImplementedError

-    def get_tensor_from_input(self, input_data: Dict[str, Any],
-                              **kwargs) -> torch.Tensor:
-        """Get input tensor from input data.
-
-        Args:
-            input_data (dict): Input data containing meta info and image
-                tensor.
-        Returns:
-            torch.Tensor: An image in `Tensor`.
-        """
-        raise NotImplementedError
-
    def get_partition_cfg(partition_type: str, **kwargs) -> Dict:
        """Get a certain partition config for mmdet.

@ -245,58 +211,3 @@ class VoxelDetection(BaseTask):
            dict: Composed of the preprocess information.
        """
        raise NotImplementedError
-
-    def single_gpu_test(self,
-                        model: nn.Module,
-                        data_loader: DataLoader,
-                        show: bool = False,
-                        out_dir: Optional[str] = None,
-                        **kwargs) -> List:
-        """Run test with single gpu.
-
-        Args:
-            model (nn.Module): Input model from nn.Module.
-            data_loader (DataLoader): PyTorch data loader.
-            show (bool): Specifying whether to show plotted results. Defaults
-                to `False`.
-            out_dir (str): A directory to save results, defaults to `None`.
-
-        Returns:
-            list: The prediction results.
-        """
-        model.eval()
-        results = []
-        dataset = data_loader.dataset
-
-        prog_bar = mmcv.ProgressBar(len(dataset))
-        for i, data in enumerate(data_loader):
-            with torch.no_grad():
-                result = model(data['points'][0].data,
-                               data['img_metas'][0].data, False)
-            if show:
-                # Visualize the results of MMDetection3D model
-                # 'show_results' is MMdetection3D visualization API
-                if out_dir is None:
-                    model.module.show_result(
-                        data,
-                        result,
-                        out_dir='',
-                        file_name='',
-                        show=show,
-                        snapshot=False,
-                        score_thr=0.3)
-                else:
-                    model.module.show_result(
-                        data,
-                        result,
-                        out_dir=out_dir,
-                        file_name=f'model_output{i}',
-                        show=show,
-                        snapshot=True,
-                        score_thr=0.3)
-            results.extend(result)
-
-            batch_size = len(result)
-            for _ in range(batch_size):
-                prog_bar.update()
-        return results
--- a/mmdeploy/codebase/mmdet3d/deploy/voxel_detection_model.py
+++ b/mmdeploy/codebase/mmdet3d/deploy/voxel_detection_model.py
@ -1,25 +1,19 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import Dict, List, Sequence, Union
+from typing import Any, Dict, List, Optional, Sequence, Union

 import mmcv
-import mmengine
 import torch
-from mmcv.utils import Registry
-from torch.nn import functional as F
+from mmdet3d.structures.det3d_data_sample import SampleList
+from mmengine import Config
+from mmengine.model.base_model.data_preprocessor import BaseDataPreprocessor
+from mmengine.registry import Registry
+from mmengine.structures import BaseDataElement, InstanceData

 from mmdeploy.codebase.base import BaseBackendModel
-from mmdeploy.core import RewriterContext
 from mmdeploy.utils import (Backend, get_backend, get_codebase_config,
-                            get_root_logger, load_config)
+                            load_config)

-
-def __build_backend_voxel_model(cls_name: str, registry: Registry, *args,
-                                **kwargs):
-    return registry.module_dict[cls_name](*args, **kwargs)
-
-
-__BACKEND_MODEL = mmcv.utils.Registry(
-    'backend_voxel_detectors', build_func=__build_backend_voxel_model)
+__BACKEND_MODEL = Registry('backend_voxel_detectors')


@__BACKEND_MODEL.register_module('end2end')
@ -31,8 +25,8 @@ class VoxelDetectionModel(BaseBackendModel):
        backend_files (Sequence[str]): Paths to all required backend files
                (e.g. '.onnx' for ONNX Runtime, '.param' and '.bin' for ncnn).
        device (str): A string specifying device type.
-        model_cfg (str | mmengine.Config): The model config.
-        deploy_cfg (str|mmengine.Config): Deployment config file or loaded
+        model_cfg (str | Config): The model config.
+        deploy_cfg (str|Config): Deployment config file or loaded
            Config object.
    """

@ -40,11 +34,15 @@ class VoxelDetectionModel(BaseBackendModel):
                 backend: Backend,
                 backend_files: Sequence[str],
                 device: str,
-                 model_cfg: mmengine.Config,
-                 deploy_cfg: Union[str, mmengine.Config] = None):
-        super().__init__(deploy_cfg=deploy_cfg)
-        self.deploy_cfg = deploy_cfg
+                 model_cfg: Union[str, Config],
+                 deploy_cfg: Union[str, Config],
+                 data_preprocessor: Optional[Union[dict,
+                                                   torch.nn.Module]] = None,
+                 **kwargs):
+        super().__init__(
+            deploy_cfg=deploy_cfg, data_preprocessor=data_preprocessor)
        self.model_cfg = model_cfg
+        self.deploy_cfg = deploy_cfg
        self.device = device
        self._init_wrapper(
            backend=backend, backend_files=backend_files, device=device)
@ -64,13 +62,14 @@ class VoxelDetectionModel(BaseBackendModel):
            backend=backend,
            backend_files=backend_files,
            device=device,
+            input_names=[self.input_name],
            output_names=output_names,
            deploy_cfg=self.deploy_cfg)

    def forward(self,
-                points: Sequence[torch.Tensor],
-                img_metas: Sequence[dict],
-                return_loss=False):
+                inputs: dict,
+                data_samples: Optional[List[BaseDataElement]] = None,
+                **kwargs) -> Any:
        """Run forward inference.

        Args:
@ -84,22 +83,25 @@ class VoxelDetectionModel(BaseBackendModel):
        Returns:
            list: A list contains predictions.
        """
-        result_list = []
-        for i in range(len(img_metas)):
-            voxels, num_points, coors = VoxelDetectionModel.voxelize(
-                self.model_cfg, points[i])
-            input_dict = {
-                'voxels': voxels,
-                'num_points': num_points,
-                'coors': coors
-            }
-            outputs = self.wrapper(input_dict)
-            result = VoxelDetectionModel.post_process(self.model_cfg,
-                                                      self.deploy_cfg, outputs,
-                                                      img_metas[i],
-                                                      self.device)[0]
-            result_list.append(result)
-        return result_list
+        preprocessed = inputs['voxels']
+        input_dict = {
+            'voxels': preprocessed['voxels'].to(self.device),
+            'num_points': preprocessed['num_points'].to(self.device),
+            'coors': preprocessed['coors'].to(self.device)
+        }
+
+        outputs = self.wrapper(input_dict)
+
+        if data_samples is None:
+            return outputs
+
+        prediction = VoxelDetectionModel.postprocess(
+            model_cfg=self.model_cfg,
+            deploy_cfg=self.deploy_cfg,
+            outs=outputs,
+            metas=data_samples)
+
+        return prediction

    def show_result(self,
                    data: Dict,
@ -132,120 +134,259 @@ class VoxelDetectionModel(BaseBackendModel):
            pred_labels=pred_labels)

    @staticmethod
-    def voxelize(model_cfg: Union[str, mmengine.Config], points: torch.Tensor):
-        """convert kitti points(N, >=3) to voxels.
+    def convert_to_datasample(
+        data_samples: SampleList,
+        data_instances_3d: Optional[List[InstanceData]] = None,
+        data_instances_2d: Optional[List[InstanceData]] = None,
+    ) -> SampleList:
+        """Convert results list to `Det3DDataSample`.
+
+        Subclasses could override it to be compatible for some multi-modality
+        3D detectors.

        Args:
-            model_cfg (str | mmengine.Config): The model config.
-            points (torch.Tensor): [N, ndim] float tensor. points[:, :3]
-                contain xyz points and points[:, 3:] contain other information
-                like reflectivity.
+            data_samples (list[:obj:`Det3DDataSample`]): The input data.
+            data_instances_3d (list[:obj:`InstanceData`], optional): 3D
+                Detection results of each sample.
+            data_instances_2d (list[:obj:`InstanceData`], optional): 2D
+                Detection results of each sample.

        Returns:
-            voxels: [M, max_points, ndim] float tensor. only contain points
-                and returned when max_points != -1.
-            coordinates: [M, 3] int32 tensor, always returned.
-            num_points_per_voxel: [M] int32 tensor. Only returned when
-                max_points != -1.
+            list[:obj:`Det3DDataSample`]: Detection results of the
+            input. Each Det3DDataSample usually contains
+            'pred_instances_3d'. And the ``pred_instances_3d`` normally
+            contains following keys.
+
+            - scores_3d (Tensor): Classification scores, has a shape
+              (num_instance, )
+            - labels_3d (Tensor): Labels of 3D bboxes, has a shape
+              (num_instances, ).
+            - bboxes_3d (Tensor): Contains a tensor with shape
+              (num_instances, C) where C >=7.
+
+            When there are image prediction in some models, it should
+            contains  `pred_instances`, And the ``pred_instances`` normally
+            contains following keys.
+
+            - scores (Tensor): Classification scores of image, has a shape
+              (num_instance, )
+            - labels (Tensor): Predict Labels of 2D bboxes, has a shape
+              (num_instances, ).
+            - bboxes (Tensor): Contains a tensor with shape
+              (num_instances, 4).
        """
-        from mmcv.ops import Voxelization
-        model_cfg = load_config(model_cfg)[0]
-        if 'voxel_layer' in model_cfg.model.keys():
-            voxel_layer = model_cfg.model['voxel_layer']
-        elif 'pts_voxel_layer' in model_cfg.model.keys():
-            voxel_layer = model_cfg.model['pts_voxel_layer']
-        else:
-            raise
-        voxel_layer = Voxelization(**voxel_layer)
-        voxels, coors, num_points = [], [], []
-        for res in points:
-            res_voxels, res_coors, res_num_points = voxel_layer(res)
-            voxels.append(res_voxels)
-            coors.append(res_coors)
-            num_points.append(res_num_points)
-        voxels = torch.cat(voxels, dim=0)
-        num_points = torch.cat(num_points, dim=0)
-        coors_batch = []
-        for i, coor in enumerate(coors):
-            coor_pad = F.pad(coor, (1, 0), mode='constant', value=i)
-            coors_batch.append(coor_pad)
-        coors_batch = torch.cat(coors_batch, dim=0)
-        return voxels, num_points, coors_batch
+
+        assert (data_instances_2d is not None) or \
+               (data_instances_3d is not None),\
+               'please pass at least one type of data_samples'
+
+        if data_instances_2d is None:
+            data_instances_2d = [
+                InstanceData() for _ in range(len(data_instances_3d))
+            ]
+        if data_instances_3d is None:
+            data_instances_3d = [
+                InstanceData() for _ in range(len(data_instances_2d))
+            ]
+
+        for i, data_sample in enumerate(data_samples):
+            data_sample.pred_instances_3d = data_instances_3d[i]
+            data_sample.pred_instances = data_instances_2d[i]
+        return data_samples

    @staticmethod
-    def post_process(model_cfg: Union[str, mmengine.Config],
-                     deploy_cfg: Union[str, mmengine.Config],
-                     outs: Dict,
-                     img_metas: Dict,
-                     device: str,
-                     rescale=False):
-        """model post process.
+    def postprocess(model_cfg: Union[str, Config],
+                    deploy_cfg: Union[str, Config], outs: Dict, metas: Dict):
+        """postprocess outputs to datasamples.

        Args:
-            model_cfg (str | mmengine.Config): The model config.
-            deploy_cfg (str|mmengine.Config): Deployment config file or loaded
-            Config object.
-            outs (Dict): Output of model's head.
-            img_metas(Dict): Meta info for pcd.
-            device (str): A string specifying device type.
-            rescale (list[torch.Tensor]): whether th rescale bbox.
+            model_cfg (Union[str, Config]): The model config from
+                trainning repo
+            deploy_cfg (Union[str, Config]): The deploy config to specify
+                backend and input shape
+            outs (Dict): output bbox, cls and score
+            metas (Dict): DataSample3D for bbox3d render
+
+        Raises:
+            NotImplementedError: Only support mmdet3d model with `bbox_head`
+
        Returns:
-            list: A list contains predictions, include bboxes, scores, labels.
+            DataSample3D: datatype for render
        """
-        from mmdet3d.core import bbox3d2result
-        from mmdet3d.models.builder import build_head
-        model_cfg = load_config(model_cfg)[0]
-        deploy_cfg = load_config(deploy_cfg)[0]
-        if 'bbox_head' in model_cfg.model.keys():
-            head_cfg = dict(**model_cfg.model['bbox_head'])
-        elif 'pts_bbox_head' in model_cfg.model.keys():
-            head_cfg = dict(**model_cfg.model['pts_bbox_head'])
+        if 'cls_score' not in outs or 'bbox_pred' not in outs or 'dir_cls_pred' not in outs:  # noqa: E501
+            raise RuntimeError('output tensor not found')
+
+        if 'test_cfg' not in model_cfg.model:
+            raise RuntimeError('test_cfg not found')
+
+        from mmengine.registry import MODELS
+        cls_score = outs['cls_score']
+        bbox_pred = outs['bbox_pred']
+        dir_cls_pred = outs['dir_cls_pred']
+        batch_input_metas = [data_samples.metainfo for data_samples in metas]
+
+        head = None
+        cfg = None
+        if 'bbox_head' in model_cfg.model:
+            # pointpillars postprocess
+            head = MODELS.build(model_cfg.model['bbox_head'])
+            cfg = model_cfg.model.test_cfg
+        elif 'pts_bbox_head' in model_cfg.model:
+            # centerpoint postprocess
+            head = MODELS.build(model_cfg.model['pts_bbox_head'])
+            cfg = model_cfg.model.test_cfg.pts
        else:
-            raise NotImplementedError('Not supported model.')
-        head_cfg['train_cfg'] = None
-        head_cfg['test_cfg'] = model_cfg.model['test_cfg']\
-            if 'pts' not in model_cfg.model['test_cfg'].keys()\
-            else model_cfg.model['test_cfg']['pts']
-        head = build_head(head_cfg)
-        if device == 'cpu':
-            logger = get_root_logger()
-            logger.warning(
-                'Don\'t suggest using CPU device. Post process can\'t support.'
-            )
-            if torch.cuda.is_available():
-                device = 'cuda'
-            else:
-                raise NotImplementedError(
-                    'Post process don\'t support device=cpu')
-        cls_scores = [outs['scores'].to(device)]
-        bbox_preds = [outs['bbox_preds'].to(device)]
-        dir_scores = [outs['dir_scores'].to(device)]
-        with RewriterContext(
-                cfg=deploy_cfg,
-                backend=deploy_cfg.backend_config.type,
-                opset=deploy_cfg.onnx_config.opset_version):
-            bbox_list = head.get_bboxes(
-                cls_scores, bbox_preds, dir_scores, img_metas, rescale=False)
-            bbox_results = [
-                bbox3d2result(bboxes, scores, labels)
-                for bboxes, scores, labels in bbox_list
-            ]
-        return bbox_results
+            raise NotImplementedError('mmdet3d model bbox_head not found')
+
+        if not hasattr(head, 'task_heads'):
+            data_instances_3d = head.predict_by_feat(
+                cls_scores=[cls_score],
+                bbox_preds=[bbox_pred],
+                dir_cls_preds=[dir_cls_pred],
+                batch_input_metas=batch_input_metas,
+                cfg=cfg)
+
+            data_samples = VoxelDetectionModel.convert_to_datasample(
+                data_samples=metas, data_instances_3d=data_instances_3d)
+
+        else:
+            pts = model_cfg.model.test_cfg.pts
+
+            rets = []
+            scores_range = [0]
+            bbox_range = [0]
+            dir_range = [0]
+            for i, _ in enumerate(head.task_heads):
+                scores_range.append(scores_range[i] + head.num_classes[i])
+                bbox_range.append(bbox_range[i] + 8)
+                dir_range.append(dir_range[i] + 2)
+
+            for task_id in range(len(head.num_classes)):
+                num_class_with_bg = head.num_classes[task_id]
+
+                batch_heatmap = cls_score[:,
+                                          scores_range[task_id]:scores_range[
+                                              task_id + 1], ...].sigmoid()
+
+                batch_reg = bbox_pred[:,
+                                      bbox_range[task_id]:bbox_range[task_id] +
+                                      2, ...]
+                batch_hei = bbox_pred[:, bbox_range[task_id] +
+                                      2:bbox_range[task_id] + 3, ...]
+
+                if head.norm_bbox:
+                    batch_dim = torch.exp(bbox_pred[:, bbox_range[task_id] +
+                                                    3:bbox_range[task_id] + 6,
+                                                    ...])
+                else:
+                    batch_dim = bbox_pred[:, bbox_range[task_id] +
+                                          3:bbox_range[task_id] + 6, ...]
+
+                batch_vel = bbox_pred[:, bbox_range[task_id] +
+                                      6:bbox_range[task_id + 1], ...]
+
+                batch_rots = dir_cls_pred[:,
+                                          dir_range[task_id]:dir_range[task_id
+                                                                       + 1],
+                                          ...][:, 0].unsqueeze(1)
+                batch_rotc = dir_cls_pred[:,
+                                          dir_range[task_id]:dir_range[task_id
+                                                                       + 1],
+                                          ...][:, 1].unsqueeze(1)
+
+                temp = head.bbox_coder.decode(
+                    batch_heatmap,
+                    batch_rots,
+                    batch_rotc,
+                    batch_hei,
+                    batch_dim,
+                    batch_vel,
+                    reg=batch_reg,
+                    task_id=task_id)
+
+                assert pts['nms_type'] in ['circle', 'rotate']
+                batch_reg_preds = [box['bboxes'] for box in temp]
+                batch_cls_preds = [box['scores'] for box in temp]
+                batch_cls_labels = [box['labels'] for box in temp]
+                if pts['nms_type'] == 'circle':
+                    boxes3d = temp[0]['bboxes']
+                    scores = temp[0]['scores']
+                    labels = temp[0]['labels']
+                    centers = boxes3d[:, [0, 1]]
+                    boxes = torch.cat([centers, scores.view(-1, 1)], dim=1)
+                    from mmdet3d.models.layers import circle_nms
+                    keep = torch.tensor(
+                        circle_nms(
+                            boxes.detach().cpu().numpy(),
+                            pts['min_radius'][task_id],
+                            post_max_size=pts['post_max_size']),
+                        dtype=torch.long,
+                        device=boxes.device)
+
+                    boxes3d = boxes3d[keep]
+                    scores = scores[keep]
+                    labels = labels[keep]
+                    ret = dict(bboxes=boxes3d, scores=scores, labels=labels)
+                    ret_task = [ret]
+                    rets.append(ret_task)
+                else:
+                    rets.append(
+                        head.get_task_detections(num_class_with_bg,
+                                                 batch_cls_preds,
+                                                 batch_reg_preds,
+                                                 batch_cls_labels,
+                                                 batch_input_metas))
+
+            # Merge branches results
+            num_samples = len(rets[0])
+
+            ret_list = []
+            for i in range(num_samples):
+                temp_instances = InstanceData()
+                for k in rets[0][i].keys():
+                    if k == 'bboxes':
+                        bboxes = torch.cat([ret[i][k] for ret in rets])
+                        bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 5] * 0.5
+                        bboxes = batch_input_metas[i]['box_type_3d'](
+                            bboxes, head.bbox_coder.code_size)
+                    elif k == 'scores':
+                        scores = torch.cat([ret[i][k] for ret in rets])
+                    elif k == 'labels':
+                        flag = 0
+                        for j, num_class in enumerate(head.num_classes):
+                            rets[j][i][k] += flag
+                            flag += num_class
+                        labels = torch.cat([ret[i][k].int() for ret in rets])
+                temp_instances.bboxes_3d = bboxes
+                temp_instances.scores_3d = scores
+                temp_instances.labels_3d = labels
+                ret_list.append(temp_instances)
+
+            data_samples = VoxelDetectionModel.convert_to_datasample(
+                metas, data_instances_3d=ret_list)
+
+        return data_samples


-def build_voxel_detection_model(model_files: Sequence[str],
-                                model_cfg: Union[str, mmengine.Config],
-                                deploy_cfg: Union[str, mmengine.Config],
-                                device: str):
+def build_voxel_detection_model(
+        model_files: Sequence[str],
+        model_cfg: Union[str, Config],
+        deploy_cfg: Union[str, Config],
+        device: str,
+        data_preprocessor: Optional[Union[Config,
+                                          BaseDataPreprocessor]] = None,
+        **kwargs):
    """Build 3d voxel object detection model for different backends.

    Args:
        model_files (Sequence[str]): Input model file(s).
-        model_cfg (str | mmengine.Config): Input model config file or Config
+        model_cfg (str | Config): Input model config file or Config
            object.
-        deploy_cfg (str | mmengine.Config): Input deployment config file or
+        deploy_cfg (str | Config): Input deployment config file or
            Config object.
        device (str):  Device to input model
+        data_preprocessor (BaseDataPreprocessor | Config): The data
+            preprocessor of the model.

    Returns:
        VoxelDetectionModel: Detector for a configured backend.
@ -256,11 +397,14 @@ def build_voxel_detection_model(model_files: Sequence[str],
    model_type = get_codebase_config(deploy_cfg).get('model_type', 'end2end')

    backend_detector = __BACKEND_MODEL.build(
-        model_type,
-        backend=backend,
-        backend_files=model_files,
-        device=device,
-        model_cfg=model_cfg,
-        deploy_cfg=deploy_cfg)
+        dict(
+            type=model_type,
+            backend=backend,
+            backend_files=model_files,
+            device=device,
+            model_cfg=model_cfg,
+            deploy_cfg=deploy_cfg,
+            data_preprocessor=data_preprocessor,
+            **kwargs))

    return backend_detector
--- a/mmdeploy/codebase/mmdet3d/models/init.py
+++ b/mmdeploy/codebase/mmdet3d/models/init.py
@ -1,7 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .base import *  # noqa: F401,F403
-from .centerpoint import *  # noqa: F401,F403
-from .mvx_two_stage import *  # noqa: F401,F403
-from .pillar_encode import *  # noqa: F401,F403
-from .pillar_scatter import *  # noqa: F401,F403
-from .voxelnet import *  # noqa: F401,F403
+from . import base  # noqa: F401,F403
+from . import mvx_two_stage  # noqa: F401,F403
+from . import pillar_encode  # noqa: F401,F403
+from . import pillar_scatter  # noqa: F401,F403
--- a/mmdeploy/codebase/mmdet3d/models/base.py
+++ b/mmdeploy/codebase/mmdet3d/models/base.py
@ -1,23 +1,26 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from typing import List, Tuple
+
+import torch
+
 from mmdeploy.core import FUNCTION_REWRITER


@FUNCTION_REWRITER.register_rewriter(
-    'mmdet3d.models.detectors.base.Base3DDetector.forward_test')
-def base3ddetector__forward_test(ctx,
-                                 self,
-                                 voxels,
-                                 num_points,
-                                 coors,
-                                 img_metas=None,
-                                 img=None,
-                                 rescale=False):
-    """Rewrite this function to run simple_test directly."""
-    return self.simple_test(voxels, num_points, coors, img_metas, img)
+    'mmdet3d.models.detectors.Base3DDetector.forward'  # noqa: E501
+)
+def basedetector__forward(ctx,
+                          self,
+                          inputs: list,
+                          data_samples=None,
+                          **kwargs) -> Tuple[List[torch.Tensor]]:
+    """Extract features of images."""

-
-@FUNCTION_REWRITER.register_rewriter(
-    'mmdet3d.models.detectors.base.Base3DDetector.forward')
-def base3ddetector__forward(ctx, self, *args, **kwargs):
-    """Rewrite this function to run the model directly."""
-    return self.forward_test(*args)
+    batch_inputs_dict = {
+        'voxels': {
+            'voxels': inputs[0],
+            'num_points': inputs[1],
+            'coors': inputs[2]
+        }
+    }
+    return self._forward(batch_inputs_dict, data_samples, **kwargs)
--- a/mmdeploy/codebase/mmdet3d/models/centerpoint.py
+++ b/mmdeploy/codebase/mmdet3d/models/centerpoint.py
@ -1,190 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-import torch
-from mmdet3d.core import circle_nms
-
-from mmdeploy.core import FUNCTION_REWRITER
-
-
-@FUNCTION_REWRITER.register_rewriter(
-    'mmdet3d.models.detectors.centerpoint.CenterPoint.extract_pts_feat')
-def centerpoint__extract_pts_feat(ctx, self, voxels, num_points, coors,
-                                  img_feats, img_metas):
-    """Extract features from points. Rewrite this func to remove voxelize op.
-
-    Args:
-        voxels (torch.Tensor): Point features or raw points in shape (N, M, C).
-        num_points (torch.Tensor): Number of points in each voxel.
-        coors (torch.Tensor): Coordinates of each voxel.
-        img_feats (list[torch.Tensor], optional): Image features used for
-            multi-modality fusion. Defaults to None.
-        img_metas (list[dict]): Meta information of samples.
-
-    Returns:
-        torch.Tensor: Points feature.
-    """
-    if not self.with_pts_bbox:
-        return None
-
-    voxel_features = self.pts_voxel_encoder(voxels, num_points, coors)
-    batch_size = coors[-1, 0] + 1
-    x = self.pts_middle_encoder(voxel_features, coors, batch_size)
-    x = self.pts_backbone(x)
-    if self.with_pts_neck:
-        x = self.pts_neck(x)
-    return x
-
-
-@FUNCTION_REWRITER.register_rewriter(
-    'mmdet3d.models.detectors.centerpoint.CenterPoint.simple_test_pts')
-def centerpoint__simple_test_pts(ctx, self, x, img_metas, rescale=False):
-    """Rewrite this func to format model outputs.
-
-    Args:
-        x (torch.Tensor): Input points feature.
-        img_metas (list[dict]): Meta information of samples.
-        rescale (bool): Whether need rescale.
-
-    Returns:
-        List: Result of model.
-    """
-    outs = self.pts_bbox_head(x)
-    bbox_preds, scores, dir_scores = [], [], []
-    for task_res in outs:
-        bbox_preds.append(task_res[0]['reg'])
-        bbox_preds.append(task_res[0]['height'])
-        bbox_preds.append(task_res[0]['dim'])
-        if 'vel' in task_res[0].keys():
-            bbox_preds.append(task_res[0]['vel'])
-        scores.append(task_res[0]['heatmap'])
-        dir_scores.append(task_res[0]['rot'])
-    bbox_preds = torch.cat(bbox_preds, dim=1)
-    scores = torch.cat(scores, dim=1)
-    dir_scores = torch.cat(dir_scores, dim=1)
-    return scores, bbox_preds, dir_scores
-
-
-@FUNCTION_REWRITER.register_rewriter(
-    'mmdet3d.models.dense_heads.centerpoint_head.CenterHead.get_bboxes')
-def centerpoint__get_bbox(ctx,
-                          self,
-                          cls_scores,
-                          bbox_preds,
-                          dir_scores,
-                          img_metas,
-                          img=None,
-                          rescale=False):
-    """Rewrite this func to format func inputs.
-
-    Args
-        cls_scores (list[torch.Tensor]): Classification predicts results.
-        bbox_preds (list[torch.Tensor]): Bbox predicts results.
-        dir_scores (list[torch.Tensor]): Dir predicts results.
-        img_metas (list[dict]): Point cloud and image's meta info.
-        img (torch.Tensor): Input image.
-        rescale (Bool): Whether need rescale.
-
-    Returns:
-        list[dict]: Decoded bbox, scores and labels after nms.
-    """
-    rets = []
-    scores_range = [0]
-    bbox_range = [0]
-    dir_range = [0]
-    for i, task_head in enumerate(self.task_heads):
-        scores_range.append(scores_range[i] + self.num_classes[i])
-        bbox_range.append(bbox_range[i] + 8)
-        dir_range.append(dir_range[i] + 2)
-    for task_id in range(len(self.num_classes)):
-        num_class_with_bg = self.num_classes[task_id]
-
-        batch_heatmap = cls_scores[
-            0][:, scores_range[task_id]:scores_range[task_id + 1],
-               ...].sigmoid()
-
-        batch_reg = bbox_preds[0][:,
-                                  bbox_range[task_id]:bbox_range[task_id] + 2,
-                                  ...]
-        batch_hei = bbox_preds[0][:, bbox_range[task_id] +
-                                  2:bbox_range[task_id] + 3, ...]
-
-        if self.norm_bbox:
-            batch_dim = torch.exp(bbox_preds[0][:, bbox_range[task_id] +
-                                                3:bbox_range[task_id] + 6,
-                                                ...])
-        else:
-            batch_dim = bbox_preds[0][:, bbox_range[task_id] +
-                                      3:bbox_range[task_id] + 6, ...]
-
-        batch_vel = bbox_preds[0][:, bbox_range[task_id] +
-                                  6:bbox_range[task_id + 1], ...]
-
-        batch_rots = dir_scores[0][:,
-                                   dir_range[task_id]:dir_range[task_id + 1],
-                                   ...][:, 0].unsqueeze(1)
-        batch_rotc = dir_scores[0][:,
-                                   dir_range[task_id]:dir_range[task_id + 1],
-                                   ...][:, 1].unsqueeze(1)
-
-        temp = self.bbox_coder.decode(
-            batch_heatmap,
-            batch_rots,
-            batch_rotc,
-            batch_hei,
-            batch_dim,
-            batch_vel,
-            reg=batch_reg,
-            task_id=task_id)
-        if 'pts' in self.test_cfg.keys():
-            self.test_cfg = self.test_cfg.pts
-        assert self.test_cfg['nms_type'] in ['circle', 'rotate']
-        batch_reg_preds = [box['bboxes'] for box in temp]
-        batch_cls_preds = [box['scores'] for box in temp]
-        batch_cls_labels = [box['labels'] for box in temp]
-        if self.test_cfg['nms_type'] == 'circle':
-
-            boxes3d = temp[0]['bboxes']
-            scores = temp[0]['scores']
-            labels = temp[0]['labels']
-            centers = boxes3d[:, [0, 1]]
-            boxes = torch.cat([centers, scores.view(-1, 1)], dim=1)
-            keep = torch.tensor(
-                circle_nms(
-                    boxes.detach().cpu().numpy(),
-                    self.test_cfg['min_radius'][task_id],
-                    post_max_size=self.test_cfg['post_max_size']),
-                dtype=torch.long,
-                device=boxes.device)
-
-            boxes3d = boxes3d[keep]
-            scores = scores[keep]
-            labels = labels[keep]
-            ret = dict(bboxes=boxes3d, scores=scores, labels=labels)
-            ret_task = [ret]
-            rets.append(ret_task)
-        else:
-            rets.append(
-                self.get_task_detections(num_class_with_bg, batch_cls_preds,
-                                         batch_reg_preds, batch_cls_labels,
-                                         img_metas))
-
-    # Merge branches results
-    num_samples = len(rets[0])
-
-    ret_list = []
-    for i in range(num_samples):
-        for k in rets[0][i].keys():
-            if k == 'bboxes':
-                bboxes = torch.cat([ret[i][k] for ret in rets])
-                bboxes[:, 2] = bboxes[:, 2] - bboxes[:, 5] * 0.5
-                bboxes = img_metas[i]['box_type_3d'](bboxes,
-                                                     self.bbox_coder.code_size)
-            elif k == 'scores':
-                scores = torch.cat([ret[i][k] for ret in rets])
-            elif k == 'labels':
-                flag = 0
-                for j, num_class in enumerate(self.num_classes):
-                    rets[j][i][k] += flag
-                    flag += num_class
-                labels = torch.cat([ret[i][k].int() for ret in rets])
-        ret_list.append([bboxes, scores, labels])
-    return ret_list
--- a/mmdeploy/codebase/mmdet3d/models/mvx_two_stage.py
+++ b/mmdeploy/codebase/mmdet3d/models/mvx_two_stage.py
@ -1,41 +1,33 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+import torch
+
 from mmdeploy.core import FUNCTION_REWRITER


@FUNCTION_REWRITER.register_rewriter(
-    'mmdet3d.models.detectors.mvx_two_stage.MVXTwoStageDetector.simple_test')
-def mvxtwostagedetector__simple_test(ctx,
-                                     self,
-                                     voxels,
-                                     num_points,
-                                     coors,
-                                     img_metas,
-                                     img=None,
-                                     rescale=False):
-    """Rewrite this func to remove voxelize op.
-
-    Args:
-        voxels (torch.Tensor): Point features or raw points in shape (N, M, C).
-        num_points (torch.Tensor): Number of points in each voxel.
-        coors (torch.Tensor): Coordinates of each voxel.
-        img_metas (list[dict]): Meta information of samples.
-        img (torch.Tensor): Input image.
-        rescale (Bool): Whether need rescale.
-
-    Returns:
-        list[dict]: Decoded bbox, scores and labels after nms.
-    """
-    _, pts_feats = self.extract_feat(
-        voxels, num_points, coors, img=img, img_metas=img_metas)
-    if pts_feats and self.with_pts_bbox:
-        bbox_pts = self.simple_test_pts(pts_feats, img_metas, rescale=rescale)
-    return bbox_pts
+    'mmdet3d.models.detectors.mvx_two_stage.MVXTwoStageDetector.extract_img_feat'  # noqa: E501
+)
+def mvxtwostagedetector__extract_img_feat(ctx, self,
+                                          img: torch.Tensor) -> dict:
+    """Extract features of images."""
+    if self.with_img_backbone and img is not None:
+        if img.dim() == 5 and img.size(0) == 1:
+            img.squeeze_()
+        elif img.dim() == 5 and img.size(0) > 1:
+            B, N, C, H, W = img.size()
+            img = img.view(B * N, C, H, W)
+        img_feats = self.img_backbone(img)
+    else:
+        return None
+    if self.with_img_neck:
+        img_feats = self.img_neck(img_feats)
+    return img_feats


@FUNCTION_REWRITER.register_rewriter(
    'mmdet3d.models.detectors.mvx_two_stage.MVXTwoStageDetector.extract_feat')
-def mvxtwostagedetector__extract_feat(ctx, self, voxels, num_points, coors,
-                                      img, img_metas):
+def mvxtwostagedetector__extract_feat(ctx, self,
+                                      batch_inputs_dict: dict) -> tuple:
    """Rewrite this func to remove voxelize op.

    Args:
@ -44,63 +36,58 @@ def mvxtwostagedetector__extract_feat(ctx, self, voxels, num_points, coors,
        coors (torch.Tensor): Coordinates of each voxel.
        img (torch.Tensor): Input image.
        img_metas (list[dict]): Meta information of samples.
-
    Returns:
        tuple(torch.Tensor) : image feature and points feather.
    """
-    img_feats = self.extract_img_feat(img, img_metas)
-    pts_feats = self.extract_pts_feat(voxels, num_points, coors, img_feats,
-                                      img_metas)
+    voxel_dict = batch_inputs_dict.get('voxels', None)
+    imgs = batch_inputs_dict.get('imgs', None)
+    points = batch_inputs_dict.get('points', None)
+    img_feats = self.extract_img_feat(imgs)
+    pts_feats = self.extract_pts_feat(
+        voxel_dict, points=points, img_feats=img_feats)
    return (img_feats, pts_feats)


@FUNCTION_REWRITER.register_rewriter(
-    'mmdet3d.models.detectors.mvx_two_stage.MVXTwoStageDetector.'
-    'extract_pts_feat')
-def mvxtwostagedetector__extract_pts_feat(ctx, self, voxels, num_points, coors,
-                                          img_feats, img_metas):
-    """Extract features from points. Rewrite this func to remove voxelize op.
+    'mmdet3d.models.detectors.mvx_two_stage.MVXTwoStageDetector.forward')
+def mvxtwostagedetector__forward(ctx, self, inputs: list, **kwargs):
+    """Rewrite this func to remove voxelize op.

    Args:
-        voxels (torch.Tensor): Point features or raw points in shape (N, M, C).
-        num_points (torch.Tensor): Number of points in each voxel.
-        coors (torch.Tensor): Coordinates of each voxel.
-        img_feats (list[torch.Tensor], optional): Image features used for
-            multi-modality fusion. Defaults to None.
-        img_metas (list[dict]): Meta information of samples.
+        inputs (list): voxels, num_points and coors compose the input list
+        data_samples (DataSample3D): intermediate format within multiple
+            algorithm framework

    Returns:
-        torch.Tensor: Points feature.
+        bbox (Tensor): Decoded bbox after nms
+        scores (Tensor): bbox scores
+        labels (Tensor): bbox labels
    """
-    if not self.with_pts_bbox:
-        return None
-    voxel_features = self.pts_voxel_encoder(voxels, num_points, coors,
-                                            img_feats, img_metas)
-    batch_size = coors[-1, 0] + 1
-    x = self.pts_middle_encoder(voxel_features, coors, batch_size)
-    x = self.pts_backbone(x)
-    if self.with_pts_neck:
-        x = self.pts_neck(x)
-    return x
+    batch_inputs_dict = {
+        'voxels': {
+            'voxels': inputs[0],
+            'num_points': inputs[1],
+            'coors': inputs[2]
+        }
+    }

+    _, pts_feats = self.extract_feat(batch_inputs_dict=batch_inputs_dict)
+    outs = self.pts_bbox_head(pts_feats)

-@FUNCTION_REWRITER.register_rewriter(
-    'mmdet3d.models.detectors.mvx_two_stage.MVXTwoStageDetector.'
-    'simple_test_pts')
-def mvxtwostagedetector__simple_test_pts(ctx,
-                                         self,
-                                         x,
-                                         img_metas,
-                                         rescale=False):
-    """Rewrite this func to format model outputs.
-
-    Args:
-        x (torch.Tensor): Input points feature.
-        img_metas (list[dict]): Meta information of samples.
-        rescale (bool): Whether need rescale.
-
-    Returns:
-        List: Result of model.
-    """
-    bbox_preds, scores, dir_scores = self.pts_bbox_head(x)
-    return bbox_preds, scores, dir_scores
+    if type(outs[0][0]) is dict:
+        bbox_preds, scores, dir_scores = [], [], []
+        for task_res in outs:
+            bbox_preds.append(task_res[0]['reg'])
+            bbox_preds.append(task_res[0]['height'])
+            bbox_preds.append(task_res[0]['dim'])
+            if 'vel' in task_res[0].keys():
+                bbox_preds.append(task_res[0]['vel'])
+            scores.append(task_res[0]['heatmap'])
+            dir_scores.append(task_res[0]['rot'])
+        bbox_preds = torch.cat(bbox_preds, dim=1)
+        scores = torch.cat(scores, dim=1)
+        dir_scores = torch.cat(dir_scores, dim=1)
+        return scores, bbox_preds, dir_scores
+    else:
+        cls_score, bbox_pred, dir_cls_pred = outs[0][0], outs[1][0], outs[2][0]
+        return cls_score, bbox_pred, dir_cls_pred
--- a/mmdeploy/codebase/mmdet3d/models/pillar_encode.py
+++ b/mmdeploy/codebase/mmdet3d/models/pillar_encode.py
@ -7,7 +7,8 @@ from mmdeploy.core import FUNCTION_REWRITER

@FUNCTION_REWRITER.register_rewriter(
    'mmdet3d.models.voxel_encoders.pillar_encoder.PillarFeatureNet.forward')
-def pillar_encoder__forward(ctx, self, features, num_points, coors):
+def pillar_encoder__forward(ctx, self, features, num_points, coors, *args,
+                            **kwargs):
    """Rewrite this func to optimize node. Modify the code at
    _with_voxel_center and use slice instead of the original operation.

--- a/mmdeploy/codebase/mmdet3d/models/pillar_scatter.py
+++ b/mmdeploy/codebase/mmdet3d/models/pillar_scatter.py
@ -30,6 +30,7 @@ def pointpillarsscatter__forward(ctx,
    indices = indices.long()
    voxels = voxel_features.t()
    # Now scatter the blob back to the canvas.
+
    canvas.scatter_(
        dim=1, index=indices.expand(canvas.shape[0], -1), src=voxels)
    # Undo the column stacking to final 4-dim tensor
--- a/mmdeploy/codebase/mmdet3d/models/voxelnet.py
+++ b/mmdeploy/codebase/mmdet3d/models/voxelnet.py
@ -1,58 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from mmdeploy.core import FUNCTION_REWRITER
-
-
-@FUNCTION_REWRITER.register_rewriter(
-    'mmdet3d.models.detectors.voxelnet.VoxelNet.simple_test')
-def voxelnet__simple_test(ctx,
-                          self,
-                          voxels,
-                          num_points,
-                          coors,
-                          img_metas=None,
-                          imgs=None,
-                          rescale=False):
-    """Test function without augmentaiton. Rewrite this func to remove model
-    post process.
-
-    Args:
-        voxels (torch.Tensor): Point features or raw points in shape (N, M, C).
-        num_points (torch.Tensor): Number of points in each pillar.
-        coors (torch.Tensor): Coordinates of each voxel.
-        input_metas (list[dict]): Contain pcd meta info.
-
-    Returns:
-        List: Result of model.
-    """
-    x = self.extract_feat(voxels, num_points, coors, img_metas)
-    bbox_preds, scores, dir_scores = self.bbox_head(x)
-    return bbox_preds, scores, dir_scores
-
-
-@FUNCTION_REWRITER.register_rewriter(
-    'mmdet3d.models.detectors.voxelnet.VoxelNet.extract_feat')
-def voxelnet__extract_feat(ctx,
-                           self,
-                           voxels,
-                           num_points,
-                           coors,
-                           img_metas=None):
-    """Extract features from points. Rewrite this func to remove voxelize op.
-
-    Args:
-        voxels (torch.Tensor): Point features or raw points in shape (N, M, C).
-        num_points (torch.Tensor): Number of points in each pillar.
-        coors (torch.Tensor): Coordinates of each voxel.
-        input_metas (list[dict]): Contain pcd meta info.
-
-    Returns:
-        torch.Tensor: Features from points.
-    """
-    voxel_features = self.voxel_encoder(voxels, num_points, coors)
-    batch_size = coors[-1, 0] + 1  # refactor
-    assert batch_size == 1
-    x = self.middle_encoder(voxel_features, coors, batch_size)
-    x = self.backbone(x)
-    if self.with_neck:
-        x = self.neck(x)
-    return x
--- a/mmdeploy/codebase/mmedit/init.py
+++ b/mmdeploy/codebase/mmedit/init.py
@ -1,5 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .deploy import MMEditing, SuperResolution
-from .models import base_edit_model__forward

-__all__ = ['MMEditing', 'SuperResolution', 'base_edit_model__forward']
+__all__ = ['MMEditing', 'SuperResolution']
--- a/mmdeploy/codebase/mmedit/models/init.py
+++ b/mmdeploy/codebase/mmedit/models/init.py
@ -1,2 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .base_models import *  # noqa F401, F403
+from . import base_models  # noqa F401, F403
--- a/mmdeploy/codebase/mmedit/models/base_models/init.py
+++ b/mmdeploy/codebase/mmedit/models/base_models/init.py
@ -1,4 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .base_edit_model import base_edit_model__forward
-
-__all__ = ['base_edit_model__forward']
+from . import base_edit_model  # noqa: F401,F403
--- a/mmdeploy/codebase/mmocr/init.py
+++ b/mmdeploy/codebase/mmocr/init.py
@ -1,3 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .deploy import *  # noqa: F401,F403
-from .models import *  # noqa: F401,F403
--- a/mmdeploy/codebase/mmocr/models/init.py
+++ b/mmdeploy/codebase/mmocr/models/init.py
@ -1,3 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .text_detection import *  # noqa: F401,F403
-from .text_recognition import *  # noqa: F401,F403
+from . import text_detection  # noqa: F401,F403
+from . import text_recognition  # noqa: F401,F403
--- a/mmdeploy/codebase/mmocr/models/text_detection/init.py
+++ b/mmdeploy/codebase/mmocr/models/text_detection/init.py
@ -1,9 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .fpn_cat import fpnc__forward__tensorrt
-from .heads import base_text_det_head__predict, db_head__predict
-from .single_stage_text_detector import single_stage_text_detector__forward
-
-__all__ = [
-    'fpnc__forward__tensorrt', 'base_text_det_head__predict',
-    'single_stage_text_detector__forward', 'db_head__predict'
-]
+from . import fpn_cat  # noqa: F401,F403
+from . import heads  # noqa: F401,F403
+from . import single_stage_text_detector  # noqa: F401,F403
--- a/mmdeploy/codebase/mmocr/models/text_detection/single_stage_text_detector.py
+++ b/mmdeploy/codebase/mmocr/models/text_detection/single_stage_text_detector.py
@ -20,7 +20,7 @@ def single_stage_text_detector__forward(

    Args:
        batch_inputs (torch.Tensor): Images of shape (N, C, H, W).
-        batch_data_samples (list[TextDetDataSample]): A list of N
+        data_samples (list[TextDetDataSample]): A list of N
            datasamples, containing meta information and gold annotations
            for each of the images.

--- a/mmdeploy/codebase/mmocr/models/text_recognition/init.py
+++ b/mmdeploy/codebase/mmocr/models/text_recognition/init.py
@ -1,14 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-# from .base import base_recognizer__forward
-from .base_decoder import base_decoder__forward
-from .crnn_decoder import crnndecoder__forward_train__ncnn
-from .encoder_decoder_recognizer import encoder_decoder_recognizer__forward
-from .lstm_layer import bidirectionallstm__forward__ncnn
-from .sar_decoder import *  # noqa: F401,F403
-from .sar_encoder import sar_encoder__forward
-
-__all__ = [
-    'base_decoder__forward', 'crnndecoder__forward_train__ncnn',
-    'encoder_decoder_recognizer__forward', 'bidirectionallstm__forward__ncnn',
-    'sar_encoder__forward'
-]
+from . import base_decoder  # noqa: F401,F403
+from . import crnn_decoder  # noqa: F401,F403
+from . import encoder_decoder_recognizer  # noqa: F401,F403
+from . import lstm_layer  # noqa: F401,F403
+from . import sar_decoder  # noqa: F401,F403
+from . import sar_encoder  # noqa: F401,F403
--- a/mmdeploy/codebase/mmocr/models/text_recognition/base_decoder.py
+++ b/mmdeploy/codebase/mmocr/models/text_recognition/base_decoder.py
@ -16,7 +16,7 @@ def base_decoder__forward(
    out_enc: Optional[torch.Tensor] = None,
    data_samples: Optional[Sequence[TextRecogDataSample]] = None
 ) -> Sequence[TextRecogDataSample]:
-    """Perform forward propagation of the decoder and postprocessor.
+    """Rewrite `predict` of `BaseDecoder` to skip post-process.

    Args:
        feat (Tensor, optional): Features from the backbone. Defaults
--- a/mmdeploy/codebase/mmocr/models/text_recognition/encoder_decoder_recognizer.py
+++ b/mmdeploy/codebase/mmocr/models/text_recognition/encoder_decoder_recognizer.py
@ -20,13 +20,10 @@ def encoder_decoder_recognizer__forward(ctx, self, batch_inputs: torch.Tensor,
        ctx (ContextCaller): The context with additional information.
        self: The instance of the class
            EncoderDecoderRecognizer.
-        img (Tensor): Input images of shape (N, C, H, W).
+        batch_inputs (Tensor): Input images of shape (N, C, H, W).
            Typically these should be mean centered and std scaled.
-        img_metas (list[dict]): A list of image info dict where each dict
-            has: 'img_shape', 'scale_factor', 'flip', and may also contain
-            'filename', 'ori_shape', 'pad_shape', and 'img_norm_cfg'.
-            For details on the values of these keys, see
-            :class:`mmdet.datasets.pipelines.Collect`.
+        data_samples (TextRecogDataSample): Containing meta information
+            and gold annotations for each of the images. Defaults to None.

    Returns:
        out_dec (Tensor): A feature map output from a decoder. The tensor shape
--- a/mmdeploy/codebase/mmpose/init.py
+++ b/mmdeploy/codebase/mmpose/init.py
@ -1,5 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .deploy import PoseDetection
-from .models import *  # noqa: F401,F403

 __all__ = ['PoseDetection']
--- a/mmdeploy/codebase/mmpose/models/init.py
+++ b/mmdeploy/codebase/mmpose/models/init.py
@ -1,4 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.

-from .heads import *  # noqa: F401,F403
-from .pose_estimators import *  # noqa: F401,F403
+from . import heads  # noqa: F401,F403
+from . import pose_estimators  # noqa: F401,F403
--- a/mmdeploy/codebase/mmrotate/models/dense_heads/oriented_rpn_head.py
+++ b/mmdeploy/codebase/mmrotate/models/dense_heads/oriented_rpn_head.py
@ -1,8 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import torch

-from mmdeploy.codebase.mmdet import (get_post_processing_params,
-                                     pad_with_value_if_necessary)
+from mmdeploy.codebase.mmdet.deploy import (get_post_processing_params,
+                                            pad_with_value_if_necessary)
 from mmdeploy.codebase.mmrotate.core.post_processing import \
    fake_multiclass_nms_rotated
 from mmdeploy.core import FUNCTION_REWRITER
--- a/mmdeploy/codebase/mmrotate/models/dense_heads/rotated_anchor_head.py
+++ b/mmdeploy/codebase/mmrotate/models/dense_heads/rotated_anchor_head.py
@ -1,8 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import torch

-from mmdeploy.codebase.mmdet import (get_post_processing_params,
-                                     pad_with_value_if_necessary)
+from mmdeploy.codebase.mmdet.deploy import (get_post_processing_params,
+                                            pad_with_value_if_necessary)
 from mmdeploy.codebase.mmrotate.core.post_processing import \
    multiclass_nms_rotated
 from mmdeploy.core import FUNCTION_REWRITER
--- a/mmdeploy/codebase/mmrotate/models/dense_heads/rotated_rpn_head.py
+++ b/mmdeploy/codebase/mmrotate/models/dense_heads/rotated_rpn_head.py
@ -1,9 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import torch

-from mmdeploy.codebase.mmdet import (get_post_processing_params,
-                                     pad_with_value_if_necessary)
-from mmdeploy.codebase.mmdet.core.post_processing import multiclass_nms
+from mmdeploy.codebase.mmdet.deploy import (get_post_processing_params,
+                                            pad_with_value_if_necessary)
+from mmdeploy.codebase.mmdet.models.layers import multiclass_nms
 from mmdeploy.core import FUNCTION_REWRITER
 from mmdeploy.utils import is_dynamic_shape

--- a/mmdeploy/codebase/mmrotate/models/roi_heads/gv_bbox_head.py
+++ b/mmdeploy/codebase/mmrotate/models/roi_heads/gv_bbox_head.py
@ -1,7 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import torch.nn.functional as F

-from mmdeploy.codebase.mmdet import get_post_processing_params
+from mmdeploy.codebase.mmdet.deploy import get_post_processing_params
 from mmdeploy.codebase.mmrotate.core.post_processing import \
    multiclass_nms_rotated
 from mmdeploy.core import FUNCTION_REWRITER
--- a/mmdeploy/codebase/mmrotate/models/roi_heads/rotated_bbox_head.py
+++ b/mmdeploy/codebase/mmrotate/models/roi_heads/rotated_bbox_head.py
@ -2,7 +2,7 @@
 import torch
 import torch.nn.functional as F

-from mmdeploy.codebase.mmdet import get_post_processing_params
+from mmdeploy.codebase.mmdet.deploy import get_post_processing_params
 from mmdeploy.codebase.mmrotate.core.post_processing import \
    multiclass_nms_rotated
 from mmdeploy.core import FUNCTION_REWRITER
--- a/mmdeploy/codebase/mmseg/init.py
+++ b/mmdeploy/codebase/mmseg/init.py
@ -1,3 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .deploy import *  # noqa: F401,F403
-from .models import *  # noqa: F401,F403
--- a/mmdeploy/codebase/mmseg/models/init.py
+++ b/mmdeploy/codebase/mmseg/models/init.py
@ -1,4 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .decode_heads import *  # noqa: F401,F403
-from .segmentors import *  # noqa: F401,F403
-from .utils import *  # noqa: F401,F403
+from . import decode_heads  # noqa: F401,F403
+from . import segmentors  # noqa: F401,F403
+from . import utils  # noqa: F401,F403
--- a/mmdeploy/codebase/mmseg/models/utils/init.py
+++ b/mmdeploy/codebase/mmseg/models/utils/init.py
@ -1,4 +1,2 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .up_conv_block import up_conv_block__forward
-
-__all__ = ['up_conv_block__forward']
+from . import up_conv_block  # noqa: F401,F403
--- a/mmdeploy/core/optimizers/init.py
+++ b/mmdeploy/core/optimizers/init.py
@ -2,10 +2,10 @@
 from .extractor import create_extractor, parse_extractor_io_string
 from .function_marker import mark, reset_mark_function_count
 from .optimize import (attribute_to_dict, get_new_name, remove_identity,
-                       rename_value)
+                       remove_imports, rename_value)

 __all__ = [
    'mark', 'reset_mark_function_count', 'create_extractor',
    'parse_extractor_io_string', 'remove_identity', 'attribute_to_dict',
-    'rename_value', 'get_new_name'
+    'rename_value', 'get_new_name', 'remove_imports'
 ]
--- a/mmdeploy/core/optimizers/optimize.py
+++ b/mmdeploy/core/optimizers/optimize.py
@ -206,3 +206,24 @@ def remove_identity(model: onnx.ModelProto):
        pass

    remove_nodes(model, is_identity)
+
+
+def remove_imports(model: onnx.ModelProto):
+    """Remove useless imports from an ONNX model.
+
+    The domain like `mmdeploy` might influence model conversion for
+    some backends.
+
+    Args:
+        model (onnx.ModelProto): Input onnx model.
+    """
+    logger = get_root_logger()
+    dst_domain = ['']
+    for node in model.graph.node:
+        if hasattr(node, 'module') and (node.module not in dst_domain):
+            dst_domain.append(node.module)
+    src_domains = [oi.domain for oi in model.opset_import]
+    for i, src_domain in enumerate(src_domains):
+        if src_domain not in dst_domain:
+            logger.info(f'remove opset_import {src_domain}')
+            model.opset_import.pop(i)
--- a/mmdeploy/mmcv/init.py
+++ b/mmdeploy/mmcv/init.py
@ -1,3 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .cnn import *  # noqa: F401,F403
-from .ops import *  # noqa: F401,F403
+from . import cnn  # noqa: F401,F403
+from . import ops  # noqa: F401,F403
--- a/mmdeploy/mmcv/cnn/init.py
+++ b/mmdeploy/mmcv/cnn/init.py
@ -2,4 +2,4 @@
 from . import conv2d_adaptive_padding  # noqa: F401,F403
 from .transformer import MultiHeadAttentionop

-__all__ = ['conv2d_adaptive_padding', 'MultiHeadAttentionop']
+__all__ = ['MultiHeadAttentionop']
--- a/mmdeploy/mmcv/ops/init.py
+++ b/mmdeploy/mmcv/ops/init.py
@ -1,15 +1,14 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .deform_conv import deform_conv_openvino
-from .modulated_deform_conv import modulated_deform_conv_default
-from .nms import *  # noqa: F401,F403
-from .nms_rotated import *  # noqa: F401,F403
-from .point_sample import *  # noqa: F401,F403
-from .roi_align import roi_align_default
-from .roi_align_rotated import roi_align_rotated_default
-from .transformer import patch_embed__forward__ncnn
+from . import deform_conv  # noqa: F401,F403
+from . import modulated_deform_conv  # noqa: F401,F403
+from . import point_sample  # noqa: F401,F403
+from . import roi_align  # noqa: F401,F403
+from . import roi_align_rotated  # noqa: F401,F403
+from . import transformer  # noqa: F401,F403
+from .nms import ONNXNMSop, TRTBatchedNMSop
+from .nms_rotated import ONNXNMSRotatedOp, TRTBatchedRotatedNMSop

 __all__ = [
-    'roi_align_default', 'modulated_deform_conv_default',
-    'deform_conv_openvino', 'roi_align_rotated_default',
-    'patch_embed__forward__ncnn'
+    'ONNXNMSop', 'TRTBatchedNMSop', 'TRTBatchedRotatedNMSop',
+    'ONNXNMSRotatedOp'
 ]
--- a/mmdeploy/pytorch/init.py
+++ b/mmdeploy/pytorch/init.py
@ -1,3 +1,3 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .functions import *  # noqa: F401,F403
-from .ops import *  # noqa: F401,F403
+from . import functions  # noqa: F401,F403
+from . import symbolics  # noqa: F401,F403
--- a/mmdeploy/pytorch/functions/init.py
+++ b/mmdeploy/pytorch/functions/init.py
@ -1,35 +1,22 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from . import multi_head_attention_forward
-from .adaptive_pool import (adaptive_avg_pool2d__default,
-                            adaptive_avg_pool2d__ncnn)
-from .atan2 import atan2__default
-from .chunk import chunk__ncnn, chunk__torchscript
-from .clip import clip__coreml
-from .expand import expand__ncnn
-from .flatten import flatten__coreml
-from .getattribute import tensor__getattribute__ncnn
-from .group_norm import group_norm__ncnn
-from .interpolate import interpolate__ncnn, interpolate__tensorrt
-from .linear import linear__ncnn
-from .masked_fill import masked_fill__onnxruntime
-from .mod import mod__tensorrt
-from .normalize import normalize__ncnn
-from .pad import _prepare_onnx_paddings__tensorrt
-from .repeat import tensor__repeat__tensorrt
-from .size import tensor__size__ncnn
-from .tensor_getitem import tensor__getitem__ascend
-from .tensor_setitem import tensor__setitem__default
-from .topk import topk__dynamic, topk__tensorrt
-from .triu import triu__default
-
-__all__ = [
-    'tensor__getattribute__ncnn', 'group_norm__ncnn', 'interpolate__ncnn',
-    'interpolate__tensorrt', 'linear__ncnn', 'tensor__repeat__tensorrt',
-    'tensor__size__ncnn', 'topk__dynamic', 'topk__tensorrt', 'chunk__ncnn',
-    'triu__default', 'atan2__default', 'normalize__ncnn', 'expand__ncnn',
-    'chunk__torchscript', 'masked_fill__onnxruntime',
-    'tensor__setitem__default', 'tensor__getitem__ascend',
-    'adaptive_avg_pool2d__default', 'adaptive_avg_pool2d__ncnn',
-    'multi_head_attention_forward', 'flatten__coreml', 'clip__coreml',
-    'mod__tensorrt', '_prepare_onnx_paddings__tensorrt'
-]
+from . import adaptive_pool  # noqa: F401,F403
+from . import atan2  # noqa: F401,F403
+from . import chunk  # noqa: F401,F403
+from . import clip  # noqa: F401,F403
+from . import expand  # noqa: F401,F403
+from . import flatten  # noqa: F401,F403
+from . import getattribute  # noqa: F401,F403
+from . import group_norm  # noqa: F401,F403
+from . import interpolate  # noqa: F401,F403
+from . import linear  # noqa: F401,F403
+from . import masked_fill  # noqa: F401,F403
+from . import mod  # noqa: F401,F403
+from . import multi_head_attention_forward  # noqa: F401,F403
+from . import normalize  # noqa: F401,F403
+from . import pad  # noqa: F401,F403
+from . import repeat  # noqa: F401,F403
+from . import size  # noqa: F401,F403
+from . import tensor_getitem  # noqa: F401,F403
+from . import tensor_setitem  # noqa: F401,F403
+from . import topk  # noqa: F401,F403
+from . import triu  # noqa: F401,F403
--- a/mmdeploy/pytorch/functions/interpolate.py
+++ b/mmdeploy/pytorch/functions/interpolate.py
@ -40,6 +40,38 @@ def interpolate__ncnn(ctx,
        recompute_scale_factor=recompute_scale_factor)


+@FUNCTION_REWRITER.register_rewriter(
+    func_name='torch.nn.functional.interpolate', backend='rknn')
+def interpolate__rknn(ctx,
+                      input: torch.Tensor,
+                      size: Optional[Union[int, Tuple[int], Tuple[int, int],
+                                           Tuple[int, int, int]]] = None,
+                      scale_factor: Optional[Union[float,
+                                                   Tuple[float]]] = None,
+                      mode: str = 'nearest',
+                      align_corners: Optional[bool] = None,
+                      recompute_scale_factor: Optional[bool] = None):
+    """Rewrite `interpolate` for rknn backend.
+
+    rknn require `size` should be constant in ONNX Node. We use `scale_factor`
+    instead of `size` to avoid dynamic size.
+    """
+    input_size = input.shape
+    if scale_factor is None:
+        scale_factor = [(s_out / s_in)
+                        for s_out, s_in in zip(size, input_size[2:])]
+        if isinstance(scale_factor[0], torch.Tensor):
+            scale_factor = [i.item() for i in scale_factor]
+
+    return ctx.origin_func(
+        input,
+        None,
+        scale_factor,
+        mode=mode,
+        align_corners=align_corners,
+        recompute_scale_factor=recompute_scale_factor)
+
+
@FUNCTION_REWRITER.register_rewriter(
    'torch.nn.functional.interpolate',
    is_pytorch=True,
--- a/mmdeploy/pytorch/ops/init.py
+++ b/mmdeploy/pytorch/ops/init.py
@ -1,17 +0,0 @@
-# Copyright (c) OpenMMLab. All rights reserved.
-from .adaptive_pool import adaptive_avg_pool2d__ncnn
-from .gelu import gelu__ncnn
-from .grid_sampler import grid_sampler__default
-from .hardsigmoid import hardsigmoid__default
-from .instance_norm import instance_norm__tensorrt
-from .layer_norm import layer_norm__ncnn
-from .linear import linear__ncnn
-from .lstm import generic_rnn__ncnn
-from .roll import roll_default
-from .squeeze import squeeze__default
-
-__all__ = [
-    'grid_sampler__default', 'hardsigmoid__default', 'instance_norm__tensorrt',
-    'generic_rnn__ncnn', 'squeeze__default', 'adaptive_avg_pool2d__ncnn',
-    'gelu__ncnn', 'layer_norm__ncnn', 'linear__ncnn', 'roll_default'
-]
--- a/mmdeploy/pytorch/symbolics/init.py
+++ b/mmdeploy/pytorch/symbolics/init.py
@ -0,0 +1,11 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from . import adaptive_pool  # noqa: F401,F403
+from . import gelu  # noqa: F401,F403
+from . import grid_sampler  # noqa: F401,F403
+from . import hardsigmoid  # noqa: F401,F403
+from . import instance_norm  # noqa: F401,F403
+from . import layer_norm  # noqa: F401,F403
+from . import linear  # noqa: F401,F403
+from . import lstm  # noqa: F401,F403
+from . import roll  # noqa: F401,F403
+from . import squeeze  # noqa: F401,F403
--- a/mmdeploy/pytorch/symbolics/adaptive_pool.py
+++ b/mmdeploy/pytorch/symbolics/adaptive_pool.py
--- a/mmdeploy/pytorch/symbolics/gelu.py
+++ b/mmdeploy/pytorch/symbolics/gelu.py
--- a/mmdeploy/pytorch/symbolics/grid_sampler.py
+++ b/mmdeploy/pytorch/symbolics/grid_sampler.py
--- a/mmdeploy/pytorch/symbolics/hardsigmoid.py
+++ b/mmdeploy/pytorch/symbolics/hardsigmoid.py
--- a/mmdeploy/pytorch/symbolics/instance_norm.py
+++ b/mmdeploy/pytorch/symbolics/instance_norm.py
--- a/mmdeploy/pytorch/symbolics/layer_norm.py
+++ b/mmdeploy/pytorch/symbolics/layer_norm.py
--- a/Show More
+++ b/Show More