From 77d9ad0c6b750974a8faafc58088f99c91d3066e Mon Sep 17 00:00:00 2001
From: tripleMu <gpu@163.com>
Date: Sun, 29 Jan 2023 17:48:54 +0800
Subject: [PATCH 01/64] [CI]: fix isort version issue in lint (#492)

* fix lint

* fix-lint
---
 .pre-commit-config-zh-cn.yaml | 4 ++--
 .pre-commit-config.yaml       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config-zh-cn.yaml b/.pre-commit-config-zh-cn.yaml
index 9b98d009..a7a68afb 100644
--- a/.pre-commit-config-zh-cn.yaml
+++ b/.pre-commit-config-zh-cn.yaml
@@ -4,8 +4,8 @@ repos:
     rev: 5.0.4
     hooks:
       - id: flake8
-  - repo: https://gitee.com/openmmlab/mirrors-isort
-    rev: 5.10.1
+  - repo: https://gitee.com/zhouzaida/mirrors-isort
+    rev: 5.12.1
     hooks:
       - id: isort
   - repo: https://gitee.com/openmmlab/mirrors-yapf
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 7ccdff38..626d6492 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,8 +4,8 @@ repos:
     rev: 5.0.4
     hooks:
       - id: flake8
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.10.1
+  - repo: https://github.com/zhouzaida/isort
+    rev: 5.12.1
     hooks:
       - id: isort
   - repo: https://github.com/pre-commit/mirrors-yapf

From 74558aa2f7dd76c2f97991611d8b6e391875cc0d Mon Sep 17 00:00:00 2001
From: tripleMu <gpu@163.com>
Date: Tue, 31 Jan 2023 14:50:10 +0800
Subject: [PATCH 02/64] Use official isort

---
 .pre-commit-config-zh-cn.yaml | 4 ++--
 .pre-commit-config.yaml       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.pre-commit-config-zh-cn.yaml b/.pre-commit-config-zh-cn.yaml
index a7a68afb..52bb607e 100644
--- a/.pre-commit-config-zh-cn.yaml
+++ b/.pre-commit-config-zh-cn.yaml
@@ -4,8 +4,8 @@ repos:
     rev: 5.0.4
     hooks:
       - id: flake8
-  - repo: https://gitee.com/zhouzaida/mirrors-isort
-    rev: 5.12.1
+  - repo: https://gitee.com/openmmlab/mirrors-isort
+    rev: 5.11.5
     hooks:
       - id: isort
   - repo: https://gitee.com/openmmlab/mirrors-yapf
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 626d6492..ffae20d2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -4,8 +4,8 @@ repos:
     rev: 5.0.4
     hooks:
       - id: flake8
-  - repo: https://github.com/zhouzaida/isort
-    rev: 5.12.1
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.11.5
     hooks:
       - id: isort
   - repo: https://github.com/pre-commit/mirrors-yapf

From 79f0aae55516a9085342cc4d5b99521453a393d3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Fri, 3 Feb 2023 14:28:35 +0800
Subject: [PATCH 03/64] Beautify the YOLOv5 configuration (#501)

* refactor_config
---
 ...v5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py |  14 +-
 ...olov5_m-v61_syncbn_fast_8xb16-300e_coco.py |  14 +-
 ...v5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py |  33 ++--
 ...ov5_s-v61_syncbn-detect_8xb16-300e_coco.py |   4 +-
 .../yolov5_s-v61_syncbn_8xb16-300e_coco.py    | 145 ++++++++++++------
 ...ov5_s-v61_syncbn_fast_1xb4-300e_balloon.py |  37 +++--
 6 files changed, 157 insertions(+), 90 deletions(-)

diff --git a/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py
index f2ccf787..f593e378 100644
--- a/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py
@@ -1,10 +1,15 @@
 _base_ = './yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 0.67
 widen_factor = 0.75
-lr_factor = 0.1  # lrf=0.1
+lr_factor = 0.1
 affine_scale = 0.9
+loss_cls_weight = 0.3
+loss_obj_weight = 0.7
+mixup_prob = 0.1
 
+# =======================Unmodified in most cases==================
 num_classes = _base_.num_classes
 num_det_layers = _base_.num_det_layers
 img_scale = _base_.img_scale
@@ -20,9 +25,9 @@ model = dict(
     ),
     bbox_head=dict(
         head_module=dict(widen_factor=widen_factor),
-        loss_cls=dict(loss_weight=0.3 *
+        loss_cls=dict(loss_weight=loss_cls_weight *
                       (num_classes / 80 * 3 / num_det_layers)),
-        loss_obj=dict(loss_weight=0.7 *
+        loss_obj=dict(loss_weight=loss_obj_weight *
                       ((img_scale[0] / 640)**2 * 3 / num_det_layers))))
 
 pre_transform = _base_.pre_transform
@@ -49,7 +54,7 @@ train_pipeline = [
     *pre_transform, *mosaic_affine_pipeline,
     dict(
         type='YOLOv5MixUp',
-        prob=0.1,
+        prob=mixup_prob,
         pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
     dict(
         type='mmdet.Albu',
@@ -71,5 +76,4 @@ train_pipeline = [
 ]
 
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
-
 default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
diff --git a/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py
index fdce9603..d2ef324e 100644
--- a/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py
@@ -1,10 +1,15 @@
 _base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 0.67
 widen_factor = 0.75
-lr_factor = 0.1  # lrf=0.1
+lr_factor = 0.1
 affine_scale = 0.9
+loss_cls_weight = 0.3
+loss_obj_weight = 0.7
+mixup_prob = 0.1
 
+# =======================Unmodified in most cases==================
 num_classes = _base_.num_classes
 num_det_layers = _base_.num_det_layers
 img_scale = _base_.img_scale
@@ -20,9 +25,9 @@ model = dict(
     ),
     bbox_head=dict(
         head_module=dict(widen_factor=widen_factor),
-        loss_cls=dict(loss_weight=0.3 *
+        loss_cls=dict(loss_weight=loss_cls_weight *
                       (num_classes / 80 * 3 / num_det_layers)),
-        loss_obj=dict(loss_weight=0.7 *
+        loss_obj=dict(loss_weight=loss_obj_weight *
                       ((img_scale[0] / 640)**2 * 3 / num_det_layers))))
 
 pre_transform = _base_.pre_transform
@@ -49,7 +54,7 @@ train_pipeline = [
     *pre_transform, *mosaic_affine_pipeline,
     dict(
         type='YOLOv5MixUp',
-        prob=0.1,
+        prob=mixup_prob,
         pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
     dict(
         type='mmdet.Albu',
@@ -71,5 +76,4 @@ train_pipeline = [
 ]
 
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
-
 default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
diff --git a/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py
index d7cb0925..3d14484f 100644
--- a/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py
@@ -1,19 +1,32 @@
 _base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
 
+# ========================modified parameters======================
 img_scale = (1280, 1280)  # width, height
-num_classes = 80
-# only on Val
-batch_shapes_cfg = dict(img_size=img_scale[0], size_divisor=64)
-
+num_classes = 80  # Number of classes for classification
+# Config of batch shapes. Only on val.
+# It means not used if batch_shapes_cfg is None.
+batch_shapes_cfg = dict(
+    img_size=img_scale[0],
+    # The image scale of padding should be divided by pad_size_divisor
+    size_divisor=64)
+# Basic size of multi-scale prior box
 anchors = [
     [(19, 27), (44, 40), (38, 94)],  # P3/8
     [(96, 68), (86, 152), (180, 137)],  # P4/16
     [(140, 301), (303, 264), (238, 542)],  # P5/32
     [(436, 615), (739, 380), (925, 792)]  # P6/64
 ]
+# Strides of multi-scale prior box
 strides = [8, 16, 32, 64]
-num_det_layers = 4
+num_det_layers = 4  # The number of model output scales
+loss_cls_weight = 0.5
+loss_bbox_weight = 0.05
+loss_obj_weight = 1.0
+# The obj loss weights of the three output layers
+obj_level_weights = [4.0, 1.0, 0.25, 0.06]
+affine_scale = 0.5  # YOLOv5RandomAffine scaling ratio
 
+# =======================Unmodified in most cases==================
 model = dict(
     backbone=dict(arch='P6', out_indices=(2, 3, 4, 5)),
     neck=dict(
@@ -23,12 +36,12 @@ model = dict(
             in_channels=[256, 512, 768, 1024], featmap_strides=strides),
         prior_generator=dict(base_sizes=anchors, strides=strides),
         # scaled based on number of detection layers
-        loss_cls=dict(loss_weight=0.5 *
+        loss_cls=dict(loss_weight=loss_cls_weight *
                       (num_classes / 80 * 3 / num_det_layers)),
-        loss_bbox=dict(loss_weight=0.05 * (3 / num_det_layers)),
-        loss_obj=dict(loss_weight=1.0 *
+        loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)),
+        loss_obj=dict(loss_weight=loss_obj_weight *
                       ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
-        obj_level_weights=[4.0, 1.0, 0.25, 0.06]))
+        obj_level_weights=obj_level_weights))
 
 pre_transform = _base_.pre_transform
 albu_train_transforms = _base_.albu_train_transforms
@@ -44,7 +57,7 @@ train_pipeline = [
         type='YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
-        scaling_ratio_range=(0.5, 1.5),
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
         # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
diff --git a/configs/yolov5/yolov5_s-v61_syncbn-detect_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-v61_syncbn-detect_8xb16-300e_coco.py
index 2789c959..627f9859 100644
--- a/configs/yolov5/yolov5_s-v61_syncbn-detect_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_s-v61_syncbn-detect_8xb16-300e_coco.py
@@ -1,9 +1,7 @@
 _base_ = 'yolov5_s-v61_syncbn_8xb16-300e_coco.py'
 
 test_pipeline = [
-    dict(
-        type='LoadImageFromFile',
-        file_client_args={{_base_.file_client_args}}),
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
     dict(
         type='LetterResize',
         scale=_base_.img_scale,
diff --git a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
index d06f75c4..b90f6785 100644
--- a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
@@ -1,47 +1,95 @@
 _base_ = '../_base_/default_runtime.py'
 
-# dataset settings
-data_root = 'data/coco/'
-dataset_type = 'YOLOv5CocoDataset'
+# ========================Frequently modified parameters======================
+# -----data related-----
+data_root = 'data/coco/'  # Root path of data
+# Path of train annotation file
+train_ann_file = 'annotations/instances_train2017.json'
+train_data_prefix = 'train2017/'  # Prefix of train image path
+# Path of val annotation file
+val_ann_file = 'annotations/instances_val2017.json'
+val_data_prefix = 'val2017/'  # Prefix of val image path
 
-# parameters that often need to be modified
-num_classes = 80
-img_scale = (640, 640)  # width, height
-deepen_factor = 0.33
-widen_factor = 0.5
-max_epochs = 300
-save_epoch_intervals = 10
+num_classes = 80  # Number of classes for classification
+# Batch size of a single GPU during training
 train_batch_size_per_gpu = 16
+# Worker to pre-fetch data for each single GPU during training
 train_num_workers = 8
-val_batch_size_per_gpu = 1
-val_num_workers = 2
-
-# persistent_workers must be False if num_workers is 0.
+# persistent_workers must be False if num_workers is 0
 persistent_workers = True
 
-# Base learning rate for optim_wrapper
-base_lr = 0.01
-
-# only on Val
-batch_shapes_cfg = dict(
-    type='BatchShapePolicy',
-    batch_size=val_batch_size_per_gpu,
-    img_size=img_scale[0],
-    size_divisor=32,
-    extra_pad_ratio=0.5)
-
+# -----model related-----
+# Basic size of multi-scale prior box
 anchors = [
     [(10, 13), (16, 30), (33, 23)],  # P3/8
     [(30, 61), (62, 45), (59, 119)],  # P4/16
     [(116, 90), (156, 198), (373, 326)]  # P5/32
 ]
-strides = [8, 16, 32]
-num_det_layers = 3
 
-# single-scale training is recommended to
+# -----train val related-----
+# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
+base_lr = 0.01
+max_epochs = 300  # Maximum training epochs
+
+model_test_cfg = dict(
+    # The config of multi-label for multi-class prediction.
+    multi_label=True,
+    # The number of boxes before NMS
+    nms_pre=30000,
+    score_thr=0.001,  # Threshold to filter out boxes.
+    nms=dict(type='nms', iou_threshold=0.65),  # NMS type and threshold
+    max_per_img=300)  # Max number of detections of each image
+
+# ========================Possible modified parameters========================
+# -----data related-----
+img_scale = (640, 640)  # width, height
+# Dataset type, this will be used to define the dataset
+dataset_type = 'YOLOv5CocoDataset'
+# Batch size of a single GPU during validation
+val_batch_size_per_gpu = 1
+# Worker to pre-fetch data for each single GPU during validation
+val_num_workers = 2
+
+# Config of batch shapes. Only on val.
+# It means not used if batch_shapes_cfg is None.
+batch_shapes_cfg = dict(
+    type='BatchShapePolicy',
+    batch_size=val_batch_size_per_gpu,
+    img_size=img_scale[0],
+    # The image scale of padding should be divided by pad_size_divisor
+    size_divisor=32,
+    # Additional paddings for pixel scale
+    extra_pad_ratio=0.5)
+
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
+deepen_factor = 0.33
+# The scaling factor that controls the width of the network structure
+widen_factor = 0.5
+# Strides of multi-scale prior box
+strides = [8, 16, 32]
+num_det_layers = 3  # The number of model output scales
+norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
+
+# -----train val related-----
+affine_scale = 0.5  # YOLOv5RandomAffine scaling ratio
+loss_cls_weight = 0.5
+loss_bbox_weight = 0.05
+loss_obj_weight = 1.0
+prior_match_thr = 4.  # Priori box matching threshold
+obj_level_weights = [4., 1.,
+                     0.4]  # The obj loss weights of the three output layers
+lr_factor = 0.01  # Learning rate scaling factor
+weight_decay = 0.0005
+# Save model checkpoint and validation intervals
+save_epoch_intervals = 10
+# The maximum checkpoints to keep.
+max_keep_ckpts = 3
+# Single-scale training is recommended to
 # be turned on, which can speed up training.
 env_cfg = dict(cudnn_benchmark=True)
 
+# ===============================Unmodified in most cases====================
 model = dict(
     type='YOLODetector',
     data_preprocessor=dict(
@@ -53,7 +101,7 @@ model = dict(
         type='YOLOv5CSPDarknet',
         deepen_factor=deepen_factor,
         widen_factor=widen_factor,
-        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True)),
     neck=dict(
         type='YOLOv5PAFPN',
@@ -62,7 +110,7 @@ model = dict(
         in_channels=[256, 512, 1024],
         out_channels=[256, 512, 1024],
         num_csp_blocks=3,
-        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True)),
     bbox_head=dict(
         type='YOLOv5Head',
@@ -82,28 +130,25 @@ model = dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=True,
             reduction='mean',
-            loss_weight=0.5 * (num_classes / 80 * 3 / num_det_layers)),
+            loss_weight=loss_cls_weight *
+            (num_classes / 80 * 3 / num_det_layers)),
         loss_bbox=dict(
             type='IoULoss',
             iou_mode='ciou',
             bbox_format='xywh',
             eps=1e-7,
             reduction='mean',
-            loss_weight=0.05 * (3 / num_det_layers),
+            loss_weight=loss_bbox_weight * (3 / num_det_layers),
             return_iou=True),
         loss_obj=dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=True,
             reduction='mean',
-            loss_weight=1.0 * ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
-        prior_match_thr=4.,
-        obj_level_weights=[4., 1., 0.4]),
-    test_cfg=dict(
-        multi_label=True,
-        nms_pre=30000,
-        score_thr=0.001,
-        nms=dict(type='nms', iou_threshold=0.65),
-        max_per_img=300))
+            loss_weight=loss_obj_weight *
+            ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
+        prior_match_thr=prior_match_thr,
+        obj_level_weights=obj_level_weights),
+    test_cfg=model_test_cfg)
 
 albu_train_transforms = [
     dict(type='Blur', p=0.01),
@@ -128,7 +173,7 @@ train_pipeline = [
         type='YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
-        scaling_ratio_range=(0.5, 1.5),
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
         # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
@@ -160,8 +205,8 @@ train_dataloader = dict(
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/'),
+        ann_file=train_ann_file,
+        data_prefix=dict(img=train_data_prefix),
         filter_cfg=dict(filter_empty_gt=False, min_size=32),
         pipeline=train_pipeline))
 
@@ -191,8 +236,8 @@ val_dataloader = dict(
         type=dataset_type,
         data_root=data_root,
         test_mode=True,
-        data_prefix=dict(img='val2017/'),
-        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img=val_data_prefix),
+        ann_file=val_ann_file,
         pipeline=test_pipeline,
         batch_shapes_cfg=batch_shapes_cfg))
 
@@ -205,7 +250,7 @@ optim_wrapper = dict(
         type='SGD',
         lr=base_lr,
         momentum=0.937,
-        weight_decay=0.0005,
+        weight_decay=weight_decay,
         nesterov=True,
         batch_size_per_gpu=train_batch_size_per_gpu),
     constructor='YOLOv5OptimizerConstructor')
@@ -214,13 +259,13 @@ default_hooks = dict(
     param_scheduler=dict(
         type='YOLOv5ParamSchedulerHook',
         scheduler_type='linear',
-        lr_factor=0.01,
+        lr_factor=lr_factor,
         max_epochs=max_epochs),
     checkpoint=dict(
         type='CheckpointHook',
         interval=save_epoch_intervals,
         save_best='auto',
-        max_keep_ckpts=3))
+        max_keep_ckpts=max_keep_ckpts))
 
 custom_hooks = [
     dict(
@@ -235,7 +280,7 @@ custom_hooks = [
 val_evaluator = dict(
     type='mmdet.CocoMetric',
     proposal_nums=(100, 1, 10),
-    ann_file=data_root + 'annotations/instances_val2017.json',
+    ann_file=data_root + val_ann_file,
     metric='bbox')
 test_evaluator = val_evaluator
 
diff --git a/configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py b/configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
index 529702e2..2c585ceb 100644
--- a/configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
+++ b/configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
@@ -1,39 +1,42 @@
 _base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
 
+# ========================modified parameters======================
 data_root = 'data/balloon/'
-
-train_batch_size_per_gpu = 4
-train_num_workers = 2
-
+# Path of train annotation file
+train_ann_file = 'train.json'
+train_data_prefix = 'train/'  # Prefix of train image path
+# Path of val annotation file
+val_ann_file = 'val.json'
+val_data_prefix = 'val/'  # Prefix of val image path
 metainfo = {
     'classes': ('balloon', ),
     'palette': [
         (220, 20, 60),
     ]
 }
+num_classes = 1
 
+train_batch_size_per_gpu = 4
+train_num_workers = 2
+log_interval = 1
+
+# =======================Unmodified in most cases==================
 train_dataloader = dict(
     batch_size=train_batch_size_per_gpu,
     num_workers=train_num_workers,
     dataset=dict(
         data_root=data_root,
         metainfo=metainfo,
-        data_prefix=dict(img='train/'),
-        ann_file='train.json'))
-
+        data_prefix=dict(img=train_data_prefix),
+        ann_file=train_ann_file))
 val_dataloader = dict(
     dataset=dict(
         data_root=data_root,
         metainfo=metainfo,
-        data_prefix=dict(img='val/'),
-        ann_file='val.json'))
-
+        data_prefix=dict(img=val_data_prefix),
+        ann_file=val_ann_file))
 test_dataloader = val_dataloader
-
-val_evaluator = dict(ann_file=data_root + 'val.json')
-
+val_evaluator = dict(ann_file=data_root + val_ann_file)
 test_evaluator = val_evaluator
-
-model = dict(bbox_head=dict(head_module=dict(num_classes=1)))
-
-default_hooks = dict(logger=dict(interval=1))
+model = dict(bbox_head=dict(head_module=dict(num_classes=num_classes)))
+default_hooks = dict(logger=dict(interval=log_interval))

From 5f2b08cf5ad225e41598fd3fa24e28c35f7d2b55 Mon Sep 17 00:00:00 2001
From: Audrey528 <68552295+Audrey528@users.noreply.github.com>
Date: Fri, 3 Feb 2023 20:01:05 +0800
Subject: [PATCH 04/64] add model structure graph of yolov7 in readme (#504)

---
 configs/yolov7/README.md | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/configs/yolov7/README.md b/configs/yolov7/README.md
index 2fb56681..33de9be0 100644
--- a/configs/yolov7/README.md
+++ b/configs/yolov7/README.md
@@ -12,6 +12,11 @@ YOLOv7 surpasses all known object detectors in both speed and accuracy in the ra
 <img src="https://user-images.githubusercontent.com/17425982/204231759-cc5c77a9-38c6-4a41-85be-eb97e4b2bcbb.png"/>
 </div>
 
+<div align=center>
+<img alt="YOLOv7-l" src="https://user-images.githubusercontent.com/68552295/216335336-963bd03a-71f3-4556-97af-18b20d69e065.png" width = 95.5%/>
+YOLOv7-l-P5 model structure
+</div>
+
 ## Results and models
 
 ### COCO

From 8237f19bf8bd35b2b76cc7551a15f61153985b38 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E5=B8=83=E9=B2=81=E7=93=A6=E4=B8=9D?=
 <62822224+TianWen580@users.noreply.github.com>
Date: Sat, 4 Feb 2023 11:49:58 +0800
Subject: [PATCH 05/64] Added the prompt to submit the pull request (#508)

---
 docs/en/community/contributing.md    | 15 ++++++++++-----
 docs/zh_cn/community/contributing.md |  9 +++++++--
 2 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/docs/en/community/contributing.md b/docs/en/community/contributing.md
index 92d25161..2c73a768 100644
--- a/docs/en/community/contributing.md
+++ b/docs/en/community/contributing.md
@@ -37,9 +37,10 @@ Then, you can clone the repositories to local:
 git clone git@github.com:{username}/mmyolo.git
 ```
 
-After that, you should add official repository as the upstream repository.
+After that, you should get into the project folder and add official repository as the upstream repository.
 
 ```bash
+cd mmyolo
 git remote add upstream git@github.com:open-mmlab/mmyolo
 ```
 
@@ -88,7 +89,7 @@ If the code does not conform to the code style specification, pre-commit will ra
 
 <img src="https://user-images.githubusercontent.com/57566630/202369176-67642454-0025-4023-a095-263529107aa3.png" width="1200">
 
-If we want to commit our code bypassing the pre-commit hook, we can use the `--no-verify` option(**only for temporarily commit**.
+If we want to commit our code bypassing the pre-commit hook, we can use the `--no-verify` option(**only for temporarily commit**).
 
 ```shell
 git commit -m "xxx" --no-verify
@@ -142,15 +143,19 @@ This will allow you to use the `git push` command to push code directly next tim
 
 <img src="https://user-images.githubusercontent.com/27466624/204302289-d1e54901-8f27-4934-923f-fda800ff9851.png" width="1200">
 
-(2) Modify the PR description according to the guidelines so that other developers can better understand your changes
+(2) Modify the PR description according to the guidelines so that other developers can better understand your changes.
 
-<img src="https://user-images.githubusercontent.com/27466624/204303311-84456397-ee41-44f9-945c-85ce415da235.png" width="1200">
+```{note}
+The *base* branch should be modified to *dev* branch.
+```
+
+<img src="https://user-images.githubusercontent.com/62822224/216594960-a2292b9d-2b7c-4861-b4c5-362a9458b194.png" width="1200">
 
 Find more details about Pull Request description in [pull request guidelines](#pr-specs).
 
 **note**
 
-(a) The Pull Request description should contain the reason for the change, the content of the change, and the impact of the change, and be associated with the relevant Issue (see [documentation](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue)
+(a) The Pull Request description should contain the reason for the change, the content of the change, and the impact of the change, and be associated with the relevant Issue (see [documentation](https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue))
 
 (b) If it is your first contribution, please sign the CLA
 
diff --git a/docs/zh_cn/community/contributing.md b/docs/zh_cn/community/contributing.md
index 4fac637a..64c463b0 100644
--- a/docs/zh_cn/community/contributing.md
+++ b/docs/zh_cn/community/contributing.md
@@ -39,9 +39,10 @@
 git clone git@github.com:{username}/mmyolo.git
 ```
 
-添加原代码库为上游代码库
+进入项目并添加原代码库为上游代码库
 
 ```bash
+cd mmyolo
 git remote add upstream git@github.com:open-mmlab/mmyolo
 ```
 
@@ -146,7 +147,11 @@ git push -u origin {branch_name}
 
 (2) 根据指引修改 PR 描述，以便于其他开发者更好地理解你的修改
 
-<img src="https://user-images.githubusercontent.com/27466624/204303311-84456397-ee41-44f9-945c-85ce415da235.png" width="1200">
+```{note}
+注意在 PR branch 左侧的 base 需要修改为 dev 分支
+```
+
+<img src="https://user-images.githubusercontent.com/62822224/216594960-a2292b9d-2b7c-4861-b4c5-362a9458b194.png" width="1200">
 
 描述规范详见[拉取请求规范](#拉取请求规范)
 

From 6acde82ec8c96e377a7003e72cf01418d94596b1 Mon Sep 17 00:00:00 2001
From: yechenzhi <136920488@qq.com>
Date: Sat, 4 Feb 2023 13:46:14 +0800
Subject: [PATCH 06/64] Fix typo for assigner visualization (#509)

---
 projects/assigner_visualization/README.md                       | 2 +-
 .../assigner_visualization/visualization/assigner_visualizer.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/projects/assigner_visualization/README.md b/projects/assigner_visualization/README.md
index 579ab5c8..443a23c6 100644
--- a/projects/assigner_visualization/README.md
+++ b/projects/assigner_visualization/README.md
@@ -13,5 +13,5 @@ Now, the script only support `YOLOv5` .
 ### Command
 
 ```shell
-python projects/assigner_visualization/assigner_visualization.py projects/assigner_visualization/configs/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py `
+python projects/assigner_visualization/assigner_visualization.py projects/assigner_visualization/configs/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py
 ```
diff --git a/projects/assigner_visualization/visualization/assigner_visualizer.py b/projects/assigner_visualization/visualization/assigner_visualizer.py
index a4e8ae50..270f82eb 100644
--- a/projects/assigner_visualization/visualization/assigner_visualizer.py
+++ b/projects/assigner_visualization/visualization/assigner_visualizer.py
@@ -175,7 +175,7 @@ class YOLOAssignerVisualizer(DetLocalVisualizer):
 
         # The PALETTE in the dataset_meta is required
         assert self.dataset_meta is not None
-        palette = self.dataset_meta['PALETTE']
+        palette = self.dataset_meta['palette']
         x = ((grid_x_inds + offset) * stride).long()
         y = ((grid_y_inds + offset) * stride).long()
         center = torch.stack((x, y), dim=-1)

From 1dee9eed6ebb2b9d0487a772854b71dc966f1552 Mon Sep 17 00:00:00 2001
From: yechenzhi <136920488@qq.com>
Date: Mon, 6 Feb 2023 10:12:04 +0800
Subject: [PATCH 07/64] [Docs] How to specify specific GPU training and
 inference (#503)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* 中文版指定GPU训练

* 删去不必要文件

* typo

* rebase dev

* add test example

* add english version

* fix format

* english typo

* Update docs/zh_cn/advanced_guides/how_to.md

Co-authored-by: Range King <RangeKingHZ@gmail.com>

* Update docs/zh_cn/advanced_guides/how_to.md

Co-authored-by: Range King <RangeKingHZ@gmail.com>

---------

Co-authored-by: Range King <RangeKingHZ@gmail.com>
---
 docs/en/advanced_guides/how_to.md    | 24 ++++++++++++++++++++++++
 docs/zh_cn/advanced_guides/how_to.md | 24 ++++++++++++++++++++++++
 2 files changed, 48 insertions(+)

diff --git a/docs/en/advanced_guides/how_to.md b/docs/en/advanced_guides/how_to.md
index 657d6ca0..37a3671f 100644
--- a/docs/en/advanced_guides/how_to.md
+++ b/docs/en/advanced_guides/how_to.md
@@ -546,3 +546,27 @@ python ./tools/train.py \
 - `randomness.seed=2023`, set the random seed to 2023.
 - `randomness.diff_rank_seed=True`, set different seeds according to global rank. Defaults to False.
 - `randomness.deterministic=True`, set the deterministic option for cuDNN backend, i.e., set `torch.backends.cudnn.deterministic` to True and `torch.backends.cudnn.benchmark` to False. Defaults to False. See https://pytorch.org/docs/stable/notes/randomness.html for more details.
+
+## Specify specific GPUs during training or inference
+
+If you have multiple GPUs, such as 8 GPUs, numbered `0, 1, 2, 3, 4, 5, 6, 7`, GPU 0 will be used by default for training or inference. If you want to specify other GPUs for training or inference, you can use the following commands:
+
+```shell
+CUDA_VISIBLE_DEVICES=5 python ./tools/train.py ${CONFIG} #train
+CUDA_VISIBLE_DEVICES=5 python ./tools/test.py ${CONFIG} ${CHECKPOINT_FILE} #test
+```
+
+If you set `CUDA_VISIBLE_DEVICES` to -1 or a number greater than the maximum GPU number, such as 8, the CPU will be used for training or inference.
+
+If you want to use several of these GPUs to train in parallel, you can use the following command:
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh ${CONFIG} ${GPU_NUM}
+```
+
+Here the `GPU_NUM` is 4. In addition, if multiple tasks are trained in parallel on one machine and each task requires multiple GPUs, the PORT of each task need to be set differently to avoid communication conflict, like the following commands:
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG} 4
+CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG} 4
+```
diff --git a/docs/zh_cn/advanced_guides/how_to.md b/docs/zh_cn/advanced_guides/how_to.md
index f5ea3c5e..3cf7368a 100644
--- a/docs/zh_cn/advanced_guides/how_to.md
+++ b/docs/zh_cn/advanced_guides/how_to.md
@@ -552,3 +552,27 @@ python ./tools/train.py \
 - `randomness.diff_rank_seed=True`，根据 rank 来设置不同的种子，`diff_rank_seed` 默认为 False。
 
 - `randomness.deterministic=True`，把 cuDNN 后端确定性选项设置为 True，即把`torch.backends.cudnn.deterministic` 设为 True，把 `torch.backends.cudnn.benchmark` 设为False。`deterministic` 默认为 False。更多细节见 https://pytorch.org/docs/stable/notes/randomness.html。
+
+## 指定特定 GPU 训练或推理
+
+如果你有多张 GPU，比如 8 张，其编号分别为 `0, 1, 2, 3, 4, 5, 6, 7`，使用单卡训练或推理时会默认使用卡 0。如果想指定其他卡进行训练或推理，可以使用以下命令：
+
+```shell
+CUDA_VISIBLE_DEVICES=5 python ./tools/train.py ${CONFIG} #train
+CUDA_VISIBLE_DEVICES=5 python ./tools/test.py ${CONFIG} ${CHECKPOINT_FILE} #test
+```
+
+如果设置`CUDA_VISIBLE_DEVICES`为 -1 或者一个大于 GPU 最大编号的数，比如 8，将会使用 CPU 进行训练或者推理。
+
+如果你想使用其中几张卡并行训练，可以使用如下命令：
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh ${CONFIG} ${GPU_NUM}
+```
+
+这里 `GPU_NUM` 为 4。另外如果在一台机器上多个任务同时多卡训练，需要设置不同的端口，比如以下命令：
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG} 4
+CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG} 4
+```

From f54e5603fd0702c24a5a5019ee7b39e9208a752e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Mon, 6 Feb 2023 17:47:22 +0800
Subject: [PATCH 08/64] [Enchance] Extending `DetDataPreprocessor` flexibility
 (#511)

* Extending DetDataPreprocessor flexibility

* update

* add cast_data
---
 mmyolo/datasets/utils.py                      | 19 ++---
 .../data_preprocessors/data_preprocessor.py   | 81 +++++++++++++------
 tests/test_datasets/test_utils.py             | 19 +++--
 .../test_data_preprocessor.py                 | 13 +--
 4 files changed, 87 insertions(+), 45 deletions(-)

diff --git a/mmyolo/datasets/utils.py b/mmyolo/datasets/utils.py
index 0cca341b..1c056200 100644
--- a/mmyolo/datasets/utils.py
+++ b/mmyolo/datasets/utils.py
@@ -22,6 +22,7 @@ def yolov5_collate(data_batch: Sequence,
     for i in range(len(data_batch)):
         datasamples = data_batch[i]['data_samples']
         inputs = data_batch[i]['inputs']
+        batch_imgs.append(inputs)
 
         gt_bboxes = datasamples.gt_instances.bboxes.tensor
         gt_labels = datasamples.gt_instances.labels
@@ -30,17 +31,17 @@ def yolov5_collate(data_batch: Sequence,
                                   dim=1)
         batch_bboxes_labels.append(bboxes_labels)
 
-        batch_imgs.append(inputs)
+    collated_results = {
+        'data_samples': {
+            'bboxes_labels': torch.cat(batch_bboxes_labels, 0)
+        }
+    }
+
     if use_ms_training:
-        return {
-            'inputs': batch_imgs,
-            'data_samples': torch.cat(batch_bboxes_labels, 0)
-        }
+        collated_results['inputs'] = batch_imgs
     else:
-        return {
-            'inputs': torch.stack(batch_imgs, 0),
-            'data_samples': torch.cat(batch_bboxes_labels, 0)
-        }
+        collated_results['inputs'] = torch.stack(batch_imgs, 0)
+    return collated_results
 
 
 @TASK_UTILS.register_module()
diff --git a/mmyolo/models/data_preprocessors/data_preprocessor.py b/mmyolo/models/data_preprocessors/data_preprocessor.py
index c7281fa5..f21a363a 100644
--- a/mmyolo/models/data_preprocessors/data_preprocessor.py
+++ b/mmyolo/models/data_preprocessors/data_preprocessor.py
@@ -1,16 +1,20 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import random
-from typing import List, Tuple, Union
+from typing import List, Mapping, Sequence, Tuple, Union
 
 import torch
 import torch.nn.functional as F
 from mmdet.models import BatchSyncRandomResize
 from mmdet.models.data_preprocessors import DetDataPreprocessor
 from mmengine import MessageHub, is_list_of
+from mmengine.structures import BaseDataElement
 from torch import Tensor
 
 from mmyolo.registry import MODELS
 
+CastData = Union[tuple, dict, BaseDataElement, torch.Tensor, list, bytes, str,
+                 None]
+
 
 @MODELS.register_module()
 class YOLOv5DetDataPreprocessor(DetDataPreprocessor):
@@ -19,6 +23,30 @@ class YOLOv5DetDataPreprocessor(DetDataPreprocessor):
     Note: It must be used together with `mmyolo.datasets.utils.yolov5_collate`
     """
 
+    # TODO: Can be deleted after mmdet support
+    def cast_data(self, data: CastData) -> CastData:
+        """Copying data to the target device.
+
+        Args:
+            data (dict): Data returned by ``DataLoader``.
+
+        Returns:
+            CollatedResult: Inputs and data sample at target device.
+        """
+        if isinstance(data, Mapping):
+            return {key: self.cast_data(data[key]) for key in data}
+        elif isinstance(data, (str, bytes)) or data is None:
+            return data
+        elif isinstance(data, tuple) and hasattr(data, '_fields'):
+            # namedtuple
+            return type(data)(*(self.cast_data(sample) for sample in data))  # type: ignore  # noqa: E501  # yapf:disable
+        elif isinstance(data, Sequence):
+            return type(data)(self.cast_data(sample) for sample in data)  # type: ignore  # noqa: E501  # yapf:disable
+        elif isinstance(data, (torch.Tensor, BaseDataElement)):
+            return data.to(self.device, non_blocking=True)
+        else:
+            return data
+
     def forward(self, data: dict, training: bool = False) -> dict:
         """Perform normalization, padding and bgr2rgb conversion based on
         ``DetDataPreprocessorr``.
@@ -32,29 +60,26 @@ class YOLOv5DetDataPreprocessor(DetDataPreprocessor):
         """
         if not training:
             return super().forward(data, training)
-        assert isinstance(data['data_samples'], torch.Tensor), \
-            '"data_samples" should be a tensor, but got ' \
-            f'{type(data["data_samples"])}. The possible reason for this ' \
-            'is that you are not using it with ' \
-            '"mmyolo.datasets.utils.yolov5_collate". Please refer to ' \
-            '"configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py".'
 
-        inputs = data['inputs'].to(self.device, non_blocking=True)
+        data = self.cast_data(data)
+        inputs, data_samples = data['inputs'], data['data_samples']
+        assert isinstance(data['data_samples'], dict)
 
+        # TODO: Supports multi-scale training
         if self._channel_conversion and inputs.shape[1] == 3:
             inputs = inputs[:, [2, 1, 0], ...]
-
         if self._enable_normalize:
             inputs = (inputs - self.mean) / self.std
 
-        data_samples = data['data_samples'].to(self.device, non_blocking=True)
-
         if self.batch_augments is not None:
             for batch_aug in self.batch_augments:
                 inputs, data_samples = batch_aug(inputs, data_samples)
 
         img_metas = [{'batch_input_shape': inputs.shape[2:]}] * len(inputs)
-        data_samples = {'bboxes_labels': data_samples, 'img_metas': img_metas}
+        data_samples = {
+            'bboxes_labels': data_samples['bboxes_labels'],
+            'img_metas': img_metas
+        }
 
         return {'inputs': inputs, 'data_samples': data_samples}
 
@@ -98,18 +123,18 @@ class PPYOLOEDetDataPreprocessor(DetDataPreprocessor):
 
         data = self.cast_data(data)
         inputs, data_samples = data['inputs'], data['data_samples']
+        assert isinstance(data['data_samples'], dict)
 
         # Process data.
         batch_inputs = []
-        for _batch_input, data_sample in zip(inputs, data_samples):
+        for _input in inputs:
             # channel transform
             if self._channel_conversion:
-                _batch_input = _batch_input[[2, 1, 0], ...]
+                _input = _input[[2, 1, 0], ...]
             # Convert to float after channel conversion to ensure
             # efficiency
-            _batch_input = _batch_input.float()
-
-            batch_inputs.append(_batch_input)
+            _input = _input.float()
+            batch_inputs.append(_input)
 
         # Batch random resize image.
         if self.batch_augments is not None:
@@ -120,7 +145,10 @@ class PPYOLOEDetDataPreprocessor(DetDataPreprocessor):
             inputs = (inputs - self.mean) / self.std
 
         img_metas = [{'batch_input_shape': inputs.shape[2:]}] * len(inputs)
-        data_samples = {'bboxes_labels': data_samples, 'img_metas': img_metas}
+        data_samples = {
+            'bboxes_labels': data_samples['bboxes_labels'],
+            'img_metas': img_metas
+        }
 
         return {'inputs': inputs, 'data_samples': data_samples}
 
@@ -179,7 +207,7 @@ class PPYOLOEBatchRandomResize(BatchSyncRandomResize):
             self.interp_mode = interp_mode
 
     def forward(self, inputs: list,
-                data_samples: Tensor) -> Tuple[Tensor, Tensor]:
+                data_samples: dict) -> Tuple[Tensor, Tensor]:
         """Resize a batch of images and bboxes to shape ``self._input_size``.
 
         The inputs and data_samples should be list, and
@@ -191,6 +219,9 @@ class PPYOLOEBatchRandomResize(BatchSyncRandomResize):
             'The type of inputs must be list. The possible reason for this ' \
             'is that you are not using it with `PPYOLOEDetDataPreprocessor` ' \
             'and `yolov5_collate` with use_ms_training == True.'
+
+        bboxes_labels = data_samples['bboxes_labels']
+
         message_hub = MessageHub.get_current_instance()
         if (message_hub.get_info('iter') + 1) % self._interval == 0:
             # get current input size
@@ -218,11 +249,13 @@ class PPYOLOEBatchRandomResize(BatchSyncRandomResize):
                         align_corners=align_corners)
 
                     # rescale boxes
-                    indexes = data_samples[:, 0] == i
-                    data_samples[indexes, 2] *= scale_x
-                    data_samples[indexes, 3] *= scale_y
-                    data_samples[indexes, 4] *= scale_x
-                    data_samples[indexes, 5] *= scale_y
+                    indexes = bboxes_labels[:, 0] == i
+                    bboxes_labels[indexes, 2] *= scale_x
+                    bboxes_labels[indexes, 3] *= scale_y
+                    bboxes_labels[indexes, 4] *= scale_x
+                    bboxes_labels[indexes, 5] *= scale_y
+
+                    data_samples['bboxes_labels'] = bboxes_labels
                 else:
                     _batch_input = _batch_input.unsqueeze(0)
 
diff --git a/tests/test_datasets/test_utils.py b/tests/test_datasets/test_utils.py
index 43c8e61f..dc7b9022 100644
--- a/tests/test_datasets/test_utils.py
+++ b/tests/test_datasets/test_utils.py
@@ -39,13 +39,15 @@ class TestYOLOv5Collate(unittest.TestCase):
         out = yolov5_collate([dict(inputs=inputs, data_samples=data_samples)])
         self.assertIsInstance(out, dict)
         self.assertTrue(out['inputs'].shape == (1, 3, 10, 10))
-        self.assertTrue(out['data_samples'].shape == (4, 6))
+        self.assertTrue(out['data_samples'], dict)
+        self.assertTrue(out['data_samples']['bboxes_labels'].shape == (4, 6))
 
         out = yolov5_collate([dict(inputs=inputs, data_samples=data_samples)] *
                              2)
         self.assertIsInstance(out, dict)
         self.assertTrue(out['inputs'].shape == (2, 3, 10, 10))
-        self.assertTrue(out['data_samples'].shape == (8, 6))
+        self.assertTrue(out['data_samples'], dict)
+        self.assertTrue(out['data_samples']['bboxes_labels'].shape == (8, 6))
 
     def test_yolov5_collate_with_multi_scale(self):
         rng = np.random.RandomState(0)
@@ -63,19 +65,22 @@ class TestYOLOv5Collate(unittest.TestCase):
                              use_ms_training=True)
         self.assertIsInstance(out, dict)
         self.assertTrue(out['inputs'][0].shape == (3, 10, 10))
-        print(out['data_samples'].shape)
-        self.assertTrue(out['data_samples'].shape == (4, 6))
+        self.assertTrue(out['data_samples'], dict)
+        self.assertTrue(out['data_samples']['bboxes_labels'].shape == (4, 6))
         self.assertIsInstance(out['inputs'], list)
-        self.assertIsInstance(out['data_samples'], torch.Tensor)
+        self.assertIsInstance(out['data_samples']['bboxes_labels'],
+                              torch.Tensor)
 
         out = yolov5_collate(
             [dict(inputs=inputs, data_samples=data_samples)] * 2,
             use_ms_training=True)
         self.assertIsInstance(out, dict)
         self.assertTrue(out['inputs'][0].shape == (3, 10, 10))
-        self.assertTrue(out['data_samples'].shape == (8, 6))
+        self.assertTrue(out['data_samples'], dict)
+        self.assertTrue(out['data_samples']['bboxes_labels'].shape == (8, 6))
         self.assertIsInstance(out['inputs'], list)
-        self.assertIsInstance(out['data_samples'], torch.Tensor)
+        self.assertIsInstance(out['data_samples']['bboxes_labels'],
+                              torch.Tensor)
 
 
 class TestBatchShapePolicy(unittest.TestCase):
diff --git a/tests/test_models/test_data_preprocessor/test_data_preprocessor.py b/tests/test_models/test_data_preprocessor/test_data_preprocessor.py
index 203660ae..1e5de1a7 100644
--- a/tests/test_models/test_data_preprocessor/test_data_preprocessor.py
+++ b/tests/test_models/test_data_preprocessor/test_data_preprocessor.py
@@ -55,7 +55,9 @@ class TestYOLOv5DetDataPreprocessor(TestCase):
         # test training
         data = {
             'inputs': torch.randint(0, 256, (2, 3, 10, 11)),
-            'data_samples': torch.randint(0, 11, (18, 6)),
+            'data_samples': {
+                'bboxes_labels': torch.randint(0, 11, (18, 6))
+            },
         }
         out_data = processor(data, training=True)
         batch_inputs, batch_data_samples = out_data['inputs'], out_data[
@@ -71,7 +73,7 @@ class TestYOLOv5DetDataPreprocessor(TestCase):
             'inputs': [torch.randint(0, 256, (3, 11, 10))],
             'data_samples': [DetDataSample()]
         }
-        # data_samples must be tensor
+        # data_samples must be dict
         with self.assertRaises(AssertionError):
             processor(data, training=True)
 
@@ -104,8 +106,9 @@ class TestPPYOLOEDetDataPreprocessor(TestCase):
                 torch.randint(0, 256, (3, 10, 11)),
                 torch.randint(0, 256, (3, 10, 11))
             ],
-            'data_samples':
-            torch.randint(0, 11, (18, 6)).float(),
+            'data_samples': {
+                'bboxes_labels': torch.randint(0, 11, (18, 6)).float()
+            },
         }
         out_data = processor(data, training=True)
         batch_data_samples = out_data['data_samples']
@@ -120,5 +123,5 @@ class TestPPYOLOEDetDataPreprocessor(TestCase):
             'data_samples': DetDataSample()
         }
         # data_samples must be list
-        with self.assertRaises(TypeError):
+        with self.assertRaises(AssertionError):
             processor(data, training=True)

From c3acf42db46f3197277403fb68b09004c72babc9 Mon Sep 17 00:00:00 2001
From: Range King <RangeKingHZ@gmail.com>
Date: Mon, 6 Feb 2023 19:39:39 +0800
Subject: [PATCH 09/64] Beautify the YOLOv8 configuration (#516)

* Update yolov5_s-v61_syncbn_8xb16-300e_coco.py

* Update yolov8_s_syncbn_fast_8xb16-500e_coco.py

* Update yolov8_m_syncbn_fast_8xb16-500e_coco.py

* Update yolov8_l_syncbn_fast_8xb16-500e_coco.py

* Update yolov8_s_syncbn_fast_8xb16-500e_coco.py

* Add todo

* Update yolov8_s_syncbn_fast_8xb16-500e_coco.py

* Update transforms.py
---
 .../yolov5_s-v61_syncbn_8xb16-300e_coco.py    |   2 +-
 .../yolov8_l_syncbn_fast_8xb16-500e_coco.py   |  13 +-
 .../yolov8_m_syncbn_fast_8xb16-500e_coco.py   |  15 +-
 .../yolov8_s_syncbn_fast_8xb16-500e_coco.py   | 178 +++++++++++-------
 mmyolo/datasets/transforms/transforms.py      |   9 +
 5 files changed, 140 insertions(+), 77 deletions(-)

diff --git a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
index b90f6785..b7d28168 100644
--- a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
@@ -69,7 +69,7 @@ widen_factor = 0.5
 # Strides of multi-scale prior box
 strides = [8, 16, 32]
 num_det_layers = 3  # The number of model output scales
-norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
+norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)  # Normalization config
 
 # -----train val related-----
 affine_scale = 0.5  # YOLOv5RandomAffine scaling ratio
diff --git a/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py
index 425edaed..9c2d1ae3 100644
--- a/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py
+++ b/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py
@@ -1,10 +1,13 @@
 _base_ = './yolov8_m_syncbn_fast_8xb16-500e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 1.00
 widen_factor = 1.00
 last_stage_out_channels = 512
-mixup_ratio = 0.15
 
+mixup_prob = 0.15
+
+# =======================Unmodified in most cases==================
 model = dict(
     backbone=dict(
         last_stage_out_channels=last_stage_out_channels,
@@ -22,15 +25,15 @@ model = dict(
 
 pre_transform = _base_.pre_transform
 albu_train_transform = _base_.albu_train_transform
-mosaic_affine_transform = _base_.mosaic_affine_transform
+mosaic_affine_pipeline = _base_.mosaic_affine_pipeline
 last_transform = _base_.last_transform
 
 train_pipeline = [
-    *pre_transform, *mosaic_affine_transform,
+    *pre_transform, *mosaic_affine_pipeline,
     dict(
         type='YOLOv5MixUp',
-        prob=mixup_ratio,
-        pre_transform=[*pre_transform, *mosaic_affine_transform]),
+        prob=mixup_prob,
+        pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
     *last_transform
 ]
 
diff --git a/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py
index ed350683..23eb3823 100644
--- a/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py
+++ b/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py
@@ -1,12 +1,14 @@
 _base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 0.67
 widen_factor = 0.75
 last_stage_out_channels = 768
 
 affine_scale = 0.9
-mixup_ratio = 0.1
+mixup_prob = 0.1
 
+# =======================Unmodified in most cases==================
 num_classes = _base_.num_classes
 num_det_layers = _base_.num_det_layers
 img_scale = _base_.img_scale
@@ -30,7 +32,7 @@ pre_transform = _base_.pre_transform
 albu_train_transform = _base_.albu_train_transform
 last_transform = _base_.last_transform
 
-mosaic_affine_transform = [
+mosaic_affine_pipeline = [
     dict(
         type='Mosaic',
         img_scale=img_scale,
@@ -47,12 +49,13 @@ mosaic_affine_transform = [
         border_val=(114, 114, 114))
 ]
 
+# enable mixup
 train_pipeline = [
-    *pre_transform, *mosaic_affine_transform,
+    *pre_transform, *mosaic_affine_pipeline,
     dict(
         type='YOLOv5MixUp',
-        prob=mixup_ratio,
-        pre_transform=[*pre_transform, *mosaic_affine_transform]),
+        prob=mixup_prob,
+        pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
     *last_transform
 ]
 
@@ -85,6 +88,6 @@ custom_hooks = [
         priority=49),
     dict(
         type='mmdet.PipelineSwitchHook',
-        switch_epoch=_base_.max_epochs - 10,
+        switch_epoch=_base_.max_epochs - _base_.close_mosaic_epochs,
         switch_pipeline=train_pipeline_stage2)
 ]
diff --git a/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py
index 641862b8..d4900609 100644
--- a/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py
+++ b/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py
@@ -1,37 +1,100 @@
 _base_ = '../_base_/default_runtime.py'
 
-# dataset settings
-data_root = 'data/coco/'
-dataset_type = 'YOLOv5CocoDataset'
+# ========================Frequently modified parameters======================
+# -----data related-----
+data_root = 'data/coco/'  # Root path of data
+# Path of train annotation file
+train_ann_file = 'annotations/instances_train2017.json'
+train_data_prefix = 'train2017/'  # Prefix of train image path
+# Path of val annotation file
+val_ann_file = 'annotations/instances_val2017.json'
+val_data_prefix = 'val2017/'  # Prefix of val image path
 
-# parameters that often need to be modified
-num_classes = 80
-img_scale = (640, 640)  # height, width
-deepen_factor = 0.33
-widen_factor = 0.5
-max_epochs = 500
-save_epoch_intervals = 10
+num_classes = 80  # Number of classes for classification
+# Batch size of a single GPU during training
 train_batch_size_per_gpu = 16
+# Worker to pre-fetch data for each single GPU during training
 train_num_workers = 8
-val_batch_size_per_gpu = 1
-val_num_workers = 2
-
-# persistent_workers must be False if num_workers is 0.
+# persistent_workers must be False if num_workers is 0
 persistent_workers = True
 
-strides = [8, 16, 32]
-num_det_layers = 3
-
-last_stage_out_channels = 1024
-
-# Base learning rate for optim_wrapper
+# -----train val related-----
+# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
 base_lr = 0.01
-lr_factor = 0.01
+max_epochs = 500  # Maximum training epochs
+# Disable mosaic augmentation for final 10 epochs (stage 2)
+close_mosaic_epochs = 10
 
-# single-scale training is recommended to
+model_test_cfg = dict(
+    # The config of multi-label for multi-class prediction.
+    multi_label=True,
+    # The number of boxes before NMS
+    nms_pre=30000,
+    score_thr=0.001,  # Threshold to filter out boxes.
+    nms=dict(type='nms', iou_threshold=0.7),  # NMS type and threshold
+    max_per_img=300)  # Max number of detections of each image
+
+# ========================Possible modified parameters========================
+# -----data related-----
+img_scale = (640, 640)  # width, height
+# Dataset type, this will be used to define the dataset
+dataset_type = 'YOLOv5CocoDataset'
+# Batch size of a single GPU during validation
+val_batch_size_per_gpu = 1
+# Worker to pre-fetch data for each single GPU during validation
+val_num_workers = 2
+
+# Config of batch shapes. Only on val.
+# We tested YOLOv8-m will get 0.02 higher than not using it.
+batch_shapes_cfg = None
+# You can turn on `batch_shapes_cfg` by uncommenting the following lines.
+# batch_shapes_cfg = dict(
+#     type='BatchShapePolicy',
+#     batch_size=val_batch_size_per_gpu,
+#     img_size=img_scale[0],
+#     # The image scale of padding should be divided by pad_size_divisor
+#     size_divisor=32,
+#     # Additional paddings for pixel scale
+#     extra_pad_ratio=0.5)
+
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
+deepen_factor = 0.33
+# The scaling factor that controls the width of the network structure
+widen_factor = 0.5
+# Strides of multi-scale prior box
+strides = [8, 16, 32]
+# The output channel of the last stage
+last_stage_out_channels = 1024
+num_det_layers = 3  # The number of model output scales
+norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)  # Normalization config
+
+# -----train val related-----
+affine_scale = 0.5  # YOLOv5RandomAffine scaling ratio
+# YOLOv5RandomAffine aspect ratio of width and height thres to filter bboxes
+max_aspect_ratio = 100
+tal_topk = 10  # Number of bbox selected in each level
+tal_alpha = 0.5  # A Hyper-parameter related to alignment_metrics
+tal_beta = 6.0  # A Hyper-parameter related to alignment_metrics
+# TODO: Automatically scale loss_weight based on number of detection layers
+loss_cls_weight = 0.5
+loss_bbox_weight = 7.5
+# Since the dfloss is implemented differently in the official
+# and mmdet, we're going to divide loss_weight by 4.
+loss_dfl_weight = 1.5 / 4
+lr_factor = 0.01  # Learning rate scaling factor
+weight_decay = 0.0005
+# Save model checkpoint and validation intervals in stage 1
+save_epoch_intervals = 10
+# validation intervals in stage 2
+val_interval_stage2 = 1
+# The maximum checkpoints to keep.
+max_keep_ckpts = 2
+# Single-scale training is recommended to
 # be turned on, which can speed up training.
 env_cfg = dict(cudnn_benchmark=True)
 
+# ===============================Unmodified in most cases====================
 model = dict(
     type='YOLODetector',
     data_preprocessor=dict(
@@ -45,7 +108,7 @@ model = dict(
         last_stage_out_channels=last_stage_out_channels,
         deepen_factor=deepen_factor,
         widen_factor=widen_factor,
-        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True)),
     neck=dict(
         type='YOLOv8PAFPN',
@@ -54,7 +117,7 @@ model = dict(
         in_channels=[256, 512, last_stage_out_channels],
         out_channels=[256, 512, last_stage_out_channels],
         num_csp_blocks=3,
-        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True)),
     bbox_head=dict(
         type='YOLOv8Head',
@@ -64,45 +127,39 @@ model = dict(
             in_channels=[256, 512, last_stage_out_channels],
             widen_factor=widen_factor,
             reg_max=16,
-            norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+            norm_cfg=norm_cfg,
             act_cfg=dict(type='SiLU', inplace=True),
-            featmap_strides=[8, 16, 32]),
+            featmap_strides=strides),
         prior_generator=dict(
-            type='mmdet.MlvlPointGenerator', offset=0.5, strides=[8, 16, 32]),
+            type='mmdet.MlvlPointGenerator', offset=0.5, strides=strides),
         bbox_coder=dict(type='DistancePointBBoxCoder'),
+        # scaled based on number of detection layers
         loss_cls=dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=True,
             reduction='none',
-            loss_weight=0.5),
+            loss_weight=loss_cls_weight),
         loss_bbox=dict(
             type='IoULoss',
             iou_mode='ciou',
             bbox_format='xyxy',
             reduction='sum',
-            loss_weight=7.5,
+            loss_weight=loss_bbox_weight,
             return_iou=False),
-        # Since the dfloss is implemented differently in the official
-        # and mmdet, we're going to divide loss_weight by 4.
         loss_dfl=dict(
             type='mmdet.DistributionFocalLoss',
             reduction='mean',
-            loss_weight=1.5 / 4)),
+            loss_weight=loss_dfl_weight)),
     train_cfg=dict(
         assigner=dict(
             type='BatchTaskAlignedAssigner',
             num_classes=num_classes,
             use_ciou=True,
-            topk=10,
-            alpha=0.5,
-            beta=6.0,
+            topk=tal_topk,
+            alpha=tal_alpha,
+            beta=tal_beta,
             eps=1e-9)),
-    test_cfg=dict(
-        multi_label=True,
-        nms_pre=30000,
-        score_thr=0.001,
-        nms=dict(type='nms', iou_threshold=0.7),
-        max_per_img=300))
+    test_cfg=model_test_cfg)
 
 albu_train_transform = [
     dict(type='Blur', p=0.01),
@@ -135,6 +192,7 @@ last_transform = [
         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
                    'flip_direction'))
 ]
+
 train_pipeline = [
     *pre_transform,
     dict(
@@ -146,8 +204,8 @@ train_pipeline = [
         type='YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
-        scaling_ratio_range=(0.5, 1.5),
-        max_aspect_ratio=100,
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        max_aspect_ratio=max_aspect_ratio,
         # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
@@ -166,8 +224,8 @@ train_pipeline_stage2 = [
         type='YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
-        scaling_ratio_range=(0.5, 1.5),
-        max_aspect_ratio=100,
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        max_aspect_ratio=max_aspect_ratio,
         border_val=(114, 114, 114)), *last_transform
 ]
 
@@ -181,8 +239,8 @@ train_dataloader = dict(
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/'),
+        ann_file=train_ann_file,
+        data_prefix=dict(img=train_data_prefix),
         filter_cfg=dict(filter_empty_gt=False, min_size=32),
         pipeline=train_pipeline))
 
@@ -201,17 +259,6 @@ test_pipeline = [
                    'scale_factor', 'pad_param'))
 ]
 
-# only on Val
-# you can turn on `batch_shapes_cfg`,
-# we tested YOLOv8-m will get 0.02 higher than not using it.
-batch_shapes_cfg = None
-# batch_shapes_cfg = dict(
-#     type='BatchShapePolicy',
-#     batch_size=val_batch_size_per_gpu,
-#     img_size=img_scale[0],
-#     size_divisor=32,
-#     extra_pad_ratio=0.5)
-
 val_dataloader = dict(
     batch_size=val_batch_size_per_gpu,
     num_workers=val_num_workers,
@@ -223,8 +270,8 @@ val_dataloader = dict(
         type=dataset_type,
         data_root=data_root,
         test_mode=True,
-        data_prefix=dict(img='val2017/'),
-        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img=val_data_prefix),
+        ann_file=val_ann_file,
         pipeline=test_pipeline,
         batch_shapes_cfg=batch_shapes_cfg))
 
@@ -238,7 +285,7 @@ optim_wrapper = dict(
         type='SGD',
         lr=base_lr,
         momentum=0.937,
-        weight_decay=0.0005,
+        weight_decay=weight_decay,
         nesterov=True,
         batch_size_per_gpu=train_batch_size_per_gpu),
     constructor='YOLOv5OptimizerConstructor')
@@ -253,7 +300,7 @@ default_hooks = dict(
         type='CheckpointHook',
         interval=save_epoch_intervals,
         save_best='auto',
-        max_keep_ckpts=2))
+        max_keep_ckpts=max_keep_ckpts))
 
 custom_hooks = [
     dict(
@@ -265,14 +312,14 @@ custom_hooks = [
         priority=49),
     dict(
         type='mmdet.PipelineSwitchHook',
-        switch_epoch=max_epochs - 10,
+        switch_epoch=max_epochs - close_mosaic_epochs,
         switch_pipeline=train_pipeline_stage2)
 ]
 
 val_evaluator = dict(
     type='mmdet.CocoMetric',
     proposal_nums=(100, 1, 10),
-    ann_file=data_root + 'annotations/instances_val2017.json',
+    ann_file=data_root + val_ann_file,
     metric='bbox')
 test_evaluator = val_evaluator
 
@@ -280,7 +327,8 @@ train_cfg = dict(
     type='EpochBasedTrainLoop',
     max_epochs=max_epochs,
     val_interval=save_epoch_intervals,
-    dynamic_intervals=[(max_epochs - 10, 1)])
+    dynamic_intervals=[((max_epochs - close_mosaic_epochs),
+                        val_interval_stage2)])
 
 val_cfg = dict(type='ValLoop')
 test_cfg = dict(type='TestLoop')
diff --git a/mmyolo/datasets/transforms/transforms.py b/mmyolo/datasets/transforms/transforms.py
index 720f7756..ca9c9607 100644
--- a/mmyolo/datasets/transforms/transforms.py
+++ b/mmyolo/datasets/transforms/transforms.py
@@ -450,6 +450,15 @@ class YOLOv5RandomAffine(BaseTransform):
             the border of the image. In some dataset like MOT17, the gt bboxes
             are allowed to cross the border of images. Therefore, we don't
             need to clip the gt bboxes in these cases. Defaults to True.
+        min_bbox_size (float): Width and height threshold to filter bboxes.
+            If the height or width of a box is smaller than this value, it
+            will be removed. Defaults to 2.
+        min_area_ratio (float): Threshold of area ratio between
+            original bboxes and wrapped bboxes. If smaller than this value,
+            the box will be removed. Defaults to 0.1.
+        max_aspect_ratio (float): Aspect ratio of width and height
+            threshold to filter bboxes. If max(h/w, w/h) larger than this
+            value, the box will be removed. Defaults to 20.
     """
 
     def __init__(self,

From e90c369f9d4c6fcffe5db6e504e38bea87a3bd73 Mon Sep 17 00:00:00 2001
From: tripleMu <gpu@163.com>
Date: Tue, 7 Feb 2023 10:09:51 +0800
Subject: [PATCH 10/64] [Docs] Fix links in YOLOv8 README (#517)

* Update link

* Fix
---
 configs/yolov8/README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/configs/yolov8/README.md b/configs/yolov8/README.md
index ebab5597..9fad3ad1 100644
--- a/configs/yolov8/README.md
+++ b/configs/yolov8/README.md
@@ -20,11 +20,11 @@ YOLOv8-P5 model structure
 
 ### COCO
 
-| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP |                                                  Config                                                   |                                                                                                                                                           Download                                                                                                                                                           |
-| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :-------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| YOLOv8-n |  P5  | 640  |  Yes   | Yes |   2.8    |  37.2  | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov8/yolov8_n_syncbn_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) |
-| YOLOv8-s |  P5  | 640  |  Yes   | Yes |   4.0    |  44.2  | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov8/yolov8_s_syncbn_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) |
-| YOLOv8-m |  P5  | 640  |  Yes   | Yes |   7.2    |  49.8  | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov8/yolov8_m_syncbn_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) |
+| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP |                                                     Config                                                     |                                                                                                                                                           Download                                                                                                                                                           |
+| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| YOLOv8-n |  P5  | 640  |  Yes   | Yes |   2.8    |  37.2  | [config](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) |
+| YOLOv8-s |  P5  | 640  |  Yes   | Yes |   4.0    |  44.2  | [config](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) |
+| YOLOv8-m |  P5  | 640  |  Yes   | Yes |   7.2    |  49.8  | [config](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) |
 
 **Note**
 

From 4a7228a5cc7db0096e5e933c45e7d45d76d60289 Mon Sep 17 00:00:00 2001
From: tripleMu <gpu@163.com>
Date: Tue, 7 Feb 2023 14:45:27 +0800
Subject: [PATCH 11/64] Fix rtmdet decoder in easydeploy (#519)

---
 projects/easydeploy/bbox_code/bbox_coder.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/projects/easydeploy/bbox_code/bbox_coder.py b/projects/easydeploy/bbox_code/bbox_coder.py
index 1fef2a1f..6483cf8b 100644
--- a/projects/easydeploy/bbox_code/bbox_coder.py
+++ b/projects/easydeploy/bbox_code/bbox_coder.py
@@ -27,6 +27,8 @@ def yolov5_bbox_decoder(priors: Tensor, bbox_preds: Tensor,
 
 def rtmdet_bbox_decoder(priors: Tensor, bbox_preds: Tensor,
                         stride: Optional[Tensor]) -> Tensor:
+    stride = stride[None, :, None]
+    bbox_preds *= stride
     tl_x = (priors[..., 0] - bbox_preds[..., 0])
     tl_y = (priors[..., 1] - bbox_preds[..., 1])
     br_x = (priors[..., 0] + bbox_preds[..., 2])

From 031e7450bc5b6dbf1d05cce21b53898df70ce61f Mon Sep 17 00:00:00 2001
From: tripleMu <gpu@163.com>
Date: Wed, 8 Feb 2023 11:07:46 +0800
Subject: [PATCH 12/64] [Happy New Year] TensorRT + DeepStream Support. (#485)

* [Happy New Years] TensorRT + DeepStream Support.

* Add deepstream config

* pre-commit fix

* Fix name

* [Happy New Years] TensorRT + DeepStream Support.

* Add deepstream config

* pre-commit fix

* Fix name

* Add config

* Add rtmdet deepstream
---
 projects/easydeploy/deepstream/CMakeLists.txt |  35 ++++++
 .../easydeploy/deepstream/coco_labels.txt     |  80 ++++++++++++
 .../configs/config_infer_rtmdet.txt           |  22 ++++
 .../nvdsparsebbox_mmyolo.cpp                  | 118 ++++++++++++++++++
 .../deepstream/deepstream_app_config.txt      |  62 +++++++++
 5 files changed, 317 insertions(+)
 create mode 100644 projects/easydeploy/deepstream/CMakeLists.txt
 create mode 100644 projects/easydeploy/deepstream/coco_labels.txt
 create mode 100644 projects/easydeploy/deepstream/configs/config_infer_rtmdet.txt
 create mode 100644 projects/easydeploy/deepstream/custom_mmyolo_bbox_parser/nvdsparsebbox_mmyolo.cpp
 create mode 100644 projects/easydeploy/deepstream/deepstream_app_config.txt

diff --git a/projects/easydeploy/deepstream/CMakeLists.txt b/projects/easydeploy/deepstream/CMakeLists.txt
new file mode 100644
index 00000000..f640bea1
--- /dev/null
+++ b/projects/easydeploy/deepstream/CMakeLists.txt
@@ -0,0 +1,35 @@
+cmake_minimum_required(VERSION 2.8.12)
+
+set(CMAKE_CUDA_ARCHITECTURES 60 61 62 70 72 75 86)
+set(CMAKE_CUDA_COMPILER /usr/local/cuda/bin/nvcc)
+
+project(nvdsparsebbox_mmyolo LANGUAGES CXX)
+
+set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -O3 -g -Wall -Werror -shared -fPIC")
+set(CMAKE_CXX_STANDARD 14)
+set(CMAKE_BUILD_TYPE Release)
+option(CUDA_USE_STATIC_CUDA_RUNTIME OFF)
+
+# CUDA
+find_package(CUDA REQUIRED)
+
+# TensorRT
+set(TensorRT_INCLUDE_DIRS "/usr/include/x86_64-linux-gnu" CACHE STRING "TensorRT headers path")
+set(TensorRT_LIBRARIES "/usr/lib/x86_64-linux-gnu" CACHE STRING "TensorRT libs path")
+
+# DeepStream
+set(DEEPSTREAM "/opt/nvidia/deepstream/deepstream" CACHE STRING "DeepStream root path")
+set(DS_LIBRARIES ${DEEPSTREAM}/lib)
+set(DS_INCLUDE_DIRS ${DEEPSTREAM}/sources/includes)
+
+include_directories(
+        ${CUDA_INCLUDE_DIRS}
+        ${TensorRT_INCLUDE_DIRS}
+        ${DS_INCLUDE_DIRS})
+
+add_library(
+        ${PROJECT_NAME}
+        SHARED
+        custom_mmyolo_bbox_parser/nvdsparsebbox_mmyolo.cpp)
+
+target_link_libraries(${PROJECT_NAME} PRIVATE nvinfer nvinfer_plugin)
diff --git a/projects/easydeploy/deepstream/coco_labels.txt b/projects/easydeploy/deepstream/coco_labels.txt
new file mode 100644
index 00000000..ca76c80b
--- /dev/null
+++ b/projects/easydeploy/deepstream/coco_labels.txt
@@ -0,0 +1,80 @@
+person
+bicycle
+car
+motorbike
+aeroplane
+bus
+train
+truck
+boat
+traffic light
+fire hydrant
+stop sign
+parking meter
+bench
+bird
+cat
+dog
+horse
+sheep
+cow
+elephant
+bear
+zebra
+giraffe
+backpack
+umbrella
+handbag
+tie
+suitcase
+frisbee
+skis
+snowboard
+sports ball
+kite
+baseball bat
+baseball glove
+skateboard
+surfboard
+tennis racket
+bottle
+wine glass
+cup
+fork
+knife
+spoon
+bowl
+banana
+apple
+sandwich
+orange
+broccoli
+carrot
+hot dog
+pizza
+donut
+cake
+chair
+sofa
+pottedplant
+bed
+diningtable
+toilet
+tvmonitor
+laptop
+mouse
+remote
+keyboard
+cell phone
+microwave
+oven
+toaster
+sink
+refrigerator
+book
+clock
+vase
+scissors
+teddy bear
+hair drier
+toothbrush
diff --git a/projects/easydeploy/deepstream/configs/config_infer_rtmdet.txt b/projects/easydeploy/deepstream/configs/config_infer_rtmdet.txt
new file mode 100644
index 00000000..a1e5efd2
--- /dev/null
+++ b/projects/easydeploy/deepstream/configs/config_infer_rtmdet.txt
@@ -0,0 +1,22 @@
+[property]
+gpu-id=0
+net-scale-factor=0.01735207357279195
+offsets=57.375;57.12;58.395
+model-color-format=1
+model-engine-file=../end2end.engine
+labelfile-path=../coco_labels.txt
+batch-size=1
+network-mode=0
+num-detected-classes=80
+interval=0
+gie-unique-id=1
+process-mode=1
+network-type=0
+cluster-mode=2
+maintain-aspect-ratio=1
+parse-bbox-func-name=NvDsInferParseCustomMMYOLO
+custom-lib-path=../build/libnvdsparsebbox_mmyolo.so
+
+[class-attrs-all]
+pre-cluster-threshold=0.45
+topk=100
diff --git a/projects/easydeploy/deepstream/custom_mmyolo_bbox_parser/nvdsparsebbox_mmyolo.cpp b/projects/easydeploy/deepstream/custom_mmyolo_bbox_parser/nvdsparsebbox_mmyolo.cpp
new file mode 100644
index 00000000..eb780856
--- /dev/null
+++ b/projects/easydeploy/deepstream/custom_mmyolo_bbox_parser/nvdsparsebbox_mmyolo.cpp
@@ -0,0 +1,118 @@
+#include "nvdsinfer_custom_impl.h"
+#include <cassert>
+#include <iostream>
+
+/**
+ * Function expected by DeepStream for decoding the MMYOLO output.
+ *
+ * C-linkage [extern "C"] was written to prevent name-mangling. This function must return true after
+ * adding all bounding boxes to the objectList vector.
+ *
+ * @param [outputLayersInfo] std::vector of NvDsInferLayerInfo objects with information about the output layer.
+ * @param [networkInfo] NvDsInferNetworkInfo object with information about the MMYOLO network.
+ * @param [detectionParams] NvDsInferParseDetectionParams with information about some config params.
+ * @param [objectList] std::vector of NvDsInferParseObjectInfo objects to which bounding box information must
+ * be stored.
+ *
+ * @return true
+ */
+
+// This is just the function prototype. The definition is written at the end of the file.
+extern "C" bool NvDsInferParseCustomMMYOLO(
+	std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
+	NvDsInferNetworkInfo const& networkInfo,
+	NvDsInferParseDetectionParams const& detectionParams,
+	std::vector<NvDsInferParseObjectInfo>& objectList);
+
+static __inline__ float clamp(float& val, float min, float max)
+{
+	return val > min ? (val < max ? val : max) : min;
+}
+
+static std::vector<NvDsInferParseObjectInfo> decodeMMYoloTensor(
+	const int* num_dets,
+	const float* bboxes,
+	const float* scores,
+	const int* labels,
+	const float& conf_thres,
+	const unsigned int& img_w,
+	const unsigned int& img_h
+)
+{
+	std::vector<NvDsInferParseObjectInfo> bboxInfo;
+	size_t nums = num_dets[0];
+	for (size_t i = 0; i < nums; i++)
+	{
+		float score = scores[i];
+		if (score < conf_thres)continue;
+		float x0 = (bboxes[i * 4]);
+		float y0 = (bboxes[i * 4 + 1]);
+		float x1 = (bboxes[i * 4 + 2]);
+		float y1 = (bboxes[i * 4 + 3]);
+		x0 = clamp(x0, 0.f, img_w);
+		y0 = clamp(y0, 0.f, img_h);
+		x1 = clamp(x1, 0.f, img_w);
+		y1 = clamp(y1, 0.f, img_h);
+		NvDsInferParseObjectInfo obj;
+		obj.left = x0;
+		obj.top = y0;
+		obj.width = x1 - x0;
+		obj.height = y1 - y0;
+		obj.detectionConfidence = score;
+		obj.classId = labels[i];
+		bboxInfo.push_back(obj);
+	}
+
+	return bboxInfo;
+}
+
+/* C-linkage to prevent name-mangling */
+extern "C" bool NvDsInferParseCustomMMYOLO(
+	std::vector<NvDsInferLayerInfo> const& outputLayersInfo,
+	NvDsInferNetworkInfo const& networkInfo,
+	NvDsInferParseDetectionParams const& detectionParams,
+	std::vector<NvDsInferParseObjectInfo>& objectList)
+{
+
+// Some assertions and error checking.
+	if (outputLayersInfo.empty() || outputLayersInfo.size() != 4)
+	{
+		std::cerr << "Could not find output layer in bbox parsing" << std::endl;
+		return false;
+	}
+
+//	Score threshold of bboxes.
+	const float conf_thres = detectionParams.perClassThreshold[0];
+
+// Obtaining the output layer.
+	const NvDsInferLayerInfo& num_dets = outputLayersInfo[0];
+	const NvDsInferLayerInfo& bboxes = outputLayersInfo[1];
+	const NvDsInferLayerInfo& scores = outputLayersInfo[2];
+	const NvDsInferLayerInfo& labels = outputLayersInfo[3];
+
+// num_dets(int) bboxes(float) scores(float) labels(int)
+	assert (num_dets.dims.numDims == 2);
+	assert (bboxes.dims.numDims == 3);
+	assert (scores.dims.numDims == 2);
+	assert (labels.dims.numDims == 2);
+
+
+// Decoding the output tensor of MMYOLO to the NvDsInferParseObjectInfo format.
+	std::vector<NvDsInferParseObjectInfo> objects =
+		decodeMMYoloTensor(
+			(const int*)(num_dets.buffer),
+			(const float*)(bboxes.buffer),
+			(const float*)(scores.buffer),
+			(const int*)(labels.buffer),
+			conf_thres,
+			networkInfo.width,
+			networkInfo.height
+		);
+
+	objectList.clear();
+	objectList = objects;
+	return true;
+}
+
+/* Check that the custom function has been defined correctly */
+CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseCustomMMYOLO);
diff --git a/projects/easydeploy/deepstream/deepstream_app_config.txt b/projects/easydeploy/deepstream/deepstream_app_config.txt
new file mode 100644
index 00000000..33177689
--- /dev/null
+++ b/projects/easydeploy/deepstream/deepstream_app_config.txt
@@ -0,0 +1,62 @@
+[application]
+enable-perf-measurement=1
+perf-measurement-interval-sec=5
+
+[tiled-display]
+enable=1
+rows=1
+columns=1
+width=1280
+height=720
+gpu-id=0
+nvbuf-memory-type=0
+
+[source0]
+enable=1
+type=3
+uri=file:///opt/nvidia/deepstream/deepstream/samples/streams/sample_1080p_h264.mp4
+num-sources=1
+gpu-id=0
+cudadec-memtype=0
+
+[sink0]
+enable=1
+type=2
+sync=0
+gpu-id=0
+nvbuf-memory-type=0
+
+[osd]
+enable=1
+gpu-id=0
+border-width=5
+text-size=15
+text-color=1;1;1;1;
+text-bg-color=0.3;0.3;0.3;1
+font=Serif
+show-clock=0
+clock-x-offset=800
+clock-y-offset=820
+clock-text-size=12
+clock-color=1;0;0;0
+nvbuf-memory-type=0
+
+[streammux]
+gpu-id=0
+live-source=0
+batch-size=1
+batched-push-timeout=40000
+width=1920
+height=1080
+enable-padding=0
+nvbuf-memory-type=0
+
+[primary-gie]
+enable=1
+gpu-id=0
+gie-unique-id=1
+nvbuf-memory-type=0
+config-file=configs/config_infer_rtmdet.txt
+
+[tests]
+file-loop=0

From 2813e89f44a67d87fa79f7b363926b0eaabf7dd6 Mon Sep 17 00:00:00 2001
From: Youfu <71306851+lyviva@users.noreply.github.com>
Date: Wed, 8 Feb 2023 20:10:03 +0800
Subject: [PATCH 13/64] [Feature] Implement fast version of YOLOX (#518)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Implement fast version of YOLOX

* config change

* Update yolox_head.py

* Update mmyolo/models/data_preprocessors/data_preprocessor.py

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* Update mmyolo/models/data_preprocessors/data_preprocessor.py

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* add test and modify faults

* fix lint

* fix lint

* modify metafile and README

* modify metafile and readme

* fix

* fix

* fix

* fix

* fix

* fix test

---------

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>
---
 configs/yolox/README.md                       |  8 ++--
 configs/yolox/metafile.yml                    |  8 ++--
 ...coco.py => yolox_l_fast_8xb8-300e_coco.py} |  2 +-
 ...coco.py => yolox_m_fast_8xb8-300e_coco.py} |  2 +-
 ...o.py => yolox_nano_fast_8xb8-300e_coco.py} |  2 +-
 ...coco.py => yolox_s_fast_8xb8-300e_coco.py} |  5 ++-
 ...o.py => yolox_tiny_fast_8xb8-300e_coco.py} |  2 +-
 ...coco.py => yolox_x_fast_8xb8-300e_coco.py} |  2 +-
 mmyolo/models/data_preprocessors/__init__.py  |  5 ++-
 .../data_preprocessors/data_preprocessor.py   | 41 +++++++++++++++++++
 mmyolo/models/dense_heads/yolox_head.py       | 30 +++++++++++++-
 .../test_data_preprocessor.py                 | 31 +++++++++++++-
 .../test_dense_heads/test_yolox_head.py       | 20 ++++-----
 .../test_detectors/test_yolo_detector.py      |  7 ++--
 14 files changed, 130 insertions(+), 35 deletions(-)
 rename configs/yolox/{yolox_l_8xb8-300e_coco.py => yolox_l_fast_8xb8-300e_coco.py} (86%)
 rename configs/yolox/{yolox_m_8xb8-300e_coco.py => yolox_m_fast_8xb8-300e_coco.py} (86%)
 rename configs/yolox/{yolox_nano_8xb8-300e_coco.py => yolox_nano_fast_8xb8-300e_coco.py} (90%)
 rename configs/yolox/{yolox_s_8xb8-300e_coco.py => yolox_s_fast_8xb8-300e_coco.py} (98%)
 rename configs/yolox/{yolox_tiny_8xb8-300e_coco.py => yolox_tiny_fast_8xb8-300e_coco.py} (97%)
 rename configs/yolox/{yolox_x_8xb8-300e_coco.py => yolox_x_fast_8xb8-300e_coco.py} (86%)

diff --git a/configs/yolox/README.md b/configs/yolox/README.md
index eff2ef4d..4219bfb3 100644
--- a/configs/yolox/README.md
+++ b/configs/yolox/README.md
@@ -19,10 +19,10 @@ YOLOX-l model structure
 
 ## Results and Models
 
-|  Backbone  | size | Mem (GB) | box AP |                                                Config                                                 |                                                                                                                                    Download                                                                                                                                    |
-| :--------: | :--: | :------: | :----: | :---------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| YOLOX-tiny | 416  |   2.8    |  32.7  | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolox/yolox_tiny_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908.log.json) |
-|  YOLOX-s   | 640  |   5.6    |  40.8  |  [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolox/yolox_s_8xb8-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738-d7e60cb2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738.log.json)       |
+|  Backbone  | size | Mem (GB) | box AP |                                                   Config                                                   |                                                                                                                                    Download                                                                                                                                    |
+| :--------: | :--: | :------: | :----: | :--------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| YOLOX-tiny | 416  |   2.8    |  32.7  | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908.log.json) |
+|  YOLOX-s   | 640  |   5.6    |  40.8  |  [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolox/yolox_s_fast_8xb8-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738-d7e60cb2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738.log.json)       |
 
 **Note**:
 
diff --git a/configs/yolox/metafile.yml b/configs/yolox/metafile.yml
index a05f1c69..baf3a8f5 100644
--- a/configs/yolox/metafile.yml
+++ b/configs/yolox/metafile.yml
@@ -20,9 +20,9 @@ Collections:
 
 
 Models:
-  - Name: yolox_tiny_8xb8-300e_coco
+  - Name: yolox_tiny_fast_8xb8-300e_coco
     In Collection: YOLOX
-    Config: configs/yolox/yolox_tiny_8xb8-300e_coco.py
+    Config: configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
     Metadata:
       Training Memory (GB): 2.8
       Epochs: 300
@@ -32,9 +32,9 @@ Models:
         Metrics:
           box AP: 32.7
     Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth
-  - Name: yolox_s_8xb8-300e_coco
+  - Name: yolox_s_fast_8xb8-300e_coco
     In Collection: YOLOX
-    Config: configs/yolox/yolox_s_8xb8-300e_coco.py
+    Config: configs/yolox/yolox_s_fast_8xb8-300e_coco.py
     Metadata:
       Training Memory (GB): 5.6
       Epochs: 300
diff --git a/configs/yolox/yolox_l_8xb8-300e_coco.py b/configs/yolox/yolox_l_fast_8xb8-300e_coco.py
similarity index 86%
rename from configs/yolox/yolox_l_8xb8-300e_coco.py
rename to configs/yolox/yolox_l_fast_8xb8-300e_coco.py
index e5e971d9..7b3d24fb 100644
--- a/configs/yolox/yolox_l_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_l_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = './yolox_s_8xb8-300e_coco.py'
+_base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
 deepen_factor = 1.0
 widen_factor = 1.0
diff --git a/configs/yolox/yolox_m_8xb8-300e_coco.py b/configs/yolox/yolox_m_fast_8xb8-300e_coco.py
similarity index 86%
rename from configs/yolox/yolox_m_8xb8-300e_coco.py
rename to configs/yolox/yolox_m_fast_8xb8-300e_coco.py
index 2d869413..691b61d0 100644
--- a/configs/yolox/yolox_m_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_m_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = './yolox_s_8xb8-300e_coco.py'
+_base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
 deepen_factor = 0.67
 widen_factor = 0.75
diff --git a/configs/yolox/yolox_nano_8xb8-300e_coco.py b/configs/yolox/yolox_nano_fast_8xb8-300e_coco.py
similarity index 90%
rename from configs/yolox/yolox_nano_8xb8-300e_coco.py
rename to configs/yolox/yolox_nano_fast_8xb8-300e_coco.py
index 2c94a5d9..1c69b5fd 100644
--- a/configs/yolox/yolox_nano_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_nano_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = './yolox_tiny_8xb8-300e_coco.py'
+_base_ = './yolox_tiny_fast_8xb8-300e_coco.py'
 
 deepen_factor = 0.33
 widen_factor = 0.25
diff --git a/configs/yolox/yolox_s_8xb8-300e_coco.py b/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
similarity index 98%
rename from configs/yolox/yolox_s_8xb8-300e_coco.py
rename to configs/yolox/yolox_s_fast_8xb8-300e_coco.py
index 0cebbb0e..b4dd23af 100644
--- a/configs/yolox/yolox_s_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
@@ -29,11 +29,11 @@ model = dict(
     # TODO: Waiting for mmengine support
     use_syncbn=False,
     data_preprocessor=dict(
-        type='mmdet.DetDataPreprocessor',
+        type='YOLOv5DetDataPreprocessor',
         pad_size_divisor=32,
         batch_augments=[
             dict(
-                type='mmdet.BatchSyncRandomResize',
+                type='YOLOXBatchSyncRandomResize',
                 random_size_range=(480, 800),
                 size_divisor=32,
                 interval=10)
@@ -157,6 +157,7 @@ train_dataloader = dict(
     num_workers=train_num_workers,
     persistent_workers=True,
     pin_memory=True,
+    collate_fn=dict(type='yolov5_collate'),
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
diff --git a/configs/yolox/yolox_tiny_8xb8-300e_coco.py b/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
similarity index 97%
rename from configs/yolox/yolox_tiny_8xb8-300e_coco.py
rename to configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
index 0fd0a15a..2288bd16 100644
--- a/configs/yolox/yolox_tiny_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = './yolox_s_8xb8-300e_coco.py'
+_base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
 deepen_factor = 0.33
 widen_factor = 0.375
diff --git a/configs/yolox/yolox_x_8xb8-300e_coco.py b/configs/yolox/yolox_x_fast_8xb8-300e_coco.py
similarity index 86%
rename from configs/yolox/yolox_x_8xb8-300e_coco.py
rename to configs/yolox/yolox_x_fast_8xb8-300e_coco.py
index 56f1280a..9b327c55 100644
--- a/configs/yolox/yolox_x_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_x_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = './yolox_s_8xb8-300e_coco.py'
+_base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
 deepen_factor = 1.33
 widen_factor = 1.25
diff --git a/mmyolo/models/data_preprocessors/__init__.py b/mmyolo/models/data_preprocessors/__init__.py
index 4e31aa71..3ef4f6d7 100644
--- a/mmyolo/models/data_preprocessors/__init__.py
+++ b/mmyolo/models/data_preprocessors/__init__.py
@@ -1,9 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .data_preprocessor import (PPYOLOEBatchRandomResize,
                                 PPYOLOEDetDataPreprocessor,
-                                YOLOv5DetDataPreprocessor)
+                                YOLOv5DetDataPreprocessor,
+                                YOLOXBatchSyncRandomResize)
 
 __all__ = [
     'YOLOv5DetDataPreprocessor', 'PPYOLOEDetDataPreprocessor',
-    'PPYOLOEBatchRandomResize'
+    'PPYOLOEBatchRandomResize', 'YOLOXBatchSyncRandomResize'
 ]
diff --git a/mmyolo/models/data_preprocessors/data_preprocessor.py b/mmyolo/models/data_preprocessors/data_preprocessor.py
index f21a363a..e06c0036 100644
--- a/mmyolo/models/data_preprocessors/data_preprocessor.py
+++ b/mmyolo/models/data_preprocessors/data_preprocessor.py
@@ -16,6 +16,47 @@ CastData = Union[tuple, dict, BaseDataElement, torch.Tensor, list, bytes, str,
                  None]
 
 
+@MODELS.register_module()
+class YOLOXBatchSyncRandomResize(BatchSyncRandomResize):
+    """YOLOX batch random resize.
+
+    Args:
+        random_size_range (tuple): The multi-scale random range during
+            multi-scale training.
+        interval (int): The iter interval of change
+            image size. Defaults to 10.
+        size_divisor (int): Image size divisible factor.
+            Defaults to 32.
+    """
+
+    def forward(self, inputs: Tensor, data_samples: dict) -> Tensor and dict:
+        """resize a batch of images and bboxes to shape ``self._input_size``"""
+        h, w = inputs.shape[-2:]
+        inputs = inputs.float()
+        assert isinstance(data_samples, dict)
+
+        if self._input_size is None:
+            self._input_size = (h, w)
+        scale_y = self._input_size[0] / h
+        scale_x = self._input_size[1] / w
+        if scale_x != 1 or scale_y != 1:
+            inputs = F.interpolate(
+                inputs,
+                size=self._input_size,
+                mode='bilinear',
+                align_corners=False)
+
+            data_samples['bboxes_labels'][:, 2::2] *= scale_x
+            data_samples['bboxes_labels'][:, 3::2] *= scale_y
+
+        message_hub = MessageHub.get_current_instance()
+        if (message_hub.get_info('iter') + 1) % self._interval == 0:
+            self._input_size = self._get_random_size(
+                aspect_ratio=float(w / h), device=inputs.device)
+
+        return inputs, data_samples
+
+
 @MODELS.register_module()
 class YOLOv5DetDataPreprocessor(DetDataPreprocessor):
     """Rewrite collate_fn to get faster training speed.
diff --git a/mmyolo/models/dense_heads/yolox_head.py b/mmyolo/models/dense_heads/yolox_head.py
index 9ab4c269..a203298d 100644
--- a/mmyolo/models/dense_heads/yolox_head.py
+++ b/mmyolo/models/dense_heads/yolox_head.py
@@ -265,7 +265,7 @@ class YOLOXHead(YOLOv5Head):
             cls_scores: Sequence[Tensor],
             bbox_preds: Sequence[Tensor],
             objectnesses: Sequence[Tensor],
-            batch_gt_instances: Sequence[InstanceData],
+            batch_gt_instances: Tensor,
             batch_img_metas: Sequence[dict],
             batch_gt_instances_ignore: OptInstanceList = None) -> dict:
         """Calculate the loss based on the features extracted by the detection
@@ -297,6 +297,9 @@ class YOLOXHead(YOLOv5Head):
         if batch_gt_instances_ignore is None:
             batch_gt_instances_ignore = [None] * num_imgs
 
+        batch_gt_instances = self.gt_instances_preprocess(
+            batch_gt_instances, len(batch_img_metas))
+
         featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores]
         mlvl_priors = self.prior_generator.grid_priors(
             featmap_sizes,
@@ -484,3 +487,28 @@ class YOLOXHead(YOLOv5Head):
         bbox_aux_target[:,
                         2:] = torch.log(gt_cxcywh[:, 2:] / priors[:, 2:] + eps)
         return bbox_aux_target
+
+    @staticmethod
+    def gt_instances_preprocess(batch_gt_instances: Tensor,
+                                batch_size: int) -> List[InstanceData]:
+        """Split batch_gt_instances with batch size.
+
+        Args:
+            batch_gt_instances (Tensor): Ground truth
+                a 2D-Tensor for whole batch, shape [all_gt_bboxes, 6]
+            batch_size (int): Batch size.
+
+        Returns:
+            List: batch gt instances data, shape [batch_size, InstanceData]
+        """
+        # faster version
+        batch_instance_list = []
+        for i in range(batch_size):
+            batch_gt_instance_ = InstanceData()
+            single_batch_instance = \
+                batch_gt_instances[batch_gt_instances[:, 0] == i, :]
+            batch_gt_instance_.bboxes = single_batch_instance[:, 2:]
+            batch_gt_instance_.labels = single_batch_instance[:, 1]
+            batch_instance_list.append(batch_gt_instance_)
+
+        return batch_instance_list
diff --git a/tests/test_models/test_data_preprocessor/test_data_preprocessor.py b/tests/test_models/test_data_preprocessor/test_data_preprocessor.py
index 1e5de1a7..2c7e4415 100644
--- a/tests/test_models/test_data_preprocessor/test_data_preprocessor.py
+++ b/tests/test_models/test_data_preprocessor/test_data_preprocessor.py
@@ -6,7 +6,8 @@ from mmdet.structures import DetDataSample
 from mmengine import MessageHub
 
 from mmyolo.models import PPYOLOEBatchRandomResize, PPYOLOEDetDataPreprocessor
-from mmyolo.models.data_preprocessors import YOLOv5DetDataPreprocessor
+from mmyolo.models.data_preprocessors import (YOLOv5DetDataPreprocessor,
+                                              YOLOXBatchSyncRandomResize)
 from mmyolo.utils import register_all_modules
 
 register_all_modules()
@@ -125,3 +126,31 @@ class TestPPYOLOEDetDataPreprocessor(TestCase):
         # data_samples must be list
         with self.assertRaises(AssertionError):
             processor(data, training=True)
+
+
+class TestYOLOXDetDataPreprocessor(TestCase):
+
+    def test_batch_sync_random_size(self):
+        processor = YOLOXBatchSyncRandomResize(
+            random_size_range=(480, 800), size_divisor=32, interval=1)
+        self.assertTrue(isinstance(processor, YOLOXBatchSyncRandomResize))
+        message_hub = MessageHub.get_instance(
+            'test_yolox_batch_sync_random_resize')
+        message_hub.update_info('iter', 0)
+
+        # test training
+        inputs = torch.randint(0, 256, (4, 3, 10, 11))
+        data_samples = {'bboxes_labels': torch.randint(0, 11, (18, 6)).float()}
+
+        inputs, data_samples = processor(inputs, data_samples)
+
+        self.assertIn('bboxes_labels', data_samples)
+        self.assertIsInstance(data_samples['bboxes_labels'], torch.Tensor)
+        self.assertIsInstance(inputs, torch.Tensor)
+
+        inputs = torch.randint(0, 256, (4, 3, 10, 11))
+        data_samples = DetDataSample()
+
+        # data_samples must be dict
+        with self.assertRaises(AssertionError):
+            processor(inputs, data_samples)
diff --git a/tests/test_models/test_dense_heads/test_yolox_head.py b/tests/test_models/test_dense_heads/test_yolox_head.py
index 74467302..60e0abe9 100644
--- a/tests/test_models/test_dense_heads/test_yolox_head.py
+++ b/tests/test_models/test_dense_heads/test_yolox_head.py
@@ -4,7 +4,6 @@ from unittest import TestCase
 import torch
 from mmengine.config import Config
 from mmengine.model import bias_init_with_prob
-from mmengine.structures import InstanceData
 from mmengine.testing import assert_allclose
 
 from mmyolo.models.dense_heads import YOLOXHead
@@ -98,11 +97,10 @@ class TestYOLOXHead(TestCase):
 
         # Test that empty ground truth encourages the network to predict
         # background
-        gt_instances = InstanceData(
-            bboxes=torch.empty((0, 4)), labels=torch.LongTensor([]))
+        gt_instances = torch.empty((0, 6))
 
         empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds,
-                                            objectnesses, [gt_instances],
+                                            objectnesses, gt_instances,
                                             img_metas)
         # When there is no truth, the cls loss should be nonzero but there
         # should be no box loss.
@@ -122,12 +120,11 @@ class TestYOLOXHead(TestCase):
         # for random inputs
         head = YOLOXHead(head_module=self.head_module, train_cfg=train_cfg)
         head.use_bbox_aux = True
-        gt_instances = InstanceData(
-            bboxes=torch.Tensor([[23.6667, 23.8757, 238.6326, 151.8874]]),
-            labels=torch.LongTensor([2]))
+        gt_instances = torch.Tensor(
+            [[0, 2, 23.6667, 23.8757, 238.6326, 151.8874]])
 
         one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, objectnesses,
-                                          [gt_instances], img_metas)
+                                          gt_instances, img_metas)
         onegt_cls_loss = one_gt_losses['loss_cls'].sum()
         onegt_box_loss = one_gt_losses['loss_bbox'].sum()
         onegt_obj_loss = one_gt_losses['loss_obj'].sum()
@@ -142,11 +139,10 @@ class TestYOLOXHead(TestCase):
                            'l1 loss should be non-zero')
 
         # Test groud truth out of bound
-        gt_instances = InstanceData(
-            bboxes=torch.Tensor([[s * 4, s * 4, s * 4 + 10, s * 4 + 10]]),
-            labels=torch.LongTensor([2]))
+        gt_instances = torch.Tensor(
+            [[0, 2, s * 4, s * 4, s * 4 + 10, s * 4 + 10]])
         empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds,
-                                            objectnesses, [gt_instances],
+                                            objectnesses, gt_instances,
                                             img_metas)
         # When gt_bboxes out of bound, the assign results should be empty,
         # so the cls and bbox loss should be zero.
diff --git a/tests/test_models/test_detectors/test_yolo_detector.py b/tests/test_models/test_detectors/test_yolo_detector.py
index 0af0f0b3..4b295204 100644
--- a/tests/test_models/test_detectors/test_yolo_detector.py
+++ b/tests/test_models/test_detectors/test_yolo_detector.py
@@ -21,7 +21,7 @@ class TestSingleStageDetector(TestCase):
     @parameterized.expand([
         'yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py',
         'yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py',
-        'yolox/yolox_tiny_8xb8-300e_coco.py',
+        'yolox/yolox_tiny_fast_8xb8-300e_coco.py',
         'rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py',
         'yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py',
         'yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py'
@@ -38,7 +38,6 @@ class TestSingleStageDetector(TestCase):
 
     @parameterized.expand([
         ('yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py', ('cuda', 'cpu')),
-        ('yolox/yolox_s_8xb8-300e_coco.py', ('cuda', 'cpu')),
         ('yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py', ('cuda', 'cpu')),
         ('rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py', ('cuda', 'cpu')),
         ('yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py', ('cuda', 'cpu'))
@@ -79,7 +78,7 @@ class TestSingleStageDetector(TestCase):
         ('yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py', ('cuda',
                                                                 'cpu')),
         ('yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py', ('cuda', 'cpu')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cuda', 'cpu')),
+        ('yolox/yolox_tiny_fast_8xb8-300e_coco.py', ('cuda', 'cpu')),
         ('yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py', ('cuda', 'cpu')),
         ('rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py', ('cuda', 'cpu')),
         ('yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py', ('cuda', 'cpu'))
@@ -112,7 +111,7 @@ class TestSingleStageDetector(TestCase):
         ('yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py', ('cuda',
                                                                 'cpu')),
         ('yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py', ('cuda', 'cpu')),
-        ('yolox/yolox_tiny_8xb8-300e_coco.py', ('cuda', 'cpu')),
+        ('yolox/yolox_tiny_fast_8xb8-300e_coco.py', ('cuda', 'cpu')),
         ('yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py', ('cuda', 'cpu')),
         ('rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py', ('cuda', 'cpu')),
         ('yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py', ('cuda', 'cpu'))

From ccb8315ff3456452067bc17eedfc4617f4105d7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Thu, 9 Feb 2023 09:46:59 +0800
Subject: [PATCH 14/64] [Enchance] judge `metainfo` is lower (#535)

* judge metainfo is lower

* update

* update

* update
---
 mmyolo/utils/__init__.py |  7 +++++--
 mmyolo/utils/misc.py     | 17 +++++++++++++++++
 tools/test.py            |  5 ++++-
 tools/train.py           |  5 ++++-
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/mmyolo/utils/__init__.py b/mmyolo/utils/__init__.py
index e466c982..f4e96849 100644
--- a/mmyolo/utils/__init__.py
+++ b/mmyolo/utils/__init__.py
@@ -1,6 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .collect_env import collect_env
-from .misc import switch_to_deploy
+from .misc import is_metainfo_lower, switch_to_deploy
 from .setup_env import register_all_modules
 
-__all__ = ['register_all_modules', 'collect_env', 'switch_to_deploy']
+__all__ = [
+    'register_all_modules', 'collect_env', 'switch_to_deploy',
+    'is_metainfo_lower'
+]
diff --git a/mmyolo/utils/misc.py b/mmyolo/utils/misc.py
index 5b5dd5d2..c90f52b9 100644
--- a/mmyolo/utils/misc.py
+++ b/mmyolo/utils/misc.py
@@ -114,3 +114,20 @@ def show_data_classes(data_classes):
     # Align display data to the left
     data_classes_info.align['Class name'] = 'l'
     print(data_classes_info)
+
+
+def is_metainfo_lower(cfg):
+    """Determine whether the custom metainfo fields are all lowercase."""
+
+    def judge_keys(dataloader_cfg):
+        while 'dataset' in dataloader_cfg:
+            dataloader_cfg = dataloader_cfg['dataset']
+        if 'metainfo' in dataloader_cfg:
+            all_keys = dataloader_cfg['metainfo'].keys()
+            all_is_lower = all([str(k).islower() for k in all_keys])
+            assert all_is_lower, f'The keys in dataset metainfo must be all lowercase, but got {all_keys}. ' \
+                                 f'Please refer to https://github.com/open-mmlab/mmyolo/blob/e62c8c4593/configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py#L8' # noqa
+
+    judge_keys(cfg.get('train_dataloader', {}))
+    judge_keys(cfg.get('val_dataloader', {}))
+    judge_keys(cfg.get('test_dataloader', {}))
diff --git a/tools/test.py b/tools/test.py
index 0c5b89b8..53a617fd 100644
--- a/tools/test.py
+++ b/tools/test.py
@@ -9,7 +9,7 @@ from mmengine.evaluator import DumpResults
 from mmengine.runner import Runner
 
 from mmyolo.registry import RUNNERS
-from mmyolo.utils import register_all_modules
+from mmyolo.utils import is_metainfo_lower, register_all_modules
 
 
 # TODO: support fuse_conv_bn
@@ -106,6 +106,9 @@ def main():
         }
         cfg.merge_from_dict(cfg_json)
 
+    # Determine whether the custom metainfo fields are all lowercase
+    is_metainfo_lower(cfg)
+
     # build the runner from config
     if 'runner_type' not in cfg:
         # build the default runner
diff --git a/tools/train.py b/tools/train.py
index e6ba9ebc..816a3b35 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -9,7 +9,7 @@ from mmengine.logging import print_log
 from mmengine.runner import Runner
 
 from mmyolo.registry import RUNNERS
-from mmyolo.utils import register_all_modules
+from mmyolo.utils import is_metainfo_lower, register_all_modules
 
 
 def parse_args():
@@ -99,6 +99,9 @@ def main():
         cfg.resume = True
         cfg.load_from = args.resume
 
+    # Determine whether the custom metainfo fields are all lowercase
+    is_metainfo_lower(cfg)
+
     # build the runner from config
     if 'runner_type' not in cfg:
         # build the default runner

From 82d288f5d4b92e1f526d03a9c3ca051dd5cd1c33 Mon Sep 17 00:00:00 2001
From: Nioolek <40284075+Nioolek@users.noreply.github.com>
Date: Thu, 9 Feb 2023 19:05:28 +0800
Subject: [PATCH 15/64] [Improve] Beauty YOLOv6 head `gt_instances_preprocess`
 (#532)

* beauty yolov6 head preprocess

* add >0

* format

* add ut

* move path

* format
---
 mmyolo/models/dense_heads/ppyoloe_head.py |  3 +-
 mmyolo/models/dense_heads/rtmdet_head.py  | 71 +---------------------
 mmyolo/models/dense_heads/yolov6_head.py  | 69 +---------------------
 mmyolo/models/dense_heads/yolov8_head.py  | 60 +------------------
 mmyolo/models/utils/__init__.py           |  4 +-
 mmyolo/models/utils/misc.py               | 72 +++++++++++++++++++++++
 tests/test_models/test_utils/test_misc.py | 34 +++++++++++
 7 files changed, 117 insertions(+), 196 deletions(-)

diff --git a/mmyolo/models/dense_heads/ppyoloe_head.py b/mmyolo/models/dense_heads/ppyoloe_head.py
index 67c1160b..72d82004 100644
--- a/mmyolo/models/dense_heads/ppyoloe_head.py
+++ b/mmyolo/models/dense_heads/ppyoloe_head.py
@@ -14,6 +14,7 @@ from torch import Tensor
 
 from mmyolo.registry import MODELS
 from ..layers.yolo_bricks import PPYOLOESELayer
+from ..utils import gt_instances_preprocess
 from .yolov6_head import YOLOv6Head
 
 
@@ -269,7 +270,7 @@ class PPYOLOEHead(YOLOv6Head):
             self.stride_tensor = self.flatten_priors_train[..., [2]]
 
         # gt info
-        gt_info = self.gt_instances_preprocess(batch_gt_instances, num_imgs)
+        gt_info = gt_instances_preprocess(batch_gt_instances, num_imgs)
         gt_labels = gt_info[:, :, :1]
         gt_bboxes = gt_info[:, :, 1:]  # xyxy
         pad_bbox_flag = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
diff --git a/mmyolo/models/dense_heads/rtmdet_head.py b/mmyolo/models/dense_heads/rtmdet_head.py
index 1547f276..42b2948e 100644
--- a/mmyolo/models/dense_heads/rtmdet_head.py
+++ b/mmyolo/models/dense_heads/rtmdet_head.py
@@ -1,5 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from typing import List, Sequence, Tuple, Union
+from typing import List, Sequence, Tuple
 
 import torch
 import torch.nn as nn
@@ -13,6 +13,7 @@ from mmengine.model import (BaseModule, bias_init_with_prob, constant_init,
 from torch import Tensor
 
 from mmyolo.registry import MODELS, TASK_UTILS
+from ..utils import gt_instances_preprocess
 from .yolov5_head import YOLOv5Head
 
 
@@ -307,7 +308,7 @@ class RTMDetHead(YOLOv5Head):
         featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
         assert len(featmap_sizes) == self.prior_generator.num_levels
 
-        gt_info = self.gt_instances_preprocess(batch_gt_instances, num_imgs)
+        gt_info = gt_instances_preprocess(batch_gt_instances, num_imgs)
         gt_labels = gt_info[:, :, :1]
         gt_bboxes = gt_info[:, :, 1:]  # xyxy
         pad_bbox_flag = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
@@ -370,69 +371,3 @@ class RTMDetHead(YOLOv5Head):
             loss_bbox = bbox_preds.sum() * 0
 
         return dict(loss_cls=loss_cls, loss_bbox=loss_bbox)
-
-    @staticmethod
-    def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence],
-                                batch_size: int) -> Tensor:
-        """Split batch_gt_instances with batch size, from [all_gt_bboxes, 6]
-        to.
-
-        [batch_size, number_gt, 5]. If some shape of single batch smaller than
-        gt bbox len, then using [-1., 0., 0., 0., 0.] to fill.
-
-        Args:
-            batch_gt_instances (Sequence[Tensor]): Ground truth
-                instances for whole batch, shape [all_gt_bboxes, 6]
-            batch_size (int): Batch size.
-
-        Returns:
-            Tensor: batch gt instances data, shape [batch_size, number_gt, 5]
-        """
-        if isinstance(batch_gt_instances, Sequence):
-            max_gt_bbox_len = max(
-                [len(gt_instances) for gt_instances in batch_gt_instances])
-            # fill [-1., 0., 0., 0., 0.] if some shape of
-            # single batch not equal max_gt_bbox_len
-            batch_instance_list = []
-            for index, gt_instance in enumerate(batch_gt_instances):
-                bboxes = gt_instance.bboxes
-                labels = gt_instance.labels
-                batch_instance_list.append(
-                    torch.cat((labels[:, None], bboxes), dim=-1))
-
-                if bboxes.shape[0] >= max_gt_bbox_len:
-                    continue
-
-                fill_tensor = bboxes.new_full(
-                    [max_gt_bbox_len - bboxes.shape[0], 5], 0)
-                fill_tensor[:, 0] = -1.
-                batch_instance_list[index] = torch.cat(
-                    (batch_instance_list[-1], fill_tensor), dim=0)
-
-            return torch.stack(batch_instance_list)
-        else:
-            # faster version
-            # sqlit batch gt instance [all_gt_bboxes, 6] ->
-            # [batch_size, number_gt_each_batch, 5]
-            batch_instance_list = []
-            max_gt_bbox_len = 0
-            for i in range(batch_size):
-                single_batch_instance = \
-                    batch_gt_instances[batch_gt_instances[:, 0] == i, :]
-                single_batch_instance = single_batch_instance[:, 1:]
-                batch_instance_list.append(single_batch_instance)
-                if len(single_batch_instance) > max_gt_bbox_len:
-                    max_gt_bbox_len = len(single_batch_instance)
-
-            # fill [-1., 0., 0., 0., 0.] if some shape of
-            # single batch not equal max_gt_bbox_len
-            for index, gt_instance in enumerate(batch_instance_list):
-                if gt_instance.shape[0] >= max_gt_bbox_len:
-                    continue
-                fill_tensor = batch_gt_instances.new_full(
-                    [max_gt_bbox_len - gt_instance.shape[0], 5], 0)
-                fill_tensor[:, 0] = -1.
-                batch_instance_list[index] = torch.cat(
-                    (batch_instance_list[index], fill_tensor), dim=0)
-
-            return torch.stack(batch_instance_list)
diff --git a/mmyolo/models/dense_heads/yolov6_head.py b/mmyolo/models/dense_heads/yolov6_head.py
index 60d39620..4b492d12 100644
--- a/mmyolo/models/dense_heads/yolov6_head.py
+++ b/mmyolo/models/dense_heads/yolov6_head.py
@@ -14,6 +14,7 @@ from mmengine.structures import InstanceData
 from torch import Tensor
 
 from mmyolo.registry import MODELS, TASK_UTILS
+from ..utils import gt_instances_preprocess
 from .yolov5_head import YOLOv5Head
 
 
@@ -290,7 +291,7 @@ class YOLOv6Head(YOLOv5Head):
             self.stride_tensor = self.flatten_priors_train[..., [2]]
 
         # gt info
-        gt_info = self.gt_instances_preprocess(batch_gt_instances, num_imgs)
+        gt_info = gt_instances_preprocess(batch_gt_instances, num_imgs)
         gt_labels = gt_info[:, :, :1]
         gt_bboxes = gt_info[:, :, 1:]  # xyxy
         pad_bbox_flag = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
@@ -366,69 +367,3 @@ class YOLOv6Head(YOLOv5Head):
         _, world_size = get_dist_info()
         return dict(
             loss_cls=loss_cls * world_size, loss_bbox=loss_bbox * world_size)
-
-    @staticmethod
-    def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence],
-                                batch_size: int) -> Tensor:
-        """Split batch_gt_instances with batch size, from [all_gt_bboxes, 6]
-        to.
-
-        [batch_size, number_gt, 5]. If some shape of single batch smaller than
-        gt bbox len, then using [-1., 0., 0., 0., 0.] to fill.
-
-        Args:
-            batch_gt_instances (Sequence[Tensor]): Ground truth
-                instances for whole batch, shape [all_gt_bboxes, 6]
-            batch_size (int): Batch size.
-
-        Returns:
-            Tensor: batch gt instances data, shape [batch_size, number_gt, 5]
-        """
-        if isinstance(batch_gt_instances, Sequence):
-            max_gt_bbox_len = max(
-                [len(gt_instances) for gt_instances in batch_gt_instances])
-            # fill [-1., 0., 0., 0., 0.] if some shape of
-            # single batch not equal max_gt_bbox_len
-            batch_instance_list = []
-            for index, gt_instance in enumerate(batch_gt_instances):
-                bboxes = gt_instance.bboxes
-                labels = gt_instance.labels
-                batch_instance_list.append(
-                    torch.cat((labels[:, None], bboxes), dim=-1))
-
-                if bboxes.shape[0] >= max_gt_bbox_len:
-                    continue
-
-                fill_tensor = bboxes.new_full(
-                    [max_gt_bbox_len - bboxes.shape[0], 5], 0)
-                fill_tensor[:, 0] = -1.
-                batch_instance_list[index] = torch.cat(
-                    (batch_instance_list[-1], fill_tensor), dim=0)
-
-            return torch.stack(batch_instance_list)
-        else:
-            # faster version
-            # sqlit batch gt instance [all_gt_bboxes, 6] ->
-            # [batch_size, number_gt_each_batch, 5]
-            batch_instance_list = []
-            max_gt_bbox_len = 0
-            for i in range(batch_size):
-                single_batch_instance = \
-                    batch_gt_instances[batch_gt_instances[:, 0] == i, :]
-                single_batch_instance = single_batch_instance[:, 1:]
-                batch_instance_list.append(single_batch_instance)
-                if len(single_batch_instance) > max_gt_bbox_len:
-                    max_gt_bbox_len = len(single_batch_instance)
-
-            # fill [-1., 0., 0., 0., 0.] if some shape of
-            # single batch not equal max_gt_bbox_len
-            for index, gt_instance in enumerate(batch_instance_list):
-                if gt_instance.shape[0] >= max_gt_bbox_len:
-                    continue
-                fill_tensor = batch_gt_instances.new_full(
-                    [max_gt_bbox_len - gt_instance.shape[0], 5], 0)
-                fill_tensor[:, 0] = -1.
-                batch_instance_list[index] = torch.cat(
-                    (batch_instance_list[index], fill_tensor), dim=0)
-
-            return torch.stack(batch_instance_list)
diff --git a/mmyolo/models/dense_heads/yolov8_head.py b/mmyolo/models/dense_heads/yolov8_head.py
index 7e6bf52e..d6f36c9a 100644
--- a/mmyolo/models/dense_heads/yolov8_head.py
+++ b/mmyolo/models/dense_heads/yolov8_head.py
@@ -14,7 +14,7 @@ from mmengine.structures import InstanceData
 from torch import Tensor
 
 from mmyolo.registry import MODELS, TASK_UTILS
-from ..utils import make_divisible
+from ..utils import gt_instances_preprocess, make_divisible
 from .yolov5_head import YOLOv5Head
 
 
@@ -304,7 +304,7 @@ class YOLOv8Head(YOLOv5Head):
             self.stride_tensor = self.flatten_priors_train[..., [2]]
 
         # gt info
-        gt_info = self.gt_instances_preprocess(batch_gt_instances, num_imgs)
+        gt_info = gt_instances_preprocess(batch_gt_instances, num_imgs)
         gt_labels = gt_info[:, :, :1]
         gt_bboxes = gt_info[:, :, 1:]  # xyxy
         pad_bbox_flag = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
@@ -389,59 +389,3 @@ class YOLOv8Head(YOLOv5Head):
             loss_cls=loss_cls * num_imgs * world_size,
             loss_bbox=loss_bbox * num_imgs * world_size,
             loss_dfl=loss_dfl * num_imgs * world_size)
-
-    @staticmethod
-    def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence],
-                                batch_size: int) -> Tensor:
-        """Split batch_gt_instances with batch size, from [all_gt_bboxes, 6]
-        to.
-
-        [batch_size, number_gt, 5]. If some shape of single batch smaller than
-        gt bbox len, then using [-1., 0., 0., 0., 0.] to fill.
-
-        Args:
-            batch_gt_instances (Sequence[Tensor]): Ground truth
-                instances for whole batch, shape [all_gt_bboxes, 6]
-            batch_size (int): Batch size.
-
-        Returns:
-            Tensor: batch gt instances data, shape [batch_size, number_gt, 5]
-        """
-        if isinstance(batch_gt_instances, Sequence):
-            max_gt_bbox_len = max(
-                [len(gt_instances) for gt_instances in batch_gt_instances])
-            # fill [-1., 0., 0., 0., 0.] if some shape of
-            # single batch not equal max_gt_bbox_len
-            batch_instance_list = []
-            for index, gt_instance in enumerate(batch_gt_instances):
-                bboxes = gt_instance.bboxes
-                labels = gt_instance.labels
-                batch_instance_list.append(
-                    torch.cat((labels[:, None], bboxes), dim=-1))
-
-                if bboxes.shape[0] >= max_gt_bbox_len:
-                    continue
-
-                fill_tensor = bboxes.new_full(
-                    [max_gt_bbox_len - bboxes.shape[0], 5], 0)
-                fill_tensor[:, 0] = -1.
-                batch_instance_list[index] = torch.cat(
-                    (batch_instance_list[-1], fill_tensor), dim=0)
-
-            return torch.stack(batch_instance_list)
-        else:
-            # faster version
-            # sqlit batch gt instance [all_gt_bboxes, 6] ->
-            # [batch_size, number_gt_each_batch, 5]
-            assert isinstance(batch_gt_instances, Tensor)
-            if batch_gt_instances.shape[0] == 0:
-                return batch_gt_instances.new_zeros((batch_size, 0, 5))
-            i = batch_gt_instances[:, 0]  # image index
-            _, counts = i.unique(return_counts=True)
-            out = batch_gt_instances.new_zeros((batch_size, counts.max(), 5))
-            for j in range(batch_size):
-                matches = i == j
-                n = matches.sum()
-                if n:
-                    out[j, :n] = batch_gt_instances[matches, 1:]
-            return out
diff --git a/mmyolo/models/utils/__init__.py b/mmyolo/models/utils/__init__.py
index 89118283..cdfeaaf0 100644
--- a/mmyolo/models/utils/__init__.py
+++ b/mmyolo/models/utils/__init__.py
@@ -1,4 +1,4 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from .misc import make_divisible, make_round
+from .misc import gt_instances_preprocess, make_divisible, make_round
 
-__all__ = ['make_divisible', 'make_round']
+__all__ = ['make_divisible', 'make_round', 'gt_instances_preprocess']
diff --git a/mmyolo/models/utils/misc.py b/mmyolo/models/utils/misc.py
index 6844ad37..150182a1 100644
--- a/mmyolo/models/utils/misc.py
+++ b/mmyolo/models/utils/misc.py
@@ -1,5 +1,9 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import math
+from typing import Sequence, Union
+
+import torch
+from torch import Tensor
 
 
 def make_divisible(x: float,
@@ -12,3 +16,71 @@ def make_divisible(x: float,
 def make_round(x: float, deepen_factor: float = 1.0) -> int:
     """Make sure that x*deepen_factor becomes an integer not less than 1."""
     return max(round(x * deepen_factor), 1) if x > 1 else x
+
+
+def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence],
+                            batch_size: int) -> Tensor:
+    """Split batch_gt_instances with batch size, from [all_gt_bboxes, 6] to.
+
+    [batch_size, number_gt, 5]. If some shape of single batch smaller than
+    gt bbox len, then using [-1., 0., 0., 0., 0.] to fill.
+
+    Args:
+        batch_gt_instances (Sequence[Tensor]): Ground truth
+            instances for whole batch, shape [all_gt_bboxes, 6]
+        batch_size (int): Batch size.
+
+    Returns:
+        Tensor: batch gt instances data, shape [batch_size, number_gt, 5]
+    """
+    if isinstance(batch_gt_instances, Sequence):
+        max_gt_bbox_len = max(
+            [len(gt_instances) for gt_instances in batch_gt_instances])
+        # fill [0., 0., 0., 0., 0.] if some shape of
+        # single batch not equal max_gt_bbox_len
+        batch_instance_list = []
+        for index, gt_instance in enumerate(batch_gt_instances):
+            bboxes = gt_instance.bboxes
+            labels = gt_instance.labels
+            batch_instance_list.append(
+                torch.cat((labels[:, None], bboxes), dim=-1))
+
+            if bboxes.shape[0] >= max_gt_bbox_len:
+                continue
+
+            fill_tensor = bboxes.new_full(
+                [max_gt_bbox_len - bboxes.shape[0], 5], 0)
+            batch_instance_list[index] = torch.cat(
+                (batch_instance_list[index], fill_tensor), dim=0)
+
+        return torch.stack(batch_instance_list)
+    else:
+        # faster version
+        # format of batch_gt_instances:
+        # [img_ind, cls_ind, x1, y1, x2, y2]
+
+        # sqlit batch gt instance [all_gt_bboxes, 6] ->
+        # [batch_size, max_gt_bbox_len, 5]
+        assert isinstance(batch_gt_instances, Tensor)
+        if len(batch_gt_instances) > 0:
+            gt_images_indexes = batch_gt_instances[:, 0]
+            max_gt_bbox_len = gt_images_indexes.unique(
+                return_counts=True)[1].max()
+            # fill [0., 0., 0., 0., 0.] if some shape of
+            # single batch not equal max_gt_bbox_len
+            batch_instance = torch.zeros((batch_size, max_gt_bbox_len, 5),
+                                         dtype=batch_gt_instances.dtype,
+                                         device=batch_gt_instances.device)
+
+            for i in range(batch_size):
+                match_indexes = gt_images_indexes == i
+                gt_num = match_indexes.sum()
+                if gt_num:
+                    batch_instance[i, :gt_num] = batch_gt_instances[
+                        match_indexes, 1:]
+        else:
+            batch_instance = torch.zeros((batch_size, 0, 5),
+                                         dtype=batch_gt_instances.dtype,
+                                         device=batch_gt_instances.device)
+
+        return batch_instance
diff --git a/tests/test_models/test_utils/test_misc.py b/tests/test_models/test_utils/test_misc.py
index ef101fec..0d2fa0c7 100644
--- a/tests/test_models/test_utils/test_misc.py
+++ b/tests/test_models/test_utils/test_misc.py
@@ -1 +1,35 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from unittest import TestCase
+
+import numpy as np
+import torch
+from mmengine.structures import InstanceData
+from torch import Tensor
+
+from mmyolo.models.utils import gt_instances_preprocess
+from mmyolo.utils import register_all_modules
+
+register_all_modules()
+
+
+class TestGtInstancesPreprocess(TestCase):
+
+    def test(self):
+        gt_instances = InstanceData(
+            bboxes=torch.empty((0, 4)), labels=torch.LongTensor([]))
+        batch_size = 1
+        batch_instance = gt_instances_preprocess([gt_instances], batch_size)
+        self.assertIsInstance(batch_instance, Tensor)
+        self.assertEqual(
+            len(batch_instance.shape), 3, 'the len of result must be 3.')
+
+    def test_fast_version(self):
+        gt_instances = torch.from_numpy(
+            np.array([[0., 1., 0., 0., 0., 0.]], dtype=np.float32))
+        batch_size = 1
+        batch_instance = gt_instances_preprocess(gt_instances, batch_size)
+        self.assertIsInstance(batch_instance, Tensor)
+        self.assertEqual(
+            len(batch_instance.shape), 3, 'the len of result must be 3.')
+        self.assertEqual(batch_instance.shape[1], 1)
+        self.assertEqual(batch_instance.shape[2], 5)

From e966ce4e600bebff873680547f977b5f159b5d6d Mon Sep 17 00:00:00 2001
From: Nioolek <40284075+Nioolek@users.noreply.github.com>
Date: Thu, 9 Feb 2023 19:08:21 +0800
Subject: [PATCH 16/64] [Improve] Beauty RTMDet config (#531)

* beauty rtmdet config

* format
---
 .../rtmdet_l_syncbn_fast_8xb32-300e_coco.py   | 143 ++++++++++++------
 .../rtmdet_m_syncbn_fast_8xb32-300e_coco.py   |   2 +
 .../rtmdet_s_syncbn_fast_8xb32-300e_coco.py   |  22 ++-
 ...rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py |  16 +-
 .../rtmdet_x_syncbn_fast_8xb32-300e_coco.py   |   2 +
 .../yolov5_s-v61_syncbn_8xb16-300e_coco.py    |  12 +-
 6 files changed, 137 insertions(+), 60 deletions(-)

diff --git a/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
index a5add2c5..64ccc598 100644
--- a/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
@@ -1,30 +1,56 @@
 _base_ = '../_base_/default_runtime.py'
 
+# ========================Frequently modified parameters======================
+# -----data related-----
 data_root = 'data/coco/'
-dataset_type = 'YOLOv5CocoDataset'
-
-img_scale = (640, 640)  # width, height
-deepen_factor = 1.0
-widen_factor = 1.0
-max_epochs = 300
-stage2_num_epochs = 20
-interval = 10
-num_classes = 80
+# Path of train annotation file
+train_ann_file = 'annotations/instances_train2017.json'
+train_data_prefix = 'train2017/'  # Prefix of train image path
+# Path of val annotation file
+val_ann_file = 'annotations/instances_val2017.json'
+val_data_prefix = 'val2017/'  # Prefix of val image path
 
+num_classes = 80  # Number of classes for classification
+# Batch size of a single GPU during training
 train_batch_size_per_gpu = 32
+# Worker to pre-fetch data for each single GPU during training
 train_num_workers = 10
-val_batch_size_per_gpu = 32
-val_num_workers = 10
 # persistent_workers must be False if num_workers is 0.
 persistent_workers = True
-strides = [8, 16, 32]
+
+# -----train val related-----
+# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
 base_lr = 0.004
+max_epochs = 300  # Maximum training epochs
+# Change train_pipeline for final 20 epochs (stage 2)
+num_epochs_stage2 = 20
 
-# single-scale training is recommended to
-# be turned on, which can speed up training.
-env_cfg = dict(cudnn_benchmark=True)
+model_test_cfg = dict(
+    # The config of multi-label for multi-class prediction.
+    multi_label=True,
+    # The number of boxes before NMS
+    nms_pre=30000,
+    score_thr=0.001,  # Threshold to filter out boxes.
+    nms=dict(type='nms', iou_threshold=0.65),  # NMS type and threshold
+    max_per_img=300)  # Max number of detections of each image
 
-# only on Val
+# ========================Possible modified parameters========================
+# -----data related-----
+img_scale = (640, 640)  # width, height
+# ratio range for random resize
+random_resize_ratio_range = (0.1, 2.0)
+# Cached images number in mosaic
+mosaic_max_cached_images = 40
+# Number of cached images in mixup
+mixup_max_cached_images = 20
+# Dataset type, this will be used to define the dataset
+dataset_type = 'YOLOv5CocoDataset'
+# Batch size of a single GPU during validation
+val_batch_size_per_gpu = 32
+# Worker to pre-fetch data for each single GPU during validation
+val_num_workers = 10
+
+# Config of batch shapes. Only on val.
 batch_shapes_cfg = dict(
     type='BatchShapePolicy',
     batch_size=val_batch_size_per_gpu,
@@ -32,6 +58,35 @@ batch_shapes_cfg = dict(
     size_divisor=32,
     extra_pad_ratio=0.5)
 
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
+deepen_factor = 1.0
+# The scaling factor that controls the width of the network structure
+widen_factor = 1.0
+# Strides of multi-scale prior box
+strides = [8, 16, 32]
+
+norm_cfg = dict(type='BN')  # Normalization config
+
+# -----train val related-----
+lr_start_factor = 1.0e-5
+dsl_topk = 13  # Number of bbox selected in each level
+loss_cls_weight = 1.0
+loss_bbox_weight = 2.0
+qfl_beta = 2.0  # beta of QualityFocalLoss
+weight_decay = 0.05
+
+# Save model checkpoint and validation intervals
+save_checkpoint_intervals = 10
+# validation intervals in stage 2
+val_interval_stage2 = 1
+# The maximum checkpoints to keep.
+max_keep_ckpts = 3
+# single-scale training is recommended to
+# be turned on, which can speed up training.
+env_cfg = dict(cudnn_benchmark=True)
+
+# ===============================Unmodified in most cases====================
 model = dict(
     type='YOLODetector',
     data_preprocessor=dict(
@@ -46,7 +101,7 @@ model = dict(
         deepen_factor=deepen_factor,
         widen_factor=widen_factor,
         channel_attention=True,
-        norm_cfg=dict(type='BN'),
+        norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True)),
     neck=dict(
         type='CSPNeXtPAFPN',
@@ -56,7 +111,7 @@ model = dict(
         out_channels=256,
         num_csp_blocks=3,
         expand_ratio=0.5,
-        norm_cfg=dict(type='BN'),
+        norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True)),
     bbox_head=dict(
         type='RTMDetHead',
@@ -66,7 +121,7 @@ model = dict(
             in_channels=256,
             stacked_convs=2,
             feat_channels=256,
-            norm_cfg=dict(type='BN'),
+            norm_cfg=norm_cfg,
             act_cfg=dict(type='SiLU', inplace=True),
             share_conv=True,
             pred_kernel_size=1,
@@ -77,24 +132,19 @@ model = dict(
         loss_cls=dict(
             type='mmdet.QualityFocalLoss',
             use_sigmoid=True,
-            beta=2.0,
-            loss_weight=1.0),
-        loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0)),
+            beta=qfl_beta,
+            loss_weight=loss_cls_weight),
+        loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=loss_bbox_weight)),
     train_cfg=dict(
         assigner=dict(
             type='BatchDynamicSoftLabelAssigner',
             num_classes=num_classes,
-            topk=13,
+            topk=dsl_topk,
             iou_calculator=dict(type='mmdet.BboxOverlaps2D')),
         allowed_border=-1,
         pos_weight=-1,
         debug=False),
-    test_cfg=dict(
-        multi_label=True,
-        nms_pre=30000,
-        score_thr=0.001,
-        nms=dict(type='nms', iou_threshold=0.65),
-        max_per_img=300),
+    test_cfg=model_test_cfg,
 )
 
 train_pipeline = [
@@ -104,20 +154,23 @@ train_pipeline = [
         type='Mosaic',
         img_scale=img_scale,
         use_cached=True,
-        max_cached_images=40,
+        max_cached_images=mosaic_max_cached_images,
         pad_val=114.0),
     dict(
         type='mmdet.RandomResize',
         # img_scale is (width, height)
         scale=(img_scale[0] * 2, img_scale[1] * 2),
-        ratio_range=(0.1, 2.0),
+        ratio_range=random_resize_ratio_range,
         resize_type='mmdet.Resize',
         keep_ratio=True),
     dict(type='mmdet.RandomCrop', crop_size=img_scale),
     dict(type='mmdet.YOLOXHSVRandomAug'),
     dict(type='mmdet.RandomFlip', prob=0.5),
     dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
-    dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20),
+    dict(
+        type='YOLOv5MixUp',
+        use_cached=True,
+        max_cached_images=mixup_max_cached_images),
     dict(type='mmdet.PackDetInputs')
 ]
 
@@ -127,7 +180,7 @@ train_pipeline_stage2 = [
     dict(
         type='mmdet.RandomResize',
         scale=img_scale,
-        ratio_range=(0.1, 2.0),
+        ratio_range=random_resize_ratio_range,
         resize_type='mmdet.Resize',
         keep_ratio=True),
     dict(type='mmdet.RandomCrop', crop_size=img_scale),
@@ -162,8 +215,8 @@ train_dataloader = dict(
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/'),
+        ann_file=train_ann_file,
+        data_prefix=dict(img=train_data_prefix),
         filter_cfg=dict(filter_empty_gt=True, min_size=32),
         pipeline=train_pipeline))
 
@@ -177,8 +230,8 @@ val_dataloader = dict(
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='annotations/instances_val2017.json',
-        data_prefix=dict(img='val2017/'),
+        ann_file=val_ann_file,
+        data_prefix=dict(img=val_data_prefix),
         test_mode=True,
         batch_shapes_cfg=batch_shapes_cfg,
         pipeline=test_pipeline))
@@ -189,14 +242,14 @@ test_dataloader = val_dataloader
 val_evaluator = dict(
     type='mmdet.CocoMetric',
     proposal_nums=(100, 1, 10),
-    ann_file=data_root + 'annotations/instances_val2017.json',
+    ann_file=data_root + val_ann_file,
     metric='bbox')
 test_evaluator = val_evaluator
 
 # optimizer
 optim_wrapper = dict(
     type='OptimWrapper',
-    optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+    optimizer=dict(type='AdamW', lr=base_lr, weight_decay=weight_decay),
     paramwise_cfg=dict(
         norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
 
@@ -204,7 +257,7 @@ optim_wrapper = dict(
 param_scheduler = [
     dict(
         type='LinearLR',
-        start_factor=1.0e-5,
+        start_factor=lr_start_factor,
         by_epoch=False,
         begin=0,
         end=1000),
@@ -223,8 +276,8 @@ param_scheduler = [
 default_hooks = dict(
     checkpoint=dict(
         type='CheckpointHook',
-        interval=interval,
-        max_keep_ckpts=3  # only keep latest 3 checkpoints
+        interval=save_checkpoint_intervals,
+        max_keep_ckpts=max_keep_ckpts  # only keep latest 3 checkpoints
     ))
 
 custom_hooks = [
@@ -237,15 +290,15 @@ custom_hooks = [
         priority=49),
     dict(
         type='mmdet.PipelineSwitchHook',
-        switch_epoch=max_epochs - stage2_num_epochs,
+        switch_epoch=max_epochs - num_epochs_stage2,
         switch_pipeline=train_pipeline_stage2)
 ]
 
 train_cfg = dict(
     type='EpochBasedTrainLoop',
     max_epochs=max_epochs,
-    val_interval=interval,
-    dynamic_intervals=[(max_epochs - stage2_num_epochs, 1)])
+    val_interval=save_checkpoint_intervals,
+    dynamic_intervals=[(max_epochs - num_epochs_stage2, val_interval_stage2)])
 
 val_cfg = dict(type='ValLoop')
 test_cfg = dict(type='TestLoop')
diff --git a/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py
index 2e8e5a40..52576bf4 100644
--- a/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py
@@ -1,8 +1,10 @@
 _base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 0.67
 widen_factor = 0.75
 
+# =======================Unmodified in most cases==================
 model = dict(
     backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
     neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
diff --git a/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
index 8ea4847e..47733ae6 100644
--- a/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
@@ -1,10 +1,19 @@
 _base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
 checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth'  # noqa
 
+# ========================modified parameters======================
 deepen_factor = 0.33
 widen_factor = 0.5
 img_scale = _base_.img_scale
 
+# ratio range for random resize
+random_resize_ratio_range = (0.5, 2.0)
+# Number of cached images in mosaic
+mosaic_max_cached_images = 40
+# Number of cached images in mixup
+mixup_max_cached_images = 20
+
+# =======================Unmodified in most cases==================
 model = dict(
     backbone=dict(
         deepen_factor=deepen_factor,
@@ -30,20 +39,23 @@ train_pipeline = [
         type='Mosaic',
         img_scale=img_scale,
         use_cached=True,
-        max_cached_images=40,
+        max_cached_images=mosaic_max_cached_images,
         pad_val=114.0),
     dict(
         type='mmdet.RandomResize',
         # img_scale is (width, height)
         scale=(img_scale[0] * 2, img_scale[1] * 2),
-        ratio_range=(0.5, 2.0),  # note
+        ratio_range=random_resize_ratio_range,  # note
         resize_type='mmdet.Resize',
         keep_ratio=True),
     dict(type='mmdet.RandomCrop', crop_size=img_scale),
     dict(type='mmdet.YOLOXHSVRandomAug'),
     dict(type='mmdet.RandomFlip', prob=0.5),
     dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
-    dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20),
+    dict(
+        type='YOLOv5MixUp',
+        use_cached=True,
+        max_cached_images=mixup_max_cached_images),
     dict(type='mmdet.PackDetInputs')
 ]
 
@@ -53,7 +65,7 @@ train_pipeline_stage2 = [
     dict(
         type='mmdet.RandomResize',
         scale=img_scale,
-        ratio_range=(0.5, 2.0),  # note
+        ratio_range=random_resize_ratio_range,  # note
         resize_type='mmdet.Resize',
         keep_ratio=True),
     dict(type='mmdet.RandomCrop', crop_size=img_scale),
@@ -75,6 +87,6 @@ custom_hooks = [
         priority=49),
     dict(
         type='mmdet.PipelineSwitchHook',
-        switch_epoch=_base_.max_epochs - _base_.stage2_num_epochs,
+        switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
         switch_pipeline=train_pipeline_stage2)
 ]
diff --git a/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py
index 281062c1..27d6762a 100644
--- a/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py
@@ -1,11 +1,19 @@
 _base_ = './rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
-
 checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth'  # noqa
 
+# ========================modified parameters======================
 deepen_factor = 0.167
 widen_factor = 0.375
 img_scale = _base_.img_scale
 
+# ratio range for random resize
+random_resize_ratio_range = (0.5, 2.0)
+# Number of cached images in mosaic
+mosaic_max_cached_images = 20
+# Number of cached images in mixup
+mixup_max_cached_images = 10
+
+# =======================Unmodified in most cases==================
 model = dict(
     backbone=dict(
         deepen_factor=deepen_factor,
@@ -24,14 +32,14 @@ train_pipeline = [
         type='Mosaic',
         img_scale=img_scale,
         use_cached=True,
-        max_cached_images=20,  # note
+        max_cached_images=mosaic_max_cached_images,  # note
         random_pop=False,  # note
         pad_val=114.0),
     dict(
         type='mmdet.RandomResize',
         # img_scale is (width, height)
         scale=(img_scale[0] * 2, img_scale[1] * 2),
-        ratio_range=(0.5, 2.0),
+        ratio_range=random_resize_ratio_range,
         resize_type='mmdet.Resize',
         keep_ratio=True),
     dict(type='mmdet.RandomCrop', crop_size=img_scale),
@@ -42,7 +50,7 @@ train_pipeline = [
         type='YOLOv5MixUp',
         use_cached=True,
         random_pop=False,
-        max_cached_images=10,
+        max_cached_images=mixup_max_cached_images,
         prob=0.5),
     dict(type='mmdet.PackDetInputs')
 ]
diff --git a/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py
index 0978c787..7fc9001f 100644
--- a/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py
@@ -1,8 +1,10 @@
 _base_ = './rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 1.33
 widen_factor = 1.25
 
+# =======================Unmodified in most cases==================
 model = dict(
     backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
     neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
diff --git a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
index b7d28168..77070b5d 100644
--- a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
@@ -27,7 +27,7 @@ anchors = [
 ]
 
 # -----train val related-----
-# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
+# Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs
 base_lr = 0.01
 max_epochs = 300  # Maximum training epochs
 
@@ -77,12 +77,12 @@ loss_cls_weight = 0.5
 loss_bbox_weight = 0.05
 loss_obj_weight = 1.0
 prior_match_thr = 4.  # Priori box matching threshold
-obj_level_weights = [4., 1.,
-                     0.4]  # The obj loss weights of the three output layers
+# The obj loss weights of the three output layers
+obj_level_weights = [4., 1., 0.4]
 lr_factor = 0.01  # Learning rate scaling factor
 weight_decay = 0.0005
 # Save model checkpoint and validation intervals
-save_epoch_intervals = 10
+save_checkpoint_intervals = 10
 # The maximum checkpoints to keep.
 max_keep_ckpts = 3
 # Single-scale training is recommended to
@@ -263,7 +263,7 @@ default_hooks = dict(
         max_epochs=max_epochs),
     checkpoint=dict(
         type='CheckpointHook',
-        interval=save_epoch_intervals,
+        interval=save_checkpoint_intervals,
         save_best='auto',
         max_keep_ckpts=max_keep_ckpts))
 
@@ -287,6 +287,6 @@ test_evaluator = val_evaluator
 train_cfg = dict(
     type='EpochBasedTrainLoop',
     max_epochs=max_epochs,
-    val_interval=save_epoch_intervals)
+    val_interval=save_checkpoint_intervals)
 val_cfg = dict(type='ValLoop')
 test_cfg = dict(type='TestLoop')

From ff3e89809ee916356702f6c446f889fbe94d1291 Mon Sep 17 00:00:00 2001
From: HinGwenWoong <peterhuang0323@qq.com>
Date: Fri, 10 Feb 2023 10:06:17 +0800
Subject: [PATCH 17/64] [DOC] Fix error link (#537)

* Fix error link

* Fix link

* Fix lint
---
 README.md                             |  4 +--
 README_zh-CN.md                       | 10 +++----
 configs/yolov5/README.md              | 40 +++++++++++++--------------
 configs/yolov7/README.md              | 14 +++++-----
 configs/yolox/README.md               |  8 +++---
 docs/en/advanced_guides/plugins.md    |  2 +-
 docs/en/get_started.md                |  4 +--
 docs/en/user_guides/custom_dataset.md |  2 +-
 docs/zh_cn/advanced_guides/plugins.md |  2 +-
 docs/zh_cn/get_started.md             |  4 +--
 10 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/README.md b/README.md
index e88bd621..eda8b242 100644
--- a/README.md
+++ b/README.md
@@ -21,8 +21,8 @@
 [![PyPI](https://img.shields.io/pypi/v/mmyolo)](https://pypi.org/project/mmyolo)
 [![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmyolo.readthedocs.io/en/latest/)
 [![deploy](https://github.com/open-mmlab/mmyolo/workflows/deploy/badge.svg)](https://github.com/open-mmlab/mmyolo/actions)
-[![codecov](https://codecov.io/gh/open-mmlab/mmyolo/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmyolo)
-[![license](https://img.shields.io/github/license/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/blob/master/LICENSE)
+[![codecov](https://codecov.io/gh/open-mmlab/mmyolo/branch/main/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmyolo)
+[![license](https://img.shields.io/github/license/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/blob/main/LICENSE)
 [![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/issues)
 [![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/issues)
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
index dad7208d..bdc9dacb 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -19,17 +19,17 @@
   <div>&nbsp;</div>
 
 [![PyPI](https://img.shields.io/pypi/v/mmyolo)](https://pypi.org/project/mmyolo)
-[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmyolo.readthedocs.io/en/latest/)
+[![docs](https://img.shields.io/badge/docs-latest-blue)](https://mmyolo.readthedocs.io/zh_CN/latest/)
 [![deploy](https://github.com/open-mmlab/mmyolo/workflows/deploy/badge.svg)](https://github.com/open-mmlab/mmyolo/actions)
-[![codecov](https://codecov.io/gh/open-mmlab/mmyolo/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmyolo)
-[![license](https://img.shields.io/github/license/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/blob/master/LICENSE)
+[![codecov](https://codecov.io/gh/open-mmlab/mmyolo/branch/main/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmyolo)
+[![license](https://img.shields.io/github/license/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/blob/main/LICENSE)
 [![open issues](https://isitmaintained.com/badge/open/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/issues)
 [![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/issues)
 
 [📘使用文档](https://mmyolo.readthedocs.io/zh_CN/latest/) |
 [🛠️安装教程](https://mmyolo.readthedocs.io/zh_CN/latest/get_started.html) |
 [👀模型库](https://mmyolo.readthedocs.io/zh_CN/latest/model_zoo.html) |
-[🆕更新日志](https://mmyolo.readthedocs.io/en/latest/notes/changelog.html) |
+[🆕更新日志](https://mmyolo.readthedocs.io/zh_CN/latest/notes/changelog.html) |
 [🤔报告问题](https://github.com/open-mmlab/mmyolo/issues/new/choose)
 
 </div>
@@ -130,7 +130,7 @@ MMYOLO 是一个基于 PyTorch 和 MMDetection 的 YOLO 系列算法开源工具
 <img src="https://user-images.githubusercontent.com/27466624/199999337-0544a4cb-3cbd-4f3e-be26-bcd9e74db7ff.jpg" alt="基类-P5"/>
   图为 RangeKing@GitHub 提供，非常感谢！
 
-P6 模型图详见 [model_design.md](docs/zh_CN/algorithm_descriptions/model_design.md)。
+P6 模型图详见 [model_design.md](docs/zh_cn/algorithm_descriptions/model_design.md)。
 
 </details>
 
diff --git a/configs/yolov5/README.md b/configs/yolov5/README.md
index 641813c2..399de4f2 100644
--- a/configs/yolov5/README.md
+++ b/configs/yolov5/README.md
@@ -20,16 +20,16 @@ YOLOv5-l-P6 model structure
 
 ### COCO
 
-| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP |                                                          Config                                                          |                                                                                                                                                                         Download                                                                                                                                                                         |
-| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :----------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| YOLOv5-n |  P5  | 640  |  Yes   | Yes |   1.5    |  28.0  |  [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json)       |
-| YOLOv5-s |  P5  | 640  |  Yes   | Yes |   2.7    |  37.7  |  [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json)       |
-| YOLOv5-m |  P5  | 640  |  Yes   | Yes |   5.0    |  45.3  |  [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json)       |
-| YOLOv5-l |  P5  | 640  |  Yes   | Yes |   8.1    |  48.8  |  [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json)       |
-| YOLOv5-n |  P6  | 1280 |  Yes   | Yes |   5.8    |  35.9  | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) |
-| YOLOv5-s |  P6  | 1280 |  Yes   | Yes |   10.5   |  44.4  | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) |
-| YOLOv5-m |  P6  | 1280 |  Yes   | Yes |   19.1   |  51.3  | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) |
-| YOLOv5-l |  P6  | 1280 |  Yes   | Yes |   30.5   |  53.7  | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) |
+| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP |                                                         Config                                                         |                                                                                                                                                                         Download                                                                                                                                                                         |
+| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| YOLOv5-n |  P5  | 640  |  Yes   | Yes |   1.5    |  28.0  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json)       |
+| YOLOv5-s |  P5  | 640  |  Yes   | Yes |   2.7    |  37.7  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json)       |
+| YOLOv5-m |  P5  | 640  |  Yes   | Yes |   5.0    |  45.3  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json)       |
+| YOLOv5-l |  P5  | 640  |  Yes   | Yes |   8.1    |  48.8  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json)       |
+| YOLOv5-n |  P6  | 1280 |  Yes   | Yes |   5.8    |  35.9  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) |
+| YOLOv5-s |  P6  | 1280 |  Yes   | Yes |   10.5   |  44.4  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) |
+| YOLOv5-m |  P6  | 1280 |  Yes   | Yes |   19.1   |  51.3  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) |
+| YOLOv5-l |  P6  | 1280 |  Yes   | Yes |   30.5   |  53.7  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) |
 
 **Note**:
 In the official YOLOv5 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task. See https://github.com/ultralytics/yolov5/issues/9917 for details.
@@ -43,12 +43,12 @@ In the official YOLOv5 code, the `random_perspective` data augmentation in COCO
 
 ### VOC
 
-| Backbone | size | Batchsize | AMP | Mem (GB) | box AP(COCO metric) |                                                      Config                                                      |                                                                                                                                                 Download                                                                                                                                                 |
-| :------: | :--: | :-------: | :-: | :------: | :-----------------: | :--------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| YOLOv5-n | 512  |    64     | Yes |   3.5    |        51.2         | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254-f1493430.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254.log.json) |
-| YOLOv5-s | 512  |    64     | Yes |   6.5    |        62.7         | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156-0009b33e.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156.log.json) |
-| YOLOv5-m | 512  |    64     | Yes |   12.0   |        70.1         | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_m-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138-815c143a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138.log.json) |
-| YOLOv5-l | 512  |    32     | Yes |   10.0   |        73.1         | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/voc/yolov5_l-v61_fast_1xb32-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500-edc7e0d8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500.log.json) |
+| Backbone | size | Batchsize | AMP | Mem (GB) | box AP(COCO metric) |                                                     Config                                                     |                                                                                                                                                 Download                                                                                                                                                 |
+| :------: | :--: | :-------: | :-: | :------: | :-----------------: | :------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| YOLOv5-n | 512  |    64     | Yes |   3.5    |        51.2         | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_n-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254-f1493430.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_fast_1xb64-50e_voc/yolov5_n-v61_fast_1xb64-50e_voc_20221017_234254.log.json) |
+| YOLOv5-s | 512  |    64     | Yes |   6.5    |        62.7         | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156-0009b33e.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_fast_1xb64-50e_voc/yolov5_s-v61_fast_1xb64-50e_voc_20221017_234156.log.json) |
+| YOLOv5-m | 512  |    64     | Yes |   12.0   |        70.1         | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_m-v61_fast_1xb64-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138-815c143a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_fast_1xb64-50e_voc/yolov5_m-v61_fast_1xb64-50e_voc_20221017_114138.log.json) |
+| YOLOv5-l | 512  |    32     | Yes |   10.0   |        73.1         | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/voc/yolov5_l-v61_fast_1xb32-50e_voc.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500-edc7e0d8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_fast_1xb32-50e_voc/yolov5_l-v61_fast_1xb32-50e_voc_20221017_045500.log.json) |
 
 **Note**:
 
@@ -62,10 +62,10 @@ In the official YOLOv5 code, the `random_perspective` data augmentation in COCO
 
 Since the `iscrowd` annotation of the COCO dataset is not equivalent to `ignore`, we use the CrowdHuman dataset to verify that the YOLOv5 ignore logic is correct.
 
-| Backbone | size | SyncBN | AMP | Mem (GB) | ignore_iof_thr | box AP50(CrowDHuman Metric) |  MR  |  JI   |                                                              Config                                                               | Download |
-| :------: | :--: | :----: | :-: | :------: | :------------: | :-------------------------: | :--: | :---: | :-------------------------------------------------------------------------------------------------------------------------------: | :------: |
-| YOLOv5-s | 640  |  Yes   | Yes |   2.6    |       -1       |            85.79            | 48.7 | 75.33 |  [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/crowdhuman/yolov5_s-v61_fast_8xb16-300e_crowdhuman.py)  |          |
-| YOLOv5-s | 640  |  Yes   | Yes |   2.6    |      0.5       |            86.17            | 48.8 | 75.87 | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py) |          |
+| Backbone | size | SyncBN | AMP | Mem (GB) | ignore_iof_thr | box AP50(CrowDHuman Metric) |  MR  |  JI   |                                                             Config                                                              | Download |
+| :------: | :--: | :----: | :-: | :------: | :------------: | :-------------------------: | :--: | :---: | :-----------------------------------------------------------------------------------------------------------------------------: | :------: |
+| YOLOv5-s | 640  |  Yes   | Yes |   2.6    |       -1       |            85.79            | 48.7 | 75.33 |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/crowdhuman/yolov5_s-v61_fast_8xb16-300e_crowdhuman.py)  |          |
+| YOLOv5-s | 640  |  Yes   | Yes |   2.6    |      0.5       |            86.17            | 48.8 | 75.87 | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/crowdhuman/yolov5_s-v61_8xb16-300e_ignore_crowdhuman.py) |          |
 
 **Note**:
 
diff --git a/configs/yolov7/README.md b/configs/yolov7/README.md
index 33de9be0..e4f81261 100644
--- a/configs/yolov7/README.md
+++ b/configs/yolov7/README.md
@@ -21,13 +21,13 @@ YOLOv7-l-P5 model structure
 
 ### COCO
 
-|  Backbone   | Arch | Size | SyncBN | AMP | Mem (GB) | Box AP |                                                        Config                                                        |                                                                                                                                                                 Download                                                                                                                                                                 |
-| :---------: | :--: | :--: | :----: | :-: | :------: | :----: | :------------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| YOLOv7-tiny |  P5  | 640  |  Yes   | Yes |   2.7    |  37.5  | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719.log.json) |
-|  YOLOv7-l   |  P5  | 640  |  Yes   | Yes |   10.3   |  50.9  |  [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601-8113c0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601.log.json)       |
-|  YOLOv7-x   |  P5  | 640  |  Yes   | Yes |   13.7   |  52.8  |  [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331-ef949a68.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331.log.json)       |
-|  YOLOv7-w   |  P6  | 1280 |  Yes   | Yes |   27.0   |  54.1  | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031-a68ef9d2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031.log.json) |
-|  YOLOv7-e   |  P6  | 1280 |  Yes   | Yes |   42.5   |  55.1  | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636-34425033.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636.log.json) |
+|  Backbone   | Arch | Size | SyncBN | AMP | Mem (GB) | Box AP |                                                       Config                                                       |                                                                                                                                                                 Download                                                                                                                                                                 |
+| :---------: | :--: | :--: | :----: | :-: | :------: | :----: | :----------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| YOLOv7-tiny |  P5  | 640  |  Yes   | Yes |   2.7    |  37.5  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719.log.json) |
+|  YOLOv7-l   |  P5  | 640  |  Yes   | Yes |   10.3   |  50.9  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601-8113c0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco/yolov7_l_syncbn_fast_8x16b-300e_coco_20221123_023601.log.json)       |
+|  YOLOv7-x   |  P5  | 640  |  Yes   | Yes |   13.7   |  52.8  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331-ef949a68.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_x_syncbn_fast_8x16b-300e_coco/yolov7_x_syncbn_fast_8x16b-300e_coco_20221124_215331.log.json)       |
+|  YOLOv7-w   |  P6  | 1280 |  Yes   | Yes |   27.0   |  54.1  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031-a68ef9d2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco/yolov7_w-p6_syncbn_fast_8x16b-300e_coco_20221123_053031.log.json) |
+|  YOLOv7-e   |  P6  | 1280 |  Yes   | Yes |   42.5   |  55.1  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636-34425033.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_e-p6_syncbn_fast_8x16b-300e_coco/yolov7_e-p6_syncbn_fast_8x16b-300e_coco_20221126_102636.log.json) |
 
 **Note**:
 In the official YOLOv7 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task.
diff --git a/configs/yolox/README.md b/configs/yolox/README.md
index 4219bfb3..d0885e83 100644
--- a/configs/yolox/README.md
+++ b/configs/yolox/README.md
@@ -19,10 +19,10 @@ YOLOX-l model structure
 
 ## Results and Models
 
-|  Backbone  | size | Mem (GB) | box AP |                                                   Config                                                   |                                                                                                                                    Download                                                                                                                                    |
-| :--------: | :--: | :------: | :----: | :--------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| YOLOX-tiny | 416  |   2.8    |  32.7  | [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908.log.json) |
-|  YOLOX-s   | 640  |   5.6    |  40.8  |  [config](https://github.com/open-mmlab/mmyolo/tree/master/configs/yolox/yolox_s_fast_8xb8-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738-d7e60cb2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738.log.json)       |
+|  Backbone  | size | Mem (GB) | box AP |                                                  Config                                                  |                                                                                                                                    Download                                                                                                                                    |
+| :--------: | :--: | :------: | :----: | :------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| YOLOX-tiny | 416  |   2.8    |  32.7  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908.log.json) |
+|  YOLOX-s   | 640  |   5.6    |  40.8  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolox/yolox_s_fast_8xb8-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738-d7e60cb2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738.log.json)       |
 
 **Note**:
 
diff --git a/docs/en/advanced_guides/plugins.md b/docs/en/advanced_guides/plugins.md
index b488ab89..5a0b3236 100644
--- a/docs/en/advanced_guides/plugins.md
+++ b/docs/en/advanced_guides/plugins.md
@@ -26,7 +26,7 @@ MMYOLO currently supports the following plugins:
 <details open>
 <summary><b>Supported Plugins</b></summary>
 
-1. [CBAM](https://github.com/open-mmlab/mmyolo/blob/dev/mmyolo/models/plugins/cbam.py#L84)
+1. [CBAM](https://github.com/open-mmlab/mmyolo/blob/dev/mmyolo/models/plugins/cbam.py#L86)
 2. [GeneralizedAttention](https://github.com/open-mmlab/mmcv/blob/2.x/mmcv/cnn/bricks/generalized_attention.py#L13)
 3. [NonLocal2d](https://github.com/open-mmlab/mmcv/blob/2.x/mmcv/cnn/bricks/non_local.py#L250)
 4. [ContextBlock](https://github.com/open-mmlab/mmcv/blob/2.x/mmcv/cnn/bricks/context_block.py#L18)
diff --git a/docs/en/get_started.md b/docs/en/get_started.md
index b45aca57..01c1a716 100644
--- a/docs/en/get_started.md
+++ b/docs/en/get_started.md
@@ -239,9 +239,9 @@ Within Jupyter, the exclamation mark `!` is used to call external executables an
 
 #### Using MMYOLO with Docker
 
-We provide a [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/master/docker/Dockerfile) to build an image. Ensure that your [docker version](https://docs.docker.com/engine/install/) >=19.03.
+We provide a [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile) to build an image. Ensure that your [docker version](https://docs.docker.com/engine/install/) >=19.03.
 
-Reminder: If you find out that your download speed is very slow, we suggest that you can canceling the comments in the last two lines of `Optional` in the [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/master/docker/Dockerfile#L19-L20) to obtain a rocket like download speed:
+Reminder: If you find out that your download speed is very slow, we suggest that you can canceling the comments in the last two lines of `Optional` in the [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile#L19-L20) to obtain a rocket like download speed:
 
 ```dockerfile
 # (Optional)
diff --git a/docs/en/user_guides/custom_dataset.md b/docs/en/user_guides/custom_dataset.md
index 87725b3d..bce5d53f 100644
--- a/docs/en/user_guides/custom_dataset.md
+++ b/docs/en/user_guides/custom_dataset.md
@@ -1165,7 +1165,7 @@ docker load < /path/to/mmyolo-deploy.tar
 ### 11.2 Using `projects/easydeploy` to deploy
 
 ```{SeeAlso}
-See [deployment documentation](https://github.com/open-mmlab/mmyolo/blob/dev/projects/easydeploy/README_en.md) for details.
+See [deployment documentation](https://github.com/open-mmlab/mmyolo/blob/dev/projects/easydeploy/README.md) for details.
 ```
 
 TODO: This part will be improved in the next version...
diff --git a/docs/zh_cn/advanced_guides/plugins.md b/docs/zh_cn/advanced_guides/plugins.md
index ae8a17d7..82673e43 100644
--- a/docs/zh_cn/advanced_guides/plugins.md
+++ b/docs/zh_cn/advanced_guides/plugins.md
@@ -26,7 +26,7 @@ model = dict(
 <details open>
 <summary><b>支持的插件</b></summary>
 
-1. [CBAM](https://github.com/open-mmlab/mmyolo/blob/dev/mmyolo/models/plugins/cbam.py#L84)
+1. [CBAM](https://github.com/open-mmlab/mmyolo/blob/dev/mmyolo/models/plugins/cbam.py#L86)
 2. [GeneralizedAttention](https://github.com/open-mmlab/mmcv/blob/2.x/mmcv/cnn/bricks/generalized_attention.py#L13)
 3. [NonLocal2d](https://github.com/open-mmlab/mmcv/blob/2.x/mmcv/cnn/bricks/non_local.py#L250)
 4. [ContextBlock](https://github.com/open-mmlab/mmcv/blob/2.x/mmcv/cnn/bricks/context_block.py#L18)
diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started.md
index 0eb9eeea..1cd1d1c0 100644
--- a/docs/zh_cn/get_started.md
+++ b/docs/zh_cn/get_started.md
@@ -240,9 +240,9 @@ print(mmyolo.__version__)
 
 #### 通过 Docker 使用 MMYOLO
 
-我们提供了一个 [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/master/docker/Dockerfile) 来构建一个镜像。请确保你的 [docker 版本](https://docs.docker.com/engine/install/) >=`19.03`。
+我们提供了一个 [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile) 来构建一个镜像。请确保你的 [docker 版本](https://docs.docker.com/engine/install/) >=`19.03`。
 
-温馨提示；国内用户建议取消掉 [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/master/docker/Dockerfile#L19-L20) 里面 `Optional` 后两行的注释，可以获得火箭一般的下载提速：
+温馨提示；国内用户建议取消掉 [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile#L19-L20) 里面 `Optional` 后两行的注释，可以获得火箭一般的下载提速：
 
 ```dockerfile
 # (Optional)

From d8b1353691d361ccd9726568dee5d0dcbbad6a8d Mon Sep 17 00:00:00 2001
From: HinGwenWoong <peterhuang0323@qq.com>
Date: Fri, 10 Feb 2023 10:13:47 +0800
Subject: [PATCH 18/64] [Improve] Beautify YOLOv6 all configs (#539)

* Beauty YOLOv6 config

* Beauty YOLOv6 config

* Beauty config

* Beauty config
---
 .../yolov6_l_syncbn_fast_8xb32-300e_coco.py   |  5 ++
 .../yolov6_m_syncbn_fast_8xb32-300e_coco.py   |  9 ++-
 .../yolov6_n_syncbn_fast_8xb32-300e_coco.py   | 10 ++-
 .../yolov6_n_syncbn_fast_8xb32-400e_coco.py   | 10 ++-
 .../yolov6_s_syncbn_fast_8xb32-300e_coco.py   |  8 +-
 .../yolov6_s_syncbn_fast_8xb32-400e_coco.py   | 79 ++++++++++++-------
 .../yolov6_t_syncbn_fast_8xb32-300e_coco.py   |  5 ++
 .../yolov6_t_syncbn_fast_8xb32-400e_coco.py   |  5 ++
 8 files changed, 99 insertions(+), 32 deletions(-)

diff --git a/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py b/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py
index 924f1075..ad5ecf34 100644
--- a/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py
@@ -1,8 +1,13 @@
 _base_ = './yolov6_m_syncbn_fast_8xb32-300e_coco.py'
 
+# ======================= Possible modified parameters =======================
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
 deepen_factor = 1
+# The scaling factor that controls the width of the network structure
 widen_factor = 1
 
+# ============================== Unmodified in most cases ===================
 model = dict(
     backbone=dict(
         deepen_factor=deepen_factor,
diff --git a/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py b/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py
index 4f8e33ab..09811c8c 100644
--- a/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py
@@ -1,9 +1,16 @@
 _base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py'
 
+# ======================= Possible modified parameters =======================
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
 deepen_factor = 0.6
+# The scaling factor that controls the width of the network structure
 widen_factor = 0.75
-affine_scale = 0.9
 
+# -----train val related-----
+affine_scale = 0.9  # YOLOv5RandomAffine scaling ratio
+
+# ============================== Unmodified in most cases ===================
 model = dict(
     backbone=dict(
         type='YOLOv6CSPBep',
diff --git a/configs/yolov6/yolov6_n_syncbn_fast_8xb32-300e_coco.py b/configs/yolov6/yolov6_n_syncbn_fast_8xb32-300e_coco.py
index 4b992a55..bc2db4b6 100644
--- a/configs/yolov6/yolov6_n_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/yolov6/yolov6_n_syncbn_fast_8xb32-300e_coco.py
@@ -1,8 +1,16 @@
 _base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py'
 
+# ======================= Possible modified parameters =======================
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
 deepen_factor = 0.33
+# The scaling factor that controls the width of the network structure
 widen_factor = 0.25
 
+# -----train val related-----
+lr_factor = 0.02  # Learning rate scaling factor
+
+# ============================== Unmodified in most cases ===================
 model = dict(
     backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
     neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
@@ -10,4 +18,4 @@ model = dict(
         head_module=dict(widen_factor=widen_factor),
         loss_bbox=dict(iou_mode='siou')))
 
-default_hooks = dict(param_scheduler=dict(lr_factor=0.02))
+default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
diff --git a/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py b/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py
index 36718f19..f66aa15f 100644
--- a/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py
+++ b/configs/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py
@@ -1,8 +1,16 @@
 _base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
 
+# ======================= Possible modified parameters =======================
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
 deepen_factor = 0.33
+# The scaling factor that controls the width of the network structure
 widen_factor = 0.25
 
+# -----train val related-----
+lr_factor = 0.02  # Learning rate scaling factor
+
+# ============================== Unmodified in most cases ===================
 model = dict(
     backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
     neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
@@ -10,4 +18,4 @@ model = dict(
         head_module=dict(widen_factor=widen_factor),
         loss_bbox=dict(iou_mode='siou')))
 
-default_hooks = dict(param_scheduler=dict(lr_factor=0.02))
+default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
diff --git a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py
index a5201b32..dbffaeb3 100644
--- a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-300e_coco.py
@@ -1,8 +1,12 @@
 _base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
 
-max_epochs = 300
-num_last_epochs = 15
+# ======================= Frequently modified parameters =====================
+# -----train val related-----
+# Base learning rate for optim_wrapper
+max_epochs = 300  # Maximum training epochs
+num_last_epochs = 15  # Last epoch number to switch training pipeline
 
+# ============================== Unmodified in most cases ===================
 default_hooks = dict(
     param_scheduler=dict(
         type='YOLOv5ParamSchedulerHook',
diff --git a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py
index f76f6b8b..bda6562a 100644
--- a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py
+++ b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py
@@ -1,31 +1,41 @@
 _base_ = '../_base_/default_runtime.py'
 
-# dataset settings
-data_root = 'data/coco/'
-dataset_type = 'YOLOv5CocoDataset'
+# ======================= Frequently modified parameters =====================
+# -----data related-----
+data_root = 'data/coco/'  # Root path of data
+# Path of train annotation file
+train_ann_file = 'annotations/instances_train2017.json'
+train_data_prefix = 'train2017/'  # Prefix of train image path
+# Path of val annotation file
+val_ann_file = 'annotations/instances_val2017.json'
+val_data_prefix = 'val2017/'  # Prefix of val image path
 
-num_last_epochs = 15
-max_epochs = 400
-num_classes = 80
-
-# parameters that often need to be modified
-img_scale = (640, 640)  # width, height
-deepen_factor = 0.33
-widen_factor = 0.5
-affine_scale = 0.5
-save_epoch_intervals = 10
+num_classes = 80  # Number of classes for classification
+# Batch size of a single GPU during training
 train_batch_size_per_gpu = 32
+# Worker to pre-fetch data for each single GPU during training
 train_num_workers = 8
-val_batch_size_per_gpu = 1
-val_num_workers = 2
-
-# persistent_workers must be False if num_workers is 0.
+# persistent_workers must be False if num_workers is 0
 persistent_workers = True
 
+# -----train val related-----
 # Base learning rate for optim_wrapper
 base_lr = 0.01
+max_epochs = 400  # Maximum training epochs
+num_last_epochs = 15  # Last epoch number to switch training pipeline
 
-# only on Val
+# ======================= Possible modified parameters =======================
+# -----data related-----
+img_scale = (640, 640)  # width, height
+# Dataset type, this will be used to define the dataset
+dataset_type = 'YOLOv5CocoDataset'
+# Batch size of a single GPU during validation
+val_batch_size_per_gpu = 1
+# Worker to pre-fetch data for each single GPU during validation
+val_num_workers = 2
+
+# Config of batch shapes. Only on val.
+# It means not used if batch_shapes_cfg is None.
 batch_shapes_cfg = dict(
     type='BatchShapePolicy',
     batch_size=val_batch_size_per_gpu,
@@ -33,10 +43,25 @@ batch_shapes_cfg = dict(
     size_divisor=32,
     extra_pad_ratio=0.5)
 
-# single-scale training is recommended to
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
+deepen_factor = 0.33
+# The scaling factor that controls the width of the network structure
+widen_factor = 0.5
+
+# -----train val related-----
+affine_scale = 0.5  # YOLOv5RandomAffine scaling ratio
+lr_factor = 0.01  # Learning rate scaling factor
+weight_decay = 0.0005
+# Save model checkpoint and validation intervals
+save_epoch_intervals = 10
+# The maximum checkpoints to keep.
+max_keep_ckpts = 3
+# Single-scale training is recommended to
 # be turned on, which can speed up training.
 env_cfg = dict(cudnn_benchmark=True)
 
+# ============================== Unmodified in most cases ===================
 model = dict(
     type='YOLODetector',
     data_preprocessor=dict(
@@ -162,8 +187,8 @@ train_dataloader = dict(
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/'),
+        ann_file=train_ann_file,
+        data_prefix=dict(img=train_data_prefix),
         filter_cfg=dict(filter_empty_gt=False, min_size=32),
         pipeline=train_pipeline))
 
@@ -193,8 +218,8 @@ val_dataloader = dict(
         type=dataset_type,
         data_root=data_root,
         test_mode=True,
-        data_prefix=dict(img='val2017/'),
-        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img=val_data_prefix),
+        ann_file=val_ann_file,
         pipeline=test_pipeline,
         batch_shapes_cfg=batch_shapes_cfg))
 
@@ -208,7 +233,7 @@ optim_wrapper = dict(
         type='SGD',
         lr=base_lr,
         momentum=0.937,
-        weight_decay=0.0005,
+        weight_decay=weight_decay,
         nesterov=True,
         batch_size_per_gpu=train_batch_size_per_gpu),
     constructor='YOLOv5OptimizerConstructor')
@@ -217,12 +242,12 @@ default_hooks = dict(
     param_scheduler=dict(
         type='YOLOv5ParamSchedulerHook',
         scheduler_type='cosine',
-        lr_factor=0.01,
+        lr_factor=lr_factor,
         max_epochs=max_epochs),
     checkpoint=dict(
         type='CheckpointHook',
         interval=save_epoch_intervals,
-        max_keep_ckpts=3,
+        max_keep_ckpts=max_keep_ckpts,
         save_best='auto'))
 
 custom_hooks = [
@@ -242,7 +267,7 @@ custom_hooks = [
 val_evaluator = dict(
     type='mmdet.CocoMetric',
     proposal_nums=(100, 1, 10),
-    ann_file=data_root + 'annotations/instances_val2017.json',
+    ann_file=data_root + val_ann_file,
     metric='bbox')
 test_evaluator = val_evaluator
 
diff --git a/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py b/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py
index d5a19e16..aa9da63f 100644
--- a/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/yolov6/yolov6_t_syncbn_fast_8xb32-300e_coco.py
@@ -1,8 +1,13 @@
 _base_ = './yolov6_s_syncbn_fast_8xb32-300e_coco.py'
 
+# ======================= Possible modified parameters =======================
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
 deepen_factor = 0.33
+# The scaling factor that controls the width of the network structure
 widen_factor = 0.375
 
+# ============================== Unmodified in most cases ===================
 model = dict(
     backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
     neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
diff --git a/configs/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py b/configs/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py
index e8592072..75755555 100644
--- a/configs/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py
+++ b/configs/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py
@@ -1,8 +1,13 @@
 _base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
 
+# ======================= Possible modified parameters =======================
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
 deepen_factor = 0.33
+# The scaling factor that controls the width of the network structure
 widen_factor = 0.375
 
+# ============================== Unmodified in most cases ===================
 model = dict(
     backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
     neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),

From 4e8bf17c90d36bcdd2c512ec7e3b6e129050d11f Mon Sep 17 00:00:00 2001
From: tianlei <tianlei@mail.ustc.edu.cn>
Date: Fri, 10 Feb 2023 10:18:57 +0800
Subject: [PATCH 19/64] [Improve] Beautify the YOLOv7 configuration (#506)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Beautify the YOLOv7 configuration

* yolov7 config

* Update configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* Update configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* Update configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* Update configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* Update configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* Update configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* Beautify the YOLOv7 configuration

---------

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>
---
 .../yolov7_l_syncbn_fast_8x16b-300e_coco.py   | 197 +++++++++++-------
 ...yolov7_tiny_syncbn_fast_8x16b-300e_coco.py |  40 ++--
 ...yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py |  57 +++--
 3 files changed, 194 insertions(+), 100 deletions(-)

diff --git a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
index 2bf8cb7f..3fca98f0 100644
--- a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
+++ b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
@@ -1,42 +1,103 @@
 _base_ = '../_base_/default_runtime.py'
 
-# dataset settings
-data_root = 'data/coco/'
-dataset_type = 'YOLOv5CocoDataset'
+# ========================Frequently modified parameters======================
+# -----data related-----
+data_root = 'data/coco/'  # Root path of data
+# Path of train annotation file
+train_ann_file = 'annotations/instances_train2017.json'
+train_data_prefix = 'train2017/'  # Prefix of train image path
+# Path of val annotation file
+val_ann_file = 'annotations/instances_val2017.json'
+val_data_prefix = 'val2017/'  # Prefix of val image path
 
-# parameters that often need to be modified
-img_scale = (640, 640)  # width, height
-max_epochs = 300
-save_epoch_intervals = 10
+num_classes = 80  # Number of classes for classification
+# Batch size of a single GPU during training
 train_batch_size_per_gpu = 16
+# Worker to pre-fetch data for each single GPU during training
 train_num_workers = 8
-# persistent_workers must be False if num_workers is 0.
+# persistent_workers must be False if num_workers is 0
 persistent_workers = True
-val_batch_size_per_gpu = 1
-val_num_workers = 2
 
-# only on Val
-batch_shapes_cfg = dict(
-    type='BatchShapePolicy',
-    batch_size=val_batch_size_per_gpu,
-    img_size=img_scale[0],
-    size_divisor=32,
-    extra_pad_ratio=0.5)
-
-# different from yolov5
+# -----model related-----
+# Basic size of multi-scale prior box
 anchors = [
     [(12, 16), (19, 36), (40, 28)],  # P3/8
     [(36, 75), (76, 55), (72, 146)],  # P4/16
     [(142, 110), (192, 243), (459, 401)]  # P5/32
 ]
-strides = [8, 16, 32]
-num_det_layers = 3
-num_classes = 80
+# -----train val related-----
+# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
+base_lr = 0.01
+max_epochs = 300  # Maximum training epochs
 
-# single-scale training is recommended to
+num_epoch_stage2 = 30  # The last 30 epochs switch evaluation interval
+val_interval_stage2 = 1  # Evaluation interval
+
+model_test_cfg = dict(
+    # The config of multi-label for multi-class prediction.
+    multi_label=True,
+    # The number of boxes before NMS.
+    nms_pre=30000,
+    score_thr=0.001,  # Threshold to filter out boxes.
+    nms=dict(type='nms', iou_threshold=0.65),  # NMS type and threshold
+    max_per_img=300)  # Max number of detections of each image
+
+# ========================Possible modified parameters========================
+# -----data related-----
+img_scale = (640, 640)  # width, height
+# Dataset type, this will be used to define the dataset
+dataset_type = 'YOLOv5CocoDataset'
+# Batch size of a single GPU during validation
+val_batch_size_per_gpu = 1
+# Worker to pre-fetch data for each single GPU during validation
+val_num_workers = 2
+
+# Config of batch shapes. Only on val.
+# It means not used if batch_shapes_cfg is None.
+batch_shapes_cfg = dict(
+    type='BatchShapePolicy',
+    batch_size=val_batch_size_per_gpu,
+    img_size=img_scale[0],
+    # The image scale of padding should be divided by pad_size_divisor
+    size_divisor=32,
+    # Additional paddings for pixel scale
+    extra_pad_ratio=0.5)
+
+# -----model related-----
+strides = [8, 16, 32]  # Strides of multi-scale prior box
+num_det_layers = 3  # The number of model output scales
+norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
+
+# Data augmentation
+max_translate_ratio = 0.2  # YOLOv5RandomAffine
+scaling_ratio_range = (0.1, 2.0)  # YOLOv5RandomAffine
+mixup_prob = 0.15  # YOLOv5MixUp
+randchoice_mosaic_prob = [0.8, 0.2]
+mixup_alpha = 8.0  # YOLOv5MixUp
+mixup_beta = 8.0  # YOLOv5MixUp
+
+# -----train val related-----
+loss_cls_weight = 0.3
+loss_bbox_weight = 0.05
+loss_obj_weight = 0.7
+# BatchYOLOv7Assigner params
+simota_candidate_topk = 10
+simota_iou_weight = 3.0
+simota_cls_weight = 1.0
+prior_match_thr = 4.  # Priori box matching threshold
+obj_level_weights = [4., 1.,
+                     0.4]  # The obj loss weights of the three output layers
+
+lr_factor = 0.1  # Learning rate scaling factor
+weight_decay = 0.0005
+save_epoch_intervals = 1  # Save model checkpoint and validation intervals
+max_keep_ckpts = 3  # The maximum checkpoints to keep.
+
+# Single-scale training is recommended to
 # be turned on, which can speed up training.
 env_cfg = dict(cudnn_benchmark=True)
 
+# ===============================Unmodified in most cases====================
 model = dict(
     type='YOLODetector',
     data_preprocessor=dict(
@@ -47,7 +108,7 @@ model = dict(
     backbone=dict(
         type='YOLOv7Backbone',
         arch='L',
-        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True)),
     neck=dict(
         type='YOLOv7PAFPN',
@@ -61,7 +122,7 @@ model = dict(
         in_channels=[512, 1024, 1024],
         # The real output channel will be multiplied by 2
         out_channels=[128, 256, 512],
-        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True)),
     bbox_head=dict(
         type='YOLOv7Head',
@@ -80,31 +141,28 @@ model = dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=True,
             reduction='mean',
-            loss_weight=0.3 * (num_classes / 80 * 3 / num_det_layers)),
+            loss_weight=loss_cls_weight *
+            (num_classes / 80 * 3 / num_det_layers)),
         loss_bbox=dict(
             type='IoULoss',
             iou_mode='ciou',
             bbox_format='xywh',
             reduction='mean',
-            loss_weight=0.05 * (3 / num_det_layers),
+            loss_weight=loss_bbox_weight * (3 / num_det_layers),
             return_iou=True),
         loss_obj=dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=True,
             reduction='mean',
-            loss_weight=0.7 * ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
-        obj_level_weights=[4., 1., 0.4],
+            loss_weight=loss_obj_weight *
+            ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
+        prior_match_thr=prior_match_thr,
+        obj_level_weights=obj_level_weights,
         # BatchYOLOv7Assigner params
-        prior_match_thr=4.,
-        simota_candidate_topk=10,
-        simota_iou_weight=3.0,
-        simota_cls_weight=1.0),
-    test_cfg=dict(
-        multi_label=True,
-        nms_pre=30000,
-        score_thr=0.001,
-        nms=dict(type='nms', iou_threshold=0.65),
-        max_per_img=300))
+        simota_candidate_topk=simota_candidate_topk,
+        simota_iou_weight=simota_iou_weight,
+        simota_cls_weight=simota_cls_weight),
+    test_cfg=model_test_cfg)
 
 pre_transform = [
     dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
@@ -121,8 +179,8 @@ mosiac4_pipeline = [
         type='YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
-        max_translate_ratio=0.2,  # note
-        scaling_ratio_range=(0.1, 2.0),  # note
+        max_translate_ratio=max_translate_ratio,  # note
+        scaling_ratio_range=scaling_ratio_range,  # note
         # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
@@ -138,8 +196,8 @@ mosiac9_pipeline = [
         type='YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
-        max_translate_ratio=0.2,  # note
-        scaling_ratio_range=(0.1, 2.0),  # note
+        max_translate_ratio=max_translate_ratio,  # note
+        scaling_ratio_range=scaling_ratio_range,  # note
         # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
@@ -148,16 +206,16 @@ mosiac9_pipeline = [
 randchoice_mosaic_pipeline = dict(
     type='RandomChoice',
     transforms=[mosiac4_pipeline, mosiac9_pipeline],
-    prob=[0.8, 0.2])
+    prob=randchoice_mosaic_prob)
 
 train_pipeline = [
     *pre_transform,
     randchoice_mosaic_pipeline,
     dict(
         type='YOLOv5MixUp',
-        alpha=8.0,  # note
-        beta=8.0,  # note
-        prob=0.15,
+        alpha=mixup_alpha,  # note
+        beta=mixup_beta,  # note
+        prob=mixup_prob,
         pre_transform=[*pre_transform, randchoice_mosaic_pipeline]),
     dict(type='YOLOv5HSVRandomAug'),
     dict(type='mmdet.RandomFlip', prob=0.5),
@@ -177,8 +235,8 @@ train_dataloader = dict(
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/'),
+        ann_file=train_ann_file,
+        data_prefix=dict(img=train_data_prefix),
         filter_cfg=dict(filter_empty_gt=False, min_size=32),
         pipeline=train_pipeline))
 
@@ -208,8 +266,8 @@ val_dataloader = dict(
         type=dataset_type,
         data_root=data_root,
         test_mode=True,
-        data_prefix=dict(img='val2017/'),
-        ann_file='annotations/instances_val2017.json',
+        data_prefix=dict(img=val_data_prefix),
+        ann_file=val_ann_file,
         pipeline=test_pipeline,
         batch_shapes_cfg=batch_shapes_cfg))
 
@@ -220,9 +278,9 @@ optim_wrapper = dict(
     type='OptimWrapper',
     optimizer=dict(
         type='SGD',
-        lr=0.01,
+        lr=base_lr,
         momentum=0.937,
-        weight_decay=0.0005,
+        weight_decay=weight_decay,
         nesterov=True,
         batch_size_per_gpu=train_batch_size_per_gpu),
     constructor='YOLOv7OptimWrapperConstructor')
@@ -231,27 +289,14 @@ default_hooks = dict(
     param_scheduler=dict(
         type='YOLOv5ParamSchedulerHook',
         scheduler_type='cosine',
-        lr_factor=0.1,  # note
+        lr_factor=lr_factor,  # note
         max_epochs=max_epochs),
     checkpoint=dict(
         type='CheckpointHook',
         save_param_scheduler=False,
-        interval=1,
+        interval=save_epoch_intervals,
         save_best='auto',
-        max_keep_ckpts=3))
-
-val_evaluator = dict(
-    type='mmdet.CocoMetric',
-    proposal_nums=(100, 1, 10),  # Can be accelerated
-    ann_file=data_root + 'annotations/instances_val2017.json',
-    metric='bbox')
-test_evaluator = val_evaluator
-
-train_cfg = dict(
-    type='EpochBasedTrainLoop',
-    max_epochs=max_epochs,
-    val_interval=save_epoch_intervals,
-    dynamic_intervals=[(270, 1)])
+        max_keep_ckpts=max_keep_ckpts))
 
 custom_hooks = [
     dict(
@@ -263,7 +308,17 @@ custom_hooks = [
         priority=49)
 ]
 
+val_evaluator = dict(
+    type='mmdet.CocoMetric',
+    proposal_nums=(100, 1, 10),  # Can be accelerated
+    ann_file=data_root + val_ann_file,
+    metric='bbox')
+test_evaluator = val_evaluator
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=save_epoch_intervals,
+    dynamic_intervals=[(max_epochs - num_epoch_stage2, val_interval_stage2)])
 val_cfg = dict(type='ValLoop')
 test_cfg = dict(type='TestLoop')
-
-# randomness = dict(seed=1, deterministic=True)
diff --git a/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py
index afb00402..b9e9f10e 100644
--- a/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py
+++ b/configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py
@@ -1,10 +1,26 @@
 _base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py'
 
+# ========================modified parameters========================
+
+# -----model related-----
+# Data augmentation
+max_translate_ratio = 0.1  # YOLOv5RandomAffine
+scaling_ratio_range = (0.5, 1.6)  # YOLOv5RandomAffine
+mixup_prob = 0.05  # YOLOv5MixUp
+randchoice_mosaic_prob = [0.8, 0.2]
+mixup_alpha = 8.0  # YOLOv5MixUp
+mixup_beta = 8.0  # YOLOv5MixUp
+
+# -----train val related-----
+loss_cls_weight = 0.5
+loss_obj_weight = 1.0
+
+lr_factor = 0.01  # Learning rate scaling factor
+# ===============================Unmodified in most cases====================
 num_classes = _base_.num_classes
 num_det_layers = _base_.num_det_layers
 img_scale = _base_.img_scale
 pre_transform = _base_.pre_transform
-
 model = dict(
     backbone=dict(
         arch='Tiny', act_cfg=dict(type='LeakyReLU', negative_slope=0.1)),
@@ -18,9 +34,9 @@ model = dict(
         use_repconv_outs=False),
     bbox_head=dict(
         head_module=dict(in_channels=[128, 256, 512]),
-        loss_cls=dict(loss_weight=0.5 *
+        loss_cls=dict(loss_weight=loss_cls_weight *
                       (num_classes / 80 * 3 / num_det_layers)),
-        loss_obj=dict(loss_weight=1.0 *
+        loss_obj=dict(loss_weight=loss_obj_weight *
                       ((img_scale[0] / 640)**2 * 3 / num_det_layers))))
 
 mosiac4_pipeline = [
@@ -33,8 +49,8 @@ mosiac4_pipeline = [
         type='YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
-        max_translate_ratio=0.1,  # change
-        scaling_ratio_range=(0.5, 1.6),  # change
+        max_translate_ratio=max_translate_ratio,  # change
+        scaling_ratio_range=scaling_ratio_range,  # change
         # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
@@ -50,8 +66,8 @@ mosiac9_pipeline = [
         type='YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
-        max_translate_ratio=0.1,  # change
-        scaling_ratio_range=(0.5, 1.6),  # change
+        max_translate_ratio=max_translate_ratio,  # change
+        scaling_ratio_range=scaling_ratio_range,  # change
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
 ]
@@ -59,16 +75,16 @@ mosiac9_pipeline = [
 randchoice_mosaic_pipeline = dict(
     type='RandomChoice',
     transforms=[mosiac4_pipeline, mosiac9_pipeline],
-    prob=[0.8, 0.2])
+    prob=randchoice_mosaic_prob)
 
 train_pipeline = [
     *pre_transform,
     randchoice_mosaic_pipeline,
     dict(
         type='YOLOv5MixUp',
-        alpha=8.0,
-        beta=8.0,
-        prob=0.05,  # change
+        alpha=mixup_alpha,
+        beta=mixup_beta,
+        prob=mixup_prob,  # change
         pre_transform=[*pre_transform, randchoice_mosaic_pipeline]),
     dict(type='YOLOv5HSVRandomAug'),
     dict(type='mmdet.RandomFlip', prob=0.5),
@@ -79,4 +95,4 @@ train_pipeline = [
 ]
 
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
-default_hooks = dict(param_scheduler=dict(lr_factor=0.01))
+default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
diff --git a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py
index 6536c093..17cb84da 100644
--- a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py
+++ b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py
@@ -1,19 +1,42 @@
 _base_ = './yolov7_l_syncbn_fast_8x16b-300e_coco.py'
 
+# ========================modified parameters========================
+# -----data related-----
 img_scale = (1280, 1280)  # height, width
-num_classes = 80
-# only on Val
-batch_shapes_cfg = dict(img_size=img_scale[0], size_divisor=64)
+num_classes = 80  # Number of classes for classification
+# Config of batch shapes. Only on val
+# It means not used if batch_shapes_cfg is None.
+batch_shapes_cfg = dict(
+    img_size=img_scale[
+        0],  # The image scale of padding should be divided by pad_size_divisor
+    size_divisor=64)  # Additional paddings for pixel scale
 
+# -----model related-----
+# Basic size of multi-scale prior box
 anchors = [
     [(19, 27), (44, 40), (38, 94)],  # P3/8
     [(96, 68), (86, 152), (180, 137)],  # P4/16
     [(140, 301), (303, 264), (238, 542)],  # P5/32
     [(436, 615), (739, 380), (925, 792)]  # P6/64
 ]
-strides = [8, 16, 32, 64]
-num_det_layers = 4
+strides = [8, 16, 32, 64]  # Strides of multi-scale prior box
+num_det_layers = 4  # # The number of model output scales
+norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
 
+# Data augmentation
+max_translate_ratio = 0.2  # YOLOv5RandomAffine
+scaling_ratio_range = (0.1, 2.0)  # YOLOv5RandomAffine
+mixup_prob = 0.15  # YOLOv5MixUp
+randchoice_mosaic_prob = [0.8, 0.2]
+mixup_alpha = 8.0  # YOLOv5MixUp
+mixup_beta = 8.0  # YOLOv5MixUp
+
+# -----train val related-----
+loss_cls_weight = 0.3
+loss_bbox_weight = 0.05
+loss_obj_weight = 0.7
+
+# ===============================Unmodified in most cases====================
 model = dict(
     backbone=dict(arch='W', out_indices=(2, 3, 4, 5)),
     neck=dict(
@@ -26,15 +49,15 @@ model = dict(
             type='YOLOv7p6HeadModule',
             in_channels=[128, 256, 384, 512],
             featmap_strides=strides,
-            norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+            norm_cfg=norm_cfg,
             act_cfg=dict(type='SiLU', inplace=True)),
         prior_generator=dict(base_sizes=anchors, strides=strides),
         simota_candidate_topk=20,  # note
         # scaled based on number of detection layers
-        loss_cls=dict(loss_weight=0.3 *
+        loss_cls=dict(loss_weight=loss_cls_weight *
                       (num_classes / 80 * 3 / num_det_layers)),
-        loss_bbox=dict(loss_weight=0.05 * (3 / num_det_layers)),
-        loss_obj=dict(loss_weight=0.7 *
+        loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)),
+        loss_obj=dict(loss_weight=loss_obj_weight *
                       ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
         obj_level_weights=[4.0, 1.0, 0.25, 0.06]))
 
@@ -50,8 +73,8 @@ mosiac4_pipeline = [
         type='YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
-        max_translate_ratio=0.2,  # note
-        scaling_ratio_range=(0.1, 2.0),  # note
+        max_translate_ratio=max_translate_ratio,  # note
+        scaling_ratio_range=scaling_ratio_range,  # note
         # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
@@ -67,8 +90,8 @@ mosiac9_pipeline = [
         type='YOLOv5RandomAffine',
         max_rotate_degree=0.0,
         max_shear_degree=0.0,
-        max_translate_ratio=0.2,  # note
-        scaling_ratio_range=(0.1, 2.0),  # note
+        max_translate_ratio=max_translate_ratio,  # note
+        scaling_ratio_range=scaling_ratio_range,  # note
         # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2),
         border_val=(114, 114, 114)),
@@ -77,16 +100,16 @@ mosiac9_pipeline = [
 randchoice_mosaic_pipeline = dict(
     type='RandomChoice',
     transforms=[mosiac4_pipeline, mosiac9_pipeline],
-    prob=[0.8, 0.2])
+    prob=randchoice_mosaic_prob)
 
 train_pipeline = [
     *pre_transform,
     randchoice_mosaic_pipeline,
     dict(
         type='YOLOv5MixUp',
-        alpha=8.0,  # note
-        beta=8.0,  # note
-        prob=0.15,
+        alpha=mixup_alpha,  # note
+        beta=mixup_beta,  # note
+        prob=mixup_prob,
         pre_transform=[*pre_transform, randchoice_mosaic_pipeline]),
     dict(type='YOLOv5HSVRandomAug'),
     dict(type='mmdet.RandomFlip', prob=0.5),

From 3a6899e232c0b947291ab035835c5c3ed3997d5f Mon Sep 17 00:00:00 2001
From: Youfu <71306851+lyviva@users.noreply.github.com>
Date: Fri, 10 Feb 2023 10:50:59 +0800
Subject: [PATCH 20/64] [Improve] Beautify the YOLOX configuration (#529)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Beautify the YOLOX configuration

* fix checks

* Update configs/yolox/yolox_s_fast_8xb8-300e_coco.py

Co-authored-by: HinGwenWoong <peterhuang0323@qq.com>

* fix letter case problem

* beauty yolox configs except yolox_s's config

* fix lint

* Update configs/yolox/yolox_s_fast_8xb8-300e_coco.py

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* fix yolox_s yolox_tiny

* fix tiny

* fix tiny

* simple tiny

---------

Co-authored-by: HinGwenWoong <peterhuang0323@qq.com>
Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>
---
 configs/yolox/yolox_l_fast_8xb8-300e_coco.py  |   2 +
 configs/yolox/yolox_m_fast_8xb8-300e_coco.py  |   2 +
 .../yolox/yolox_nano_fast_8xb8-300e_coco.py   |   2 +
 configs/yolox/yolox_s_fast_8xb8-300e_coco.py  | 105 ++++++++++++------
 .../yolox/yolox_tiny_fast_8xb8-300e_coco.py   |  15 ++-
 configs/yolox/yolox_x_fast_8xb8-300e_coco.py  |   2 +
 6 files changed, 91 insertions(+), 37 deletions(-)

diff --git a/configs/yolox/yolox_l_fast_8xb8-300e_coco.py b/configs/yolox/yolox_l_fast_8xb8-300e_coco.py
index 7b3d24fb..39198d2e 100644
--- a/configs/yolox/yolox_l_fast_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_l_fast_8xb8-300e_coco.py
@@ -1,8 +1,10 @@
 _base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 1.0
 widen_factor = 1.0
 
+# =======================Unmodified in most cases==================
 # model settings
 model = dict(
     backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
diff --git a/configs/yolox/yolox_m_fast_8xb8-300e_coco.py b/configs/yolox/yolox_m_fast_8xb8-300e_coco.py
index 691b61d0..ec8fd2c8 100644
--- a/configs/yolox/yolox_m_fast_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_m_fast_8xb8-300e_coco.py
@@ -1,8 +1,10 @@
 _base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 0.67
 widen_factor = 0.75
 
+# =======================Unmodified in most cases==================
 # model settings
 model = dict(
     backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
diff --git a/configs/yolox/yolox_nano_fast_8xb8-300e_coco.py b/configs/yolox/yolox_nano_fast_8xb8-300e_coco.py
index 1c69b5fd..a0a5d373 100644
--- a/configs/yolox/yolox_nano_fast_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_nano_fast_8xb8-300e_coco.py
@@ -1,9 +1,11 @@
 _base_ = './yolox_tiny_fast_8xb8-300e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 0.33
 widen_factor = 0.25
 use_depthwise = True
 
+# =======================Unmodified in most cases==================
 # model settings
 model = dict(
     backbone=dict(
diff --git a/configs/yolox/yolox_s_fast_8xb8-300e_coco.py b/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
index b4dd23af..46ec96dc 100644
--- a/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
@@ -1,21 +1,64 @@
 _base_ = '../_base_/default_runtime.py'
 
-data_root = 'data/coco/'
-dataset_type = 'YOLOv5CocoDataset'
+# ========================Frequently modified parameters======================
+# -----data related-----
+data_root = 'data/coco/'  # Root path of data
+# path of train annotation file
+train_ann_file = 'annotations/instances_train2017.json'
+train_data_prefix = 'train2017/'  # Prefix of train image path
+# path of val annotation file
+val_ann_file = 'annotations/instances_val2017.json'
+val_data_prefix = 'val2017/'  # Prefix of train image path
 
-img_scale = (640, 640)  # width, height
-deepen_factor = 0.33
-widen_factor = 0.5
-
-save_epoch_intervals = 10
+num_classes = 80  # Number of classes for classification
+# Batch size of a single GPU during training
 train_batch_size_per_gpu = 8
+# Worker to pre-fetch data for each single GPU during tarining
 train_num_workers = 8
+# Presistent_workers must be False if num_workers is 0
+persistent_workers = True
+
+# -----train val related-----
+# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
+base_lr = 0.01
+max_epochs = 300  # Maximum training epochs
+
+model_test_cfg = dict(
+    yolox_style=True,  # better
+    # The config of multi-label for multi-class prediction
+    multi_label=True,  # 40.5 -> 40.7
+    score_thr=0.001,  # Threshold to filter out boxes
+    max_per_img=300,  # Max number of detections of each image
+    nms=dict(type='nms', iou_threshold=0.65))  # NMS type and threshold
+
+# ========================Possible modified parameters========================
+# -----data related-----
+img_scale = (640, 640)  # width, height
+# Dataset type, this will be used to define the dataset
+dataset_type = 'YOLOv5CocoDataset'
+# Batch size of a single GPU during validation
 val_batch_size_per_gpu = 1
+# Worker to pre-fetch data for each single GPU during validation
 val_num_workers = 2
 
-max_epochs = 300
-num_last_epochs = 15
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
+deepen_factor = 0.33
+# The scaling factor that controls the width of the network structure
+widen_factor = 0.5
+norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
 
+# -----train val related-----
+weight_decay = 0.0005
+num_last_epochs = 15
+random_affine_scaling_ratio_range = (0.1, 2)
+mixup_ratio_range = (0.8, 1.6)
+# Save model checkpoint and validation intervals
+save_epoch_intervals = 10
+# The maximum checkpoints to keep.
+max_keep_ckpts = 3
+
+# ===============================Unmodified in most cases====================
 # model settings
 model = dict(
     type='YOLODetector',
@@ -44,7 +87,7 @@ model = dict(
         widen_factor=widen_factor,
         out_indices=(2, 3, 4),
         spp_kernal_sizes=(5, 9, 13),
-        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True),
     ),
     neck=dict(
@@ -53,20 +96,20 @@ model = dict(
         widen_factor=widen_factor,
         in_channels=[256, 512, 1024],
         out_channels=256,
-        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True)),
     bbox_head=dict(
         type='YOLOXHead',
         head_module=dict(
             type='YOLOXHeadModule',
-            num_classes=80,
+            num_classes=num_classes,
             in_channels=256,
             feat_channels=256,
             widen_factor=widen_factor,
             stacked_convs=2,
             featmap_strides=(8, 16, 32),
             use_depthwise=False,
-            norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+            norm_cfg=norm_cfg,
             act_cfg=dict(type='SiLU', inplace=True),
         ),
         loss_cls=dict(
@@ -92,12 +135,7 @@ model = dict(
             type='mmdet.SimOTAAssigner',
             center_radius=2.5,
             iou_calculator=dict(type='mmdet.BboxOverlaps2D'))),
-    test_cfg=dict(
-        yolox_style=True,  # better
-        multi_label=True,  # 40.5 -> 40.7
-        score_thr=0.001,
-        max_per_img=300,
-        nms=dict(type='nms', iou_threshold=0.65)))
+    test_cfg=model_test_cfg)
 
 pre_transform = [
     dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
@@ -113,13 +151,13 @@ train_pipeline_stage1 = [
         pre_transform=pre_transform),
     dict(
         type='mmdet.RandomAffine',
-        scaling_ratio_range=(0.1, 2),
+        scaling_ratio_range=random_affine_scaling_ratio_range,
         # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2)),
     dict(
         type='YOLOXMixUp',
         img_scale=img_scale,
-        ratio_range=(0.8, 1.6),
+        ratio_range=mixup_ratio_range,
         pad_val=114.0,
         pre_transform=pre_transform),
     dict(type='mmdet.YOLOXHSVRandomAug'),
@@ -155,15 +193,15 @@ train_pipeline_stage2 = [
 train_dataloader = dict(
     batch_size=train_batch_size_per_gpu,
     num_workers=train_num_workers,
-    persistent_workers=True,
+    persistent_workers=persistent_workers,
     pin_memory=True,
     collate_fn=dict(type='yolov5_collate'),
     sampler=dict(type='DefaultSampler', shuffle=True),
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/'),
+        ann_file=train_ann_file,
+        data_prefix=dict(img=train_data_prefix),
         filter_cfg=dict(filter_empty_gt=False, min_size=32),
         pipeline=train_pipeline_stage1))
 
@@ -184,15 +222,15 @@ test_pipeline = [
 val_dataloader = dict(
     batch_size=val_batch_size_per_gpu,
     num_workers=val_num_workers,
-    persistent_workers=True,
+    persistent_workers=persistent_workers,
     pin_memory=True,
     drop_last=False,
     sampler=dict(type='DefaultSampler', shuffle=False),
     dataset=dict(
         type=dataset_type,
         data_root=data_root,
-        ann_file='annotations/instances_val2017.json',
-        data_prefix=dict(img='val2017/'),
+        ann_file=val_ann_file,
+        data_prefix=dict(img=val_data_prefix),
         test_mode=True,
         pipeline=test_pipeline))
 test_dataloader = val_dataloader
@@ -201,18 +239,20 @@ test_dataloader = val_dataloader
 val_evaluator = dict(
     type='mmdet.CocoMetric',
     proposal_nums=(100, 1, 10),
-    ann_file=data_root + 'annotations/instances_val2017.json',
+    ann_file=data_root + val_ann_file,
     metric='bbox')
 
 test_evaluator = val_evaluator
 
 # optimizer
 # default 8 gpu
-base_lr = 0.01
 optim_wrapper = dict(
     type='OptimWrapper',
     optimizer=dict(
-        type='SGD', lr=base_lr, momentum=0.9, weight_decay=5e-4,
+        type='SGD',
+        lr=base_lr,
+        momentum=0.9,
+        weight_decay=weight_decay,
         nesterov=True),
     paramwise_cfg=dict(norm_decay_mult=0., bias_decay_mult=0.))
 
@@ -248,7 +288,10 @@ param_scheduler = [
 
 default_hooks = dict(
     checkpoint=dict(
-        type='CheckpointHook', interval=1, max_keep_ckpts=3, save_best='auto'))
+        type='CheckpointHook',
+        interval=save_epoch_intervals,
+        max_keep_ckpts=max_keep_ckpts,
+        save_best='auto'))
 
 custom_hooks = [
     dict(
diff --git a/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py b/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
index 2288bd16..90e7e411 100644
--- a/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
@@ -1,14 +1,20 @@
 _base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 0.33
 widen_factor = 0.375
 
+img_scale = _base_.img_scale
+pre_transform = _base_.pre_transform
+scaling_ratio_range = (0.5, 1.5)
+
+# =======================Unmodified in most cases==================
 # model settings
 model = dict(
     data_preprocessor=dict(batch_augments=[
         dict(
-            type='mmdet.BatchSyncRandomResize',
-            random_size_range=(320, 640),  # note
+            type='YOLOXBatchSyncRandomResize',
+            random_size_range=(320, 640),
             size_divisor=32,
             interval=10)
     ]),
@@ -16,9 +22,6 @@ model = dict(
     neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
     bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
 
-img_scale = _base_.img_scale
-pre_transform = _base_.pre_transform
-
 train_pipeline_stage1 = [
     *pre_transform,
     dict(
@@ -28,7 +31,7 @@ train_pipeline_stage1 = [
         pre_transform=pre_transform),
     dict(
         type='mmdet.RandomAffine',
-        scaling_ratio_range=(0.5, 1.5),  # note
+        scaling_ratio_range=scaling_ratio_range,  # note
         # img_scale is (width, height)
         border=(-img_scale[0] // 2, -img_scale[1] // 2)),
     dict(type='mmdet.YOLOXHSVRandomAug'),
diff --git a/configs/yolox/yolox_x_fast_8xb8-300e_coco.py b/configs/yolox/yolox_x_fast_8xb8-300e_coco.py
index 9b327c55..0759d468 100644
--- a/configs/yolox/yolox_x_fast_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_x_fast_8xb8-300e_coco.py
@@ -1,8 +1,10 @@
 _base_ = './yolox_s_fast_8xb8-300e_coco.py'
 
+# ========================modified parameters======================
 deepen_factor = 1.33
 widen_factor = 1.25
 
+# =======================Unmodified in most cases==================
 # model settings
 model = dict(
     backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),

From 164c319493170c15d77e110a07b1ed0ccbdd580c Mon Sep 17 00:00:00 2001
From: Yue Sun <36404164+aptsunny@users.noreply.github.com>
Date: Fri, 10 Feb 2023 16:24:47 +0800
Subject: [PATCH 21/64] [Feature] Support MMRazor searchable backbone (#453)

* update subnet cfg

* add docs

* update model link

* fix lint

* mdformat

* update readme

* fix lint

* update link

* rename folder

* fix readme

* update readme

* make lint

* rename

* update readme

* sync mmrazor cfg

* fix cfg

* install issue

* require mmcls

* fix yolo cfg

---------

Co-authored-by: aptsunny <aptsunny@tongji.edu.cn>
Co-authored-by: sunyue1 <sunyue1@sensetime.com>
---
 configs/razor/subnets/README.md               |  79 ++++++++++++
 ..._tiny_ofa_lat31_syncbn_16xb16-300e_coco.py | 118 ++++++++++++++++++
 ...pos_shufflenetv2_syncbn_8xb16-300e_coco.py |  30 +++++
 ...nas_a6_d12_syncbn_fast_16xb16-300e_coco.py |  35 ++++++
 requirements/tests.txt                        |   2 +
 tests/test_downstream/test_mmrazor.py         |  21 ++++
 6 files changed, 285 insertions(+)
 create mode 100644 configs/razor/subnets/README.md
 create mode 100644 configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py
 create mode 100644 configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
 create mode 100644 configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py
 create mode 100644 tests/test_downstream/test_mmrazor.py

diff --git a/configs/razor/subnets/README.md b/configs/razor/subnets/README.md
new file mode 100644
index 00000000..692492d3
--- /dev/null
+++ b/configs/razor/subnets/README.md
@@ -0,0 +1,79 @@
+# Projecs Based on MMRazor
+
+There are many research works and pre-trained models built on MMRazor. We list some of them as examples of how to use MMRazor slimmable models for downstream frameworks. As the page might not be completed, please feel free to contribute more efficient mmrazor-models to update this page.
+
+## Description
+
+This is an implementation of MMRazor Searchable Backbone Application, we provide detection configs and models for MMRazor in MMYOLO.
+
+### Backbone support
+
+Here are the Neural Architecture Search(NAS) Models that come from MMRazor which support YOLO Series. If you are looking for MMRazor models only for Backbone, you could refer to MMRazor [ModelZoo](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/docs/en/get_started/model_zoo.md) and corresponding repository.
+
+- [x] [AttentiveMobileNetV3](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/attentive_mobilenetv3_supernet.py)
+- [x] [SearchableShuffleNetV2](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/spos_shufflenet_supernet.py)
+- [x] [SearchableMobileNetV2](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/configs/_base_/nas_backbones/spos_mobilenet_supernet.py)
+
+## Usage
+
+### Prerequisites
+
+- [MMRazor v1.0.0rc2](https://github.com/open-mmlab/mmrazor/tree/v1.0.0rc2) or higher (dev-1.x)
+
+Install MMRazor using MIM.
+
+```shell
+mim install mmengine
+mim install "mmrazor>=1.0.0rc2"
+```
+
+Install MMRazor from source
+
+```
+git clone -b dev-1.x https://github.com/open-mmlab/mmrazor.git
+cd mmrazor
+# Install MMRazor
+mim install -v -e .
+```
+
+### Training commands
+
+In MMYOLO's root directory, if you want to use single GPU for training, run the following command to train the model:
+
+```bash
+CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_train.sh configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
+```
+
+If you want to use several of these GPUs to train in parallel, you can use the following command:
+
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 PORT=29500 ./tools/dist_train.sh configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
+```
+
+### Testing commands
+
+In MMYOLO's root directory, run the following command to test the model:
+
+```bash
+CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py ${CHECKPOINT_PATH}
+```
+
+## Results and Models
+
+Here we provide the baseline version of YOLO Series with NAS backbone.
+
+|           Model            | size | box AP |  Params(M)   | FLOPS(G) |                                                                   Config                                                                   |                                                                                Download                                                                                 |
+| :------------------------: | :--: | :----: | :----------: | :------: | :----------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|          yolov5-s          | 640  |  37.7  |    7.235     |  8.265   |            [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py)             | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) |
+| yolov5_s_spos_shufflenetv2 | 640  |  37.9  | 7.04(-2.7%)  |   7.03   |     [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py)     |              [model](https://download.openmmlab.com/mmrazor/v1/spos/yolov5/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco_20230109_155302-777fd6f1.pth)              |
+|          yolov6-s          | 640  |  44.0  |    18.869    |  24.253  |              [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py)               |     [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth)     |
+|  yolov6_l_attentivenas_a6  | 640  |  44.5  | 18.38(-2.6%) |   8.49   | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py) |      [model](https://download.openmmlab.com/mmrazor/v1/attentivenas/yolov6/yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco_20230108_174944-4970f0b7.pth)      |
+|        RTMDet-tiny         | 640  |  41.0  |     4.8      |   8.1    |                                            [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py)                                             |  [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth)  |
+|   rtmdet_tiny_ofa_lat31    | 960  |  41.1  | 3.91(-18.5%) |   6.09   |       [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py)       |                [model](https://download.openmmlab.com/mmrazor/v1/ofa/rtmdet/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco_20230108_222141-24ff87dex.pth)                |
+
+**Note**:
+
+1. For fair comparison, the training configuration is consistent with the original configuration and results in an improvement of about 0.2-0.5% AP.
+2. `yolov5_s_spos_shufflenetv2` achieves 37.9% AP with only 7.042M parameters, directly instead of the backbone, and outperforms `yolov5_s` with a similar size by more than 0.2% AP.
+3. With the efficient backbone of `yolov6_l_attentivenas_a6`, the input channels of `YOLOv6RepPAFPN` are reduced. Meanwhile, modify the **deepen_factor** and the neck is made deeper to restore the AP.
+4. with the `rtmdet_tiny_ofa_lat31` backbone with only 3.315M parameters and 3.634G flops, we can modify the input resolution to 960, with a similar model size compared to `rtmdet_tiny` and exceeds `rtmdet_tiny` by 0.1% AP, reducing the size of the whole model to 3.91 MB.
diff --git a/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py b/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py
new file mode 100644
index 00000000..fb7d8ea1
--- /dev/null
+++ b/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py
@@ -0,0 +1,118 @@
+_base_ = [
+    'mmrazor::_base_/nas_backbones/ofa_mobilenetv3_supernet.py',
+    '../../rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
+]
+
+checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/ofa/ofa_mobilenet_subnet_8xb256_in1k_note8_lat%4031ms_top1%4072.8_finetune%4025.py_20221214_0939-981a8b2a.pth'  # noqa
+fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/ofa/rtmdet/OFA_SUBNET_NOTE8_LAT31.yaml'  # noqa
+deepen_factor = 0.167
+widen_factor = 1.0
+channels = [40, 112, 160]
+train_batch_size_per_gpu = 16
+img_scale = (960, 960)
+
+_base_.base_lr = 0.002
+_base_.optim_wrapper.optimizer.lr = 0.002
+_base_.param_scheduler[1].eta_min = 0.002 * 0.05
+
+_base_.nas_backbone.out_indices = (2, 4, 5)
+_base_.nas_backbone.conv_cfg = dict(type='mmrazor.OFAConv2d')
+_base_.nas_backbone.init_cfg = dict(
+    type='Pretrained',
+    checkpoint=checkpoint_file,
+    prefix='architecture.backbone.')
+nas_backbone = dict(
+    type='mmrazor.sub_model',
+    fix_subnet=fix_subnet,
+    cfg=_base_.nas_backbone,
+    extra_prefix='backbone.')
+
+_base_.model.backbone = nas_backbone
+_base_.model.neck.widen_factor = widen_factor
+_base_.model.neck.deepen_factor = deepen_factor
+_base_.model.neck.in_channels = channels
+_base_.model.neck.out_channels = channels[0]
+_base_.model.bbox_head.head_module.in_channels = channels[0]
+_base_.model.bbox_head.head_module.feat_channels = channels[0]
+_base_.model.bbox_head.head_module.widen_factor = widen_factor
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='Mosaic',
+        img_scale=img_scale,
+        use_cached=True,
+        max_cached_images=40,
+        pad_val=114.0),
+    dict(
+        type='mmdet.RandomResize',
+        # img_scale is (width, height)
+        scale=(img_scale[0] * 2, img_scale[1] * 2),
+        ratio_range=(0.5, 2.0),  # note
+        resize_type='mmdet.Resize',
+        keep_ratio=True),
+    dict(type='mmdet.RandomCrop', crop_size=img_scale),
+    dict(type='mmdet.YOLOXHSVRandomAug'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
+    dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
+    dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20),
+    dict(type='mmdet.PackDetInputs')
+]
+
+train_pipeline_stage2 = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='LoadAnnotations', with_bbox=True),
+    dict(
+        type='mmdet.RandomResize',
+        scale=img_scale,
+        ratio_range=(0.5, 2.0),  # note
+        resize_type='mmdet.Resize',
+        keep_ratio=True),
+    dict(type='mmdet.RandomCrop', crop_size=img_scale),
+    dict(type='mmdet.YOLOXHSVRandomAug'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
+    dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
+    dict(type='mmdet.PackDetInputs')
+]
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu, dataset=dict(pipeline=train_pipeline))
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='YOLOv5KeepRatioResize', scale=img_scale),
+    dict(
+        type='LetterResize',
+        scale=img_scale,
+        allow_scale_up=False,
+        pad_val=dict(img=114)),
+    dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor', 'pad_param'))
+]
+
+batch_shapes_cfg = dict(img_size=img_scale[0])
+
+val_dataloader = dict(
+    dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg))
+
+test_dataloader = val_dataloader
+
+custom_hooks = [
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49),
+    dict(
+        type='mmdet.PipelineSwitchHook',
+        switch_epoch=_base_.max_epochs - _base_.stage2_num_epochs,
+        switch_pipeline=train_pipeline_stage2)
+]
+
+find_unused_parameters = True
diff --git a/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py b/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
new file mode 100644
index 00000000..39884047
--- /dev/null
+++ b/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
@@ -0,0 +1,30 @@
+_base_ = [
+    'mmrazor::_base_/nas_backbones/spos_shufflenet_supernet.py',
+    '../../yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
+]
+
+checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/spos/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_v3.pth'  # noqa
+fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/spos/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_subnet_cfg_v3.yaml'  # noqa
+
+widen_factor = 1.0
+channels = [160, 320, 640]
+
+_base_.nas_backbone.out_indices = (1, 2, 3)
+_base_.nas_backbone.init_cfg = dict(
+    type='Pretrained',
+    checkpoint=checkpoint_file,
+    prefix='architecture.backbone.')
+nas_backbone = dict(
+    type='mmrazor.sub_model',
+    fix_subnet=fix_subnet,
+    cfg=_base_.nas_backbone,
+    extra_prefix='architecture.backbone.')
+
+_base_.model.backbone = nas_backbone
+_base_.model.neck.widen_factor = widen_factor
+_base_.model.neck.in_channels = channels
+_base_.model.neck.out_channels = channels
+_base_.model.bbox_head.head_module.in_channels = channels
+_base_.model.bbox_head.head_module.widen_factor = widen_factor
+
+find_unused_parameters = True
diff --git a/configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py b/configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py
new file mode 100644
index 00000000..0ab64a64
--- /dev/null
+++ b/configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py
@@ -0,0 +1,35 @@
+_base_ = [
+    'mmrazor::_base_/nas_backbones/attentive_mobilenetv3_supernet.py',
+    '../../yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py'
+]
+
+checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/bignas/attentive_mobilenet_subnet_8xb256_in1k_flops-0.93G_acc-80.81_20221229_200440-73d92cc6.pth'  # noqa
+fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/bignas/ATTENTIVE_SUBNET_A6.yaml'  # noqa
+deepen_factor = 1.2
+widen_factor = 1
+channels = [40, 128, 224]
+mid_channels = [40, 128, 224]
+
+_base_.train_dataloader.batch_size = 16
+_base_.nas_backbone.out_indices = (2, 4, 6)
+_base_.nas_backbone.conv_cfg = dict(type='mmrazor.BigNasConv2d')
+_base_.nas_backbone.norm_cfg = dict(type='mmrazor.DynamicBatchNorm2d')
+_base_.nas_backbone.init_cfg = dict(
+    type='Pretrained',
+    checkpoint=checkpoint_file,
+    prefix='architecture.backbone.')
+nas_backbone = dict(
+    type='mmrazor.sub_model',
+    fix_subnet=fix_subnet,
+    cfg=_base_.nas_backbone,
+    extra_prefix='backbone.')
+
+_base_.model.backbone = nas_backbone
+_base_.model.neck.widen_factor = widen_factor
+_base_.model.neck.deepen_factor = deepen_factor
+_base_.model.neck.in_channels = channels
+_base_.model.neck.out_channels = mid_channels
+_base_.model.bbox_head.head_module.in_channels = mid_channels
+_base_.model.bbox_head.head_module.widen_factor = widen_factor
+
+find_unused_parameters = True
diff --git a/requirements/tests.txt b/requirements/tests.txt
index cd73313e..8de70105 100644
--- a/requirements/tests.txt
+++ b/requirements/tests.txt
@@ -5,6 +5,8 @@ isort==4.3.21
 # Note: used for kwarray.group_items, this may be ported to mmcv in the future.
 kwarray
 memory_profiler
+mmcls>=1.0.0rc4
+mmrazor>=1.0.0rc2
 parameterized
 protobuf<=3.20.1
 psutil
diff --git a/tests/test_downstream/test_mmrazor.py b/tests/test_downstream/test_mmrazor.py
new file mode 100644
index 00000000..ebf6806e
--- /dev/null
+++ b/tests/test_downstream/test_mmrazor.py
@@ -0,0 +1,21 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+
+import pytest
+from mmcls.models.backbones.base_backbone import BaseBackbone
+
+from mmyolo.testing import get_detector_cfg
+
+
+@pytest.mark.parametrize('cfg_file', [
+    'razor/subnets/'
+    'yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py', 'razor/subnets/'
+    'rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py', 'razor/subnets/'
+    'yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py'
+])
+def test_razor_backbone_forward(cfg_file):
+    model = get_detector_cfg(cfg_file)
+    model_cfg = copy.deepcopy(model.backbone)
+    from mmrazor.registry import MODELS
+    model = MODELS.build(model_cfg)
+    assert isinstance(model, BaseBackbone)

From 75618020f81008f061ab009c16bbfdc1efaaa294 Mon Sep 17 00:00:00 2001
From: yechenzhi <136920488@qq.com>
Date: Mon, 13 Feb 2023 11:42:11 +0800
Subject: [PATCH 22/64] RTMDet Assigner visualization (#528)

* fix format

* return multiple pos assigns

* rewrite to get matched_gt_inds

* ignore corrupted images

* rm RTMDetectorAssigner

* fix bug for different devices

* add warnings when use rtmdet without checkpoint

* add priors for rtmdet

* fix format

* add readme

* fix format

* fix readme and typo

* typo

* fix note
---
 projects/assigner_visualization/README.md     |  12 +-
 .../assigner_visualization.py                 |  32 +++-
 ...t_8xb32-300e_coco_assignervisualization.py |   9 +
 .../dense_heads/__init__.py                   |   3 +-
 .../dense_heads/rtmdet_head_assigner.py       | 169 ++++++++++++++++++
 .../detectors/yolo_detector_assigner.py       |   3 +
 .../visualization/assigner_visualizer.py      |  16 +-
 7 files changed, 231 insertions(+), 13 deletions(-)
 create mode 100644 projects/assigner_visualization/configs/rtmdet_s_syncbn_fast_8xb32-300e_coco_assignervisualization.py
 create mode 100644 projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py

diff --git a/projects/assigner_visualization/README.md b/projects/assigner_visualization/README.md
index 443a23c6..2ae5703a 100644
--- a/projects/assigner_visualization/README.md
+++ b/projects/assigner_visualization/README.md
@@ -6,7 +6,7 @@
 
 This project is developed for easily showing assigning results. The script allows users to analyze where and how many positive samples each gt is assigned in the image.
 
-Now, the script only support `YOLOv5` .
+Now, the script supports `YOLOv5` and `RTMDet`.
 
 ## Usage
 
@@ -15,3 +15,13 @@ Now, the script only support `YOLOv5` .
 ```shell
 python projects/assigner_visualization/assigner_visualization.py projects/assigner_visualization/configs/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py
 ```
+
+Note: `YOLOv5` does not need to load the trained weights.
+
+```shell
+python projects/assigner_visualization/assigner_visualization.py projects/assigner_visualization/configs/rtmdet_s_syncbn_fast_8xb32-300e_coco_assignervisualization.py -c ${checkpont}
+```
+
+${checkpont} is the checkpont file path. Dynamic label assignment is used in `RTMDet`, model weights will affect the positive sample allocation results, so it is recommended to load the trained model weights.
+
+If you want to know details about label assignment, you can check the [documentation](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/rtmdet_description.html#id5).
diff --git a/projects/assigner_visualization/assigner_visualization.py b/projects/assigner_visualization/assigner_visualization.py
index 0086985f..139efe5b 100644
--- a/projects/assigner_visualization/assigner_visualization.py
+++ b/projects/assigner_visualization/assigner_visualization.py
@@ -3,6 +3,7 @@ import argparse
 import os
 import os.path as osp
 import sys
+import warnings
 
 import mmcv
 import numpy as np
@@ -10,11 +11,13 @@ import torch
 from mmengine import ProgressBar
 from mmengine.config import Config, DictAction
 from mmengine.dataset import COLLATE_FUNCTIONS
+from mmengine.runner import load_checkpoint
 from numpy import random
 
 from mmyolo.registry import DATASETS, MODELS
 from mmyolo.utils import register_all_modules
-from projects.assigner_visualization.dense_heads import YOLOv5HeadAssigner
+from projects.assigner_visualization.dense_heads import (RTMHeadAssigner,
+                                                         YOLOv5HeadAssigner)
 from projects.assigner_visualization.visualization import \
     YOLOAssignerVisualizer
 
@@ -24,6 +27,7 @@ def parse_args():
         description='MMYOLO show the positive sample assigning'
         ' results.')
     parser.add_argument('config', help='config file path')
+    parser.add_argument('--checkpoint', '-c', type=str, help='checkpoint file')
     parser.add_argument(
         '--show-number',
         '-n',
@@ -82,11 +86,20 @@ def main():
 
     # build model
     model = MODELS.build(cfg.model)
-    assert isinstance(model.bbox_head, YOLOv5HeadAssigner),\
-        'Now, this script only support yolov5, and bbox_head must use ' \
-        '`YOLOv5HeadAssigner`. Please use `' \
+    if args.checkpoint is not None:
+        _ = load_checkpoint(model, args.checkpoint, map_location='cpu')
+    elif isinstance(model.bbox_head, RTMHeadAssigner):
+        warnings.warn(
+            'if you use dynamic_assignment methods such as yolov7 or '
+            'rtmdet assigner, please load the checkpoint.')
+
+    assert isinstance(model.bbox_head, (YOLOv5HeadAssigner, RTMHeadAssigner)),\
+        'Now, this script only support yolov5 and rtmdet, and ' \
+        'bbox_head must use ' \
+        '`YOLOv5HeadAssigner or RTMHeadAssigner`. Please use `' \
         'yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py' \
-        '` as config file.'
+        'or rtmdet_s_syncbn_fast_8xb32-300e_coco_assignervisualization.py' \
+        """` as config file."""
     model.eval()
     model.to(args.device)
 
@@ -107,7 +120,9 @@ def main():
         }], name='visualizer')
     visualizer.dataset_meta = dataset.metainfo
     # need priors size to draw priors
-    visualizer.priors_size = model.bbox_head.prior_generator.base_anchors
+
+    if hasattr(model.bbox_head.prior_generator, 'base_anchors'):
+        visualizer.priors_size = model.bbox_head.prior_generator.base_anchors
 
     # make output dir
     os.makedirs(args.output_dir, exist_ok=True)
@@ -120,7 +135,10 @@ def main():
     progress_bar = ProgressBar(display_number)
     for ind_img in range(display_number):
         data = dataset.prepare_data(ind_img)
-
+        if data is None:
+            print('Unable to visualize {} due to strong data augmentations'.
+                  format(dataset[ind_img]['data_samples'].img_path))
+            continue
         # convert data to batch format
         batch_data = collate_fn([data])
         with torch.no_grad():
diff --git a/projects/assigner_visualization/configs/rtmdet_s_syncbn_fast_8xb32-300e_coco_assignervisualization.py b/projects/assigner_visualization/configs/rtmdet_s_syncbn_fast_8xb32-300e_coco_assignervisualization.py
new file mode 100644
index 00000000..006502eb
--- /dev/null
+++ b/projects/assigner_visualization/configs/rtmdet_s_syncbn_fast_8xb32-300e_coco_assignervisualization.py
@@ -0,0 +1,9 @@
+_base_ = ['../../../configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py']
+
+custom_imports = dict(imports=[
+    'projects.assigner_visualization.detectors',
+    'projects.assigner_visualization.dense_heads'
+])
+
+model = dict(
+    type='YOLODetectorAssigner', bbox_head=dict(type='RTMHeadAssigner'))
diff --git a/projects/assigner_visualization/dense_heads/__init__.py b/projects/assigner_visualization/dense_heads/__init__.py
index c8e368d9..fe41e5d6 100644
--- a/projects/assigner_visualization/dense_heads/__init__.py
+++ b/projects/assigner_visualization/dense_heads/__init__.py
@@ -1,4 +1,5 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from .rtmdet_head_assigner import RTMHeadAssigner
 from .yolov5_head_assigner import YOLOv5HeadAssigner
 
-__all__ = ['YOLOv5HeadAssigner']
+__all__ = ['YOLOv5HeadAssigner', 'RTMHeadAssigner']
diff --git a/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py b/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py
new file mode 100644
index 00000000..ae0e4651
--- /dev/null
+++ b/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py
@@ -0,0 +1,169 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List, Union
+
+import torch
+from mmdet.structures.bbox import distance2bbox
+from mmdet.utils import InstanceList
+from torch import Tensor
+
+from mmyolo.models import RTMDetHead
+from mmyolo.registry import MODELS
+
+
+@MODELS.register_module()
+class RTMHeadAssigner(RTMDetHead):
+
+    def assign_by_gt_and_feat(
+        self,
+        cls_scores: List[Tensor],
+        bbox_preds: List[Tensor],
+        batch_gt_instances: InstanceList,
+        batch_img_metas: List[dict],
+        inputs_hw: Union[Tensor, tuple] = (640, 640)
+    ) -> dict:
+        """Calculate the assigning results based on the gt and features
+        extracted by the detection head.
+
+        Args:
+            cls_scores (list[Tensor]): Box scores for each scale level
+                Has shape (N, num_anchors * num_classes, H, W)
+            bbox_preds (list[Tensor]): Decoded box for each scale
+                level with shape (N, num_anchors * 4, H, W) in
+                [tl_x, tl_y, br_x, br_y] format.
+            batch_gt_instances (list[:obj:`InstanceData`]): Batch of
+                gt_instance.  It usually includes ``bboxes`` and ``labels``
+                attributes.
+            batch_img_metas (list[dict]): Meta information of each image, e.g.,
+                image size, scaling factor, etc.
+            inputs_hw (Union[Tensor, tuple]): Height and width of inputs size.
+        Returns:
+            dict[str, Tensor]: A dictionary of assigning results.
+        """
+        num_imgs = len(batch_img_metas)
+        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+        assert len(featmap_sizes) == self.prior_generator.num_levels
+
+        gt_info = self.gt_instances_preprocess(batch_gt_instances, num_imgs)
+        gt_labels = gt_info[:, :, :1]
+        gt_bboxes = gt_info[:, :, 1:]  # xyxy
+        pad_bbox_flag = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
+
+        device = cls_scores[0].device
+
+        # If the shape does not equal, generate new one
+        if featmap_sizes != self.featmap_sizes_train:
+            self.featmap_sizes_train = featmap_sizes
+            mlvl_priors_with_stride = self.prior_generator.grid_priors(
+                featmap_sizes, device=device, with_stride=True)
+            self.flatten_priors_train = torch.cat(
+                mlvl_priors_with_stride, dim=0)
+
+        flatten_cls_scores = torch.cat([
+            cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1,
+                                                  self.cls_out_channels)
+            for cls_score in cls_scores
+        ], 1).contiguous()
+
+        flatten_bboxes = torch.cat([
+            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)
+            for bbox_pred in bbox_preds
+        ], 1)
+        flatten_bboxes = flatten_bboxes * self.flatten_priors_train[..., -1,
+                                                                    None]
+        flatten_bboxes = distance2bbox(self.flatten_priors_train[..., :2],
+                                       flatten_bboxes)
+
+        assigned_result = self.assigner(flatten_bboxes.detach(),
+                                        flatten_cls_scores.detach(),
+                                        self.flatten_priors_train, gt_labels,
+                                        gt_bboxes, pad_bbox_flag)
+
+        labels = assigned_result['assigned_labels'].reshape(-1)
+        bbox_targets = assigned_result['assigned_bboxes'].reshape(-1, 4)
+
+        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes
+        bg_class_ind = self.num_classes
+        pos_inds = ((labels >= 0)
+                    & (labels < bg_class_ind)).nonzero().squeeze(1)
+        targets = bbox_targets[pos_inds]
+        gt_bboxes = gt_bboxes.squeeze(0)
+        matched_gt_inds = torch.tensor(
+            [((t == gt_bboxes).sum(dim=1) == t.shape[0]).nonzero()[0]
+             for t in targets],
+            device=device)
+
+        level_inds = torch.zeros_like(labels)
+        img_inds = torch.zeros_like(labels)
+        level_nums = [0] + [f[0] * f[1] for f in featmap_sizes]
+        for i in range(len(level_nums) - 1):
+            level_nums[i + 1] = level_nums[i] + level_nums[i + 1]
+            level_inds[level_nums[i]:level_nums[i + 1]] = i
+        level_inds_pos = level_inds[pos_inds]
+
+        img_inds = img_inds[pos_inds]
+        labels = labels[pos_inds]
+
+        inputs_hw = batch_img_metas[0]['batch_input_shape']
+        assign_results = []
+        for i in range(self.num_levels):
+            retained_inds = level_inds_pos == i
+            if not retained_inds.any():
+                assign_results_prior = {
+                    'stride':
+                    self.featmap_strides[i],
+                    'grid_x_inds':
+                    torch.zeros([0], dtype=torch.int64).to(device),
+                    'grid_y_inds':
+                    torch.zeros([0], dtype=torch.int64).to(device),
+                    'img_inds':
+                    torch.zeros([0], dtype=torch.int64).to(device),
+                    'class_inds':
+                    torch.zeros([0], dtype=torch.int64).to(device),
+                    'retained_gt_inds':
+                    torch.zeros([0], dtype=torch.int64).to(device),
+                    'prior_ind':
+                    0
+                }
+            else:
+                w = inputs_hw[1] // self.featmap_strides[i]
+
+                retained_pos_inds = pos_inds[retained_inds] - level_nums[i]
+                grid_y_inds = retained_pos_inds // w
+                grid_x_inds = retained_pos_inds - retained_pos_inds // w * w
+                assign_results_prior = {
+                    'stride': self.featmap_strides[i],
+                    'grid_x_inds': grid_x_inds,
+                    'grid_y_inds': grid_y_inds,
+                    'img_inds': img_inds[retained_inds],
+                    'class_inds': labels[retained_inds],
+                    'retained_gt_inds': matched_gt_inds[retained_inds],
+                    'prior_ind': 0
+                }
+            assign_results.append([assign_results_prior])
+        return assign_results
+
+    def assign(self, batch_data_samples: Union[list, dict],
+               inputs_hw: Union[tuple, torch.Size]) -> dict:
+        """Calculate assigning results. This function is provided to the
+        `assigner_visualization.py` script.
+
+        Args:
+            batch_data_samples (List[:obj:`DetDataSample`], dict): The Data
+                Samples. It usually includes information such as
+                `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
+            inputs_hw: Height and width of inputs size
+
+        Returns:
+            dict: A dictionary of assigning components.
+        """
+        if isinstance(batch_data_samples, list):
+            raise NotImplementedError(
+                'assigning results_list is not implemented')
+        else:
+            # Fast version
+            cls_scores, bbox_preds = self(batch_data_samples['feats'])
+            assign_inputs = (cls_scores, bbox_preds,
+                             batch_data_samples['bboxes_labels'],
+                             batch_data_samples['img_metas'], inputs_hw)
+        assign_results = self.assign_by_gt_and_feat(*assign_inputs)
+        return assign_results
diff --git a/projects/assigner_visualization/detectors/yolo_detector_assigner.py b/projects/assigner_visualization/detectors/yolo_detector_assigner.py
index 394f8a06..edf0b828 100644
--- a/projects/assigner_visualization/detectors/yolo_detector_assigner.py
+++ b/projects/assigner_visualization/detectors/yolo_detector_assigner.py
@@ -3,6 +3,7 @@ from typing import Union
 
 from mmyolo.models import YOLODetector
 from mmyolo.registry import MODELS
+from projects.assigner_visualization.dense_heads import RTMHeadAssigner
 
 
 @MODELS.register_module()
@@ -22,6 +23,8 @@ class YOLODetectorAssigner(YOLODetector):
         assert isinstance(data, dict)
         assert len(data['inputs']) == 1, 'Only support batchsize == 1'
         data = self.data_preprocessor(data, True)
+        if isinstance(self.bbox_head, RTMHeadAssigner):
+            data['data_samples']['feats'] = self.extract_feat(data['inputs'])
         inputs_hw = data['inputs'].shape[-2:]
         assign_results = self.bbox_head.assign(data['data_samples'], inputs_hw)
         return assign_results
diff --git a/projects/assigner_visualization/visualization/assigner_visualizer.py b/projects/assigner_visualization/visualization/assigner_visualizer.py
index 270f82eb..73aba9f8 100644
--- a/projects/assigner_visualization/visualization/assigner_visualizer.py
+++ b/projects/assigner_visualization/visualization/assigner_visualizer.py
@@ -218,12 +218,17 @@ class YOLOAssignerVisualizer(DetLocalVisualizer):
                 with corresponding stride. Defaults to 0.5.
         """
 
-        palette = self.dataset_meta['PALETTE']
+        palette = self.dataset_meta['palette']
         center_x = ((grid_x_inds + offset) * stride)
         center_y = ((grid_y_inds + offset) * stride)
         xyxy = torch.stack((center_x, center_y, center_x, center_y), dim=1)
-        assert self.priors_size is not None
-        xyxy += self.priors_size[feat_ind][prior_ind]
+        device = xyxy.device
+        if self.priors_size is not None:
+            xyxy += self.priors_size[feat_ind][prior_ind].to(device)
+        else:
+            xyxy += torch.tensor(
+                [[-stride / 2, -stride / 2, stride / 2, stride / 2]],
+                device=device)
 
         colors = [palette[i] for i in class_inds]
         self.draw_bboxes(
@@ -284,7 +289,10 @@ class YOLOAssignerVisualizer(DetLocalVisualizer):
                                           retained_gt_inds)
 
                 # draw title
-                base_prior = self.priors_size[feat_ind][prior_ind]
+                if self.priors_size is not None:
+                    base_prior = self.priors_size[feat_ind][prior_ind]
+                else:
+                    base_prior = [stride, stride, stride * 2, stride * 2]
                 prior_size = (base_prior[2] - base_prior[0],
                               base_prior[3] - base_prior[1])
                 pos = np.array((20, 20))

From 8d2a7d1ab224acf4cdb33f04b29fbc201e053576 Mon Sep 17 00:00:00 2001
From: Range King <RangeKingHZ@gmail.com>
Date: Mon, 13 Feb 2023 19:42:49 +0800
Subject: [PATCH 23/64] Update rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py
 (#551)

---
 .../subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py b/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py
index fb7d8ea1..82d696be 100644
--- a/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py
+++ b/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py
@@ -111,7 +111,7 @@ custom_hooks = [
         priority=49),
     dict(
         type='mmdet.PipelineSwitchHook',
-        switch_epoch=_base_.max_epochs - _base_.stage2_num_epochs,
+        switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
         switch_pipeline=train_pipeline_stage2)
 ]
 

From bf6d9e945158ac98ccadff839d1fe016e3314a1f Mon Sep 17 00:00:00 2001
From: yechenzhi <136920488@qq.com>
Date: Tue, 14 Feb 2023 09:45:38 +0800
Subject: [PATCH 24/64] Fix RTMDet visualization to new code  (#550)

* fix bug to newer code

* adjust picture format

* add offset for rtmdet

* rewrite axis to fix format

* easier way to get axis
---
 .../dense_heads/rtmdet_head_assigner.py               | 11 ++++++++---
 .../visualization/assigner_visualizer.py              | 11 ++++++++---
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py b/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py
index ae0e4651..b5003eef 100644
--- a/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py
+++ b/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py
@@ -7,6 +7,7 @@ from mmdet.utils import InstanceList
 from torch import Tensor
 
 from mmyolo.models import RTMDetHead
+from mmyolo.models.utils import gt_instances_preprocess
 from mmyolo.registry import MODELS
 
 
@@ -42,8 +43,9 @@ class RTMHeadAssigner(RTMDetHead):
         num_imgs = len(batch_img_metas)
         featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
         assert len(featmap_sizes) == self.prior_generator.num_levels
+        prior_offset = self.prior_generator.offset
 
-        gt_info = self.gt_instances_preprocess(batch_gt_instances, num_imgs)
+        gt_info = gt_instances_preprocess(batch_gt_instances, num_imgs)
         gt_labels = gt_info[:, :, :1]
         gt_bboxes = gt_info[:, :, 1:]  # xyxy
         pad_bbox_flag = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
@@ -122,7 +124,9 @@ class RTMHeadAssigner(RTMDetHead):
                     'retained_gt_inds':
                     torch.zeros([0], dtype=torch.int64).to(device),
                     'prior_ind':
-                    0
+                    0,
+                    'offset':
+                    prior_offset
                 }
             else:
                 w = inputs_hw[1] // self.featmap_strides[i]
@@ -137,7 +141,8 @@ class RTMHeadAssigner(RTMDetHead):
                     'img_inds': img_inds[retained_inds],
                     'class_inds': labels[retained_inds],
                     'retained_gt_inds': matched_gt_inds[retained_inds],
-                    'prior_ind': 0
+                    'prior_ind': 0,
+                    'offset': prior_offset
                 }
             assign_results.append([assign_results_prior])
         return assign_results
diff --git a/projects/assigner_visualization/visualization/assigner_visualizer.py b/projects/assigner_visualization/visualization/assigner_visualizer.py
index 73aba9f8..41d8f62d 100644
--- a/projects/assigner_visualization/visualization/assigner_visualizer.py
+++ b/projects/assigner_visualization/visualization/assigner_visualizer.py
@@ -274,9 +274,11 @@ class YOLOAssignerVisualizer(DetLocalVisualizer):
                 grid_y_inds = assign_results_prior['grid_y_inds']
                 class_inds = assign_results_prior['class_inds']
                 prior_ind = assign_results_prior['prior_ind']
+                offset = assign_results_prior.get('offset', 0.5)
+
                 if show_prior:
                     self.draw_prior(grid_x_inds, grid_y_inds, class_inds,
-                                    stride, feat_ind, prior_ind)
+                                    stride, feat_ind, prior_ind, offset)
 
                 # draw matched gt
                 retained_gt_inds = assign_results_prior['retained_gt_inds']
@@ -286,7 +288,7 @@ class YOLOAssignerVisualizer(DetLocalVisualizer):
                 # draw positive
                 self.draw_positive_assign(grid_x_inds, grid_y_inds, class_inds,
                                           stride, gt_instances.bboxes,
-                                          retained_gt_inds)
+                                          retained_gt_inds, offset)
 
                 # draw title
                 if self.priors_size is not None:
@@ -319,4 +321,7 @@ class YOLOAssignerVisualizer(DetLocalVisualizer):
             img_show_list.append(np.concatenate(img_show_list_feat, axis=1))
 
         # Merge all images into one image
-        return np.concatenate(img_show_list, axis=0)
+        h, w = img_show.shape[:2]
+        num_priors_per_feat = img_show_list[0].shape[1] // w
+        axis = 0 if num_priors_per_feat > 1 else 1
+        return np.concatenate(img_show_list, axis=axis)

From 6f0a765ea87f4f4f6847a41c4ea096ec81816468 Mon Sep 17 00:00:00 2001
From: Youfu <71306851+lyviva@users.noreply.github.com>
Date: Wed, 15 Feb 2023 10:57:22 +0800
Subject: [PATCH 25/64] Update README.md (#552)

modify yolox_loss part
---
 configs/yolox/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configs/yolox/README.md b/configs/yolox/README.md
index d0885e83..78550047 100644
--- a/configs/yolox/README.md
+++ b/configs/yolox/README.md
@@ -13,7 +13,7 @@ In this report, we present some experienced improvements to YOLO series, forming
 </div>
 
 <div align=center>
-<img src="https://user-images.githubusercontent.com/27466624/211143387-004c6718-3d61-44c8-9406-f56b9238452a.jpg"/>
+<img src="https://user-images.githubusercontent.com/71306851/218628641-6c0101e6-e40e-4b16-a696-c0f55b8d335c.png"/>
 YOLOX-l model structure
 </div>
 

From d3179daa4d28c83a3febfce32d6c8469bb144694 Mon Sep 17 00:00:00 2001
From: Zhaoyan Fang <52028100+satuoqaq@users.noreply.github.com>
Date: Wed, 15 Feb 2023 19:05:00 +0800
Subject: [PATCH 26/64] YOLOv7 Assigner visualization  (#543)

* add yolov7 assigner visual

* base yolov5 detector wrirte yolov7

* update readme

* add yolov7 assigner visual

* base yolov5 detector wrirte yolov7

* update

* Update projects/assigner_visualization/README.md

Co-authored-by: Nioolek <40284075+Nioolek@users.noreply.github.com>

* Update projects/assigner_visualization/README.md

Co-authored-by: Nioolek <40284075+Nioolek@users.noreply.github.com>

* add note and typehint

* update

---------

Co-authored-by: Nioolek <40284075+Nioolek@users.noreply.github.com>
---
 projects/assigner_visualization/README.md     |  16 +-
 .../assigner_visualization.py                 |  24 +--
 ...t_8xb16-300e_coco_assignervisualization.py |   9 +
 .../dense_heads/__init__.py                   |   3 +-
 .../dense_heads/yolov7_head_assigner.py       | 159 ++++++++++++++++++
 .../detectors/yolo_detector_assigner.py       |   5 +-
 6 files changed, 200 insertions(+), 16 deletions(-)
 create mode 100644 projects/assigner_visualization/configs/yolov7_tiny_syncbn_fast_8xb16-300e_coco_assignervisualization.py
 create mode 100644 projects/assigner_visualization/dense_heads/yolov7_head_assigner.py

diff --git a/projects/assigner_visualization/README.md b/projects/assigner_visualization/README.md
index 2ae5703a..0bf0d8dc 100644
--- a/projects/assigner_visualization/README.md
+++ b/projects/assigner_visualization/README.md
@@ -6,22 +6,32 @@
 
 This project is developed for easily showing assigning results. The script allows users to analyze where and how many positive samples each gt is assigned in the image.
 
-Now, the script supports `YOLOv5` and `RTMDet`.
+Now, the script supports `YOLOv5`, `YOLOv7` and `RTMDet`.
 
 ## Usage
 
 ### Command
 
+YOLOv5 assigner visualization command:
+
 ```shell
 python projects/assigner_visualization/assigner_visualization.py projects/assigner_visualization/configs/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py
 ```
 
 Note: `YOLOv5` does not need to load the trained weights.
 
+YOLOv7 assigner visualization command:
+
+```shell
+python projects/assigner_visualization/assigner_visualization.py projects/assigner_visualization/configs/yolov7_tiny_syncbn_fast_8xb16-300e_coco_assignervisualization.py -c ${checkpont}
+```
+
+RTMdet assigner visualization command:
+
 ```shell
 python projects/assigner_visualization/assigner_visualization.py projects/assigner_visualization/configs/rtmdet_s_syncbn_fast_8xb32-300e_coco_assignervisualization.py -c ${checkpont}
 ```
 
-${checkpont} is the checkpont file path. Dynamic label assignment is used in `RTMDet`, model weights will affect the positive sample allocation results, so it is recommended to load the trained model weights.
+${checkpont} is the checkpont file path. Dynamic label assignment is used in `YOLOv7` and `RTMDet`, model weights will affect the positive sample allocation results, so it is recommended to load the trained model weights.
 
-If you want to know details about label assignment, you can check the [documentation](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/rtmdet_description.html#id5).
+If you want to know details about label assignment, you can check the [RTMDet](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/rtmdet_description.html#id5).
diff --git a/projects/assigner_visualization/assigner_visualization.py b/projects/assigner_visualization/assigner_visualization.py
index 139efe5b..f6c7d4f6 100644
--- a/projects/assigner_visualization/assigner_visualization.py
+++ b/projects/assigner_visualization/assigner_visualization.py
@@ -11,13 +11,14 @@ import torch
 from mmengine import ProgressBar
 from mmengine.config import Config, DictAction
 from mmengine.dataset import COLLATE_FUNCTIONS
-from mmengine.runner import load_checkpoint
+from mmengine.runner.checkpoint import load_checkpoint
 from numpy import random
 
 from mmyolo.registry import DATASETS, MODELS
 from mmyolo.utils import register_all_modules
 from projects.assigner_visualization.dense_heads import (RTMHeadAssigner,
-                                                         YOLOv5HeadAssigner)
+                                                         YOLOv5HeadAssigner,
+                                                         YOLOv7HeadAssigner)
 from projects.assigner_visualization.visualization import \
     YOLOAssignerVisualizer
 
@@ -87,17 +88,20 @@ def main():
     # build model
     model = MODELS.build(cfg.model)
     if args.checkpoint is not None:
-        _ = load_checkpoint(model, args.checkpoint, map_location='cpu')
-    elif isinstance(model.bbox_head, RTMHeadAssigner):
+        load_checkpoint(model, args.checkpoint)
+    elif isinstance(model.bbox_head, (YOLOv7HeadAssigner, RTMHeadAssigner)):
         warnings.warn(
-            'if you use dynamic_assignment methods such as yolov7 or '
-            'rtmdet assigner, please load the checkpoint.')
-
-    assert isinstance(model.bbox_head, (YOLOv5HeadAssigner, RTMHeadAssigner)),\
-        'Now, this script only support yolov5 and rtmdet, and ' \
+            'if you use dynamic_assignment methods such as YOLOv7 or '
+            'RTMDet assigner, please load the checkpoint.')
+    assert isinstance(model.bbox_head, (YOLOv5HeadAssigner,
+                                        YOLOv7HeadAssigner,
+                                        RTMHeadAssigner)), \
+        'Now, this script only support YOLOv5, YOLOv7 and RTMdet, and ' \
         'bbox_head must use ' \
-        '`YOLOv5HeadAssigner or RTMHeadAssigner`. Please use `' \
+        '`YOLOv5HeadAssigner or YOLOv7HeadAssigner or RTMHeadAssigner`.' \
+        ' Please use `' \
         'yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py' \
+        'or yolov7_tiny_syncbn_fast_8x16b-300e_coco_assignervisualization.py' \
         'or rtmdet_s_syncbn_fast_8xb32-300e_coco_assignervisualization.py' \
         """` as config file."""
     model.eval()
diff --git a/projects/assigner_visualization/configs/yolov7_tiny_syncbn_fast_8xb16-300e_coco_assignervisualization.py b/projects/assigner_visualization/configs/yolov7_tiny_syncbn_fast_8xb16-300e_coco_assignervisualization.py
new file mode 100644
index 00000000..626dc18b
--- /dev/null
+++ b/projects/assigner_visualization/configs/yolov7_tiny_syncbn_fast_8xb16-300e_coco_assignervisualization.py
@@ -0,0 +1,9 @@
+_base_ = ['../../../configs/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco.py']
+
+custom_imports = dict(imports=[
+    'projects.assigner_visualization.detectors',
+    'projects.assigner_visualization.dense_heads'
+])
+
+model = dict(
+    type='YOLODetectorAssigner', bbox_head=dict(type='YOLOv7HeadAssigner'))
diff --git a/projects/assigner_visualization/dense_heads/__init__.py b/projects/assigner_visualization/dense_heads/__init__.py
index fe41e5d6..e985d20c 100644
--- a/projects/assigner_visualization/dense_heads/__init__.py
+++ b/projects/assigner_visualization/dense_heads/__init__.py
@@ -1,5 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .rtmdet_head_assigner import RTMHeadAssigner
 from .yolov5_head_assigner import YOLOv5HeadAssigner
+from .yolov7_head_assigner import YOLOv7HeadAssigner
 
-__all__ = ['YOLOv5HeadAssigner', 'RTMHeadAssigner']
+__all__ = ['YOLOv5HeadAssigner', 'YOLOv7HeadAssigner', 'RTMHeadAssigner']
diff --git a/projects/assigner_visualization/dense_heads/yolov7_head_assigner.py b/projects/assigner_visualization/dense_heads/yolov7_head_assigner.py
new file mode 100644
index 00000000..de2a90e3
--- /dev/null
+++ b/projects/assigner_visualization/dense_heads/yolov7_head_assigner.py
@@ -0,0 +1,159 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List, Union
+
+import torch
+from mmdet.utils import InstanceList
+from torch import Tensor
+
+from mmyolo.models import YOLOv7Head
+from mmyolo.registry import MODELS
+
+
+@MODELS.register_module()
+class YOLOv7HeadAssigner(YOLOv7Head):
+
+    def assign_by_gt_and_feat(
+        self,
+        cls_scores: List[Tensor],
+        bbox_preds: List[Tensor],
+        objectnesses: List[Tensor],
+        batch_gt_instances: InstanceList,
+        batch_img_metas: List[dict],
+        inputs_hw: Union[Tensor, tuple],
+    ) -> dict:
+        """Calculate the assigning results based on the gt and features
+        extracted by the detection head.
+        Args:
+            cls_scores (Sequence[Tensor]): Box scores for each scale level,
+                each is a 4D-tensor, the channel number is
+                num_priors * num_classes.
+            bbox_preds (Sequence[Tensor]): Box energies / deltas for each scale
+                level, each is a 4D-tensor, the channel number is
+                num_priors * 4.
+            objectnesses (Sequence[Tensor]): Score factor for
+                all scale level, each is a 4D-tensor, has shape
+                (batch_size, 1, H, W)
+            batch_gt_instances (list[:obj:`InstanceData`]): Batch of
+                gt_instance. It usually includes ``bboxes`` and ``labels``
+                attributes.
+            batch_img_metas (list[dict]): Meta information of each image, e.g.,
+                image size, scaling factor, etc.
+            inputs_hw (Union[Tensor, tuple]): Height and width of inputs size.
+        Returns:
+            dict[str, Tensor]: A dictionary of assigning results.
+        """
+        device = cls_scores[0][0].device
+
+        head_preds = self._merge_predict_results(bbox_preds, objectnesses,
+                                                 cls_scores)
+
+        batch_targets_normed = self._convert_gt_to_norm_format(
+            batch_gt_instances, batch_img_metas)
+
+        # yolov5_assign and simota_assign
+        assigner_results = self.assigner(
+            head_preds,
+            batch_targets_normed,
+            batch_img_metas[0]['batch_input_shape'],
+            self.priors_base_sizes,
+            self.grid_offset,
+            near_neighbor_thr=self.near_neighbor_thr)
+
+        # multi-level positive sample position.
+        mlvl_positive_infos = assigner_results['mlvl_positive_infos']
+        # assigned results with label and bboxes information.
+        mlvl_targets_normed = assigner_results['mlvl_targets_normed']
+
+        assign_results = []
+        for i in range(self.num_levels):
+            assign_results_feat = []
+            # no gt bbox matches anchor
+            if mlvl_positive_infos[i].shape[0] == 0:
+                for k in range(self.num_base_priors):
+                    assign_results_feat.append({
+                        'stride':
+                        self.featmap_strides[i],
+                        'grid_x_inds':
+                        torch.zeros([0], dtype=torch.int64).to(device),
+                        'grid_y_inds':
+                        torch.zeros([0], dtype=torch.int64).to(device),
+                        'img_inds':
+                        torch.zeros([0], dtype=torch.int64).to(device),
+                        'class_inds':
+                        torch.zeros([0], dtype=torch.int64).to(device),
+                        'retained_gt_inds':
+                        torch.zeros([0], dtype=torch.int64).to(device),
+                        'prior_ind':
+                        k
+                    })
+                assign_results.append(assign_results_feat)
+                continue
+
+            # (batch_idx, prior_idx, x_scaled, y_scaled)
+            positive_info = mlvl_positive_infos[i]
+            targets_normed = mlvl_targets_normed[i]
+            priors_inds = positive_info[:, 1]
+            grid_x_inds = positive_info[:, 2]
+            grid_y_inds = positive_info[:, 3]
+            img_inds = targets_normed[:, 0]
+            class_inds = targets_normed[:, 1].long()
+            retained_gt_inds = self.get_gt_inds(
+                targets_normed, batch_targets_normed[0]).long()
+            for k in range(self.num_base_priors):
+                retained_inds = priors_inds == k
+                assign_results_prior = {
+                    'stride': self.featmap_strides[i],
+                    'grid_x_inds': grid_x_inds[retained_inds],
+                    'grid_y_inds': grid_y_inds[retained_inds],
+                    'img_inds': img_inds[retained_inds],
+                    'class_inds': class_inds[retained_inds],
+                    'retained_gt_inds': retained_gt_inds[retained_inds],
+                    'prior_ind': k
+                }
+                assign_results_feat.append(assign_results_prior)
+            assign_results.append(assign_results_feat)
+        return assign_results
+
+    def get_gt_inds(self, assigned_target, gt_instance):
+        """Judging which one gt_ind is assigned by comparing assign_target and
+        origin target.
+
+        Args:
+           assigned_target (Tensor(assign_nums,7)): YOLOv7 assigning results.
+           gt_instance (Tensor(gt_nums,7)):  Normalized gt_instance, It
+                usually includes ``bboxes`` and ``labels`` attributes.
+        Returns:
+           gt_inds (Tensor): the index which one gt is assigned.
+        """
+        gt_inds = torch.zeros(assigned_target.shape[0])
+        for i in range(assigned_target.shape[0]):
+            gt_inds[i] = ((assigned_target[i] == gt_instance).sum(
+                dim=1) == 7).nonzero().squeeze()
+        return gt_inds
+
+    def assign(self, batch_data_samples: Union[list, dict],
+               inputs_hw: Union[tuple, torch.Size]) -> dict:
+        """Calculate assigning results.
+
+        This function is provided to the
+        `assigner_visualization.py` script.
+        Args:
+            batch_data_samples (List[:obj:`DetDataSample`], dict): The Data
+                Samples. It usually includes information such as
+                `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
+            inputs_hw: Height and width of inputs size
+        Returns:
+            dict: A dictionary of assigning components.
+        """
+        if isinstance(batch_data_samples, list):
+            raise NotImplementedError(
+                'assigning results_list is not implemented')
+        else:
+            # Fast version
+            cls_scores, bbox_preds, objectnesses = self(
+                batch_data_samples['feats'])
+            assign_inputs = (cls_scores, bbox_preds, objectnesses,
+                             batch_data_samples['bboxes_labels'],
+                             batch_data_samples['img_metas'], inputs_hw)
+        assign_results = self.assign_by_gt_and_feat(*assign_inputs)
+        return assign_results
diff --git a/projects/assigner_visualization/detectors/yolo_detector_assigner.py b/projects/assigner_visualization/detectors/yolo_detector_assigner.py
index edf0b828..65e6a1cf 100644
--- a/projects/assigner_visualization/detectors/yolo_detector_assigner.py
+++ b/projects/assigner_visualization/detectors/yolo_detector_assigner.py
@@ -3,7 +3,8 @@ from typing import Union
 
 from mmyolo.models import YOLODetector
 from mmyolo.registry import MODELS
-from projects.assigner_visualization.dense_heads import RTMHeadAssigner
+from projects.assigner_visualization.dense_heads import (RTMHeadAssigner,
+                                                         YOLOv7HeadAssigner)
 
 
 @MODELS.register_module()
@@ -23,7 +24,7 @@ class YOLODetectorAssigner(YOLODetector):
         assert isinstance(data, dict)
         assert len(data['inputs']) == 1, 'Only support batchsize == 1'
         data = self.data_preprocessor(data, True)
-        if isinstance(self.bbox_head, RTMHeadAssigner):
+        if isinstance(self.bbox_head, (YOLOv7HeadAssigner, RTMHeadAssigner)):
             data['data_samples']['feats'] = self.extract_feat(data['inputs'])
         inputs_hw = data['inputs'].shape[-2:]
         assign_results = self.bbox_head.assign(data['data_samples'], inputs_hw)

From 761f9f844466d9680f3f03ba369169b7402b996c Mon Sep 17 00:00:00 2001
From: yechenzhi <136920488@qq.com>
Date: Thu, 16 Feb 2023 10:56:26 +0800
Subject: [PATCH 27/64] YOLOv8 Assigner visualization  (#558)

* add yolov8 assigner visualization

* add note about rtmdet
---
 .../assigner_visualization.py                 |  15 +-
 ...t_8xb16-500e_coco_assignervisualization.py |   9 +
 .../dense_heads/__init__.py                   |   6 +-
 .../dense_heads/rtmdet_head_assigner.py       |   1 +
 .../dense_heads/yolov8_head_assigner.py       | 180 ++++++++++++++++++
 .../detectors/yolo_detector_assigner.py       |   7 +-
 .../visualization/assigner_visualizer.py      |   5 +-
 7 files changed, 211 insertions(+), 12 deletions(-)
 create mode 100644 projects/assigner_visualization/configs/yolov8_s_syncbn_fast_8xb16-500e_coco_assignervisualization.py
 create mode 100644 projects/assigner_visualization/dense_heads/yolov8_head_assigner.py

diff --git a/projects/assigner_visualization/assigner_visualization.py b/projects/assigner_visualization/assigner_visualization.py
index f6c7d4f6..e290d26b 100644
--- a/projects/assigner_visualization/assigner_visualization.py
+++ b/projects/assigner_visualization/assigner_visualization.py
@@ -18,7 +18,8 @@ from mmyolo.registry import DATASETS, MODELS
 from mmyolo.utils import register_all_modules
 from projects.assigner_visualization.dense_heads import (RTMHeadAssigner,
                                                          YOLOv5HeadAssigner,
-                                                         YOLOv7HeadAssigner)
+                                                         YOLOv7HeadAssigner,
+                                                         YOLOv8HeadAssigner)
 from projects.assigner_visualization.visualization import \
     YOLOAssignerVisualizer
 
@@ -92,16 +93,18 @@ def main():
     elif isinstance(model.bbox_head, (YOLOv7HeadAssigner, RTMHeadAssigner)):
         warnings.warn(
             'if you use dynamic_assignment methods such as YOLOv7 or '
-            'RTMDet assigner, please load the checkpoint.')
+            'YOLOv8 or RTMDet assigner, please load the checkpoint.')
     assert isinstance(model.bbox_head, (YOLOv5HeadAssigner,
                                         YOLOv7HeadAssigner,
+                                        YOLOv8HeadAssigner,
                                         RTMHeadAssigner)), \
-        'Now, this script only support YOLOv5, YOLOv7 and RTMdet, and ' \
-        'bbox_head must use ' \
-        '`YOLOv5HeadAssigner or YOLOv7HeadAssigner or RTMHeadAssigner`.' \
-        ' Please use `' \
+        'Now, this script only support YOLOv5, YOLOv7, YOLOv8 and RTMdet, ' \
+        'and bbox_head must use ' \
+        '`YOLOv5HeadAssigner or YOLOv7HeadAssigne or YOLOv8HeadAssigner ' \
+        'or RTMHeadAssigner`. Please use `' \
         'yolov5_s-v61_syncbn_fast_8xb16-300e_coco_assignervisualization.py' \
         'or yolov7_tiny_syncbn_fast_8x16b-300e_coco_assignervisualization.py' \
+        'or yolov8_s_syncbn_fast_8xb16-500e_coco_assignervisualization.py' \
         'or rtmdet_s_syncbn_fast_8xb32-300e_coco_assignervisualization.py' \
         """` as config file."""
     model.eval()
diff --git a/projects/assigner_visualization/configs/yolov8_s_syncbn_fast_8xb16-500e_coco_assignervisualization.py b/projects/assigner_visualization/configs/yolov8_s_syncbn_fast_8xb16-500e_coco_assignervisualization.py
new file mode 100644
index 00000000..03dcae8c
--- /dev/null
+++ b/projects/assigner_visualization/configs/yolov8_s_syncbn_fast_8xb16-500e_coco_assignervisualization.py
@@ -0,0 +1,9 @@
+_base_ = ['../../../configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py']
+
+custom_imports = dict(imports=[
+    'projects.assigner_visualization.detectors',
+    'projects.assigner_visualization.dense_heads'
+])
+
+model = dict(
+    type='YOLODetectorAssigner', bbox_head=dict(type='YOLOv8HeadAssigner'))
diff --git a/projects/assigner_visualization/dense_heads/__init__.py b/projects/assigner_visualization/dense_heads/__init__.py
index e985d20c..82adaaba 100644
--- a/projects/assigner_visualization/dense_heads/__init__.py
+++ b/projects/assigner_visualization/dense_heads/__init__.py
@@ -2,5 +2,9 @@
 from .rtmdet_head_assigner import RTMHeadAssigner
 from .yolov5_head_assigner import YOLOv5HeadAssigner
 from .yolov7_head_assigner import YOLOv7HeadAssigner
+from .yolov8_head_assigner import YOLOv8HeadAssigner
 
-__all__ = ['YOLOv5HeadAssigner', 'YOLOv7HeadAssigner', 'RTMHeadAssigner']
+__all__ = [
+    'YOLOv5HeadAssigner', 'YOLOv7HeadAssigner', 'YOLOv8HeadAssigner',
+    'RTMHeadAssigner'
+]
diff --git a/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py b/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py
index b5003eef..d3ae1c86 100644
--- a/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py
+++ b/projects/assigner_visualization/dense_heads/rtmdet_head_assigner.py
@@ -43,6 +43,7 @@ class RTMHeadAssigner(RTMDetHead):
         num_imgs = len(batch_img_metas)
         featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
         assert len(featmap_sizes) == self.prior_generator.num_levels
+        # rtmdet's prior offset differs from others
         prior_offset = self.prior_generator.offset
 
         gt_info = gt_instances_preprocess(batch_gt_instances, num_imgs)
diff --git a/projects/assigner_visualization/dense_heads/yolov8_head_assigner.py b/projects/assigner_visualization/dense_heads/yolov8_head_assigner.py
new file mode 100644
index 00000000..49d254fd
--- /dev/null
+++ b/projects/assigner_visualization/dense_heads/yolov8_head_assigner.py
@@ -0,0 +1,180 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import List, Union
+
+import torch
+from mmdet.utils import InstanceList
+from torch import Tensor
+
+from mmyolo.models import YOLOv8Head
+from mmyolo.models.utils import gt_instances_preprocess
+from mmyolo.registry import MODELS
+
+
+@MODELS.register_module()
+class YOLOv8HeadAssigner(YOLOv8Head):
+
+    def assign_by_gt_and_feat(
+        self,
+        cls_scores: List[Tensor],
+        bbox_preds: List[Tensor],
+        batch_gt_instances: InstanceList,
+        batch_img_metas: List[dict],
+        inputs_hw: Union[Tensor, tuple] = (640, 640)
+    ) -> dict:
+        """Calculate the assigning results based on the gt and features
+        extracted by the detection head.
+        Args:
+            cls_scores (Sequence[Tensor]): Box scores for each scale level,
+                each is a 4D-tensor, the channel number is
+                num_priors * num_classes.
+            bbox_preds (Sequence[Tensor]): Box energies / deltas for each scale
+                level, each is a 4D-tensor, the channel number is
+                num_priors * 4.
+            bbox_dist_preds (Sequence[Tensor]): Box distribution logits for
+                each scale level with shape (bs, reg_max + 1, H*W, 4).
+            batch_gt_instances (list[:obj:`InstanceData`]): Batch of
+                gt_instance. It usually includes ``bboxes`` and ``labels``
+                attributes.
+            batch_img_metas (list[dict]): Meta information of each image, e.g.,
+                image size, scaling factor, etc.
+            inputs_hw (Union[Tensor, tuple]): Height and width of inputs size.
+        Returns:
+            dict[str, Tensor]: A dictionary of assigning results.
+        """
+        num_imgs = len(batch_img_metas)
+        device = cls_scores[0].device
+
+        current_featmap_sizes = [
+            cls_score.shape[2:] for cls_score in cls_scores
+        ]
+        # If the shape does not equal, generate new one
+        if current_featmap_sizes != self.featmap_sizes_train:
+            self.featmap_sizes_train = current_featmap_sizes
+
+            mlvl_priors_with_stride = self.prior_generator.grid_priors(
+                self.featmap_sizes_train,
+                dtype=cls_scores[0].dtype,
+                device=device,
+                with_stride=True)
+
+            self.num_level_priors = [len(n) for n in mlvl_priors_with_stride]
+            self.flatten_priors_train = torch.cat(
+                mlvl_priors_with_stride, dim=0)
+            self.stride_tensor = self.flatten_priors_train[..., [2]]
+
+        # gt info
+        gt_info = gt_instances_preprocess(batch_gt_instances, num_imgs)
+        gt_labels = gt_info[:, :, :1]
+        gt_bboxes = gt_info[:, :, 1:]  # xyxy
+        pad_bbox_flag = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
+
+        # pred info
+        flatten_cls_preds = [
+            cls_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1,
+                                                 self.num_classes)
+            for cls_pred in cls_scores
+        ]
+        flatten_pred_bboxes = [
+            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)
+            for bbox_pred in bbox_preds
+        ]
+        # (bs, n, 4 * reg_max)
+
+        flatten_cls_preds = torch.cat(flatten_cls_preds, dim=1)
+        flatten_pred_bboxes = torch.cat(flatten_pred_bboxes, dim=1)
+        flatten_pred_bboxes = self.bbox_coder.decode(
+            self.flatten_priors_train[..., :2], flatten_pred_bboxes,
+            self.stride_tensor[..., 0])
+
+        assigned_result = self.assigner(
+            (flatten_pred_bboxes.detach()).type(gt_bboxes.dtype),
+            flatten_cls_preds.detach().sigmoid(), self.flatten_priors_train,
+            gt_labels, gt_bboxes, pad_bbox_flag)
+
+        labels = assigned_result['assigned_labels'].reshape(-1)
+        bbox_targets = assigned_result['assigned_bboxes'].reshape(-1, 4)
+        fg_mask_pre_prior = assigned_result['fg_mask_pre_prior'].squeeze(0)
+
+        pos_inds = fg_mask_pre_prior.nonzero().squeeze(1)
+
+        targets = bbox_targets[pos_inds]
+        gt_bboxes = gt_bboxes.squeeze(0)
+        matched_gt_inds = torch.tensor(
+            [((t == gt_bboxes).sum(dim=1) == t.shape[0]).nonzero()[0]
+             for t in targets],
+            device=device)
+
+        level_inds = torch.zeros_like(labels)
+        img_inds = torch.zeros_like(labels)
+        level_nums = [0] + self.num_level_priors
+        for i in range(len(level_nums) - 1):
+            level_nums[i + 1] = level_nums[i] + level_nums[i + 1]
+            level_inds[level_nums[i]:level_nums[i + 1]] = i
+        level_inds_pos = level_inds[pos_inds]
+
+        img_inds = img_inds[pos_inds]
+        labels = labels[pos_inds]
+
+        assign_results = []
+        for i in range(self.num_levels):
+            retained_inds = level_inds_pos == i
+            if not retained_inds.any():
+                assign_results_prior = {
+                    'stride':
+                    self.featmap_strides[i],
+                    'grid_x_inds':
+                    torch.zeros([0], dtype=torch.int64).to(device),
+                    'grid_y_inds':
+                    torch.zeros([0], dtype=torch.int64).to(device),
+                    'img_inds':
+                    torch.zeros([0], dtype=torch.int64).to(device),
+                    'class_inds':
+                    torch.zeros([0], dtype=torch.int64).to(device),
+                    'retained_gt_inds':
+                    torch.zeros([0], dtype=torch.int64).to(device),
+                    'prior_ind':
+                    0
+                }
+            else:
+                w = inputs_hw[1] // self.featmap_strides[i]
+
+                retained_pos_inds = pos_inds[retained_inds] - level_nums[i]
+                grid_y_inds = retained_pos_inds // w
+                grid_x_inds = retained_pos_inds - retained_pos_inds // w * w
+                assign_results_prior = {
+                    'stride': self.featmap_strides[i],
+                    'grid_x_inds': grid_x_inds,
+                    'grid_y_inds': grid_y_inds,
+                    'img_inds': img_inds[retained_inds],
+                    'class_inds': labels[retained_inds],
+                    'retained_gt_inds': matched_gt_inds[retained_inds],
+                    'prior_ind': 0
+                }
+            assign_results.append([assign_results_prior])
+        return assign_results
+
+    def assign(self, batch_data_samples: Union[list, dict],
+               inputs_hw: Union[tuple, torch.Size]) -> dict:
+        """Calculate assigning results.
+
+        This function is provided to the
+        `assigner_visualization.py` script.
+        Args:
+            batch_data_samples (List[:obj:`DetDataSample`], dict): The Data
+                Samples. It usually includes information such as
+                `gt_instance`, `gt_panoptic_seg` and `gt_sem_seg`.
+            inputs_hw: Height and width of inputs size
+        Returns:
+            dict: A dictionary of assigning components.
+        """
+        if isinstance(batch_data_samples, list):
+            raise NotImplementedError(
+                'assigning results_list is not implemented')
+        else:
+            # Fast version
+            cls_scores, bbox_preds = self(batch_data_samples['feats'])
+            assign_inputs = (cls_scores, bbox_preds,
+                             batch_data_samples['bboxes_labels'],
+                             batch_data_samples['img_metas'], inputs_hw)
+        assign_results = self.assign_by_gt_and_feat(*assign_inputs)
+        return assign_results
diff --git a/projects/assigner_visualization/detectors/yolo_detector_assigner.py b/projects/assigner_visualization/detectors/yolo_detector_assigner.py
index 65e6a1cf..5b723e01 100644
--- a/projects/assigner_visualization/detectors/yolo_detector_assigner.py
+++ b/projects/assigner_visualization/detectors/yolo_detector_assigner.py
@@ -4,7 +4,8 @@ from typing import Union
 from mmyolo.models import YOLODetector
 from mmyolo.registry import MODELS
 from projects.assigner_visualization.dense_heads import (RTMHeadAssigner,
-                                                         YOLOv7HeadAssigner)
+                                                         YOLOv7HeadAssigner,
+                                                         YOLOv8HeadAssigner)
 
 
 @MODELS.register_module()
@@ -24,7 +25,9 @@ class YOLODetectorAssigner(YOLODetector):
         assert isinstance(data, dict)
         assert len(data['inputs']) == 1, 'Only support batchsize == 1'
         data = self.data_preprocessor(data, True)
-        if isinstance(self.bbox_head, (YOLOv7HeadAssigner, RTMHeadAssigner)):
+        available_assigners = (YOLOv7HeadAssigner, YOLOv8HeadAssigner,
+                               RTMHeadAssigner)
+        if isinstance(self.bbox_head, available_assigners):
             data['data_samples']['feats'] = self.extract_feat(data['inputs'])
         inputs_hw = data['inputs'].shape[-2:]
         assign_results = self.bbox_head.assign(data['data_samples'], inputs_hw)
diff --git a/projects/assigner_visualization/visualization/assigner_visualizer.py b/projects/assigner_visualization/visualization/assigner_visualizer.py
index 41d8f62d..fe1f4f0b 100644
--- a/projects/assigner_visualization/visualization/assigner_visualizer.py
+++ b/projects/assigner_visualization/visualization/assigner_visualizer.py
@@ -321,7 +321,6 @@ class YOLOAssignerVisualizer(DetLocalVisualizer):
             img_show_list.append(np.concatenate(img_show_list_feat, axis=1))
 
         # Merge all images into one image
-        h, w = img_show.shape[:2]
-        num_priors_per_feat = img_show_list[0].shape[1] // w
-        axis = 0 if num_priors_per_feat > 1 else 1
+        # setting axis is to beautify the merged image
+        axis = 0 if len(assign_results[0]) > 1 else 1
         return np.concatenate(img_show_list, axis=axis)

From 8cdc741fd3e6331230a359da9c7ee73e93cefb6d Mon Sep 17 00:00:00 2001
From: vansin <msnode@163.com>
Date: Thu, 16 Feb 2023 12:41:04 +0800
Subject: [PATCH 28/64] [Docs] Add twitter discord medium youtube link (#555)

* docs: Add twitter discord medium youtube link

* docs: add zhihu and b link

* [Docs] add link in readme_zh
---
 README.md       | 20 ++++++++++++++++++++
 README_zh-CN.md | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/README.md b/README.md
index eda8b242..ee2d7cba 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,26 @@ English | [简体中文](README_zh-CN.md)
 
 </div>
 
+<div align="center">
+  <a href="https://openmmlab.medium.com/" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://discord.com/channels/1037617289144569886/1046608014234370059" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
+</div>
+
 ## 📄 Table of Contents
 
 - [🥳 🚀 What's New](#--whats-new-)
diff --git a/README_zh-CN.md b/README_zh-CN.md
index bdc9dacb..67275067 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -40,6 +40,26 @@
 
 </div>
 
+<div align="center">
+  <a href="https://openmmlab.medium.com/" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/219255827-67c1a27f-f8c5-46a9-811d-5e57448c61d1.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://discord.com/channels/1037617289144569886/1046608014234370059" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/218347213-c080267f-cbb6-443e-8532-8e1ed9a58ea9.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://twitter.com/OpenMMLab" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/218346637-d30c8a0f-3eba-4699-8131-512fb06d46db.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://www.youtube.com/openmmlab" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/218346691-ceb2116a-465a-40af-8424-9f30d2348ca9.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://space.bilibili.com/1293512903" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/219026751-d7d14cce-a7c9-4e82-9942-8375fca65b99.png" width="3%" alt="" /></a>
+  <img src="https://user-images.githubusercontent.com/25839884/218346358-56cc8e2f-a2b8-487f-9088-32480cceabcf.png" width="3%" alt="" />
+  <a href="https://www.zhihu.com/people/openmmlab" style="text-decoration:none;">
+    <img src="https://user-images.githubusercontent.com/25839884/219026120-ba71e48b-6e94-4bd4-b4e9-b7d175b5e362.png" width="3%" alt="" /></a>
+</div>
+
 ## 📄 Table of Contents
 
 - [🥳 🚀 最新进展](#--最新进展-)

From 6400fba1af6c9570d0d0adbb16c0701d7143aac5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Fri, 17 Feb 2023 11:27:03 +0800
Subject: [PATCH 29/64] [Enchance] Optimize and accelerate YOLOX with RTMDet
 hyps (#542)

* enchance yolox

* update

* update

* fix

* fix

* fix lint
---
 .../yolov7_l_syncbn_fast_8x16b-300e_coco.py   |  2 +-
 configs/yolox/README.md                       | 26 ++++--
 configs/yolox/metafile.yml                    | 78 ++++++++++++++++-
 ...yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py | 12 +++
 ...ox_nano_fast_8xb32-300e-rtmdet-hyp_coco.py | 21 +++++
 ...yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py | 87 +++++++++++++++++++
 configs/yolox/yolox_s_fast_8xb8-300e_coco.py  | 27 ++++--
 ...ox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py | 70 +++++++++++++++
 .../yolox/yolox_tiny_fast_8xb8-300e_coco.py   |  6 +-
 9 files changed, 309 insertions(+), 20 deletions(-)
 create mode 100644 configs/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py
 create mode 100644 configs/yolox/yolox_nano_fast_8xb32-300e-rtmdet-hyp_coco.py
 create mode 100644 configs/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py
 create mode 100644 configs/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py

diff --git a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
index 3fca98f0..1247774e 100644
--- a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
+++ b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
@@ -26,7 +26,7 @@ anchors = [
     [(142, 110), (192, 243), (459, 401)]  # P5/32
 ]
 # -----train val related-----
-# Base learning rate for optim_wrapper. Corresponding to 8xb16=64 bs
+# Base learning rate for optim_wrapper. Corresponding to 8xb16=128 bs
 base_lr = 0.01
 max_epochs = 300  # Maximum training epochs
 
diff --git a/configs/yolox/README.md b/configs/yolox/README.md
index 78550047..11f9e307 100644
--- a/configs/yolox/README.md
+++ b/configs/yolox/README.md
@@ -17,12 +17,28 @@ In this report, we present some experienced improvements to YOLO series, forming
 YOLOX-l model structure
 </div>
 
-## Results and Models
+## 🥳 🚀 Results and Models
 
-|  Backbone  | size | Mem (GB) | box AP |                                                  Config                                                  |                                                                                                                                    Download                                                                                                                                    |
-| :--------: | :--: | :------: | :----: | :------------------------------------------------------------------------------------------------------: | :----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| YOLOX-tiny | 416  |   2.8    |  32.7  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908.log.json) |
-|  YOLOX-s   | 640  |   5.6    |  40.8  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolox/yolox_s_fast_8xb8-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738-d7e60cb2.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738.log.json)       |
+|  Backbone  | Size | Batch Size | AMP | RTMDet-Hyp | Mem (GB) |   Box AP    |                                                       Config                                                        |                                                                                                                                                                      Download                                                                                                                                                                      |
+| :--------: | :--: | :--------: | :-: | :--------: | :------: | :---------: | :-----------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| YOLOX-tiny | 416  |    8xb8    | No  |     No     |   2.8    |    32.7     |       [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py)       |                                   [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908-0e40a6fc.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_8xb8-300e_coco/yolox_tiny_8xb8-300e_coco_20220919_090908.log.json)                                   |
+| YOLOX-tiny | 416  |   8xb32    | Yes |    Yes     |   4.9    | 34.3 (+1.6) | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco_20230210_143637-4c338102.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco_20230210_143637.log.json) |
+|  YOLOX-s   | 640  |    8xb8    | Yes |     No     |   2.9    |    40.7     |        [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_s_fast_8xb8-300e_coco.py)         |                               [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb8-300e_coco/yolox_s_fast_8xb8-300e_coco_20230213_142600-2b224d8b.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb8-300e_coco/yolox_s_fast_8xb8-300e_coco_20230213_142600.log.json)                               |
+|  YOLOX-s   | 640  |   8xb32    | Yes |    Yes     |   9.8    | 41.9 (+1.2) |  [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645-3a8dfbd7.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645.log.json)       |
+|  YOLOX-m   | 640  |    8xb8    | Yes |     No     |   4.9    |    46.9     |        [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_m_fast_8xb8-300e_coco.py)         |                               [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb8-300e_coco/yolox_m_fast_8xb8-300e_coco_20230213_160218-a71a6b25.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb8-300e_coco/yolox_m_fast_8xb8-300e_coco_20230213_160218.log.json)                               |
+|  YOLOX-m   | 640  |   8xb32    | Yes |    Yes     |   17.6   | 47.5 (+0.6) |  [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco_20230210_144328-e657e182.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco_20230210_144328.log.json)       |
+|  YOLOX-l   | 640  |    8xb8    | Yes |     No     |   8.0    |    50.1     |        [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_l_fast_8xb8-300e_coco.py)         |                              [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_l_fast__8xb8-300e_coco/yolox_l_fast_8xb8-300e_coco_20230213_160715-c731eb1c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_l_fast_8xb8-300e_coco/yolox_l_fast_8xb8-300e_coco_20230213_160715.log.json)                               |
+|  YOLOX-x   | 640  |    8xb8    | Yes |     No     |   9.8    |    51.4     |        [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolox/yolox_x_fast_8xb8-300e_coco.py)         |                               [model](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_x_fast_8xb8-300e_coco/yolox_x_fast_8xb8-300e_coco_20230215_133950-1d509fab.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolox/yolox_x_fast_8xb8-300e_coco/yolox_x_fast_8xb8-300e_coco_20230215_133950.log.json)                               |
+
+YOLOX uses a default training configuration of `8xbs8` which results in a long training time, we expect it to use `8xbs32` to speed up the training and not cause a decrease in mAP. We modified `train_batch_size_per_gpu` from 8 to 32, `batch_augments_interval` from 10 to 1 and `base_lr` from 0.01 to 0.04 under YOLOX-s default configuration based on the linear scaling rule, which resulted in mAP degradation. Finally, I found that using RTMDet's training hyperparameter can improve performance in YOLOX Tiny/S/M, which also validates the superiority of RTMDet's training hyperparameter.
+
+The modified training parameters are as follows：
+
+1. train_batch_size_per_gpu: 8 -> 32
+2. batch_augments_interval: 10 -> 1
+3. num_last_epochs: 15 -> 20
+4. optim cfg: SGD -> AdamW, base_lr 0.01 -> 0.004, weight_decay 0.0005 -> 0.05
+5. ema momentum: 0.0001 -> 0.0002
 
 **Note**:
 
diff --git a/configs/yolox/metafile.yml b/configs/yolox/metafile.yml
index baf3a8f5..0926519e 100644
--- a/configs/yolox/metafile.yml
+++ b/configs/yolox/metafile.yml
@@ -36,11 +36,83 @@ Models:
     In Collection: YOLOX
     Config: configs/yolox/yolox_s_fast_8xb8-300e_coco.py
     Metadata:
-      Training Memory (GB): 5.6
+      Training Memory (GB): 2.9
       Epochs: 300
     Results:
       - Task: Object Detection
         Dataset: COCO
         Metrics:
-          box AP: 40.8
-    Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_8xb8-300e_coco/yolox_s_8xb8-300e_coco_20220917_030738-d7e60cb2.pth
+          box AP: 40.7
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb8-300e_coco/yolox_s_fast_8xb8-300e_coco_20230213_142600-2b224d8b.pth
+  - Name: yolox_m_fast_8xb8-300e_coco
+    In Collection: YOLOX
+    Config: configs/yolox/yolox_m_fast_8xb8-300e_coco.py
+    Metadata:
+      Training Memory (GB): 4.9
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 46.9
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb8-300e_coco/yolox_m_fast_8xb8-300e_coco_20230213_160218-a71a6b25.pth
+  - Name: yolox_l_fast_8xb8-300e_coco
+    In Collection: YOLOX
+    Config: configs/yolox/yolox_l_fast_8xb8-300e_coco.py
+    Metadata:
+      Training Memory (GB): 8.0
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 50.1
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_l_fast_8xb8-300e_coco/yolox_l_fast_8xb8-300e_coco_20230213_160715-c731eb1c.pth
+  - Name: yolox_x_fast_8xb8-300e_coco
+    In Collection: YOLOX
+    Config: configs/yolox/yolox_x_fast_8xb8-300e_coco.py
+    Metadata:
+      Training Memory (GB): 9.8
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 51.4
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_x_fast_8xb8-300e_coco/yolox_x_fast_8xb8-300e_coco_20230215_133950-1d509fab.pth
+  - Name: yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco
+    In Collection: YOLOX
+    Config: configs/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py
+    Metadata:
+      Training Memory (GB): 4.9
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 34.3
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco_20230210_143637-4c338102.pth
+  - Name: yolox_s_fast_8xb32-300e-rtmdet-hyp_coco
+    In Collection: YOLOX
+    Config: configs/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py
+    Metadata:
+      Training Memory (GB): 9.8
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 41.9
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645-3a8dfbd7.pth
+  - Name: yolox_m_fast_8xb32-300e-rtmdet-hyp_coco
+    In Collection: YOLOX
+    Config: configs/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py
+    Metadata:
+      Training Memory (GB): 17.6
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 47.5
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco_20230210_144328-e657e182.pth
diff --git a/configs/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py b/configs/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py
new file mode 100644
index 00000000..4a4743c2
--- /dev/null
+++ b/configs/yolox/yolox_m_fast_8xb32-300e-rtmdet-hyp_coco.py
@@ -0,0 +1,12 @@
+_base_ = './yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py'
+
+# ========================modified parameters======================
+deepen_factor = 0.67
+widen_factor = 0.75
+
+# =======================Unmodified in most cases==================
+# model settings
+model = dict(
+    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
diff --git a/configs/yolox/yolox_nano_fast_8xb32-300e-rtmdet-hyp_coco.py b/configs/yolox/yolox_nano_fast_8xb32-300e-rtmdet-hyp_coco.py
new file mode 100644
index 00000000..851664fb
--- /dev/null
+++ b/configs/yolox/yolox_nano_fast_8xb32-300e-rtmdet-hyp_coco.py
@@ -0,0 +1,21 @@
+_base_ = './yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py'
+
+# ========================modified parameters======================
+deepen_factor = 0.33
+widen_factor = 0.25
+use_depthwise = True
+
+# =======================Unmodified in most cases==================
+# model settings
+model = dict(
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        use_depthwise=use_depthwise),
+    neck=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        use_depthwise=use_depthwise),
+    bbox_head=dict(
+        head_module=dict(
+            widen_factor=widen_factor, use_depthwise=use_depthwise)))
diff --git a/configs/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py b/configs/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py
new file mode 100644
index 00000000..167023da
--- /dev/null
+++ b/configs/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py
@@ -0,0 +1,87 @@
+_base_ = './yolox_s_fast_8xb8-300e_coco.py'
+
+# ========================modified parameters======================
+# Batch size of a single GPU during training
+# 8 -> 32
+train_batch_size_per_gpu = 32
+
+# Multi-scale training intervals
+# 10 -> 1
+batch_augments_interval = 1
+
+# Last epoch number to switch training pipeline
+# 15 -> 20
+num_last_epochs = 20
+
+# Base learning rate for optim_wrapper. Corresponding to 8xb32=256 bs
+base_lr = 0.004
+
+# SGD -> AdamW
+optim_wrapper = dict(
+    _delete_=True,
+    type='OptimWrapper',
+    optimizer=dict(type='AdamW', lr=base_lr, weight_decay=0.05),
+    paramwise_cfg=dict(
+        norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# 0.0001 -> 0.0002
+ema_momentum = 0.0002
+
+# ============================== Unmodified in most cases ===================
+model = dict(
+    data_preprocessor=dict(batch_augments=[
+        dict(
+            type='YOLOXBatchSyncRandomResize',
+            random_size_range=(480, 800),
+            size_divisor=32,
+            interval=batch_augments_interval)
+    ]))
+
+param_scheduler = [
+    dict(
+        # use quadratic formula to warm up 5 epochs
+        # and lr is updated by iteration
+        # TODO: fix default scope in get function
+        type='mmdet.QuadraticWarmupLR',
+        by_epoch=True,
+        begin=0,
+        end=5,
+        convert_to_iter_based=True),
+    dict(
+        # use cosine lr from 5 to 285 epoch
+        type='CosineAnnealingLR',
+        eta_min=base_lr * 0.05,
+        begin=5,
+        T_max=_base_.max_epochs - num_last_epochs,
+        end=_base_.max_epochs - num_last_epochs,
+        by_epoch=True,
+        convert_to_iter_based=True),
+    dict(
+        # use fixed lr during last num_last_epochs epochs
+        type='ConstantLR',
+        by_epoch=True,
+        factor=1,
+        begin=_base_.max_epochs - num_last_epochs,
+        end=_base_.max_epochs,
+    )
+]
+
+custom_hooks = [
+    dict(
+        type='YOLOXModeSwitchHook',
+        num_last_epochs=num_last_epochs,
+        new_train_pipeline=_base_.train_pipeline_stage2,
+        priority=48),
+    dict(type='mmdet.SyncNormHook', priority=48),
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=ema_momentum,
+        update_buffers=True,
+        strict_load=False,
+        priority=49)
+]
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
+train_cfg = dict(dynamic_intervals=[(_base_.max_epochs - num_last_epochs, 1)])
+auto_scale_lr = dict(base_batch_size=8 * train_batch_size_per_gpu)
diff --git a/configs/yolox/yolox_s_fast_8xb8-300e_coco.py b/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
index 46ec96dc..b51a1087 100644
--- a/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
@@ -47,9 +47,16 @@ deepen_factor = 0.33
 # The scaling factor that controls the width of the network structure
 widen_factor = 0.5
 norm_cfg = dict(type='BN', momentum=0.03, eps=0.001)
+# generate new random resize shape interval
+batch_augments_interval = 10
 
 # -----train val related-----
 weight_decay = 0.0005
+loss_cls_weight = 1.0
+loss_bbox_weight = 5.0
+loss_obj_weight = 1.0
+loss_bbox_aux_weight = 1.0
+center_radius = 2.5  # SimOTAAssigner
 num_last_epochs = 15
 random_affine_scaling_ratio_range = (0.1, 2)
 mixup_ratio_range = (0.8, 1.6)
@@ -58,6 +65,8 @@ save_epoch_intervals = 10
 # The maximum checkpoints to keep.
 max_keep_ckpts = 3
 
+ema_momentum = 0.0001
+
 # ===============================Unmodified in most cases====================
 # model settings
 model = dict(
@@ -79,7 +88,7 @@ model = dict(
                 type='YOLOXBatchSyncRandomResize',
                 random_size_range=(480, 800),
                 size_divisor=32,
-                interval=10)
+                interval=batch_augments_interval)
         ]),
     backbone=dict(
         type='YOLOXCSPDarknet',
@@ -116,24 +125,26 @@ model = dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=True,
             reduction='sum',
-            loss_weight=1.0),
+            loss_weight=loss_cls_weight),
         loss_bbox=dict(
             type='mmdet.IoULoss',
             mode='square',
             eps=1e-16,
             reduction='sum',
-            loss_weight=5.0),
+            loss_weight=loss_bbox_weight),
         loss_obj=dict(
             type='mmdet.CrossEntropyLoss',
             use_sigmoid=True,
             reduction='sum',
-            loss_weight=1.0),
+            loss_weight=loss_obj_weight),
         loss_bbox_aux=dict(
-            type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)),
+            type='mmdet.L1Loss',
+            reduction='sum',
+            loss_weight=loss_bbox_aux_weight)),
     train_cfg=dict(
         assigner=dict(
             type='mmdet.SimOTAAssigner',
-            center_radius=2.5,
+            center_radius=center_radius,
             iou_calculator=dict(type='mmdet.BboxOverlaps2D'))),
     test_cfg=model_test_cfg)
 
@@ -303,7 +314,7 @@ custom_hooks = [
     dict(
         type='EMAHook',
         ema_type='ExpMomentumEMA',
-        momentum=0.0001,
+        momentum=ema_momentum,
         update_buffers=True,
         strict_load=False,
         priority=49)
@@ -315,6 +326,6 @@ train_cfg = dict(
     val_interval=save_epoch_intervals,
     dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
 
-auto_scale_lr = dict(base_batch_size=64)
+auto_scale_lr = dict(base_batch_size=8 * train_batch_size_per_gpu)
 val_cfg = dict(type='ValLoop')
 test_cfg = dict(type='TestLoop')
diff --git a/configs/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py b/configs/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py
new file mode 100644
index 00000000..d133c95f
--- /dev/null
+++ b/configs/yolox/yolox_tiny_fast_8xb32-300e-rtmdet-hyp_coco.py
@@ -0,0 +1,70 @@
+_base_ = './yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py'
+
+# ========================modified parameters======================
+deepen_factor = 0.33
+widen_factor = 0.375
+
+# Multi-scale training intervals
+# 10 -> 1
+batch_augments_interval = 1
+
+scaling_ratio_range = (0.5, 1.5)
+
+# =======================Unmodified in most cases==================
+img_scale = _base_.img_scale
+pre_transform = _base_.pre_transform
+
+# model settings
+model = dict(
+    data_preprocessor=dict(batch_augments=[
+        dict(
+            type='YOLOXBatchSyncRandomResize',
+            random_size_range=(320, 640),
+            size_divisor=32,
+            interval=batch_augments_interval)
+    ]),
+    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+train_pipeline_stage1 = [
+    *pre_transform,
+    dict(
+        type='Mosaic',
+        img_scale=img_scale,
+        pad_val=114.0,
+        pre_transform=pre_transform),
+    dict(
+        type='mmdet.RandomAffine',
+        scaling_ratio_range=scaling_ratio_range,  # note
+        # img_scale is (width, height)
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(type='mmdet.YOLOXHSVRandomAug'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
+    dict(
+        type='mmdet.FilterAnnotations',
+        min_gt_bbox_wh=(1, 1),
+        keep_empty=False),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
+                   'flip_direction'))
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='mmdet.Resize', scale=(416, 416), keep_ratio=True),  # note
+    dict(
+        type='mmdet.Pad',
+        pad_to_square=True,
+        pad_val=dict(img=(114.0, 114.0, 114.0))),
+    dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline_stage1))
+val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
+test_dataloader = val_dataloader
diff --git a/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py b/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
index 90e7e411..e8c822e0 100644
--- a/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
@@ -3,12 +3,12 @@ _base_ = './yolox_s_fast_8xb8-300e_coco.py'
 # ========================modified parameters======================
 deepen_factor = 0.33
 widen_factor = 0.375
-
-img_scale = _base_.img_scale
-pre_transform = _base_.pre_transform
 scaling_ratio_range = (0.5, 1.5)
 
 # =======================Unmodified in most cases==================
+img_scale = _base_.img_scale
+pre_transform = _base_.pre_transform
+
 # model settings
 model = dict(
     data_preprocessor=dict(batch_augments=[

From cbadd3abe489ec9fc56c884d7e3f44b974eb1797 Mon Sep 17 00:00:00 2001
From: yechenzhi <136920488@qq.com>
Date: Sun, 19 Feb 2023 21:48:32 +0800
Subject: [PATCH 30/64] add yolov7-e6e weight converter (#570)

* add yolov7-e6e weight converter

* add yolov7-e6e weight converter

* add yolov7-e6e weight converter

* fix format

* fix format
---
 tools/model_converters/yolov7_to_mmyolo.py | 291 +++++++++++++++++++++
 1 file changed, 291 insertions(+)

diff --git a/tools/model_converters/yolov7_to_mmyolo.py b/tools/model_converters/yolov7_to_mmyolo.py
index f8bff947..1c1f54d0 100644
--- a/tools/model_converters/yolov7_to_mmyolo.py
+++ b/tools/model_converters/yolov7_to_mmyolo.py
@@ -697,10 +697,293 @@ convert_dict_e = {
     'model.140.m.3': 'bbox_head.head_module.main_convs_pred.3.2'
 }
 
+convert_dict_e2e = {
+    # stem
+    'model.1': 'backbone.stem.conv',
+
+    # stage1
+    'model.2.cv1': 'backbone.stage1.0.stride_conv_branches.0',
+    'model.2.cv2': 'backbone.stage1.0.stride_conv_branches.1',
+    'model.2.cv3': 'backbone.stage1.0.maxpool_branches.1',
+
+    # E-ELANBlock
+    'model.3': 'backbone.stage1.1.e_elan_blocks.0.short_conv',
+    'model.4': 'backbone.stage1.1.e_elan_blocks.0.main_conv',
+    'model.5': 'backbone.stage1.1.e_elan_blocks.0.blocks.0.0',
+    'model.6': 'backbone.stage1.1.e_elan_blocks.0.blocks.0.1',
+    'model.7': 'backbone.stage1.1.e_elan_blocks.0.blocks.1.0',
+    'model.8': 'backbone.stage1.1.e_elan_blocks.0.blocks.1.1',
+    'model.9': 'backbone.stage1.1.e_elan_blocks.0.blocks.2.0',
+    'model.10': 'backbone.stage1.1.e_elan_blocks.0.blocks.2.1',
+    'model.12': 'backbone.stage1.1.e_elan_blocks.0.final_conv',
+    'model.13': 'backbone.stage1.1.e_elan_blocks.1.short_conv',
+    'model.14': 'backbone.stage1.1.e_elan_blocks.1.main_conv',
+    'model.15': 'backbone.stage1.1.e_elan_blocks.1.blocks.0.0',
+    'model.16': 'backbone.stage1.1.e_elan_blocks.1.blocks.0.1',
+    'model.17': 'backbone.stage1.1.e_elan_blocks.1.blocks.1.0',
+    'model.18': 'backbone.stage1.1.e_elan_blocks.1.blocks.1.1',
+    'model.19': 'backbone.stage1.1.e_elan_blocks.1.blocks.2.0',
+    'model.20': 'backbone.stage1.1.e_elan_blocks.1.blocks.2.1',
+    'model.22': 'backbone.stage1.1.e_elan_blocks.1.final_conv',
+
+    # stage2
+    'model.24.cv1': 'backbone.stage2.0.stride_conv_branches.0',
+    'model.24.cv2': 'backbone.stage2.0.stride_conv_branches.1',
+    'model.24.cv3': 'backbone.stage2.0.maxpool_branches.1',
+
+    # E-ELANBlock
+    'model.25': 'backbone.stage2.1.e_elan_blocks.0.short_conv',
+    'model.26': 'backbone.stage2.1.e_elan_blocks.0.main_conv',
+    'model.27': 'backbone.stage2.1.e_elan_blocks.0.blocks.0.0',
+    'model.28': 'backbone.stage2.1.e_elan_blocks.0.blocks.0.1',
+    'model.29': 'backbone.stage2.1.e_elan_blocks.0.blocks.1.0',
+    'model.30': 'backbone.stage2.1.e_elan_blocks.0.blocks.1.1',
+    'model.31': 'backbone.stage2.1.e_elan_blocks.0.blocks.2.0',
+    'model.32': 'backbone.stage2.1.e_elan_blocks.0.blocks.2.1',
+    'model.34': 'backbone.stage2.1.e_elan_blocks.0.final_conv',
+    'model.35': 'backbone.stage2.1.e_elan_blocks.1.short_conv',
+    'model.36': 'backbone.stage2.1.e_elan_blocks.1.main_conv',
+    'model.37': 'backbone.stage2.1.e_elan_blocks.1.blocks.0.0',
+    'model.38': 'backbone.stage2.1.e_elan_blocks.1.blocks.0.1',
+    'model.39': 'backbone.stage2.1.e_elan_blocks.1.blocks.1.0',
+    'model.40': 'backbone.stage2.1.e_elan_blocks.1.blocks.1.1',
+    'model.41': 'backbone.stage2.1.e_elan_blocks.1.blocks.2.0',
+    'model.42': 'backbone.stage2.1.e_elan_blocks.1.blocks.2.1',
+    'model.44': 'backbone.stage2.1.e_elan_blocks.1.final_conv',
+
+    # stage3
+    'model.46.cv1': 'backbone.stage3.0.stride_conv_branches.0',
+    'model.46.cv2': 'backbone.stage3.0.stride_conv_branches.1',
+    'model.46.cv3': 'backbone.stage3.0.maxpool_branches.1',
+
+    # E-ELANBlock
+    'model.47': 'backbone.stage3.1.e_elan_blocks.0.short_conv',
+    'model.48': 'backbone.stage3.1.e_elan_blocks.0.main_conv',
+    'model.49': 'backbone.stage3.1.e_elan_blocks.0.blocks.0.0',
+    'model.50': 'backbone.stage3.1.e_elan_blocks.0.blocks.0.1',
+    'model.51': 'backbone.stage3.1.e_elan_blocks.0.blocks.1.0',
+    'model.52': 'backbone.stage3.1.e_elan_blocks.0.blocks.1.1',
+    'model.53': 'backbone.stage3.1.e_elan_blocks.0.blocks.2.0',
+    'model.54': 'backbone.stage3.1.e_elan_blocks.0.blocks.2.1',
+    'model.56': 'backbone.stage3.1.e_elan_blocks.0.final_conv',
+    'model.57': 'backbone.stage3.1.e_elan_blocks.1.short_conv',
+    'model.58': 'backbone.stage3.1.e_elan_blocks.1.main_conv',
+    'model.59': 'backbone.stage3.1.e_elan_blocks.1.blocks.0.0',
+    'model.60': 'backbone.stage3.1.e_elan_blocks.1.blocks.0.1',
+    'model.61': 'backbone.stage3.1.e_elan_blocks.1.blocks.1.0',
+    'model.62': 'backbone.stage3.1.e_elan_blocks.1.blocks.1.1',
+    'model.63': 'backbone.stage3.1.e_elan_blocks.1.blocks.2.0',
+    'model.64': 'backbone.stage3.1.e_elan_blocks.1.blocks.2.1',
+    'model.66': 'backbone.stage3.1.e_elan_blocks.1.final_conv',
+
+    # stage4
+    'model.68.cv1': 'backbone.stage4.0.stride_conv_branches.0',
+    'model.68.cv2': 'backbone.stage4.0.stride_conv_branches.1',
+    'model.68.cv3': 'backbone.stage4.0.maxpool_branches.1',
+
+    # E-ELANBlock
+    'model.69': 'backbone.stage4.1.e_elan_blocks.0.short_conv',
+    'model.70': 'backbone.stage4.1.e_elan_blocks.0.main_conv',
+    'model.71': 'backbone.stage4.1.e_elan_blocks.0.blocks.0.0',
+    'model.72': 'backbone.stage4.1.e_elan_blocks.0.blocks.0.1',
+    'model.73': 'backbone.stage4.1.e_elan_blocks.0.blocks.1.0',
+    'model.74': 'backbone.stage4.1.e_elan_blocks.0.blocks.1.1',
+    'model.75': 'backbone.stage4.1.e_elan_blocks.0.blocks.2.0',
+    'model.76': 'backbone.stage4.1.e_elan_blocks.0.blocks.2.1',
+    'model.78': 'backbone.stage4.1.e_elan_blocks.0.final_conv',
+    'model.79': 'backbone.stage4.1.e_elan_blocks.1.short_conv',
+    'model.80': 'backbone.stage4.1.e_elan_blocks.1.main_conv',
+    'model.81': 'backbone.stage4.1.e_elan_blocks.1.blocks.0.0',
+    'model.82': 'backbone.stage4.1.e_elan_blocks.1.blocks.0.1',
+    'model.83': 'backbone.stage4.1.e_elan_blocks.1.blocks.1.0',
+    'model.84': 'backbone.stage4.1.e_elan_blocks.1.blocks.1.1',
+    'model.85': 'backbone.stage4.1.e_elan_blocks.1.blocks.2.0',
+    'model.86': 'backbone.stage4.1.e_elan_blocks.1.blocks.2.1',
+    'model.88': 'backbone.stage4.1.e_elan_blocks.1.final_conv',
+
+    # stage5
+    'model.90.cv1': 'backbone.stage5.0.stride_conv_branches.0',
+    'model.90.cv2': 'backbone.stage5.0.stride_conv_branches.1',
+    'model.90.cv3': 'backbone.stage5.0.maxpool_branches.1',
+
+    # E-ELANBlock
+    'model.91': 'backbone.stage5.1.e_elan_blocks.0.short_conv',
+    'model.92': 'backbone.stage5.1.e_elan_blocks.0.main_conv',
+    'model.93': 'backbone.stage5.1.e_elan_blocks.0.blocks.0.0',
+    'model.94': 'backbone.stage5.1.e_elan_blocks.0.blocks.0.1',
+    'model.95': 'backbone.stage5.1.e_elan_blocks.0.blocks.1.0',
+    'model.96': 'backbone.stage5.1.e_elan_blocks.0.blocks.1.1',
+    'model.97': 'backbone.stage5.1.e_elan_blocks.0.blocks.2.0',
+    'model.98': 'backbone.stage5.1.e_elan_blocks.0.blocks.2.1',
+    'model.100': 'backbone.stage5.1.e_elan_blocks.0.final_conv',
+    'model.101': 'backbone.stage5.1.e_elan_blocks.1.short_conv',
+    'model.102': 'backbone.stage5.1.e_elan_blocks.1.main_conv',
+    'model.103': 'backbone.stage5.1.e_elan_blocks.1.blocks.0.0',
+    'model.104': 'backbone.stage5.1.e_elan_blocks.1.blocks.0.1',
+    'model.105': 'backbone.stage5.1.e_elan_blocks.1.blocks.1.0',
+    'model.106': 'backbone.stage5.1.e_elan_blocks.1.blocks.1.1',
+    'model.107': 'backbone.stage5.1.e_elan_blocks.1.blocks.2.0',
+    'model.108': 'backbone.stage5.1.e_elan_blocks.1.blocks.2.1',
+    'model.110': 'backbone.stage5.1.e_elan_blocks.1.final_conv',
+
+    # neck SPPCSPBlock
+    'model.112.cv1': 'neck.reduce_layers.3.main_layers.0',
+    'model.112.cv3': 'neck.reduce_layers.3.main_layers.1',
+    'model.112.cv4': 'neck.reduce_layers.3.main_layers.2',
+    'model.112.cv5': 'neck.reduce_layers.3.fuse_layers.0',
+    'model.112.cv6': 'neck.reduce_layers.3.fuse_layers.1',
+    'model.112.cv2': 'neck.reduce_layers.3.short_layer',
+    'model.112.cv7': 'neck.reduce_layers.3.final_conv',
+
+    # neck
+    'model.113': 'neck.upsample_layers.0.0',
+    'model.115': 'neck.reduce_layers.2',
+
+    # neck E-ELANBlock
+    'model.117': 'neck.top_down_layers.0.e_elan_blocks.0.short_conv',
+    'model.118': 'neck.top_down_layers.0.e_elan_blocks.0.main_conv',
+    'model.119': 'neck.top_down_layers.0.e_elan_blocks.0.blocks.0',
+    'model.120': 'neck.top_down_layers.0.e_elan_blocks.0.blocks.1',
+    'model.121': 'neck.top_down_layers.0.e_elan_blocks.0.blocks.2',
+    'model.122': 'neck.top_down_layers.0.e_elan_blocks.0.blocks.3',
+    'model.123': 'neck.top_down_layers.0.e_elan_blocks.0.blocks.4',
+    'model.124': 'neck.top_down_layers.0.e_elan_blocks.0.blocks.5',
+    'model.126': 'neck.top_down_layers.0.e_elan_blocks.0.final_conv',
+    'model.127': 'neck.top_down_layers.0.e_elan_blocks.1.short_conv',
+    'model.128': 'neck.top_down_layers.0.e_elan_blocks.1.main_conv',
+    'model.129': 'neck.top_down_layers.0.e_elan_blocks.1.blocks.0',
+    'model.130': 'neck.top_down_layers.0.e_elan_blocks.1.blocks.1',
+    'model.131': 'neck.top_down_layers.0.e_elan_blocks.1.blocks.2',
+    'model.132': 'neck.top_down_layers.0.e_elan_blocks.1.blocks.3',
+    'model.133': 'neck.top_down_layers.0.e_elan_blocks.1.blocks.4',
+    'model.134': 'neck.top_down_layers.0.e_elan_blocks.1.blocks.5',
+    'model.136': 'neck.top_down_layers.0.e_elan_blocks.1.final_conv',
+    'model.138': 'neck.upsample_layers.1.0',
+    'model.140': 'neck.reduce_layers.1',
+
+    # neck E-ELANBlock
+    'model.142': 'neck.top_down_layers.1.e_elan_blocks.0.short_conv',
+    'model.143': 'neck.top_down_layers.1.e_elan_blocks.0.main_conv',
+    'model.144': 'neck.top_down_layers.1.e_elan_blocks.0.blocks.0',
+    'model.145': 'neck.top_down_layers.1.e_elan_blocks.0.blocks.1',
+    'model.146': 'neck.top_down_layers.1.e_elan_blocks.0.blocks.2',
+    'model.147': 'neck.top_down_layers.1.e_elan_blocks.0.blocks.3',
+    'model.148': 'neck.top_down_layers.1.e_elan_blocks.0.blocks.4',
+    'model.149': 'neck.top_down_layers.1.e_elan_blocks.0.blocks.5',
+    'model.151': 'neck.top_down_layers.1.e_elan_blocks.0.final_conv',
+    'model.152': 'neck.top_down_layers.1.e_elan_blocks.1.short_conv',
+    'model.153': 'neck.top_down_layers.1.e_elan_blocks.1.main_conv',
+    'model.154': 'neck.top_down_layers.1.e_elan_blocks.1.blocks.0',
+    'model.155': 'neck.top_down_layers.1.e_elan_blocks.1.blocks.1',
+    'model.156': 'neck.top_down_layers.1.e_elan_blocks.1.blocks.2',
+    'model.157': 'neck.top_down_layers.1.e_elan_blocks.1.blocks.3',
+    'model.158': 'neck.top_down_layers.1.e_elan_blocks.1.blocks.4',
+    'model.159': 'neck.top_down_layers.1.e_elan_blocks.1.blocks.5',
+    'model.161': 'neck.top_down_layers.1.e_elan_blocks.1.final_conv',
+    'model.163': 'neck.upsample_layers.2.0',
+    'model.165': 'neck.reduce_layers.0',
+    'model.167': 'neck.top_down_layers.2.e_elan_blocks.0.short_conv',
+    'model.168': 'neck.top_down_layers.2.e_elan_blocks.0.main_conv',
+    'model.169': 'neck.top_down_layers.2.e_elan_blocks.0.blocks.0',
+    'model.170': 'neck.top_down_layers.2.e_elan_blocks.0.blocks.1',
+    'model.171': 'neck.top_down_layers.2.e_elan_blocks.0.blocks.2',
+    'model.172': 'neck.top_down_layers.2.e_elan_blocks.0.blocks.3',
+    'model.173': 'neck.top_down_layers.2.e_elan_blocks.0.blocks.4',
+    'model.174': 'neck.top_down_layers.2.e_elan_blocks.0.blocks.5',
+    'model.176': 'neck.top_down_layers.2.e_elan_blocks.0.final_conv',
+    'model.177': 'neck.top_down_layers.2.e_elan_blocks.1.short_conv',
+    'model.178': 'neck.top_down_layers.2.e_elan_blocks.1.main_conv',
+    'model.179': 'neck.top_down_layers.2.e_elan_blocks.1.blocks.0',
+    'model.180': 'neck.top_down_layers.2.e_elan_blocks.1.blocks.1',
+    'model.181': 'neck.top_down_layers.2.e_elan_blocks.1.blocks.2',
+    'model.182': 'neck.top_down_layers.2.e_elan_blocks.1.blocks.3',
+    'model.183': 'neck.top_down_layers.2.e_elan_blocks.1.blocks.4',
+    'model.184': 'neck.top_down_layers.2.e_elan_blocks.1.blocks.5',
+    'model.186': 'neck.top_down_layers.2.e_elan_blocks.1.final_conv',
+    'model.188.cv1': 'neck.downsample_layers.0.stride_conv_branches.0',
+    'model.188.cv2': 'neck.downsample_layers.0.stride_conv_branches.1',
+    'model.188.cv3': 'neck.downsample_layers.0.maxpool_branches.1',
+
+    # neck E-ELANBlock
+    'model.190': 'neck.bottom_up_layers.0.e_elan_blocks.0.short_conv',
+    'model.191': 'neck.bottom_up_layers.0.e_elan_blocks.0.main_conv',
+    'model.192': 'neck.bottom_up_layers.0.e_elan_blocks.0.blocks.0',
+    'model.193': 'neck.bottom_up_layers.0.e_elan_blocks.0.blocks.1',
+    'model.194': 'neck.bottom_up_layers.0.e_elan_blocks.0.blocks.2',
+    'model.195': 'neck.bottom_up_layers.0.e_elan_blocks.0.blocks.3',
+    'model.196': 'neck.bottom_up_layers.0.e_elan_blocks.0.blocks.4',
+    'model.197': 'neck.bottom_up_layers.0.e_elan_blocks.0.blocks.5',
+    'model.199': 'neck.bottom_up_layers.0.e_elan_blocks.0.final_conv',
+    'model.200': 'neck.bottom_up_layers.0.e_elan_blocks.1.short_conv',
+    'model.201': 'neck.bottom_up_layers.0.e_elan_blocks.1.main_conv',
+    'model.202': 'neck.bottom_up_layers.0.e_elan_blocks.1.blocks.0',
+    'model.203': 'neck.bottom_up_layers.0.e_elan_blocks.1.blocks.1',
+    'model.204': 'neck.bottom_up_layers.0.e_elan_blocks.1.blocks.2',
+    'model.205': 'neck.bottom_up_layers.0.e_elan_blocks.1.blocks.3',
+    'model.206': 'neck.bottom_up_layers.0.e_elan_blocks.1.blocks.4',
+    'model.207': 'neck.bottom_up_layers.0.e_elan_blocks.1.blocks.5',
+    'model.209': 'neck.bottom_up_layers.0.e_elan_blocks.1.final_conv',
+    'model.211.cv1': 'neck.downsample_layers.1.stride_conv_branches.0',
+    'model.211.cv2': 'neck.downsample_layers.1.stride_conv_branches.1',
+    'model.211.cv3': 'neck.downsample_layers.1.maxpool_branches.1',
+    'model.213': 'neck.bottom_up_layers.1.e_elan_blocks.0.short_conv',
+    'model.214': 'neck.bottom_up_layers.1.e_elan_blocks.0.main_conv',
+    'model.215': 'neck.bottom_up_layers.1.e_elan_blocks.0.blocks.0',
+    'model.216': 'neck.bottom_up_layers.1.e_elan_blocks.0.blocks.1',
+    'model.217': 'neck.bottom_up_layers.1.e_elan_blocks.0.blocks.2',
+    'model.218': 'neck.bottom_up_layers.1.e_elan_blocks.0.blocks.3',
+    'model.219': 'neck.bottom_up_layers.1.e_elan_blocks.0.blocks.4',
+    'model.220': 'neck.bottom_up_layers.1.e_elan_blocks.0.blocks.5',
+    'model.222': 'neck.bottom_up_layers.1.e_elan_blocks.0.final_conv',
+    'model.223': 'neck.bottom_up_layers.1.e_elan_blocks.1.short_conv',
+    'model.224': 'neck.bottom_up_layers.1.e_elan_blocks.1.main_conv',
+    'model.225': 'neck.bottom_up_layers.1.e_elan_blocks.1.blocks.0',
+    'model.226': 'neck.bottom_up_layers.1.e_elan_blocks.1.blocks.1',
+    'model.227': 'neck.bottom_up_layers.1.e_elan_blocks.1.blocks.2',
+    'model.228': 'neck.bottom_up_layers.1.e_elan_blocks.1.blocks.3',
+    'model.229': 'neck.bottom_up_layers.1.e_elan_blocks.1.blocks.4',
+    'model.230': 'neck.bottom_up_layers.1.e_elan_blocks.1.blocks.5',
+    'model.232': 'neck.bottom_up_layers.1.e_elan_blocks.1.final_conv',
+    'model.234.cv1': 'neck.downsample_layers.2.stride_conv_branches.0',
+    'model.234.cv2': 'neck.downsample_layers.2.stride_conv_branches.1',
+    'model.234.cv3': 'neck.downsample_layers.2.maxpool_branches.1',
+
+    # neck E-ELANBlock
+    'model.236': 'neck.bottom_up_layers.2.e_elan_blocks.0.short_conv',
+    'model.237': 'neck.bottom_up_layers.2.e_elan_blocks.0.main_conv',
+    'model.238': 'neck.bottom_up_layers.2.e_elan_blocks.0.blocks.0',
+    'model.239': 'neck.bottom_up_layers.2.e_elan_blocks.0.blocks.1',
+    'model.240': 'neck.bottom_up_layers.2.e_elan_blocks.0.blocks.2',
+    'model.241': 'neck.bottom_up_layers.2.e_elan_blocks.0.blocks.3',
+    'model.242': 'neck.bottom_up_layers.2.e_elan_blocks.0.blocks.4',
+    'model.243': 'neck.bottom_up_layers.2.e_elan_blocks.0.blocks.5',
+    'model.245': 'neck.bottom_up_layers.2.e_elan_blocks.0.final_conv',
+    'model.246': 'neck.bottom_up_layers.2.e_elan_blocks.1.short_conv',
+    'model.247': 'neck.bottom_up_layers.2.e_elan_blocks.1.main_conv',
+    'model.248': 'neck.bottom_up_layers.2.e_elan_blocks.1.blocks.0',
+    'model.249': 'neck.bottom_up_layers.2.e_elan_blocks.1.blocks.1',
+    'model.250': 'neck.bottom_up_layers.2.e_elan_blocks.1.blocks.2',
+    'model.251': 'neck.bottom_up_layers.2.e_elan_blocks.1.blocks.3',
+    'model.252': 'neck.bottom_up_layers.2.e_elan_blocks.1.blocks.4',
+    'model.253': 'neck.bottom_up_layers.2.e_elan_blocks.1.blocks.5',
+    'model.255': 'neck.bottom_up_layers.2.e_elan_blocks.1.final_conv',
+    'model.257': 'bbox_head.head_module.main_convs_pred.0.0',
+    'model.258': 'bbox_head.head_module.main_convs_pred.1.0',
+    'model.259': 'bbox_head.head_module.main_convs_pred.2.0',
+    'model.260': 'bbox_head.head_module.main_convs_pred.3.0',
+
+    # head
+    'model.261.m.0': 'bbox_head.head_module.main_convs_pred.0.2',
+    'model.261.m.1': 'bbox_head.head_module.main_convs_pred.1.2',
+    'model.261.m.2': 'bbox_head.head_module.main_convs_pred.2.2',
+    'model.261.m.3': 'bbox_head.head_module.main_convs_pred.3.2'
+}
+
 convert_dicts = {
     'yolov7-tiny.pt': convert_dict_tiny,
     'yolov7-w6.pt': convert_dict_w,
     'yolov7-e6.pt': convert_dict_e,
+    'yolov7-e6e.pt': convert_dict_e2e,
     'yolov7.pt': convert_dict_l,
     'yolov7x.pt': convert_dict_x
 }
@@ -728,6 +1011,10 @@ def convert(src, dst):
         indexes = [140, [2, 13, 24, 35, 46, 57, 100, 112, 124]]
         in_channels = 320, 640, 960, 1280
         num_levels = 4
+    elif src_key == 'yolov7-e6e.pt':
+        indexes = [261, [2, 24, 46, 68, 90, 112, 188, 211, 234]]
+        in_channels = 320, 640, 960, 1280
+        num_levels = 4
 
     if isinstance(indexes[1], int):
         indexes[1] = [indexes[1]]
@@ -796,6 +1083,10 @@ def main():
     parser.add_argument('dst', default='mm_yolov7l.pt', help='save path')
     args = parser.parse_args()
     convert(args.src, args.dst)
+    print('If your model weights are from P6 models, such as W6, E6, D6, \
+            E6E, the auxiliary training module is not required to be loaded, \
+            so it is normal for the weights of the auxiliary module \
+            to be missing.')
 
 
 if __name__ == '__main__':

From 75fc8fc2a3e3808eb695a86dbda95cc3f732aecf Mon Sep 17 00:00:00 2001
From: Nioolek <40284075+Nioolek@users.noreply.github.com>
Date: Mon, 20 Feb 2023 11:11:13 +0800
Subject: [PATCH 31/64] [Feature] YOLOv8 supports using mask annotation to
 optimize bbox (#484)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add cfg

* add copypaste

* add todo

* 在mosaic和mixup中处理gt_masks,改config

* fix cat bug

* add finetune box in affine

* add repr

* del albu config in l

* add doc

* add config

* format code

* fix loadmask

* addconfig,fix mask

* fix loadann

* fix tra

* update LoadAnnotations

* update

* support mask

* fix error

* fix error

* fix config and no maskrefine bug

* fix

* fix

* update config

* format code

* beauty config

* add yolov5 config and readme

* beauty yolov5 config

* add ut

* fix ut. bitmap 2 poly

* fix ut and add mix transform ut.

* fix bool

* fix loadann

* rollback yolov5

* rollback yolov5

* format

* 提高速度

* update

---------

Co-authored-by: huanghaian <huanghaian@sensetime.com>
---
 configs/yolov8/README.md                      |  22 +-
 configs/yolov8/metafile.yml                   |  84 +++
 ...mask-refine_syncbn_fast_8xb16-500e_coco.py |  65 ++
 .../yolov8_l_syncbn_fast_8xb16-500e_coco.py   |  13 +-
 ...mask-refine_syncbn_fast_8xb16-500e_coco.py |  85 +++
 .../yolov8_m_syncbn_fast_8xb16-500e_coco.py   |  31 +-
 ...mask-refine_syncbn_fast_8xb16-500e_coco.py |  12 +
 ...mask-refine_syncbn_fast_8xb16-500e_coco.py |  83 +++
 .../yolov8_s_syncbn_fast_8xb16-500e_coco.py   |   4 +-
 ...mask-refine_syncbn_fast_8xb16-500e_coco.py |  13 +
 mmyolo/datasets/transforms/__init__.py        |   5 +-
 .../datasets/transforms/mix_img_transforms.py |  28 +
 mmyolo/datasets/transforms/transforms.py      | 687 ++++++++++++++----
 .../test_mix_img_transforms.py                |  92 ++-
 .../test_transforms/test_transforms.py        |  54 +-
 15 files changed, 1087 insertions(+), 191 deletions(-)
 create mode 100644 configs/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py
 create mode 100644 configs/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py
 create mode 100644 configs/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py
 create mode 100644 configs/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py
 create mode 100644 configs/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py

diff --git a/configs/yolov8/README.md b/configs/yolov8/README.md
index 9fad3ad1..47075b6c 100644
--- a/configs/yolov8/README.md
+++ b/configs/yolov8/README.md
@@ -20,19 +20,25 @@ YOLOv8-P5 model structure
 
 ### COCO
 
-| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP |                                                     Config                                                     |                                                                                                                                                           Download                                                                                                                                                           |
-| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :------------------------------------------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| YOLOv8-n |  P5  | 640  |  Yes   | Yes |   2.8    |  37.2  | [config](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json) |
-| YOLOv8-s |  P5  | 640  |  Yes   | Yes |   4.0    |  44.2  | [config](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json) |
-| YOLOv8-m |  P5  | 640  |  Yes   | Yes |   7.2    |  49.8  | [config](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json) |
+| Backbone | Arch | size | Mask Refine | SyncBN | AMP | Mem (GB) |   box AP    |                                 Config                                  |                                                                                                                                                                                   Download                                                                                                                                                                                   |
+| :------: | :--: | :--: | :---------: | :----: | :-: | :------: | :---------: | :---------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| YOLOv8-n |  P5  | 640  |     No      |  Yes   | Yes |   2.8    |    37.2     |       [config](../yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json)                         |
+| YOLOv8-n |  P5  | 640  |     Yes     |  Yes   | Yes |   2.5    | 37.4 (+0.2) | [config](../yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206.log.json) |
+| YOLOv8-s |  P5  | 640  |     No      |  Yes   | Yes |   4.0    |    44.2     |       [config](../yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json)                         |
+| YOLOv8-s |  P5  | 640  |     Yes     |  Yes   | Yes |   4.0    | 45.1 (+0.9) | [config](../yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938.log.json) |
+| YOLOv8-m |  P5  | 640  |     No      |  Yes   | Yes |   7.2    |    49.8     |       [config](../yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json)                         |
+| YOLOv8-m |  P5  | 640  |     Yes     |  Yes   | Yes |   7.0    | 50.6 (+0.8) | [config](../yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400.log.json) |
+| YOLOv8-l |  P5  | 640  |     No      |  Yes   | Yes |   9.8    |    52.1     |       [config](../yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526.log.json)                         |
+| YOLOv8-l |  P5  | 640  |     Yes     |  Yes   | Yes |   9.1    | 53.0 (+0.9) | [config](../yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100.log.json) |
+| YOLOv8-x |  P5  | 640  |     No      |  Yes   | Yes |   12.2   |    52.7     |       [config](../yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338.log.json)                         |
+| YOLOv8-x |  P5  | 640  |     Yes     |  Yes   | Yes |   12.4   | 54.0 (+1.3) | [config](../yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411.log.json) |
 
 **Note**
 
-In the official YOLOv8 code, the [bbox annotation](https://github.com/ultralytics/ultralytics/blob/0cb87f7dd340a2611148fbf2a0af59b544bd7b1b/ultralytics/yolo/data/dataloaders/v5loader.py#L1011), [`random_perspective`](https://github.com/ultralytics/ultralytics/blob/0cb87f7dd3/ultralytics/yolo/data/dataloaders/v5augmentations.py#L208) and [`copy_paste`](https://github.com/ultralytics/ultralytics/blob/0cb87f7dd3/ultralytics/yolo/data/dataloaders/v5augmentations.py#L208) data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We trained the official YOLOv8s code with `8xb16` configuration and its best performance is also 44.2. We will support mask annotations in object detection tasks in the next version.
-
 1. We use 8x A100 for training, and the single-GPU batch size is 16. This is different from the official code, but has no effect on performance.
 2. The performance is unstable and may fluctuate by about 0.3 mAP and the highest performance weight in `COCO` training in `YOLOv8` may not be the last epoch. The performance shown above is the best model.
 3. We provide [scripts](https://github.com/open-mmlab/mmyolo/tree/dev/tools/model_converters/yolov8_to_mmyolo.py) to convert official weights to MMYOLO.
-4. `SyncBN` means use SyncBN, `AMP` indicates training with mixed precision.
+4. `SyncBN` means using SyncBN, `AMP` indicates training with mixed precision.
+5. The performance of `Mask Refine` training is for the weight performance officially released by YOLOv8. `Mask Refine` means refining bbox by mask while loading annotations and transforming after `YOLOv5RandomAffine`, and the L and X models use `Copy Paste`.
 
 ## Citation
diff --git a/configs/yolov8/metafile.yml b/configs/yolov8/metafile.yml
index 337a27ac..33cd22bc 100644
--- a/configs/yolov8/metafile.yml
+++ b/configs/yolov8/metafile.yml
@@ -54,3 +54,87 @@ Models:
         Metrics:
           box AP: 49.8
     Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth
+  - Name: yolov8_l_syncbn_fast_8xb16-500e_coco
+    In Collection: YOLOv8
+    Config: configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py
+    Metadata:
+      Training Memory (GB): 9.8
+      Epochs: 500
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 52.1
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth
+  - Name: yolov8_x_syncbn_fast_8xb16-500e_coco
+    In Collection: YOLOv8
+    Config: configs/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py
+    Metadata:
+      Training Memory (GB): 12.2
+      Epochs: 500
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 52.7
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth
+  - Name: yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco
+    In Collection: YOLOv8
+    Config: configs/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py
+    Metadata:
+      Training Memory (GB): 2.5
+      Epochs: 500
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 37.4
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth
+  - Name: yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco
+    In Collection: YOLOv8
+    Config: configs/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py
+    Metadata:
+      Training Memory (GB): 4.0
+      Epochs: 500
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 45.1
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth
+  - Name: yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco
+    In Collection: YOLOv8
+    Config: configs/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py
+    Metadata:
+      Training Memory (GB): 7.0
+      Epochs: 500
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 50.6
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth
+  - Name: yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco
+    In Collection: YOLOv8
+    Config: configs/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py
+    Metadata:
+      Training Memory (GB): 9.1
+      Epochs: 500
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 53.0
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth
+  - Name: yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco
+    In Collection: YOLOv8
+    Config: configs/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py
+    Metadata:
+      Training Memory (GB): 12.4
+      Epochs: 500
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 54.0
+    Weights: https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth
diff --git a/configs/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py
new file mode 100644
index 00000000..e25b6bcb
--- /dev/null
+++ b/configs/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py
@@ -0,0 +1,65 @@
+_base_ = './yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py'
+
+# This config use refining bbox and `YOLOv5CopyPaste`.
+# Refining bbox means refining bbox by mask while loading annotations and
+# transforming after `YOLOv5RandomAffine`
+
+# ========================modified parameters======================
+deepen_factor = 1.00
+widen_factor = 1.00
+last_stage_out_channels = 512
+
+mixup_prob = 0.15
+copypaste_prob = 0.3
+
+# =======================Unmodified in most cases==================
+img_scale = _base_.img_scale
+pre_transform = _base_.pre_transform
+last_transform = _base_.last_transform
+affine_scale = _base_.affine_scale
+
+model = dict(
+    backbone=dict(
+        last_stage_out_channels=last_stage_out_channels,
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor),
+    neck=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=[256, 512, last_stage_out_channels],
+        out_channels=[256, 512, last_stage_out_channels]),
+    bbox_head=dict(
+        head_module=dict(
+            widen_factor=widen_factor,
+            in_channels=[256, 512, last_stage_out_channels])))
+
+mosaic_affine_transform = [
+    dict(
+        type='Mosaic',
+        img_scale=img_scale,
+        pad_val=114.0,
+        pre_transform=pre_transform),
+    dict(type='YOLOv5CopyPaste', prob=copypaste_prob),
+    dict(
+        type='YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        max_aspect_ratio=100.,
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # img_scale is (width, height)
+        border=(-img_scale[0] // 2, -img_scale[1] // 2),
+        border_val=(114, 114, 114),
+        min_area_ratio=_base_.min_area_ratio,
+        use_mask_refine=_base_.use_mask2refine)
+]
+
+train_pipeline = [
+    *pre_transform, *mosaic_affine_transform,
+    dict(
+        type='YOLOv5MixUp',
+        prob=mixup_prob,
+        pre_transform=[*pre_transform, *mosaic_affine_transform]),
+    *last_transform
+]
+
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
diff --git a/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py
index 9c2d1ae3..bea8b2d5 100644
--- a/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py
+++ b/configs/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py
@@ -8,6 +8,10 @@ last_stage_out_channels = 512
 mixup_prob = 0.15
 
 # =======================Unmodified in most cases==================
+pre_transform = _base_.pre_transform
+mosaic_affine_transform = _base_.mosaic_affine_transform
+last_transform = _base_.last_transform
+
 model = dict(
     backbone=dict(
         last_stage_out_channels=last_stage_out_channels,
@@ -23,17 +27,12 @@ model = dict(
             widen_factor=widen_factor,
             in_channels=[256, 512, last_stage_out_channels])))
 
-pre_transform = _base_.pre_transform
-albu_train_transform = _base_.albu_train_transform
-mosaic_affine_pipeline = _base_.mosaic_affine_pipeline
-last_transform = _base_.last_transform
-
 train_pipeline = [
-    *pre_transform, *mosaic_affine_pipeline,
+    *pre_transform, *mosaic_affine_transform,
     dict(
         type='YOLOv5MixUp',
         prob=mixup_prob,
-        pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
+        pre_transform=[*pre_transform, *mosaic_affine_transform]),
     *last_transform
 ]
 
diff --git a/configs/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py
new file mode 100644
index 00000000..2884daeb
--- /dev/null
+++ b/configs/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py
@@ -0,0 +1,85 @@
+_base_ = './yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py'
+
+# This config use refining bbox and `YOLOv5CopyPaste`.
+# Refining bbox means refining bbox by mask while loading annotations and
+# transforming after `YOLOv5RandomAffine`
+
+# ========================modified parameters======================
+deepen_factor = 0.67
+widen_factor = 0.75
+last_stage_out_channels = 768
+
+affine_scale = 0.9
+mixup_prob = 0.1
+copypaste_prob = 0.1
+
+# ===============================Unmodified in most cases====================
+img_scale = _base_.img_scale
+pre_transform = _base_.pre_transform
+last_transform = _base_.last_transform
+
+model = dict(
+    backbone=dict(
+        last_stage_out_channels=last_stage_out_channels,
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor),
+    neck=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=[256, 512, last_stage_out_channels],
+        out_channels=[256, 512, last_stage_out_channels]),
+    bbox_head=dict(
+        head_module=dict(
+            widen_factor=widen_factor,
+            in_channels=[256, 512, last_stage_out_channels])))
+
+mosaic_affine_transform = [
+    dict(
+        type='Mosaic',
+        img_scale=img_scale,
+        pad_val=114.0,
+        pre_transform=pre_transform),
+    dict(type='YOLOv5CopyPaste', prob=copypaste_prob),
+    dict(
+        type='YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        max_aspect_ratio=100.,
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        # img_scale is (width, height)
+        border=(-img_scale[0] // 2, -img_scale[1] // 2),
+        border_val=(114, 114, 114),
+        min_area_ratio=_base_.min_area_ratio,
+        use_mask_refine=_base_.use_mask2refine)
+]
+
+train_pipeline = [
+    *pre_transform, *mosaic_affine_transform,
+    dict(
+        type='YOLOv5MixUp',
+        prob=mixup_prob,
+        pre_transform=[*pre_transform, *mosaic_affine_transform]),
+    *last_transform
+]
+
+train_pipeline_stage2 = [
+    *pre_transform,
+    dict(type='YOLOv5KeepRatioResize', scale=img_scale),
+    dict(
+        type='LetterResize',
+        scale=img_scale,
+        allow_scale_up=True,
+        pad_val=dict(img=114.0)),
+    dict(
+        type='YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        max_aspect_ratio=_base_.max_aspect_ratio,
+        border_val=(114, 114, 114),
+        min_area_ratio=_base_.min_area_ratio,
+        use_mask_refine=_base_.use_mask2refine), *last_transform
+]
+
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+_base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2
diff --git a/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py
index 23eb3823..840d32cc 100644
--- a/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py
+++ b/configs/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py
@@ -9,9 +9,9 @@ affine_scale = 0.9
 mixup_prob = 0.1
 
 # =======================Unmodified in most cases==================
-num_classes = _base_.num_classes
-num_det_layers = _base_.num_det_layers
 img_scale = _base_.img_scale
+pre_transform = _base_.pre_transform
+last_transform = _base_.last_transform
 
 model = dict(
     backbone=dict(
@@ -28,11 +28,7 @@ model = dict(
             widen_factor=widen_factor,
             in_channels=[256, 512, last_stage_out_channels])))
 
-pre_transform = _base_.pre_transform
-albu_train_transform = _base_.albu_train_transform
-last_transform = _base_.last_transform
-
-mosaic_affine_pipeline = [
+mosaic_affine_transform = [
     dict(
         type='Mosaic',
         img_scale=img_scale,
@@ -51,16 +47,14 @@ mosaic_affine_pipeline = [
 
 # enable mixup
 train_pipeline = [
-    *pre_transform, *mosaic_affine_pipeline,
+    *pre_transform, *mosaic_affine_transform,
     dict(
         type='YOLOv5MixUp',
         prob=mixup_prob,
-        pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
+        pre_transform=[*pre_transform, *mosaic_affine_transform]),
     *last_transform
 ]
 
-train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
-
 train_pipeline_stage2 = [
     *pre_transform,
     dict(type='YOLOv5KeepRatioResize', scale=img_scale),
@@ -78,16 +72,5 @@ train_pipeline_stage2 = [
         border_val=(114, 114, 114)), *last_transform
 ]
 
-custom_hooks = [
-    dict(
-        type='EMAHook',
-        ema_type='ExpMomentumEMA',
-        momentum=0.0001,
-        update_buffers=True,
-        strict_load=False,
-        priority=49),
-    dict(
-        type='mmdet.PipelineSwitchHook',
-        switch_epoch=_base_.max_epochs - _base_.close_mosaic_epochs,
-        switch_pipeline=train_pipeline_stage2)
-]
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+_base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2
diff --git a/configs/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py
new file mode 100644
index 00000000..50d37742
--- /dev/null
+++ b/configs/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py
@@ -0,0 +1,12 @@
+_base_ = './yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py'
+
+# This config will refine bbox by mask while loading annotations and
+# transforming after `YOLOv5RandomAffine`
+
+deepen_factor = 0.33
+widen_factor = 0.25
+
+model = dict(
+    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
diff --git a/configs/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py
new file mode 100644
index 00000000..3ab3a2bc
--- /dev/null
+++ b/configs/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py
@@ -0,0 +1,83 @@
+_base_ = './yolov8_s_syncbn_fast_8xb16-500e_coco.py'
+
+# This config will refine bbox by mask while loading annotations and
+# transforming after `YOLOv5RandomAffine`
+
+# ========================modified parameters======================
+use_mask2refine = True
+min_area_ratio = 0.01  # YOLOv5RandomAffine
+
+# ===============================Unmodified in most cases====================
+pre_transform = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(
+        type='LoadAnnotations',
+        with_bbox=True,
+        with_mask=True,
+        mask2bbox=use_mask2refine)
+]
+
+last_transform = [
+    # Delete gt_masks to avoid more computation
+    dict(type='RemoveDataElement', keys=['gt_masks']),
+    dict(
+        type='mmdet.Albu',
+        transforms=_base_.albu_train_transforms,
+        bbox_params=dict(
+            type='BboxParams',
+            format='pascal_voc',
+            label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
+        keymap={
+            'img': 'image',
+            'gt_bboxes': 'bboxes'
+        }),
+    dict(type='YOLOv5HSVRandomAug'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
+                   'flip_direction'))
+]
+
+train_pipeline = [
+    *pre_transform,
+    dict(
+        type='Mosaic',
+        img_scale=_base_.img_scale,
+        pad_val=114.0,
+        pre_transform=pre_transform),
+    dict(
+        type='YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale),
+        max_aspect_ratio=_base_.max_aspect_ratio,
+        # img_scale is (width, height)
+        border=(-_base_.img_scale[0] // 2, -_base_.img_scale[1] // 2),
+        border_val=(114, 114, 114),
+        min_area_ratio=min_area_ratio,
+        use_mask_refine=use_mask2refine),
+    *last_transform
+]
+
+train_pipeline_stage2 = [
+    *pre_transform,
+    dict(type='YOLOv5KeepRatioResize', scale=_base_.img_scale),
+    dict(
+        type='LetterResize',
+        scale=_base_.img_scale,
+        allow_scale_up=True,
+        pad_val=dict(img=114.0)),
+    dict(
+        type='YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale),
+        max_aspect_ratio=_base_.max_aspect_ratio,
+        border_val=(114, 114, 114),
+        min_area_ratio=min_area_ratio,
+        use_mask_refine=use_mask2refine), *last_transform
+]
+
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+_base_.custom_hooks[1].switch_pipeline = train_pipeline_stage2
diff --git a/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py
index d4900609..58441a99 100644
--- a/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py
+++ b/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py
@@ -161,7 +161,7 @@ model = dict(
             eps=1e-9)),
     test_cfg=model_test_cfg)
 
-albu_train_transform = [
+albu_train_transforms = [
     dict(type='Blur', p=0.01),
     dict(type='MedianBlur', p=0.01),
     dict(type='ToGray', p=0.01),
@@ -176,7 +176,7 @@ pre_transform = [
 last_transform = [
     dict(
         type='mmdet.Albu',
-        transforms=albu_train_transform,
+        transforms=albu_train_transforms,
         bbox_params=dict(
             type='BboxParams',
             format='pascal_voc',
diff --git a/configs/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py
new file mode 100644
index 00000000..8c27b961
--- /dev/null
+++ b/configs/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py
@@ -0,0 +1,13 @@
+_base_ = './yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py'
+
+# This config use refining bbox and `YOLOv5CopyPaste`.
+# Refining bbox means refining bbox by mask while loading annotations and
+# transforming after `YOLOv5RandomAffine`
+
+deepen_factor = 1.00
+widen_factor = 1.25
+
+model = dict(
+    backbone=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
diff --git a/mmyolo/datasets/transforms/__init__.py b/mmyolo/datasets/transforms/__init__.py
index ea1cd41e..7b2c6a91 100644
--- a/mmyolo/datasets/transforms/__init__.py
+++ b/mmyolo/datasets/transforms/__init__.py
@@ -1,12 +1,13 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .mix_img_transforms import Mosaic, Mosaic9, YOLOv5MixUp, YOLOXMixUp
 from .transforms import (LetterResize, LoadAnnotations, PPYOLOERandomCrop,
-                         PPYOLOERandomDistort, YOLOv5HSVRandomAug,
+                         PPYOLOERandomDistort, RemoveDataElement,
+                         YOLOv5CopyPaste, YOLOv5HSVRandomAug,
                          YOLOv5KeepRatioResize, YOLOv5RandomAffine)
 
 __all__ = [
     'YOLOv5KeepRatioResize', 'LetterResize', 'Mosaic', 'YOLOXMixUp',
     'YOLOv5MixUp', 'YOLOv5HSVRandomAug', 'LoadAnnotations',
     'YOLOv5RandomAffine', 'PPYOLOERandomDistort', 'PPYOLOERandomCrop',
-    'Mosaic9'
+    'Mosaic9', 'YOLOv5CopyPaste', 'RemoveDataElement'
 ]
diff --git a/mmyolo/datasets/transforms/mix_img_transforms.py b/mmyolo/datasets/transforms/mix_img_transforms.py
index 9cd5ad98..4a25f6f7 100644
--- a/mmyolo/datasets/transforms/mix_img_transforms.py
+++ b/mmyolo/datasets/transforms/mix_img_transforms.py
@@ -317,6 +317,8 @@ class Mosaic(BaseMixImageTransform):
         mosaic_bboxes = []
         mosaic_bboxes_labels = []
         mosaic_ignore_flags = []
+        mosaic_masks = []
+        with_mask = True if 'gt_masks' in results else False
         # self.img_scale is wh format
         img_scale_w, img_scale_h = self.img_scale
 
@@ -370,6 +372,20 @@ class Mosaic(BaseMixImageTransform):
             mosaic_bboxes.append(gt_bboxes_i)
             mosaic_bboxes_labels.append(gt_bboxes_labels_i)
             mosaic_ignore_flags.append(gt_ignore_flags_i)
+            if with_mask and results_patch.get('gt_masks', None) is not None:
+                gt_masks_i = results_patch['gt_masks']
+                gt_masks_i = gt_masks_i.rescale(float(scale_ratio_i))
+                gt_masks_i = gt_masks_i.translate(
+                    out_shape=(int(self.img_scale[0] * 2),
+                               int(self.img_scale[1] * 2)),
+                    offset=padw,
+                    direction='horizontal')
+                gt_masks_i = gt_masks_i.translate(
+                    out_shape=(int(self.img_scale[0] * 2),
+                               int(self.img_scale[1] * 2)),
+                    offset=padh,
+                    direction='vertical')
+                mosaic_masks.append(gt_masks_i)
 
         mosaic_bboxes = mosaic_bboxes[0].cat(mosaic_bboxes, 0)
         mosaic_bboxes_labels = np.concatenate(mosaic_bboxes_labels, 0)
@@ -377,6 +393,9 @@ class Mosaic(BaseMixImageTransform):
 
         if self.bbox_clip_border:
             mosaic_bboxes.clip_([2 * img_scale_h, 2 * img_scale_w])
+            if with_mask:
+                mosaic_masks = mosaic_masks[0].cat(mosaic_masks)
+                results['gt_masks'] = mosaic_masks
         else:
             # remove outside bboxes
             inside_inds = mosaic_bboxes.is_inside(
@@ -384,12 +403,16 @@ class Mosaic(BaseMixImageTransform):
             mosaic_bboxes = mosaic_bboxes[inside_inds]
             mosaic_bboxes_labels = mosaic_bboxes_labels[inside_inds]
             mosaic_ignore_flags = mosaic_ignore_flags[inside_inds]
+            if with_mask:
+                mosaic_masks = mosaic_masks[0].cat(mosaic_masks)[inside_inds]
+                results['gt_masks'] = mosaic_masks
 
         results['img'] = mosaic_img
         results['img_shape'] = mosaic_img.shape
         results['gt_bboxes'] = mosaic_bboxes
         results['gt_bboxes_labels'] = mosaic_bboxes_labels
         results['gt_ignore_flags'] = mosaic_ignore_flags
+
         return results
 
     def _mosaic_combine(
@@ -876,6 +899,11 @@ class YOLOv5MixUp(BaseMixImageTransform):
             (results['gt_bboxes_labels'], retrieve_gt_bboxes_labels), axis=0)
         mixup_gt_ignore_flags = np.concatenate(
             (results['gt_ignore_flags'], retrieve_gt_ignore_flags), axis=0)
+        if 'gt_masks' in results:
+            assert 'gt_masks' in retrieve_results
+            mixup_gt_masks = results['gt_masks'].cat(
+                [results['gt_masks'], retrieve_results['gt_masks']])
+            results['gt_masks'] = mixup_gt_masks
 
         results['img'] = mixup_img.astype(np.uint8)
         results['img_shape'] = mixup_img.shape
diff --git a/mmyolo/datasets/transforms/transforms.py b/mmyolo/datasets/transforms/transforms.py
index ca9c9607..926af7cc 100644
--- a/mmyolo/datasets/transforms/transforms.py
+++ b/mmyolo/datasets/transforms/transforms.py
@@ -1,6 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import math
-from typing import List, Tuple, Union
+from copy import deepcopy
+from typing import List, Sequence, Tuple, Union
 
 import cv2
 import mmcv
@@ -12,6 +13,7 @@ from mmdet.datasets.transforms import LoadAnnotations as MMDET_LoadAnnotations
 from mmdet.datasets.transforms import Resize as MMDET_Resize
 from mmdet.structures.bbox import (HorizontalBoxes, autocast_box_type,
                                    get_box_type)
+from mmdet.structures.mask import PolygonMasks
 from numpy import random
 
 from mmyolo.registry import TRANSFORMS
@@ -240,7 +242,7 @@ class LetterResize(MMDET_Resize):
         results['img_shape'] = image.shape
         if 'pad_param' in results:
             results['pad_param_origin'] = results['pad_param'] * \
-                np.repeat(ratio, 2)
+                                          np.repeat(ratio, 2)
         results['pad_param'] = np.array(padding_list, dtype=np.float32)
 
     def _resize_masks(self, results: dict):
@@ -248,32 +250,29 @@ class LetterResize(MMDET_Resize):
         if results.get('gt_masks', None) is None:
             return
 
-        # resize the gt_masks
-        gt_mask_height = results['gt_masks'].height * \
-            results['scale_factor'][1]
-        gt_mask_width = results['gt_masks'].width * \
-            results['scale_factor'][0]
-        gt_masks = results['gt_masks'].resize(
-            (int(round(gt_mask_height)), int(round(gt_mask_width))))
+        gt_masks = results['gt_masks']
+        assert isinstance(
+            gt_masks, PolygonMasks
+        ), f'Only supports PolygonMasks, but got {type(gt_masks)}'
 
-        # padding the gt_masks
-        if len(gt_masks) == 0:
-            padded_masks = np.empty((0, *results['img_shape'][:2]),
-                                    dtype=np.uint8)
-        else:
-            # TODO: The function is incorrect. Because the mask may not
-            #  be able to pad.
-            padded_masks = np.stack([
-                mmcv.impad(
-                    mask,
-                    padding=(int(results['pad_param'][2]),
-                             int(results['pad_param'][0]),
-                             int(results['pad_param'][3]),
-                             int(results['pad_param'][1])),
-                    pad_val=self.pad_val.get('masks', 0)) for mask in gt_masks
-            ])
-        results['gt_masks'] = type(results['gt_masks'])(
-            padded_masks, *results['img_shape'][:2])
+        # resize the gt_masks
+        gt_mask_h = results['gt_masks'].height * results['scale_factor'][1]
+        gt_mask_w = results['gt_masks'].width * results['scale_factor'][0]
+        gt_masks = results['gt_masks'].resize(
+            (int(round(gt_mask_h)), int(round(gt_mask_w))))
+
+        top_padding, _, left_padding, _ = results['pad_param']
+        if int(left_padding) != 0:
+            gt_masks = gt_masks.translate(
+                out_shape=results['img_shape'][:2],
+                offset=int(left_padding),
+                direction='horizontal')
+        if int(top_padding) != 0:
+            gt_masks = gt_masks.translate(
+                out_shape=results['img_shape'][:2],
+                offset=int(top_padding),
+                direction='vertical')
+        results['gt_masks'] = gt_masks
 
     def _resize_bboxes(self, results: dict):
         """Resize bounding boxes with ``results['scale_factor']``."""
@@ -356,19 +355,74 @@ class YOLOv5HSVRandomAug(BaseTransform):
         results['img'] = cv2.cvtColor(im_hsv, cv2.COLOR_HSV2BGR)
         return results
 
+    def __repr__(self) -> str:
+        repr_str = self.__class__.__name__
+        repr_str += f'(hue_delta={self.hue_delta}, '
+        repr_str += f'saturation_delta={self.saturation_delta}, '
+        repr_str += f'value_delta={self.value_delta})'
+        return repr_str
+
 
-# TODO: can be accelerated
 @TRANSFORMS.register_module()
 class LoadAnnotations(MMDET_LoadAnnotations):
     """Because the yolo series does not need to consider ignore bboxes for the
     time being, in order to speed up the pipeline, it can be excluded in
     advance."""
 
+    def __init__(self,
+                 mask2bbox: bool = False,
+                 poly2mask: bool = False,
+                 **kwargs) -> None:
+        self.mask2bbox = mask2bbox
+        assert not poly2mask, 'Does not support BitmapMasks considering ' \
+                              'that bitmap consumes more memory.'
+        super().__init__(poly2mask=poly2mask, **kwargs)
+        if self.mask2bbox:
+            assert self.with_mask, 'Using mask2bbox requires ' \
+                                   'with_mask is True.'
+        self._mask_ignore_flag = None
+
+    def transform(self, results: dict) -> dict:
+        """Function to load multiple types annotations.
+
+        Args:
+            results (dict): Result dict from :obj:``mmengine.BaseDataset``.
+
+        Returns:
+            dict: The dict contains loaded bounding box, label and
+            semantic segmentation.
+        """
+        if self.mask2bbox:
+            self._load_masks(results)
+            if self.with_label:
+                self._load_labels(results)
+                self._update_mask_ignore_data(results)
+            gt_bboxes = results['gt_masks'].get_bboxes(dst_type='hbox')
+            results['gt_bboxes'] = gt_bboxes
+        else:
+            results = super().transform(results)
+            self._update_mask_ignore_data(results)
+        return results
+
+    def _update_mask_ignore_data(self, results: dict) -> None:
+        if 'gt_masks' not in results:
+            return
+
+        if 'gt_bboxes_labels' in results and len(
+                results['gt_bboxes_labels']) != len(results['gt_masks']):
+            assert len(results['gt_bboxes_labels']) == len(
+                self._mask_ignore_flag)
+            results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
+                self._mask_ignore_flag]
+
+        if 'gt_bboxes' in results and len(results['gt_bboxes']) != len(
+                results['gt_masks']):
+            assert len(results['gt_bboxes']) == len(self._mask_ignore_flag)
+            results['gt_bboxes'] = results['gt_bboxes'][self._mask_ignore_flag]
+
     def _load_bboxes(self, results: dict):
         """Private function to load bounding box annotations.
-
         Note: BBoxes with ignore_flag of 1 is not considered.
-
         Args:
             results (dict): Result dict from :obj:``mmengine.BaseDataset``.
 
@@ -394,10 +448,8 @@ class LoadAnnotations(MMDET_LoadAnnotations):
         """Private function to load label annotations.
 
         Note: BBoxes with ignore_flag of 1 is not considered.
-
         Args:
             results (dict): Result dict from :obj:``mmengine.BaseDataset``.
-
         Returns:
             dict: The dict contains loaded label annotations.
         """
@@ -408,14 +460,72 @@ class LoadAnnotations(MMDET_LoadAnnotations):
         results['gt_bboxes_labels'] = np.array(
             gt_bboxes_labels, dtype=np.int64)
 
+    def _load_masks(self, results: dict) -> None:
+        """Private function to load mask annotations.
+
+        Args:
+            results (dict): Result dict from :obj:``mmengine.BaseDataset``.
+        """
+        gt_masks = []
+        gt_ignore_flags = []
+        self._mask_ignore_flag = []
+        for instance in results.get('instances', []):
+            if instance['ignore_flag'] == 0:
+                if 'mask' in instance:
+                    gt_mask = instance['mask']
+                    if isinstance(gt_mask, list):
+                        gt_mask = [
+                            np.array(polygon) for polygon in gt_mask
+                            if len(polygon) % 2 == 0 and len(polygon) >= 6
+                        ]
+                        if len(gt_mask) == 0:
+                            # ignore
+                            self._mask_ignore_flag.append(0)
+                        else:
+                            gt_masks.append(gt_mask)
+                            gt_ignore_flags.append(instance['ignore_flag'])
+                            self._mask_ignore_flag.append(1)
+                    else:
+                        raise NotImplementedError(
+                            'Only supports mask annotations in polygon '
+                            'format currently')
+                else:
+                    # TODO: Actually, gt with bbox and without mask needs
+                    #  to be retained
+                    self._mask_ignore_flag.append(0)
+        self._mask_ignore_flag = np.array(self._mask_ignore_flag, dtype=bool)
+        results['gt_ignore_flags'] = np.array(gt_ignore_flags, dtype=bool)
+
+        h, w = results['ori_shape']
+        gt_masks = PolygonMasks([mask for mask in gt_masks], h, w)
+        results['gt_masks'] = gt_masks
+
+    def __repr__(self) -> str:
+        repr_str = self.__class__.__name__
+        repr_str += f'(with_bbox={self.with_bbox}, '
+        repr_str += f'with_label={self.with_label}, '
+        repr_str += f'with_mask={self.with_mask}, '
+        repr_str += f'with_seg={self.with_seg}, '
+        repr_str += f'mask2bbox={self.mask2bbox}, '
+        repr_str += f'poly2mask={self.poly2mask}, '
+        repr_str += f"imdecode_backend='{self.imdecode_backend}', "
+        repr_str += f'file_client_args={self.file_client_args})'
+        return repr_str
+
 
 @TRANSFORMS.register_module()
 class YOLOv5RandomAffine(BaseTransform):
-    """Random affine transform data augmentation in YOLOv5. It is different
-    from the implementation in YOLOX.
+    """Random affine transform data augmentation in YOLOv5 and YOLOv8. It is
+    different from the implementation in YOLOX.
 
     This operation randomly generates affine transform matrix which including
     rotation, translation, shear and scaling transforms.
+    If you set use_mask_refine == True, the code will use the masks
+    annotation to refine the bbox.
+    Our implementation is slightly different from the official. In COCO
+    dataset, a gt may have multiple mask tags.  The official YOLOv5
+    annotation file already combines the masks that an object has,
+    but our code takes into account the fact that an object has multiple masks.
 
     Required Keys:
 
@@ -423,6 +533,7 @@ class YOLOv5RandomAffine(BaseTransform):
     - gt_bboxes (BaseBoxes[torch.float32]) (optional)
     - gt_bboxes_labels (np.int64) (optional)
     - gt_ignore_flags (bool) (optional)
+    - gt_masks (PolygonMasks) (optional)
 
     Modified Keys:
 
@@ -431,6 +542,7 @@ class YOLOv5RandomAffine(BaseTransform):
     - gt_bboxes (optional)
     - gt_bboxes_labels (optional)
     - gt_ignore_flags (optional)
+    - gt_masks (PolygonMasks) (optional)
 
     Args:
         max_rotate_degree (float): Maximum degrees of rotation transform.
@@ -456,9 +568,11 @@ class YOLOv5RandomAffine(BaseTransform):
         min_area_ratio (float): Threshold of area ratio between
             original bboxes and wrapped bboxes. If smaller than this value,
             the box will be removed. Defaults to 0.1.
+        use_mask_refine (bool): Whether to refine bbox by mask.
         max_aspect_ratio (float): Aspect ratio of width and height
             threshold to filter bboxes. If max(h/w, w/h) larger than this
             value, the box will be removed. Defaults to 20.
+        resample_num (int): Number of poly to resample to.
     """
 
     def __init__(self,
@@ -471,7 +585,9 @@ class YOLOv5RandomAffine(BaseTransform):
                  bbox_clip_border: bool = True,
                  min_bbox_size: int = 2,
                  min_area_ratio: float = 0.1,
-                 max_aspect_ratio: int = 20):
+                 use_mask_refine: bool = False,
+                 max_aspect_ratio: float = 20.,
+                 resample_num: int = 1000):
         assert 0 <= max_translate_ratio <= 1
         assert scaling_ratio_range[0] <= scaling_ratio_range[1]
         assert scaling_ratio_range[0] > 0
@@ -483,9 +599,200 @@ class YOLOv5RandomAffine(BaseTransform):
         self.border_val = border_val
         self.bbox_clip_border = bbox_clip_border
         self.min_bbox_size = min_bbox_size
-        self.min_bbox_size = min_bbox_size
         self.min_area_ratio = min_area_ratio
+        self.use_mask_refine = use_mask_refine
         self.max_aspect_ratio = max_aspect_ratio
+        self.resample_num = resample_num
+
+    @autocast_box_type()
+    def transform(self, results: dict) -> dict:
+        """The YOLOv5 random affine transform function.
+
+        Args:
+            results (dict): The result dict.
+
+        Returns:
+            dict: The result dict.
+        """
+        img = results['img']
+        # self.border is wh format
+        height = img.shape[0] + self.border[1] * 2
+        width = img.shape[1] + self.border[0] * 2
+
+        # Note: Different from YOLOX
+        center_matrix = np.eye(3, dtype=np.float32)
+        center_matrix[0, 2] = -img.shape[1] / 2
+        center_matrix[1, 2] = -img.shape[0] / 2
+
+        warp_matrix, scaling_ratio = self._get_random_homography_matrix(
+            height, width)
+        warp_matrix = warp_matrix @ center_matrix
+
+        img = cv2.warpPerspective(
+            img,
+            warp_matrix,
+            dsize=(width, height),
+            borderValue=self.border_val)
+        results['img'] = img
+        results['img_shape'] = img.shape
+        img_h, img_w = img.shape[:2]
+
+        bboxes = results['gt_bboxes']
+        num_bboxes = len(bboxes)
+        if num_bboxes:
+            orig_bboxes = bboxes.clone()
+            if self.use_mask_refine and 'gt_masks' in results:
+                # If the dataset has annotations of mask,
+                # the mask will be used to refine bbox.
+                gt_masks = results['gt_masks']
+
+                gt_masks_resample = self.resample_masks(gt_masks)
+                gt_masks = self.warp_mask(gt_masks_resample, warp_matrix,
+                                          img_h, img_w)
+
+                # refine bboxes by masks
+                bboxes = gt_masks.get_bboxes(dst_type='hbox')
+                # filter bboxes outside image
+                valid_index = self.filter_gt_bboxes(orig_bboxes,
+                                                    bboxes).numpy()
+                results['gt_masks'] = gt_masks[valid_index]
+            else:
+                bboxes.project_(warp_matrix)
+                if self.bbox_clip_border:
+                    bboxes.clip_([height, width])
+
+                # filter bboxes
+                orig_bboxes.rescale_([scaling_ratio, scaling_ratio])
+
+                # Be careful: valid_index must convert to numpy,
+                # otherwise it will raise out of bounds when len(valid_index)=1
+                valid_index = self.filter_gt_bboxes(orig_bboxes,
+                                                    bboxes).numpy()
+                if 'gt_masks' in results:
+                    results['gt_masks'] = PolygonMasks(
+                        results['gt_masks'].masks, img_h, img_w)
+
+            results['gt_bboxes'] = bboxes[valid_index]
+            results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
+                valid_index]
+            results['gt_ignore_flags'] = results['gt_ignore_flags'][
+                valid_index]
+
+        return results
+
+    @staticmethod
+    def warp_poly(poly: np.ndarray, warp_matrix: np.ndarray, img_w: int,
+                  img_h: int) -> np.ndarray:
+        """Function to warp one mask and filter points outside image.
+
+        Args:
+            poly (np.ndarray): Segmentation annotation with shape (n, ) and
+                with format (x1, y1, x2, y2, ...).
+            warp_matrix (np.ndarray): Affine transformation matrix.
+                Shape: (3, 3).
+            img_w (int): Width of output image.
+            img_h (int): Height of output image.
+        """
+        # TODO: Current logic may cause retained masks unusable for
+        #  semantic segmentation training, which is same as official
+        #  implementation.
+        poly = poly.reshape((-1, 2))
+        poly = np.concatenate((poly, np.ones(
+            (len(poly), 1), dtype=poly.dtype)),
+                              axis=-1)
+        # transform poly
+        poly = poly @ warp_matrix.T
+        poly = poly[:, :2] / poly[:, 2:3]
+
+        # filter point outside image
+        x, y = poly.T
+        valid_ind_point = (x >= 0) & (y >= 0) & (x <= img_w) & (y <= img_h)
+        return poly[valid_ind_point].reshape(-1)
+
+    def warp_mask(self, gt_masks: PolygonMasks, warp_matrix: np.ndarray,
+                  img_w: int, img_h: int) -> PolygonMasks:
+        """Warp masks by warp_matrix and retain masks inside image after
+        warping.
+
+        Args:
+            gt_masks (PolygonMasks): Annotations of semantic segmentation.
+            warp_matrix (np.ndarray): Affine transformation matrix.
+                Shape: (3, 3).
+            img_w (int): Width of output image.
+            img_h (int): Height of output image.
+
+        Returns:
+            PolygonMasks: Masks after warping.
+        """
+        masks = gt_masks.masks
+
+        new_masks = []
+        for poly_per_obj in masks:
+            warpped_poly_per_obj = []
+            # One gt may have multiple masks.
+            for poly in poly_per_obj:
+                valid_poly = self.warp_poly(poly, warp_matrix, img_w, img_h)
+                if len(valid_poly):
+                    warpped_poly_per_obj.append(valid_poly.reshape(-1))
+            # If all the masks are invalid,
+            # add [0, 0, 0, 0, 0, 0,] here.
+            if not warpped_poly_per_obj:
+                # This will be filtered in function `filter_gt_bboxes`.
+                warpped_poly_per_obj = [
+                    np.zeros(6, dtype=poly_per_obj[0].dtype)
+                ]
+            new_masks.append(warpped_poly_per_obj)
+
+        gt_masks = PolygonMasks(new_masks, img_h, img_w)
+        return gt_masks
+
+    def resample_masks(self, gt_masks: PolygonMasks) -> PolygonMasks:
+        """Function to resample each mask annotation with shape (2 * n, ) to
+        shape (resample_num * 2, ).
+
+        Args:
+            gt_masks (PolygonMasks): Annotations of semantic segmentation.
+        """
+        masks = gt_masks.masks
+        new_masks = []
+        for poly_per_obj in masks:
+            resample_poly_per_obj = []
+            for poly in poly_per_obj:
+                poly = poly.reshape((-1, 2))  # xy
+                poly = np.concatenate((poly, poly[0:1, :]), axis=0)
+                x = np.linspace(0, len(poly) - 1, self.resample_num)
+                xp = np.arange(len(poly))
+                poly = np.concatenate([
+                    np.interp(x, xp, poly[:, i]) for i in range(2)
+                ]).reshape(2, -1).T.reshape(-1)
+                resample_poly_per_obj.append(poly)
+            new_masks.append(resample_poly_per_obj)
+        return PolygonMasks(new_masks, gt_masks.height, gt_masks.width)
+
+    def filter_gt_bboxes(self, origin_bboxes: HorizontalBoxes,
+                         wrapped_bboxes: HorizontalBoxes) -> torch.Tensor:
+        """Filter gt bboxes.
+
+        Args:
+            origin_bboxes (HorizontalBoxes): Origin bboxes.
+            wrapped_bboxes (HorizontalBoxes): Wrapped bboxes
+
+        Returns:
+            dict: The result dict.
+        """
+        origin_w = origin_bboxes.widths
+        origin_h = origin_bboxes.heights
+        wrapped_w = wrapped_bboxes.widths
+        wrapped_h = wrapped_bboxes.heights
+        aspect_ratio = np.maximum(wrapped_w / (wrapped_h + 1e-16),
+                                  wrapped_h / (wrapped_w + 1e-16))
+
+        wh_valid_idx = (wrapped_w > self.min_bbox_size) & \
+                       (wrapped_h > self.min_bbox_size)
+        area_valid_idx = wrapped_w * wrapped_h / (origin_w * origin_h +
+                                                  1e-16) > self.min_area_ratio
+        aspect_ratio_valid_idx = aspect_ratio < self.max_aspect_ratio
+        return wh_valid_idx & area_valid_idx & aspect_ratio_valid_idx
 
     @cache_randomness
     def _get_random_homography_matrix(self, height: int,
@@ -527,99 +834,6 @@ class YOLOv5RandomAffine(BaseTransform):
             translate_matrix @ shear_matrix @ rotation_matrix @ scaling_matrix)
         return warp_matrix, scaling_ratio
 
-    @autocast_box_type()
-    def transform(self, results: dict) -> dict:
-        """The YOLOv5 random affine transform function.
-
-        Args:
-            results (dict): The result dict.
-
-        Returns:
-            dict: The result dict.
-        """
-        img = results['img']
-        # self.border is wh format
-        height = img.shape[0] + self.border[1] * 2
-        width = img.shape[1] + self.border[0] * 2
-
-        # Note: Different from YOLOX
-        center_matrix = np.eye(3, dtype=np.float32)
-        center_matrix[0, 2] = -img.shape[1] / 2
-        center_matrix[1, 2] = -img.shape[0] / 2
-
-        warp_matrix, scaling_ratio = self._get_random_homography_matrix(
-            height, width)
-        warp_matrix = warp_matrix @ center_matrix
-
-        img = cv2.warpPerspective(
-            img,
-            warp_matrix,
-            dsize=(width, height),
-            borderValue=self.border_val)
-        results['img'] = img
-        results['img_shape'] = img.shape
-
-        bboxes = results['gt_bboxes']
-        num_bboxes = len(bboxes)
-        if num_bboxes:
-            orig_bboxes = bboxes.clone()
-
-            bboxes.project_(warp_matrix)
-            if self.bbox_clip_border:
-                bboxes.clip_([height, width])
-
-            # filter bboxes
-            orig_bboxes.rescale_([scaling_ratio, scaling_ratio])
-
-            # Be careful: valid_index must convert to numpy,
-            # otherwise it will raise out of bounds when len(valid_index)=1
-            valid_index = self.filter_gt_bboxes(orig_bboxes, bboxes).numpy()
-            results['gt_bboxes'] = bboxes[valid_index]
-            results['gt_bboxes_labels'] = results['gt_bboxes_labels'][
-                valid_index]
-            results['gt_ignore_flags'] = results['gt_ignore_flags'][
-                valid_index]
-
-            if 'gt_masks' in results:
-                raise NotImplementedError('RandomAffine only supports bbox.')
-        return results
-
-    def filter_gt_bboxes(self, origin_bboxes: HorizontalBoxes,
-                         wrapped_bboxes: HorizontalBoxes) -> torch.Tensor:
-        """Filter gt bboxes.
-
-        Args:
-            origin_bboxes (HorizontalBoxes): Origin bboxes.
-            wrapped_bboxes (HorizontalBoxes): Wrapped bboxes
-
-        Returns:
-            dict: The result dict.
-        """
-        origin_w = origin_bboxes.widths
-        origin_h = origin_bboxes.heights
-        wrapped_w = wrapped_bboxes.widths
-        wrapped_h = wrapped_bboxes.heights
-        aspect_ratio = np.maximum(wrapped_w / (wrapped_h + 1e-16),
-                                  wrapped_h / (wrapped_w + 1e-16))
-
-        wh_valid_idx = (wrapped_w > self.min_bbox_size) & \
-                       (wrapped_h > self.min_bbox_size)
-        area_valid_idx = wrapped_w * wrapped_h / (origin_w * origin_h +
-                                                  1e-16) > self.min_area_ratio
-        aspect_ratio_valid_idx = aspect_ratio < self.max_aspect_ratio
-        return wh_valid_idx & area_valid_idx & aspect_ratio_valid_idx
-
-    def __repr__(self) -> str:
-        repr_str = self.__class__.__name__
-        repr_str += f'(max_rotate_degree={self.max_rotate_degree}, '
-        repr_str += f'max_translate_ratio={self.max_translate_ratio}, '
-        repr_str += f'scaling_ratio_range={self.scaling_ratio_range}, '
-        repr_str += f'max_shear_degree={self.max_shear_degree}, '
-        repr_str += f'border={self.border}, '
-        repr_str += f'border_val={self.border_val}, '
-        repr_str += f'bbox_clip_border={self.bbox_clip_border})'
-        return repr_str
-
     @staticmethod
     def _get_rotation_matrix(rotate_degrees: float) -> np.ndarray:
         """Get rotation matrix.
@@ -686,6 +900,17 @@ class YOLOv5RandomAffine(BaseTransform):
                                       dtype=np.float32)
         return translation_matrix
 
+    def __repr__(self) -> str:
+        repr_str = self.__class__.__name__
+        repr_str += f'(max_rotate_degree={self.max_rotate_degree}, '
+        repr_str += f'max_translate_ratio={self.max_translate_ratio}, '
+        repr_str += f'scaling_ratio_range={self.scaling_ratio_range}, '
+        repr_str += f'max_shear_degree={self.max_shear_degree}, '
+        repr_str += f'border={self.border}, '
+        repr_str += f'border_val={self.border_val}, '
+        repr_str += f'bbox_clip_border={self.bbox_clip_border})'
+        return repr_str
+
 
 @TRANSFORMS.register_module()
 class PPYOLOERandomDistort(BaseTransform):
@@ -723,7 +948,7 @@ class PPYOLOERandomDistort(BaseTransform):
         self.contrast_cfg = contrast_cfg
         self.brightness_cfg = brightness_cfg
         self.num_distort_func = num_distort_func
-        assert 0 < self.num_distort_func <= 4,\
+        assert 0 < self.num_distort_func <= 4, \
             'num_distort_func must > 0 and <= 4'
         for cfg in [
                 self.hue_cfg, self.saturation_cfg, self.contrast_cfg,
@@ -809,6 +1034,15 @@ class PPYOLOERandomDistort(BaseTransform):
             results = func(results)
         return results
 
+    def __repr__(self) -> str:
+        repr_str = self.__class__.__name__
+        repr_str += f'(hue_cfg={self.hue_cfg}, '
+        repr_str += f'saturation_cfg={self.saturation_cfg}, '
+        repr_str += f'contrast_cfg={self.contrast_cfg}, '
+        repr_str += f'brightness_cfg={self.brightness_cfg}, '
+        repr_str += f'num_distort_func={self.num_distort_func})'
+        return repr_str
+
 
 @TRANSFORMS.register_module()
 class PPYOLOERandomCrop(BaseTransform):
@@ -837,7 +1071,7 @@ class PPYOLOERandomCrop(BaseTransform):
     Args:
         aspect_ratio (List[float]): Aspect ratio of cropped region. Default to
              [.5, 2].
-        thresholds (List[float]): Iou thresholds for decide a valid bbox crop
+        thresholds (List[float]): Iou thresholds for deciding a valid bbox crop
             in [min, max] format. Defaults to [.0, .1, .3, .5, .7, .9].
         scaling (List[float]): Ratio between a cropped region and the original
             image in [min, max] format. Default to [.3, 1.].
@@ -1079,3 +1313,194 @@ class PPYOLOERandomCrop(BaseTransform):
             valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
 
         return np.where(valid)[0]
+
+    def __repr__(self) -> str:
+        repr_str = self.__class__.__name__
+        repr_str += f'(aspect_ratio={self.aspect_ratio}, '
+        repr_str += f'thresholds={self.thresholds}, '
+        repr_str += f'scaling={self.scaling}, '
+        repr_str += f'num_attempts={self.num_attempts}, '
+        repr_str += f'allow_no_crop={self.allow_no_crop}, '
+        repr_str += f'cover_all_box={self.cover_all_box})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class YOLOv5CopyPaste(BaseTransform):
+    """Copy-Paste used in YOLOv5 and YOLOv8.
+
+    This transform randomly copy some objects in the image to the mirror
+    position of the image.It is different from the `CopyPaste` in mmdet.
+
+    Required Keys:
+
+    - img (np.uint8)
+    - gt_bboxes (BaseBoxes[torch.float32])
+    - gt_bboxes_labels (np.int64) (optional)
+    - gt_ignore_flags (bool) (optional)
+    - gt_masks (PolygonMasks) (optional)
+
+    Modified Keys:
+
+    - img
+    - gt_bboxes
+    - gt_bboxes_labels (np.int64) (optional)
+    - gt_ignore_flags (optional)
+    - gt_masks (optional)
+
+    Args:
+        ioa_thresh (float): Ioa thresholds for deciding valid bbox.
+        prob (float): Probability of choosing objects.
+            Defaults to 0.5.
+    """
+
+    def __init__(self, ioa_thresh: float = 0.3, prob: float = 0.5):
+        self.ioa_thresh = ioa_thresh
+        self.prob = prob
+
+    @autocast_box_type()
+    def transform(self, results: dict) -> Union[dict, None]:
+        """The YOLOv5 and YOLOv8 Copy-Paste transform function.
+
+        Args:
+            results (dict): The result dict.
+
+        Returns:
+            dict: The result dict.
+        """
+        if len(results.get('gt_masks', [])) == 0:
+            return results
+        gt_masks = results['gt_masks']
+        assert isinstance(gt_masks, PolygonMasks),\
+            'only support type of PolygonMasks,' \
+            ' but get type: %s' % type(gt_masks)
+        gt_bboxes = results['gt_bboxes']
+        gt_bboxes_labels = results.get('gt_bboxes_labels', None)
+        img = results['img']
+        img_h, img_w = img.shape[:2]
+
+        # calculate ioa
+        gt_bboxes_flip = deepcopy(gt_bboxes)
+        gt_bboxes_flip.flip_(img.shape)
+
+        ioa = self.bbox_ioa(gt_bboxes_flip, gt_bboxes)
+        indexes = torch.nonzero((ioa < self.ioa_thresh).all(1))[:, 0]
+        n = len(indexes)
+        valid_inds = random.choice(
+            indexes, size=round(self.prob * n), replace=False)
+        if len(valid_inds) == 0:
+            return results
+
+        if gt_bboxes_labels is not None:
+            # prepare labels
+            gt_bboxes_labels = np.concatenate(
+                (gt_bboxes_labels, gt_bboxes_labels[valid_inds]), axis=0)
+
+        # prepare bboxes
+        copypaste_bboxes = gt_bboxes_flip[valid_inds]
+        gt_bboxes = gt_bboxes.cat([gt_bboxes, copypaste_bboxes])
+
+        # prepare images
+        copypaste_gt_masks = gt_masks[valid_inds]
+        copypaste_gt_masks_flip = copypaste_gt_masks.flip()
+        # convert poly format to bitmap format
+        # example: poly: [[array(0.0, 0.0, 10.0, 0.0, 10.0, 10.0, 0.0, 10.0]]
+        #  -> bitmap: a mask with shape equal to (1, img_h, img_w)
+        # # type1 low speed
+        # copypaste_gt_masks_bitmap = copypaste_gt_masks.to_ndarray()
+        # copypaste_mask = np.sum(copypaste_gt_masks_bitmap, axis=0) > 0
+
+        # type2
+        copypaste_mask = np.zeros((img_h, img_w), dtype=np.uint8)
+        for poly in copypaste_gt_masks.masks:
+            poly = [i.reshape((-1, 1, 2)).astype(np.int32) for i in poly]
+            cv2.drawContours(copypaste_mask, poly, -1, (1, ), cv2.FILLED)
+
+        copypaste_mask = copypaste_mask.astype(bool)
+
+        # copy objects, and paste to the mirror position of the image
+        copypaste_mask_flip = mmcv.imflip(
+            copypaste_mask, direction='horizontal')
+        copypaste_img = mmcv.imflip(img, direction='horizontal')
+        img[copypaste_mask_flip] = copypaste_img[copypaste_mask_flip]
+
+        # prepare masks
+        gt_masks = copypaste_gt_masks.cat([gt_masks, copypaste_gt_masks_flip])
+
+        if 'gt_ignore_flags' in results:
+            # prepare gt_ignore_flags
+            gt_ignore_flags = results['gt_ignore_flags']
+            gt_ignore_flags = np.concatenate(
+                [gt_ignore_flags, gt_ignore_flags[valid_inds]], axis=0)
+            results['gt_ignore_flags'] = gt_ignore_flags
+
+        results['img'] = img
+        results['gt_bboxes'] = gt_bboxes
+        if gt_bboxes_labels is not None:
+            results['gt_bboxes_labels'] = gt_bboxes_labels
+        results['gt_masks'] = gt_masks
+
+        return results
+
+    @staticmethod
+    def bbox_ioa(gt_bboxes_flip: HorizontalBoxes,
+                 gt_bboxes: HorizontalBoxes,
+                 eps: float = 1e-7) -> np.ndarray:
+        """Calculate ioa between gt_bboxes_flip and gt_bboxes.
+
+        Args:
+            gt_bboxes_flip (HorizontalBoxes): Flipped ground truth
+                bounding boxes.
+            gt_bboxes (HorizontalBoxes): Ground truth bounding boxes.
+            eps (float): Default to 1e-10.
+        Return:
+            (Tensor): Ioa.
+        """
+        gt_bboxes_flip = gt_bboxes_flip.tensor
+        gt_bboxes = gt_bboxes.tensor
+
+        # Get the coordinates of bounding boxes
+        b1_x1, b1_y1, b1_x2, b1_y2 = gt_bboxes_flip.T
+        b2_x1, b2_y1, b2_x2, b2_y2 = gt_bboxes.T
+
+        # Intersection area
+        inter_area = (torch.minimum(b1_x2[:, None],
+                                    b2_x2) - torch.maximum(b1_x1[:, None],
+                                                           b2_x1)).clip(0) * \
+                     (torch.minimum(b1_y2[:, None],
+                                    b2_y2) - torch.maximum(b1_y1[:, None],
+                                                           b2_y1)).clip(0)
+
+        # box2 area
+        box2_area = (b2_x2 - b2_x1) * (b2_y2 - b2_y1) + eps
+
+        # Intersection over box2 area
+        return inter_area / box2_area
+
+    def __repr__(self) -> str:
+        repr_str = self.__class__.__name__
+        repr_str += f'(ioa_thresh={self.ioa_thresh},'
+        repr_str += f'prob={self.prob})'
+        return repr_str
+
+
+@TRANSFORMS.register_module()
+class RemoveDataElement(BaseTransform):
+    """Remove unnecessary data element in results.
+
+    Args:
+        keys (Union[str, Sequence[str]]): Keys need to be removed.
+    """
+
+    def __init__(self, keys: Union[str, Sequence[str]]):
+        self.keys = [keys] if isinstance(keys, str) else keys
+
+    def transform(self, results: dict) -> dict:
+        for key in self.keys:
+            results.pop(key, None)
+        return results
+
+    def __repr__(self) -> str:
+        repr_str = self.__class__.__name__
+        repr_str += f'(keys={self.keys})'
+        return repr_str
diff --git a/tests/test_datasets/test_transforms/test_mix_img_transforms.py b/tests/test_datasets/test_transforms/test_mix_img_transforms.py
index 253fd64b..1d4eff0b 100644
--- a/tests/test_datasets/test_transforms/test_mix_img_transforms.py
+++ b/tests/test_datasets/test_transforms/test_mix_img_transforms.py
@@ -6,7 +6,7 @@ import unittest
 import numpy as np
 import torch
 from mmdet.structures.bbox import HorizontalBoxes
-from mmdet.structures.mask import BitmapMasks
+from mmdet.structures.mask import BitmapMasks, PolygonMasks
 
 from mmyolo.datasets import YOLOv5CocoDataset
 from mmyolo.datasets.transforms import Mosaic, Mosaic9, YOLOv5MixUp, YOLOXMixUp
@@ -23,7 +23,6 @@ class TestMosaic(unittest.TestCase):
         TestCase calls functions in this order: setUp() -> testMethod() ->
         tearDown() -> cleanUp()
         """
-        rng = np.random.RandomState(0)
         self.pre_transform = [
             dict(
                 type='LoadImageFromFile',
@@ -49,8 +48,6 @@ class TestMosaic(unittest.TestCase):
                      dtype=np.float32),
             'gt_ignore_flags':
             np.array([0, 0, 1], dtype=bool),
-            'gt_masks':
-            BitmapMasks(rng.rand(3, 224, 224), height=224, width=224),
             'dataset':
             self.dataset
         }
@@ -107,6 +104,48 @@ class TestMosaic(unittest.TestCase):
         self.assertTrue(results['gt_bboxes'].dtype == torch.float32)
         self.assertTrue(results['gt_ignore_flags'].dtype == bool)
 
+    def test_transform_with_mask(self):
+        rng = np.random.RandomState(0)
+        pre_transform = [
+            dict(
+                type='LoadImageFromFile',
+                file_client_args=dict(backend='disk')),
+            dict(type='LoadAnnotations', with_bbox=True, with_mask=True)
+        ]
+
+        dataset = YOLOv5CocoDataset(
+            data_prefix=dict(
+                img=osp.join(osp.dirname(__file__), '../../data')),
+            ann_file=osp.join(
+                osp.dirname(__file__), '../../data/coco_sample_color.json'),
+            filter_cfg=dict(filter_empty_gt=False, min_size=32),
+            pipeline=[])
+        results = {
+            'img':
+            np.random.random((224, 224, 3)),
+            'img_shape': (224, 224),
+            'gt_bboxes_labels':
+            np.array([1, 2, 3], dtype=np.int64),
+            'gt_bboxes':
+            np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]],
+                     dtype=np.float32),
+            'gt_ignore_flags':
+            np.array([0, 0, 1], dtype=bool),
+            'gt_masks':
+            PolygonMasks.random(num_masks=3, height=224, width=224, rng=rng),
+            'dataset':
+            dataset
+        }
+        transform = Mosaic(img_scale=(12, 10), pre_transform=pre_transform)
+        results['gt_bboxes'] = HorizontalBoxes(results['gt_bboxes'])
+        results = transform(results)
+        self.assertTrue(results['img'].shape[:2] == (20, 24))
+        self.assertTrue(results['gt_bboxes_labels'].shape[0] ==
+                        results['gt_bboxes'].shape[0])
+        self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64)
+        self.assertTrue(results['gt_bboxes'].dtype == torch.float32)
+        self.assertTrue(results['gt_ignore_flags'].dtype == bool)
+
 
 class TestMosaic9(unittest.TestCase):
 
@@ -209,7 +248,6 @@ class TestYOLOv5MixUp(unittest.TestCase):
         TestCase calls functions in this order: setUp() -> testMethod() ->
         tearDown() -> cleanUp()
         """
-        rng = np.random.RandomState(0)
         self.pre_transform = [
             dict(
                 type='LoadImageFromFile',
@@ -235,8 +273,6 @@ class TestYOLOv5MixUp(unittest.TestCase):
                      dtype=np.float32),
             'gt_ignore_flags':
             np.array([0, 0, 1], dtype=bool),
-            'gt_masks':
-            BitmapMasks(rng.rand(3, 288, 512), height=288, width=512),
             'dataset':
             self.dataset
         }
@@ -268,6 +304,48 @@ class TestYOLOv5MixUp(unittest.TestCase):
         self.assertTrue(results['gt_bboxes'].dtype == torch.float32)
         self.assertTrue(results['gt_ignore_flags'].dtype == bool)
 
+    def test_transform_with_mask(self):
+        rng = np.random.RandomState(0)
+        pre_transform = [
+            dict(
+                type='LoadImageFromFile',
+                file_client_args=dict(backend='disk')),
+            dict(type='LoadAnnotations', with_bbox=True, with_mask=True)
+        ]
+        dataset = YOLOv5CocoDataset(
+            data_prefix=dict(
+                img=osp.join(osp.dirname(__file__), '../../data')),
+            ann_file=osp.join(
+                osp.dirname(__file__), '../../data/coco_sample_color.json'),
+            filter_cfg=dict(filter_empty_gt=False, min_size=32),
+            pipeline=[])
+
+        results = {
+            'img':
+            np.random.random((288, 512, 3)),
+            'img_shape': (288, 512),
+            'gt_bboxes_labels':
+            np.array([1, 2, 3], dtype=np.int64),
+            'gt_bboxes':
+            np.array([[10, 10, 20, 20], [20, 20, 40, 40], [40, 40, 80, 80]],
+                     dtype=np.float32),
+            'gt_ignore_flags':
+            np.array([0, 0, 1], dtype=bool),
+            'gt_masks':
+            PolygonMasks.random(num_masks=3, height=288, width=512, rng=rng),
+            'dataset':
+            dataset
+        }
+
+        transform = YOLOv5MixUp(pre_transform=pre_transform)
+        results = transform(copy.deepcopy(results))
+        self.assertTrue(results['img'].shape[:2] == (288, 512))
+        self.assertTrue(results['gt_bboxes_labels'].shape[0] ==
+                        results['gt_bboxes'].shape[0])
+        self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64)
+        self.assertTrue(results['gt_bboxes'].dtype == np.float32)
+        self.assertTrue(results['gt_ignore_flags'].dtype == bool)
+
 
 class TestYOLOXMixUp(unittest.TestCase):
 
diff --git a/tests/test_datasets/test_transforms/test_transforms.py b/tests/test_datasets/test_transforms/test_transforms.py
index d256dd9f..fc46151d 100644
--- a/tests/test_datasets/test_transforms/test_transforms.py
+++ b/tests/test_datasets/test_transforms/test_transforms.py
@@ -7,14 +7,15 @@ import mmcv
 import numpy as np
 import torch
 from mmdet.structures.bbox import HorizontalBoxes
-from mmdet.structures.mask import BitmapMasks
+from mmdet.structures.mask import BitmapMasks, PolygonMasks
 
 from mmyolo.datasets.transforms import (LetterResize, LoadAnnotations,
                                         YOLOv5HSVRandomAug,
                                         YOLOv5KeepRatioResize,
                                         YOLOv5RandomAffine)
 from mmyolo.datasets.transforms.transforms import (PPYOLOERandomCrop,
-                                                   PPYOLOERandomDistort)
+                                                   PPYOLOERandomDistort,
+                                                   YOLOv5CopyPaste)
 
 
 class TestLetterResize(unittest.TestCase):
@@ -30,7 +31,7 @@ class TestLetterResize(unittest.TestCase):
             img=np.random.random((300, 400, 3)),
             gt_bboxes=np.array([[0, 0, 150, 150]], dtype=np.float32),
             batch_shape=np.array([192, 672], dtype=np.int64),
-            gt_masks=BitmapMasks(rng.rand(1, 300, 400), height=300, width=400))
+            gt_masks=PolygonMasks.random(1, height=300, width=400, rng=rng))
         self.data_info2 = dict(
             img=np.random.random((300, 400, 3)),
             gt_bboxes=np.array([[0, 0, 150, 150]], dtype=np.float32))
@@ -88,7 +89,6 @@ class TestLetterResize(unittest.TestCase):
 
         # Test
         transform = LetterResize(scale=(640, 640), pad_val=dict(img=144))
-        rng = np.random.RandomState(0)
         for _ in range(5):
             input_h, input_w = np.random.randint(100, 700), np.random.randint(
                 100, 700)
@@ -99,8 +99,8 @@ class TestLetterResize(unittest.TestCase):
                 img=np.random.random((input_h, input_w, 3)),
                 gt_bboxes=np.array([[0, 0, 10, 10]], dtype=np.float32),
                 batch_shape=np.array([output_h, output_w], dtype=np.int64),
-                gt_masks=BitmapMasks(
-                    rng.rand(1, input_h, input_w),
+                gt_masks=PolygonMasks(
+                    [[np.array([0., 0., 0., 10., 10., 10., 10., 0.])]],
                     height=input_h,
                     width=input_w))
             results = transform(data_info)
@@ -111,15 +111,14 @@ class TestLetterResize(unittest.TestCase):
 
         # Test without batchshape
         transform = LetterResize(scale=(640, 640), pad_val=dict(img=144))
-        rng = np.random.RandomState(0)
         for _ in range(5):
             input_h, input_w = np.random.randint(100, 700), np.random.randint(
                 100, 700)
             data_info = dict(
                 img=np.random.random((input_h, input_w, 3)),
                 gt_bboxes=np.array([[0, 0, 10, 10]], dtype=np.float32),
-                gt_masks=BitmapMasks(
-                    rng.rand(1, input_h, input_w),
+                gt_masks=PolygonMasks(
+                    [[np.array([0., 0., 0., 10., 10., 10., 10., 0.])]],
                     height=input_h,
                     width=input_w))
             results = transform(data_info)
@@ -178,7 +177,8 @@ class TestYOLOv5KeepRatioResize(unittest.TestCase):
         self.data_info1 = dict(
             img=np.random.random((300, 400, 3)),
             gt_bboxes=np.array([[0, 0, 150, 150]], dtype=np.float32),
-            gt_masks=BitmapMasks(rng.rand(1, 300, 400), height=300, width=400))
+            gt_masks=PolygonMasks.random(
+                num_masks=1, height=300, width=400, rng=rng))
         self.data_info2 = dict(img=np.random.random((300, 400, 3)))
 
     def test_yolov5_keep_ratio_resize(self):
@@ -454,3 +454,37 @@ class TestPPYOLOERandomDistort(unittest.TestCase):
         self.assertTrue(results['gt_bboxes_labels'].dtype == np.int64)
         self.assertTrue(results['gt_bboxes'].dtype == torch.float32)
         self.assertTrue(results['gt_ignore_flags'].dtype == bool)
+
+
+class TestYOLOv5CopyPaste(unittest.TestCase):
+
+    def setUp(self):
+        """Set up the data info which are used in every test method.
+
+        TestCase calls functions in this order: setUp() -> testMethod() ->
+        tearDown() -> cleanUp()
+        """
+        self.data_info = dict(
+            img=np.random.random((300, 400, 3)),
+            gt_bboxes=np.array([[0, 0, 10, 10]], dtype=np.float32),
+            gt_masks=PolygonMasks(
+                [[np.array([0., 0., 0., 10., 10., 10., 10., 0.])]],
+                height=300,
+                width=400))
+
+    def test_transform(self):
+        # test transform
+        transform = YOLOv5CopyPaste(prob=1.0)
+        results = transform(copy.deepcopy(self.data_info))
+        self.assertTrue(len(results['gt_bboxes']) == 2)
+        self.assertTrue(len(results['gt_masks']) == 2)
+
+        rng = np.random.RandomState(0)
+        # test with bitmap
+        with self.assertRaises(AssertionError):
+            results = transform(
+                dict(
+                    img=np.random.random((300, 400, 3)),
+                    gt_bboxes=np.array([[0, 0, 10, 10]], dtype=np.float32),
+                    gt_masks=BitmapMasks(
+                        rng.rand(1, 300, 400), height=300, width=400)))

From f2a576d153555c930043604521cb45739a119a83 Mon Sep 17 00:00:00 2001
From: Range King <RangeKingHZ@gmail.com>
Date: Mon, 20 Feb 2023 15:51:37 +0800
Subject: [PATCH 32/64] [Docs] Refactor zh-CN docs (#568)

* refactor_docs_cn

* update

* update

* update

* add dataset_preparation

---------

Co-authored-by: huanghaian <huanghaian@sensetime.com>
---
 .readthedocs.yml                              |   1 -
 README_zh-CN.md                               | 116 +++--
 docs/README.md                                |  28 +
 .../cross-library_application.md              |   1 +
 docs/zh_cn/common_usage/amp_training.md       |   1 +
 docs/zh_cn/common_usage/freeze_layers.md      |  28 +
 docs/zh_cn/common_usage/mim_usage.md          |  89 ++++
 .../module_combination.md                     |   4 +-
 docs/zh_cn/common_usage/multi_necks.md        |  40 ++
 docs/zh_cn/common_usage/output_predictions.md |  40 ++
 .../plugins.md                                |   2 +-
 docs/zh_cn/common_usage/resume_training.md    |   1 +
 docs/zh_cn/common_usage/set_random_seed.md    |  20 +
 docs/zh_cn/common_usage/set_syncbn.md         |   1 +
 docs/zh_cn/common_usage/specify_device.md     |  23 +
 docs/zh_cn/deploy/index.rst                   |  16 -
 .../algorithm_descriptions/index.rst          |   9 -
 .../rtmdet_description.md                     |   0
 .../yolov5_description.md                     |   0
 .../yolov6_description.md                     |   0
 .../yolov8_description.md                     |   0
 .../contributing.md                           |   4 +-
 .../featured_topics/dataset_preparation.md    |   1 +
 .../deploy/easydeploy_guide.md                |   5 +
 .../deploy}/index.rst                         |  22 +-
 .../deploy/mmdeploy_guide.md}                 |   0
 .../deploy/mmdeploy_yolov5.md}                |   6 +-
 .../featured_topics/industry_examples.md      |   1 +
 .../labeling_to_deployment_tutorials.md}      |   2 +-
 .../model_design.md                           |   4 +-
 .../replace_backbone.md}                      | 287 +----------
 .../featured_topics/troubleshooting_steps.md  |   1 +
 .../visualization.md                          | 249 ++++++++-
 .../15_minutes_instance_segmentation.md       |   3 +
 .../15_minutes_object_detection.md}           |   4 +-
 .../15_minutes_rotated_object_detection.md    |   3 +
 docs/zh_cn/{ => get_started}/article.md       |  34 +-
 docs/zh_cn/get_started/dependencies.md        |  44 ++
 .../installation.md}                          |  69 +--
 docs/zh_cn/{ => get_started}/overview.md      |  38 +-
 docs/zh_cn/index.rst                          |  71 ++-
 docs/zh_cn/{community => notes}/code_style.md |   2 +-
 .../{user_guides => tutorials}/config.md      |   0
 .../data_flow.md                              |   0
 docs/zh_cn/{notes => tutorials}/faq.md        |   4 +-
 docs/zh_cn/useful_tools/browse_coco_json.md   |  62 +++
 docs/zh_cn/useful_tools/browse_dataset.md     |  57 +++
 docs/zh_cn/useful_tools/dataset_analysis.md   |  80 +++
 docs/zh_cn/useful_tools/dataset_converters.md |  58 +++
 docs/zh_cn/useful_tools/download_dataset.md   |  11 +
 docs/zh_cn/useful_tools/extract_subcoco.md    |  60 +++
 docs/zh_cn/useful_tools/log_analysis.md       |  82 +++
 docs/zh_cn/useful_tools/model_converters.md   |  52 ++
 docs/zh_cn/useful_tools/optimize_anchors.md   |  37 ++
 docs/zh_cn/useful_tools/print_config.md       |  20 +
 docs/zh_cn/useful_tools/vis_scheduler.md      |  44 ++
 docs/zh_cn/user_guides/index.rst              |  28 -
 docs/zh_cn/user_guides/useful_tools.md        | 481 ------------------
 requirements/docs.txt                         |   5 +
 requirements/readthedocs.txt                  |   5 -
 60 files changed, 1350 insertions(+), 1006 deletions(-)
 create mode 100644 docs/README.md
 create mode 100644 docs/zh_cn/advanced_guides/cross-library_application.md
 create mode 100644 docs/zh_cn/common_usage/amp_training.md
 create mode 100644 docs/zh_cn/common_usage/freeze_layers.md
 create mode 100644 docs/zh_cn/common_usage/mim_usage.md
 rename docs/zh_cn/{advanced_guides => common_usage}/module_combination.md (99%)
 create mode 100644 docs/zh_cn/common_usage/multi_necks.md
 create mode 100644 docs/zh_cn/common_usage/output_predictions.md
 rename docs/zh_cn/{advanced_guides => common_usage}/plugins.md (97%)
 create mode 100644 docs/zh_cn/common_usage/resume_training.md
 create mode 100644 docs/zh_cn/common_usage/set_random_seed.md
 create mode 100644 docs/zh_cn/common_usage/set_syncbn.md
 create mode 100644 docs/zh_cn/common_usage/specify_device.md
 delete mode 100644 docs/zh_cn/deploy/index.rst
 rename docs/zh_cn/{ => featured_topics}/algorithm_descriptions/index.rst (68%)
 rename docs/zh_cn/{ => featured_topics}/algorithm_descriptions/rtmdet_description.md (100%)
 rename docs/zh_cn/{ => featured_topics}/algorithm_descriptions/yolov5_description.md (100%)
 rename docs/zh_cn/{ => featured_topics}/algorithm_descriptions/yolov6_description.md (100%)
 rename docs/zh_cn/{ => featured_topics}/algorithm_descriptions/yolov8_description.md (100%)
 rename docs/zh_cn/{community => featured_topics}/contributing.md (99%)
 create mode 100644 docs/zh_cn/featured_topics/dataset_preparation.md
 create mode 100644 docs/zh_cn/featured_topics/deploy/easydeploy_guide.md
 rename docs/zh_cn/{advanced_guides => featured_topics/deploy}/index.rst (51%)
 rename docs/zh_cn/{deploy/basic_deployment_guide.md => featured_topics/deploy/mmdeploy_guide.md} (100%)
 rename docs/zh_cn/{deploy/yolov5_deployment.md => featured_topics/deploy/mmdeploy_yolov5.md} (98%)
 create mode 100644 docs/zh_cn/featured_topics/industry_examples.md
 rename docs/zh_cn/{user_guides/custom_dataset.md => featured_topics/labeling_to_deployment_tutorials.md} (99%)
 rename docs/zh_cn/{algorithm_descriptions => featured_topics}/model_design.md (98%)
 rename docs/zh_cn/{advanced_guides/how_to.md => featured_topics/replace_backbone.md} (52%)
 create mode 100644 docs/zh_cn/featured_topics/troubleshooting_steps.md
 rename docs/zh_cn/{user_guides => featured_topics}/visualization.md (53%)
 create mode 100644 docs/zh_cn/get_started/15_minutes_instance_segmentation.md
 rename docs/zh_cn/{user_guides/yolov5_tutorial.md => get_started/15_minutes_object_detection.md} (98%)
 create mode 100644 docs/zh_cn/get_started/15_minutes_rotated_object_detection.md
 rename docs/zh_cn/{ => get_started}/article.md (99%)
 create mode 100644 docs/zh_cn/get_started/dependencies.md
 rename docs/zh_cn/{get_started.md => get_started/installation.md} (82%)
 rename docs/zh_cn/{ => get_started}/overview.md (51%)
 rename docs/zh_cn/{community => notes}/code_style.md (99%)
 rename docs/zh_cn/{user_guides => tutorials}/config.md (100%)
 rename docs/zh_cn/{advanced_guides => tutorials}/data_flow.md (100%)
 rename docs/zh_cn/{notes => tutorials}/faq.md (83%)
 create mode 100644 docs/zh_cn/useful_tools/browse_coco_json.md
 create mode 100644 docs/zh_cn/useful_tools/browse_dataset.md
 create mode 100644 docs/zh_cn/useful_tools/dataset_analysis.md
 create mode 100644 docs/zh_cn/useful_tools/dataset_converters.md
 create mode 100644 docs/zh_cn/useful_tools/download_dataset.md
 create mode 100644 docs/zh_cn/useful_tools/extract_subcoco.md
 create mode 100644 docs/zh_cn/useful_tools/log_analysis.md
 create mode 100644 docs/zh_cn/useful_tools/model_converters.md
 create mode 100644 docs/zh_cn/useful_tools/optimize_anchors.md
 create mode 100644 docs/zh_cn/useful_tools/print_config.md
 create mode 100644 docs/zh_cn/useful_tools/vis_scheduler.md
 delete mode 100644 docs/zh_cn/user_guides/index.rst
 delete mode 100644 docs/zh_cn/user_guides/useful_tools.md
 delete mode 100644 requirements/readthedocs.txt

diff --git a/.readthedocs.yml b/.readthedocs.yml
index 6cfbf5d3..c9ab01ce 100644
--- a/.readthedocs.yml
+++ b/.readthedocs.yml
@@ -6,4 +6,3 @@ python:
   version: 3.7
   install:
     - requirements: requirements/docs.txt
-    - requirements: requirements/readthedocs.txt
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 67275067..991dc6c5 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -27,7 +27,7 @@
 [![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/issues)
 
 [📘使用文档](https://mmyolo.readthedocs.io/zh_CN/latest/) |
-[🛠️安装教程](https://mmyolo.readthedocs.io/zh_CN/latest/get_started.html) |
+[🛠️安装教程](https://mmyolo.readthedocs.io/zh_CN/latest/get_started/installation.html) |
 [👀模型库](https://mmyolo.readthedocs.io/zh_CN/latest/model_zoo.html) |
 [🆕更新日志](https://mmyolo.readthedocs.io/zh_CN/latest/notes/changelog.html) |
 [🤔报告问题](https://github.com/open-mmlab/mmyolo/issues/new/choose)
@@ -150,13 +150,13 @@ MMYOLO 是一个基于 PyTorch 和 MMDetection 的 YOLO 系列算法开源工具
 <img src="https://user-images.githubusercontent.com/27466624/199999337-0544a4cb-3cbd-4f3e-be26-bcd9e74db7ff.jpg" alt="基类-P5"/>
   图为 RangeKing@GitHub 提供，非常感谢！
 
-P6 模型图详见 [model_design.md](docs/zh_cn/algorithm_descriptions/model_design.md)。
+P6 模型图详见 [model_design.md](docs/zh_cn/featured_topics/model_design.md)。
 
 </details>
 
 ## 🛠️ 安装 [🔝](#-table-of-contents)
 
-MMYOLO 依赖 PyTorch, MMCV, MMEngine 和 MMDetection，以下是安装的简要步骤。 更详细的安装指南请参考[安装文档](docs/zh_cn/get_started.md)。
+MMYOLO 依赖 PyTorch, MMCV, MMEngine 和 MMDetection，以下是安装的简要步骤。 更详细的安装指南请参考[安装文档](docs/zh_cn/get_started/installation.md)。
 
 ```shell
 conda create -n open-mmlab python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
@@ -181,42 +181,94 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 
 针对和 MMDetection 不同的部分，我们也准备了用户指南和进阶指南，请阅读我们的 [文档](https://mmyolo.readthedocs.io/zh_CN/latest/) 。
 
-- 用户指南
+<details>
+<summary>开启 MMYOLO 之旅</summary>
 
-  - [训练 & 测试](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#训练-测试)
-    - [学习 YOLOv5 配置文件](docs/zh_cn/user_guides/config.md)
-  - [从入门到部署全流程](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#从入门到部署全流程)
-    - [自定义数据集](docs/zh_cn/user_guides/custom_dataset.md)
-    - [YOLOv5 从入门到部署全流程](docs/zh_cn/user_guides/yolov5_tutorial.md)
-  - [实用工具](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#实用工具)
-    - [可视化教程](docs/zh_cn/user_guides/visualization.md)
-    - [实用工具](docs/zh_cn/user_guides/useful_tools.md)
+- [概述](docs/zh_cn/get_started/overview.md)
+- [依赖](docs/zh_cn/get_started/dependencies.md)
+- [安装和验证](docs/zh_cn/get_started/installation.md)
+- [15 分钟上手 MMYOLO 目标检测](docs/zh_cn/get_started/15_minutes_object_detection.md)
+- [15 分钟上手 MMYOLO 旋转框目标检测](docs/zh_cn/get_started/15_minutes_rotated_object_detection.md)
+- [15 分钟上手 MMYOLO 实例分割](docs/zh_cn/get_started/15_minutes_instance_segmentation.md)
+- [中文解读资源汇总](docs/zh_cn/get_started/article.md)
 
-- 算法描述
+</details>
 
-  - [必备基础](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/index.html#基础内容)
-    - [模型设计相关说明](docs/zh_cn/algorithm_descriptions/model_design.md)
-  - [算法原理和实现全解析](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/index.html#算法原理和实现全解析)
-    - [YOLOv5 原理和实现全解析](docs/zh_cn/algorithm_descriptions/yolov5_description.md)
-    - [YOLOv6 原理和实现全解析](docs/zh_cn/algorithm_descriptions/yolov6_description.md)
-    - [RTMDet 原理和实现全解析](docs/zh_cn/algorithm_descriptions/rtmdet_description.md)
-    - [YOLOv8 原理和实现全解析](docs/zh_cn/algorithm_descriptions/yolov8_description.md)
+<details>
+<summary>推荐专题</summary>
 
-- 算法部署
+- [如何给 MMYOLO 贡献代码](docs/zh_cn/featured_topics/contributing.md)
+- [MMYOLO 模型结构设计](docs/zh_cn/featured_topics/model_design.md)
+- [MMYOLO 产业范例介绍](docs/zh_cn/featured_topics/industry_examples.md)
+- [数据集准备和说明](docs/zh_cn/featured_topics/dataset_preparation.md)
+- [原理和实现全解析](docs/zh_cn/featured_topics/algorithm_descriptions/)
+- [轻松更换主干网络](docs/zh_cn/featured_topics/replace_backbone.md)
+- [标注+训练+测试+部署全流程](docs/zh_cn/featured_topics/labeling_to_deployment_tutorials.md)
+- [关于可视化的一切](docs/zh_cn/featured_topics/visualization.md)
+- [模型部署流程](docs/zh_cn/featured_topics/deploy/)
+- [常见错误排查步骤](docs/zh_cn/featured_topics/troubleshooting_steps.md)
 
-  - [部署必备教程](https://mmyolo.readthedocs.io/zh_CN/latest/deploy/index.html#id1)
-    - [部署必备教程](docs/zh_cn/deploy/basic_deployment_guide.md)
-  - [部署全流程说明](https://mmyolo.readthedocs.io/zh_CN/latest/deploy/index.html#id2)
-    - [YOLOv5 部署全流程说明](docs/zh_cn/deploy/yolov5_deployment.md)
+</details>
 
-- 进阶指南
+<details>
+<summary>常用功能</summary>
 
-  - [模块组合](docs/zh_cn/advanced_guides/module_combination.md)
-  - [数据流](docs/zh_cn/advanced_guides/data_flow.md)
-  - [How to](docs/zh_cn/advanced_guides/how_to.md)
-  - [插件](docs/zh_cn/advanced_guides/plugins.md)
+- [恢复训练](docs/zh_cn/common_usage/resume_training.md)
+- [开启和关闭 SyncBatchNorm](docs/zh_cn/common_usage/syncbn.md)
+- [开启混合精度训练](docs/zh_cn/common_usage/amp_training.md)
+- [给主干网络增加插件](docs/zh_cn/common_usage/plugins.md)
+- [冻结指定网络层权重](docs/zh_cn/common_usage/common_usage/freeze_layers.md)
+- [输出模型预测结果](docs/zh_cn/common_usage/output_predictions.md)
+- [设置随机种子](docs/zh_cn/common_usage/set_random_seed.md)
+- [使用 mim 跨库调用其他 OpenMMLab 仓库的脚本](docs/zh_cn/common_usage/mim_usage.md)
+- [应用多个 Neck](docs/zh_cn/common_usage/multi_necks.md)
+- [指定特定设备训练或推理](docs/zh_cn/common_usage/specify_device.md)
+- [算法组合替换教程](docs/zh_cn/common_usage/module_combination.md)
 
-- [解读文章和资源汇总](docs/zh_cn/article.md)
+</details>
+
+<details>
+<summary>实用工具</summary>
+
+- [可视化 COCO 标签](docs/zh_cn/useful_tools/browse_coco_json.md)
+- [可视化数据集](docs/zh_cn/useful_tools/browse_dataset.md)
+- [可视化数据集分析结果](docs/zh_cn/useful_tools/dataset_analysis.md)
+- [数据集转换](docs/zh_cn/useful_tools/dataset_converters.md)
+- [数据集下载](docs/zh_cn/useful_tools/download_dataset.md)
+- [提取 COCO 子集](docs/zh_cn/useful_tools/extract_subcoco.md)
+- [日志分析](docs/zh_cn/useful_tools/log_analysis.md)
+- [模型转换](docs/zh_cn/useful_tools/model_converters.md)
+- [优化锚框尺寸](docs/zh_cn/useful_tools/optimize_anchors.md)
+- [打印完整配置文件](docs/zh_cn/useful_tools/print_config.md)
+- [可视化优化器参数策略](docs/zh_cn/useful_tools/vis_scheduler.md)
+
+</details>
+
+<details>
+<summary>基础教程</summary>
+
+- [学习 YOLOv5 配置文件](docs/zh_cn/tutorials/config.md)
+- [数据流](docs/zh_cn/tutorials/data_flow.md)
+- [常见问题](docs/zh_cn/tutorials/faq.md)
+
+</details>
+
+<details>
+<summary>进阶教程</summary>
+
+- [MMYOLO 跨库应用解析](docs/zh_cn/advanced_guides/cross-library_application.md)
+
+</details>
+
+<details>
+<summary>说明</summary>
+
+- [更新日志](docs/zh_cn/notes/changelog.md)
+- [兼容性说明](docs/zh_cn/notes/compatibility.md)
+- [默认约定](docs/zh_cn/notes/conventions.md)
+- [代码规范](docs/zh_cn/notes/code_style.md)
+
+</details>
 
 ## 📊 基准测试和模型库 [🔝](#-table-of-contents)
 
@@ -301,7 +353,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 
 ## ❓ 常见问题 [🔝](#-table-of-contents)
 
-请参考 [FAQ](docs/zh_cn/notes/faq.md) 了解其他用户的常见问题。
+请参考 [FAQ](docs/zh_cn/featured_topics/faq.md) 了解其他用户的常见问题。
 
 ## 🙌 贡献指南 [🔝](#-table-of-contents)
 
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 00000000..f0b79699
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,28 @@
+## Build Documentation
+
+1. Clone MMYOLO
+
+   ```bash
+   git clone https://github.com/open-mmlab/mmyolo.git
+   cd mmyolo
+   ```
+
+2. Install the building dependencies of documentation
+
+   ```bash
+   pip install -r requirements/docs.txt
+   ```
+
+3. Change directory to `docs/en` or `docs/zh_cn`
+
+   ```bash
+   cd docs/en  # or docs/zh_cn
+   ```
+
+4. Build documentation
+
+   ```bash
+   make html
+   ```
+
+5. Open `_build/html/index.html` with browser
diff --git a/docs/zh_cn/advanced_guides/cross-library_application.md b/docs/zh_cn/advanced_guides/cross-library_application.md
new file mode 100644
index 00000000..d95f68cd
--- /dev/null
+++ b/docs/zh_cn/advanced_guides/cross-library_application.md
@@ -0,0 +1 @@
+# MMYOLO 跨库应用解析
diff --git a/docs/zh_cn/common_usage/amp_training.md b/docs/zh_cn/common_usage/amp_training.md
new file mode 100644
index 00000000..d3a10e71
--- /dev/null
+++ b/docs/zh_cn/common_usage/amp_training.md
@@ -0,0 +1 @@
+# 自动混合精度（AMP）训练
diff --git a/docs/zh_cn/common_usage/freeze_layers.md b/docs/zh_cn/common_usage/freeze_layers.md
new file mode 100644
index 00000000..ca061390
--- /dev/null
+++ b/docs/zh_cn/common_usage/freeze_layers.md
@@ -0,0 +1,28 @@
+# 冻结指定网络层权重
+
+## 冻结 backbone 权重
+
+在 MMYOLO 中我们可以通过设置 `frozen_stages` 参数去冻结主干网络的部分 `stage`, 使这些 `stage` 的参数不参与模型的更新。
+需要注意的是：`frozen_stages = i` 表示的意思是指从最开始的 `stage` 开始到第 `i` 层 `stage` 的所有参数都会被冻结。下面是 `YOLOv5` 的例子，其他算法也是同样的逻辑：
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+model = dict(
+    backbone=dict(
+        frozen_stages=1 # 表示第一层 stage 以及它之前的所有 stage 中的参数都会被冻结
+    ))
+```
+
+## 冻结 neck 权重
+
+MMYOLO 中也可以通过参数 `freeze_all` 去冻结整个 `neck` 的参数。下面是 `YOLOv5` 的例子，其他算法也是同样的逻辑：
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+model = dict(
+    neck=dict(
+        freeze_all=True # freeze_all=True 时表示整个 neck 的参数都会被冻结
+    ))
+```
diff --git a/docs/zh_cn/common_usage/mim_usage.md b/docs/zh_cn/common_usage/mim_usage.md
new file mode 100644
index 00000000..aaf26920
--- /dev/null
+++ b/docs/zh_cn/common_usage/mim_usage.md
@@ -0,0 +1,89 @@
+# 使用 mim 跨库调用其他 OpenMMLab 仓库的脚本
+
+```{note}
+1. 目前暂不支持跨库调用所有脚本，正在修复中。等修复完成，本文档会添加更多的例子。
+2. 绘制 mAP 和 计算平均训练速度 两项功能在 MMDetection dev-3.x 分支中修复，目前需要通过源码安装该分支才能成功调用。
+```
+
+## 日志分析
+
+### 曲线图绘制
+
+MMDetection 中的 `tools/analysis_tools/analyze_logs.py` 可利用指定的训练 log 文件绘制 loss/mAP 曲线图， 第一次运行前请先运行 `pip install seaborn` 安装必要依赖。
+
+```shell
+mim run mmdet analyze_logs plot_curve \
+    ${LOG} \                                     # 日志文件路径
+    [--keys ${KEYS}] \                           # 需要绘制的指标，默认为 'bbox_mAP'
+    [--start-epoch ${START_EPOCH}]               # 起始的 epoch，默认为 1
+    [--eval-interval ${EVALUATION_INTERVAL}] \   # 评估间隔，默认为 1
+    [--title ${TITLE}] \                         # 图片标题，无默认值
+    [--legend ${LEGEND}] \                       # 图例，默认为 None
+    [--backend ${BACKEND}] \                     # 绘制后端，默认为 None
+    [--style ${STYLE}] \                         # 绘制风格，默认为 'dark'
+    [--out ${OUT_FILE}]                          # 输出文件路径
+# [] 代表可选参数，实际输入命令行时，不用输入 []
+```
+
+样例：
+
+- 绘制分类损失曲线图
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      --keys loss_cls \
+      --legend loss_cls
+  ```
+
+  <img src="https://user-images.githubusercontent.com/27466624/204747359-754555df-1f97-4d5c-87ca-9ad3a0badcce.png" width="600"/>
+
+- 绘制分类损失、回归损失曲线图，保存图片为对应的 pdf 文件
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      --keys loss_cls loss_bbox \
+      --legend loss_cls loss_bbox \
+      --out losses_yolov5_s.pdf
+  ```
+
+  <img src="https://user-images.githubusercontent.com/27466624/204748560-2d17ce4b-fb5f-4732-a962-329109e73aad.png" width="600"/>
+
+- 在同一图像中比较两次运行结果的 bbox mAP
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json \
+      --keys bbox_mAP \
+      --legend yolov5_s yolov5_n \
+      --eval-interval 10 # 注意评估间隔必须和训练时设置的一致，否则会报错
+  ```
+
+<img src="https://user-images.githubusercontent.com/27466624/204748704-21db9f9e-386e-449c-91c7-2ce3f8b51f24.png" width="600"/>
+
+### 计算平均训练速度
+
+```shell
+mim run mmdet analyze_logs cal_train_time \
+    ${LOG} \                                # 日志文件路径
+    [--include-outliers]                    # 计算时包含每个 epoch 的第一个数据
+```
+
+样例：
+
+```shell
+mim run mmdet analyze_logs cal_train_time \
+    yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json
+```
+
+输出以如下形式展示：
+
+```text
+-----Analyze train time of yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json-----
+slowest epoch 278, average time is 0.1705 s/iter
+fastest epoch 300, average time is 0.1510 s/iter
+time std over epochs is 0.0026
+average iter time: 0.1556 s/iter
+```
diff --git a/docs/zh_cn/advanced_guides/module_combination.md b/docs/zh_cn/common_usage/module_combination.md
similarity index 99%
rename from docs/zh_cn/advanced_guides/module_combination.md
rename to docs/zh_cn/common_usage/module_combination.md
index 265d926a..011836f6 100644
--- a/docs/zh_cn/advanced_guides/module_combination.md
+++ b/docs/zh_cn/common_usage/module_combination.md
@@ -61,7 +61,7 @@ model = dict(
         loss_weight=1.0)))
 ```
 
-### 替换 YOLOV5 Head 中的 loss_obj 函数
+### 替换 YOLOv5 Head 中的 loss_obj 函数
 
 `loss_obj` 的替换与 `loss_cls` 的替换类似，我们可以使用已经实现好的损失函数对 `loss_obj` 的损失函数进行替换
 
@@ -108,7 +108,7 @@ model = dict(
 1. 在本教程中损失函数的替换是运行不报错的，但无法保证性能一定会上升。
 2. 本次损失函数的替换都是以 YOLOv5 算法作为例子的，但是 MMYOLO 下的多个算法，如 YOLOv6，YOLOX 等算法都可以按照上述的例子进行替换。
 
-## model 和 loss 组合替换
+## Model 和 Loss 组合替换
 
 在 MMYOLO 中，model 即网络本身和 loss 是解耦的，用户可以简单的通过修改配置文件中 model 和 loss 来组合不同模块。下面给出两个具体例子。
 
diff --git a/docs/zh_cn/common_usage/multi_necks.md b/docs/zh_cn/common_usage/multi_necks.md
new file mode 100644
index 00000000..a4a17052
--- /dev/null
+++ b/docs/zh_cn/common_usage/multi_necks.md
@@ -0,0 +1,40 @@
+# 应用多个 Neck
+
+如果你想堆叠多个 Neck，可以直接在配置文件中的 Neck 参数，MMYOLO 支持以 `List` 形式拼接多个 Neck 配置，你需要保证上一个 Neck 的输出通道与下一个 Neck
+的输入通道相匹配。如需要调整通道，可以插入 `mmdet.ChannelMapper` 模块用来对齐多个 Neck 之间的通道数量。具体配置如下：
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+deepen_factor = _base_.deepen_factor
+widen_factor = _base_.widen_factor
+model = dict(
+    type='YOLODetector',
+    neck=[
+        dict(
+            type='YOLOv5PAFPN',
+            deepen_factor=deepen_factor,
+            widen_factor=widen_factor,
+            in_channels=[256, 512, 1024],
+            out_channels=[256, 512, 1024],
+            # 因为 out_channels 由 widen_factor 控制，YOLOv5PAFPN 的 out_channels = out_channels * widen_factor
+            num_csp_blocks=3,
+            norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+            act_cfg=dict(type='SiLU', inplace=True)),
+        dict(
+            type='mmdet.ChannelMapper',
+            in_channels=[128, 256, 512],
+            out_channels=128,
+        ),
+        dict(
+            type='mmdet.DyHead',
+            in_channels=128,
+            out_channels=256,
+            num_blocks=2,
+            # disable zero_init_offset to follow official implementation
+            zero_init_offset=False)
+    ],
+    bbox_head=dict(head_module=dict(in_channels=[512, 512, 512]))
+    # 因为 out_channels 由 widen_factor 控制，YOLOv5HeadModuled 的 in_channels * widen_factor 才会等于最后一个 neck 的 out_channels
+)
+```
diff --git a/docs/zh_cn/common_usage/output_predictions.md b/docs/zh_cn/common_usage/output_predictions.md
new file mode 100644
index 00000000..6c123e96
--- /dev/null
+++ b/docs/zh_cn/common_usage/output_predictions.md
@@ -0,0 +1,40 @@
+# 输出模型预测结果
+
+如果想将预测结果保存为特定的文件，用于离线评估，目前 MMYOLO 支持 json 和 pkl 两种格式。
+
+```{note}
+json 文件仅保存 `image_id`、`bbox`、`score` 和 `category_id`； json 文件可以使用 json 库读取。
+pkl 保存内容比 json 文件更多，还会保存预测图片的文件名和尺寸等一系列信息； pkl 文件可以使用 pickle 库读取。
+```
+
+### 输出为 json 文件
+
+如果想将预测结果输出为 json 文件，则命令如下：
+
+```shell
+python tools/test.py ${CONFIG} ${CHECKPOINT} --json-prefix ${JSON_PREFIX}
+```
+
+`--json-prefix` 后的参数输入为文件名前缀（无需输入 `.json` 后缀），也可以包含路径。举一个具体例子：
+
+```shell
+python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --json-prefix work_dirs/demo/json_demo
+```
+
+运行以上命令会在 `work_dirs/demo` 文件夹下，输出 `json_demo.bbox.json` 文件。
+
+### 输出为 pkl 文件
+
+如果想将预测结果输出为 pkl 文件，则命令如下：
+
+```shell
+python tools/test.py ${CONFIG} ${CHECKPOINT} --out ${OUTPUT_FILE} [--cfg-options ${OPTIONS [OPTIONS...]}]
+```
+
+`--out` 后的参数输入为完整文件名（**必须输入** `.pkl` 或 `.pickle` 后缀），也可以包含路径。举一个具体例子：
+
+```shell
+python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --out work_dirs/demo/pkl_demo.pkl
+```
+
+运行以上命令会在 `work_dirs/demo` 文件夹下，输出 `pkl_demo.pkl` 文件。
diff --git a/docs/zh_cn/advanced_guides/plugins.md b/docs/zh_cn/common_usage/plugins.md
similarity index 97%
rename from docs/zh_cn/advanced_guides/plugins.md
rename to docs/zh_cn/common_usage/plugins.md
index 82673e43..337111f9 100644
--- a/docs/zh_cn/advanced_guides/plugins.md
+++ b/docs/zh_cn/common_usage/plugins.md
@@ -1,4 +1,4 @@
-# 更多的插件使用
+# 给主干网络增加插件
 
 MMYOLO 支持在 Backbone 的不同 Stage 后增加如 `none_local`、`dropblock` 等插件，用户可以直接通过修改 config 文件中 `backbone` 的 `plugins`参数来实现对插件的管理。例如为 `YOLOv5` 增加`GeneralizedAttention` 插件，其配置文件如下：
 
diff --git a/docs/zh_cn/common_usage/resume_training.md b/docs/zh_cn/common_usage/resume_training.md
new file mode 100644
index 00000000..cbfeba7b
--- /dev/null
+++ b/docs/zh_cn/common_usage/resume_training.md
@@ -0,0 +1 @@
+# 恢复训练
diff --git a/docs/zh_cn/common_usage/set_random_seed.md b/docs/zh_cn/common_usage/set_random_seed.md
new file mode 100644
index 00000000..6f747c54
--- /dev/null
+++ b/docs/zh_cn/common_usage/set_random_seed.md
@@ -0,0 +1,20 @@
+# 设置随机种子
+
+如果想要在训练时指定随机种子，可以使用以下命令：
+
+```shell
+python ./tools/train.py \
+    ${CONFIG} \                               # 配置文件路径
+    --cfg-options randomness.seed=2023 \      # 设置随机种子为 2023
+    [randomness.diff_rank_seed=True] \        # 根据 rank 来设置不同的种子。
+    [randomness.deterministic=True]           # 把 cuDNN 后端确定性选项设置为 True
+# [] 代表可选参数，实际输入命令行时，不用输入 []
+```
+
+`randomness` 有三个参数可设置，具体含义如下：
+
+- `randomness.seed=2023` ，设置随机种子为 2023。
+
+- `randomness.diff_rank_seed=True`，根据 rank 来设置不同的种子，`diff_rank_seed` 默认为 False。
+
+- `randomness.deterministic=True`，把 cuDNN 后端确定性选项设置为 True，即把`torch.backends.cudnn.deterministic` 设为 True，把 `torch.backends.cudnn.benchmark` 设为False。`deterministic` 默认为 False。更多细节见 https://pytorch.org/docs/stable/notes/randomness.html。
diff --git a/docs/zh_cn/common_usage/set_syncbn.md b/docs/zh_cn/common_usage/set_syncbn.md
new file mode 100644
index 00000000..a654a2b4
--- /dev/null
+++ b/docs/zh_cn/common_usage/set_syncbn.md
@@ -0,0 +1 @@
+# 开启和关闭 SyncBatchNorm
diff --git a/docs/zh_cn/common_usage/specify_device.md b/docs/zh_cn/common_usage/specify_device.md
new file mode 100644
index 00000000..772e43df
--- /dev/null
+++ b/docs/zh_cn/common_usage/specify_device.md
@@ -0,0 +1,23 @@
+# 指定特定设备训练或推理
+
+如果你有多张 GPU，比如 8 张，其编号分别为 `0, 1, 2, 3, 4, 5, 6, 7`，使用单卡训练或推理时会默认使用卡 0。如果想指定其他卡进行训练或推理，可以使用以下命令：
+
+```shell
+CUDA_VISIBLE_DEVICES=5 python ./tools/train.py ${CONFIG} #train
+CUDA_VISIBLE_DEVICES=5 python ./tools/test.py ${CONFIG} ${CHECKPOINT_FILE} #test
+```
+
+如果设置`CUDA_VISIBLE_DEVICES`为 -1 或者一个大于 GPU 最大编号的数，比如 8，将会使用 CPU 进行训练或者推理。
+
+如果你想使用其中几张卡并行训练，可以使用如下命令：
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh ${CONFIG} ${GPU_NUM}
+```
+
+这里 `GPU_NUM` 为 4。另外如果在一台机器上多个任务同时多卡训练，需要设置不同的端口，比如以下命令：
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG} 4
+CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG} 4
+```
diff --git a/docs/zh_cn/deploy/index.rst b/docs/zh_cn/deploy/index.rst
deleted file mode 100644
index 3cf47711..00000000
--- a/docs/zh_cn/deploy/index.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-部署必备教程
-************************
-
-.. toctree::
-   :maxdepth: 1
-
-   basic_deployment_guide.md
-
-
-部署全流程说明
-************************
-
-.. toctree::
-   :maxdepth: 1
-
-   yolov5_deployment.md
diff --git a/docs/zh_cn/algorithm_descriptions/index.rst b/docs/zh_cn/featured_topics/algorithm_descriptions/index.rst
similarity index 68%
rename from docs/zh_cn/algorithm_descriptions/index.rst
rename to docs/zh_cn/featured_topics/algorithm_descriptions/index.rst
index bb3b2c9f..54bc8b8e 100644
--- a/docs/zh_cn/algorithm_descriptions/index.rst
+++ b/docs/zh_cn/featured_topics/algorithm_descriptions/index.rst
@@ -1,12 +1,3 @@
-必备基础
-********************
-
-.. toctree::
-   :maxdepth: 1
-
-   model_design.md
-
-
 算法原理和实现全解析
 ********************
 
diff --git a/docs/zh_cn/algorithm_descriptions/rtmdet_description.md b/docs/zh_cn/featured_topics/algorithm_descriptions/rtmdet_description.md
similarity index 100%
rename from docs/zh_cn/algorithm_descriptions/rtmdet_description.md
rename to docs/zh_cn/featured_topics/algorithm_descriptions/rtmdet_description.md
diff --git a/docs/zh_cn/algorithm_descriptions/yolov5_description.md b/docs/zh_cn/featured_topics/algorithm_descriptions/yolov5_description.md
similarity index 100%
rename from docs/zh_cn/algorithm_descriptions/yolov5_description.md
rename to docs/zh_cn/featured_topics/algorithm_descriptions/yolov5_description.md
diff --git a/docs/zh_cn/algorithm_descriptions/yolov6_description.md b/docs/zh_cn/featured_topics/algorithm_descriptions/yolov6_description.md
similarity index 100%
rename from docs/zh_cn/algorithm_descriptions/yolov6_description.md
rename to docs/zh_cn/featured_topics/algorithm_descriptions/yolov6_description.md
diff --git a/docs/zh_cn/algorithm_descriptions/yolov8_description.md b/docs/zh_cn/featured_topics/algorithm_descriptions/yolov8_description.md
similarity index 100%
rename from docs/zh_cn/algorithm_descriptions/yolov8_description.md
rename to docs/zh_cn/featured_topics/algorithm_descriptions/yolov8_description.md
diff --git a/docs/zh_cn/community/contributing.md b/docs/zh_cn/featured_topics/contributing.md
similarity index 99%
rename from docs/zh_cn/community/contributing.md
rename to docs/zh_cn/featured_topics/contributing.md
index 64c463b0..16c76b0a 100644
--- a/docs/zh_cn/community/contributing.md
+++ b/docs/zh_cn/featured_topics/contributing.md
@@ -1,4 +1,4 @@
-## 贡献代码
+## 如何给 MMYOLO 贡献代码
 
 欢迎加入 MMYOLO 社区，我们致力于打造最前沿的计算机视觉基础库，我们欢迎任何类型的贡献，包括但不限于
 
@@ -241,7 +241,7 @@ pre-commit 钩子的配置可以在 [.pre-commit-config](./.pre-commit-config.ya
 
 pre-commit 具体的安装使用方式见[拉取请求](#2-配置-pre-commit)。
 
-更具体的规范请参考 [OpenMMLab 代码规范](code_style.md)。
+更具体的规范请参考 [OpenMMLab 代码规范](../notes/code_style.md)。
 
 #### C++ and CUDA
 
diff --git a/docs/zh_cn/featured_topics/dataset_preparation.md b/docs/zh_cn/featured_topics/dataset_preparation.md
new file mode 100644
index 00000000..407916b5
--- /dev/null
+++ b/docs/zh_cn/featured_topics/dataset_preparation.md
@@ -0,0 +1 @@
+# 数据集格式准备和说明
diff --git a/docs/zh_cn/featured_topics/deploy/easydeploy_guide.md b/docs/zh_cn/featured_topics/deploy/easydeploy_guide.md
new file mode 100644
index 00000000..80a69d25
--- /dev/null
+++ b/docs/zh_cn/featured_topics/deploy/easydeploy_guide.md
@@ -0,0 +1,5 @@
+# EasyDeploy 部署必备教程
+
+本项目作为 MMYOLO 的部署 project 单独存在，意图剥离 MMDeploy 当前的体系，独自支持用户完成模型训练后的转换和部署功能，使用户的学习和工程成本下降。
+
+当前支持对 ONNX 格式和 TensorRT 格式的转换，后续对其他推理平台也会支持起来。
diff --git a/docs/zh_cn/advanced_guides/index.rst b/docs/zh_cn/featured_topics/deploy/index.rst
similarity index 51%
rename from docs/zh_cn/advanced_guides/index.rst
rename to docs/zh_cn/featured_topics/deploy/index.rst
index 02b06e61..3c545466 100644
--- a/docs/zh_cn/advanced_guides/index.rst
+++ b/docs/zh_cn/featured_topics/deploy/index.rst
@@ -1,33 +1,25 @@
-模块组合
+MMDeploy 部署必备教程
 ************************
 
 .. toctree::
    :maxdepth: 1
 
-   module_combination.md
+   mmdeploy_guide.md
 
-数据流
+
+MMDeploy 部署 YOLOv5 全流程说明
 ************************
 
 .. toctree::
    :maxdepth: 1
 
-   data_flow.md
+   mmdeploy_yolov5.md
 
 
-How to
+EasyDeploy 部署必备教程
 ************************
 
 .. toctree::
    :maxdepth: 1
 
-   how_to.md
-
-
-插件
-************************
-
-.. toctree::
-   :maxdepth: 1
-
-   plugins.md
+   easydeploy_guide.md
diff --git a/docs/zh_cn/deploy/basic_deployment_guide.md b/docs/zh_cn/featured_topics/deploy/mmdeploy_guide.md
similarity index 100%
rename from docs/zh_cn/deploy/basic_deployment_guide.md
rename to docs/zh_cn/featured_topics/deploy/mmdeploy_guide.md
diff --git a/docs/zh_cn/deploy/yolov5_deployment.md b/docs/zh_cn/featured_topics/deploy/mmdeploy_yolov5.md
similarity index 98%
rename from docs/zh_cn/deploy/yolov5_deployment.md
rename to docs/zh_cn/featured_topics/deploy/mmdeploy_yolov5.md
index 014b735e..99c0895d 100644
--- a/docs/zh_cn/deploy/yolov5_deployment.md
+++ b/docs/zh_cn/featured_topics/deploy/mmdeploy_yolov5.md
@@ -1,10 +1,10 @@
 # YOLOv5 部署全流程说明
 
-请先参考 [`部署必备指南`](basic_deployment_guide.md) 了解部署配置文件等相关信息。
+请先参考 [`部署必备指南`](./mmdeploy_guide.md) 了解部署配置文件等相关信息。
 
 ## 模型训练和测试
 
-模型训练和测试请参考 [YOLOv5 从入门到部署全流程](../user_guides/yolov5_tutorial.md) 。
+模型训练和测试请参考 [YOLOv5 从入门到部署全流程](../get_started/15_minutes.md) 。
 
 ## 准备 MMDeploy 运行环境
 
@@ -75,7 +75,7 @@ codebase_config = dict(
 backend_config = dict(type='onnxruntime')
 ```
 
-默认配置中的 `post_processing` 后处理参数是当前模型与 `pytorch` 模型精度对齐的配置，若您需要修改相关参数，可以参考 [`部署必备指南`](basic_deployment_guide.md) 的详细介绍。
+默认配置中的 `post_processing` 后处理参数是当前模型与 `pytorch` 模型精度对齐的配置，若您需要修改相关参数，可以参考 [`部署必备指南`](./mmdeploy_guide.md) 的详细介绍。
 
 当您部署在 `TensorRT` 时，您可以查看 [`detection_tensorrt_static-640x640.py`](https://github.com/open-mmlab/mmyolo/tree/main/configs/deploy/detection_tensorrt_static-640x640.py) ，如下所示：
 
diff --git a/docs/zh_cn/featured_topics/industry_examples.md b/docs/zh_cn/featured_topics/industry_examples.md
new file mode 100644
index 00000000..19960ce4
--- /dev/null
+++ b/docs/zh_cn/featured_topics/industry_examples.md
@@ -0,0 +1 @@
+# MMYOLO 产业范例介绍
diff --git a/docs/zh_cn/user_guides/custom_dataset.md b/docs/zh_cn/featured_topics/labeling_to_deployment_tutorials.md
similarity index 99%
rename from docs/zh_cn/user_guides/custom_dataset.md
rename to docs/zh_cn/featured_topics/labeling_to_deployment_tutorials.md
index 610d4557..d4e3ddf8 100644
--- a/docs/zh_cn/user_guides/custom_dataset.md
+++ b/docs/zh_cn/featured_topics/labeling_to_deployment_tutorials.md
@@ -1,4 +1,4 @@
-# 自定义数据集 标注+训练+测试+部署 全流程
+# 标注+训练+测试+部署全流程
 
 在平时的工作学习中，我们经常会遇到一些任务需要训练自定义的私有数据集，开源数据集去作为上线模型的场景比较少，这就需要我们对自己的私有数据集进行一系列的操作，以确保模型能够上线生产服务于客户。
 
diff --git a/docs/zh_cn/algorithm_descriptions/model_design.md b/docs/zh_cn/featured_topics/model_design.md
similarity index 98%
rename from docs/zh_cn/algorithm_descriptions/model_design.md
rename to docs/zh_cn/featured_topics/model_design.md
index 92b8f2c4..287cf032 100644
--- a/docs/zh_cn/algorithm_descriptions/model_design.md
+++ b/docs/zh_cn/featured_topics/model_design.md
@@ -1,4 +1,4 @@
-# 模型设计相关说明
+# MMYOLO 模型设计相关说明
 
 ## YOLO 系列模型基类
 
@@ -39,7 +39,7 @@ YOLO 系列算法大部分采用了统一的算法搭建结构，典型的如 Da
 
 MMYOLO 系列沿用 MMDetection 中设计的 `BaseDenseHead` 作为其 Head 结构的基类，但是进一步拆分了 HeadModule. 以 YOLOv5 为例，其 [HeadModule](https://github.com/open-mmlab/mmyolo/blob/main/mmyolo/models/dense_heads/yolov5_head.py#L24) 中的 forward 实现代替了原有的 forward 实现。
 
-## HeadModule
+## HeadModule 说明
 
 <div align=center>
 <img src="https://user-images.githubusercontent.com/33799979/190985845-ed303ad4-3a77-447b-83f9-1feba38d5e24.png" width=800 alt="HeadModule">
diff --git a/docs/zh_cn/advanced_guides/how_to.md b/docs/zh_cn/featured_topics/replace_backbone.md
similarity index 52%
rename from docs/zh_cn/advanced_guides/how_to.md
rename to docs/zh_cn/featured_topics/replace_backbone.md
index 3cf7368a..4514fefe 100644
--- a/docs/zh_cn/advanced_guides/how_to.md
+++ b/docs/zh_cn/featured_topics/replace_backbone.md
@@ -1,60 +1,11 @@
-# How to xxx
-
-本教程收集了任何如何使用 MMYOLO 进行 xxx 的答案。 如果您遇到有关`如何做`的问题及答案，请随时更新此文档！
-
-## 给主干网络增加插件
-
-[更多的插件使用](plugins.md)
-
-## 应用多个 Neck
-
-如果你想堆叠多个 Neck，可以直接在配置文件中的 Neck 参数，MMYOLO 支持以 `List` 形式拼接多个 Neck 配置，你需要保证上一个 Neck 的输出通道与下一个 Neck
-的输入通道相匹配。如需要调整通道，可以插入 `mmdet.ChannelMapper` 模块用来对齐多个 Neck 之间的通道数量。具体配置如下：
-
-```python
-_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-deepen_factor = _base_.deepen_factor
-widen_factor = _base_.widen_factor
-model = dict(
-    type='YOLODetector',
-    neck=[
-        dict(
-            type='YOLOv5PAFPN',
-            deepen_factor=deepen_factor,
-            widen_factor=widen_factor,
-            in_channels=[256, 512, 1024],
-            out_channels=[256, 512, 1024],
-            # 因为 out_channels 由 widen_factor 控制，YOLOv5PAFPN 的 out_channels = out_channels * widen_factor
-            num_csp_blocks=3,
-            norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
-            act_cfg=dict(type='SiLU', inplace=True)),
-        dict(
-            type='mmdet.ChannelMapper',
-            in_channels=[128, 256, 512],
-            out_channels=128,
-        ),
-        dict(
-            type='mmdet.DyHead',
-            in_channels=128,
-            out_channels=256,
-            num_blocks=2,
-            # disable zero_init_offset to follow official implementation
-            zero_init_offset=False)
-    ],
-    bbox_head=dict(head_module=dict(in_channels=[512, 512, 512]))
-    # 因为 out_channels 由 widen_factor 控制，YOLOv5HeadModuled 的 in_channels * widen_factor 才会等于最后一个 neck 的 out_channels
-)
-```
-
-## 更换主干网络
+# 轻松更换主干网络
 
 ```{note}
 1. 使用其他主干网络时，你需要保证主干网络的输出通道与 Neck 的输入通道相匹配。
 2. 下面给出的配置文件，仅能确保训练可以正确运行，直接训练性能可能不是最优的。因为某些 backbone 需要配套特定的学习率、优化器等超参数。后续会在“训练技巧章节”补充训练调优相关内容。
 ```
 
-### 使用 MMYOLO 中注册的主干网络
+## 使用 MMYOLO 中注册的主干网络
 
 假设想将 `YOLOv6EfficientRep`  作为 `YOLOv5` 的主干网络，则配置文件如下：
 
@@ -69,11 +20,11 @@ model = dict(
 )
 ```
 
-### 跨库使用主干网络
+## 跨库使用主干网络
 
 OpenMMLab 2.0 体系中 MMYOLO、MMDetection、MMClassification、MMSelfsup 中的模型注册表都继承自 MMEngine 中的根注册表，允许这些 OpenMMLab 开源库直接使用彼此已经实现的模块。 因此用户可以在 MMYOLO 中使用来自 MMDetection、MMClassification、MMSelfsup 的主干网络，而无需重新实现。
 
-#### 使用在 MMDetection 中实现的主干网络
+### 使用在 MMDetection 中实现的主干网络
 
 1. 假设想将 `ResNet-50` 作为 `YOLOv5` 的主干网络，则配置文件如下：
 
@@ -154,7 +105,7 @@ OpenMMLab 2.0 体系中 MMYOLO、MMDetection、MMClassification、MMSelfsup 中
    )
    ```
 
-#### 使用在 MMClassification 中实现的主干网络
+### 使用在 MMClassification 中实现的主干网络
 
 1. 假设想将 `ConvNeXt-Tiny` 作为 `YOLOv5` 的主干网络，则配置文件如下：
 
@@ -234,7 +185,7 @@ OpenMMLab 2.0 体系中 MMYOLO、MMDetection、MMClassification、MMSelfsup 中
    )
    ```
 
-#### 通过 MMClassification 使用 `timm` 中实现的主干网络
+### 通过 MMClassification 使用 `timm` 中实现的主干网络
 
 由于 MMClassification 提供了 Py**T**orch **Im**age **M**odels (`timm`) 主干网络的封装，用户也可以通过 MMClassification 直接使用 `timm` 中的主干网络。假设想将 `EfficientNet-B1`作为 `YOLOv5` 的主干网络，则配置文件如下：
 
@@ -273,7 +224,7 @@ model = dict(
 )
 ```
 
-#### 使用在 MMSelfSup 中实现的主干网络
+### 使用在 MMSelfSup 中实现的主干网络
 
 假设想将 MMSelfSup 中 `MoCo v3`  自监督训练的 `ResNet-50` 作为 `YOLOv5` 的主干网络，则配置文件如下：
 
@@ -315,7 +266,7 @@ model = dict(
 )
 ```
 
-#### 不使用预训练权重
+### 不使用预训练权重
 
 通常情况下，骨干网络初始化都是优先选择预训练权重。如果你不想使用预训练权重，而是想从头开始训练时模型时，
 我们可以将 `backbone` 中的 `init_cfg` 设置为 `None`，此时骨干网络将会以默认的初始化方法进行初始化，
@@ -354,225 +305,3 @@ model = dict(
             widen_factor=widen_factor))
 )
 ```
-
-#### 冻结 backbone 或 neck 的权重
-
-在 MMYOLO 中我们可以通过设置 `frozen_stages` 参数去冻结主干网络的部分 `stage`, 使这些 `stage` 的参数不参与模型的更新。
-需要注意的是：`frozen_stages = i` 表示的意思是指从最开始的 `stage` 开始到第 `i` 层 `stage` 的所有参数都会被冻结。下面是 `YOLOv5` 的例子，其他算法也是同样的逻辑：
-
-```python
-_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-model = dict(
-    backbone=dict(
-        frozen_stages=1 # 表示第一层 stage 以及它之前的所有 stage 中的参数都会被冻结
-    ))
-```
-
-此外， MMYOLO 中也可以通过参数 `freeze_all` 去冻结整个 `neck` 的参数。下面是 `YOLOv5` 的例子，其他算法也是同样的逻辑：
-
-```python
-_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-model = dict(
-    neck=dict(
-        freeze_all=True # freeze_all=True 时表示整个 neck 的参数都会被冻结
-    ))
-```
-
-## 输出预测结果
-
-如果想将预测结果保存为特定的文件，用于离线评估，目前 MMYOLO 支持 json 和 pkl 两种格式。
-
-```{note}
-json 文件仅保存 `image_id`、`bbox`、`score` 和 `category_id`； json 文件可以使用 json 库读取。
-pkl 保存内容比 json 文件更多，还会保存预测图片的文件名和尺寸等一系列信息； pkl 文件可以使用 pickle 库读取。
-```
-
-### 输出为 json 文件
-
-如果想将预测结果输出为 json 文件，则命令如下：
-
-```shell
-python tools/test.py ${CONFIG} ${CHECKPOINT} --json-prefix ${JSON_PREFIX}
-```
-
-`--json-prefix` 后的参数输入为文件名前缀（无需输入 `.json` 后缀），也可以包含路径。举一个具体例子：
-
-```shell
-python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --json-prefix work_dirs/demo/json_demo
-```
-
-运行以上命令会在 `work_dirs/demo` 文件夹下，输出 `json_demo.bbox.json` 文件。
-
-### 输出为 pkl 文件
-
-如果想将预测结果输出为 pkl 文件，则命令如下：
-
-```shell
-python tools/test.py ${CONFIG} ${CHECKPOINT} --out ${OUTPUT_FILE} [--cfg-options ${OPTIONS [OPTIONS...]}]
-```
-
-`--out` 后的参数输入为完整文件名（**必须输入** `.pkl` 或 `.pickle` 后缀），也可以包含路径。举一个具体例子：
-
-```shell
-python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --out work_dirs/demo/pkl_demo.pkl
-```
-
-运行以上命令会在 `work_dirs/demo` 文件夹下，输出 `pkl_demo.pkl` 文件。
-
-## 使用 mim 跨库调用其他 OpenMMLab 仓库的脚本
-
-```{note}
-1. 目前暂不支持跨库调用所有脚本，正在修复中。等修复完成，本文档会添加更多的例子。
-2. 绘制 mAP 和 计算平均训练速度 两项功能在 MMDetection dev-3.x 分支中修复，目前需要通过源码安装该分支才能成功调用。
-```
-
-### 日志分析
-
-#### 曲线图绘制
-
-MMDetection 中的 `tools/analysis_tools/analyze_logs.py` 可利用指定的训练 log 文件绘制 loss/mAP 曲线图， 第一次运行前请先运行 `pip install seaborn` 安装必要依赖。
-
-```shell
-mim run mmdet analyze_logs plot_curve \
-    ${LOG} \                                     # 日志文件路径
-    [--keys ${KEYS}] \                           # 需要绘制的指标，默认为 'bbox_mAP'
-    [--start-epoch ${START_EPOCH}]               # 起始的 epoch，默认为 1
-    [--eval-interval ${EVALUATION_INTERVAL}] \   # 评估间隔，默认为 1
-    [--title ${TITLE}] \                         # 图片标题，无默认值
-    [--legend ${LEGEND}] \                       # 图例，默认为 None
-    [--backend ${BACKEND}] \                     # 绘制后端，默认为 None
-    [--style ${STYLE}] \                         # 绘制风格，默认为 'dark'
-    [--out ${OUT_FILE}]                          # 输出文件路径
-# [] 代表可选参数，实际输入命令行时，不用输入 []
-```
-
-样例：
-
-- 绘制分类损失曲线图
-
-  ```shell
-  mim run mmdet analyze_logs plot_curve \
-      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
-      --keys loss_cls \
-      --legend loss_cls
-  ```
-
-  <img src="https://user-images.githubusercontent.com/27466624/204747359-754555df-1f97-4d5c-87ca-9ad3a0badcce.png" width="600"/>
-
-- 绘制分类损失、回归损失曲线图，保存图片为对应的 pdf 文件
-
-  ```shell
-  mim run mmdet analyze_logs plot_curve \
-      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
-      --keys loss_cls loss_bbox \
-      --legend loss_cls loss_bbox \
-      --out losses_yolov5_s.pdf
-  ```
-
-  <img src="https://user-images.githubusercontent.com/27466624/204748560-2d17ce4b-fb5f-4732-a962-329109e73aad.png" width="600"/>
-
-- 在同一图像中比较两次运行结果的 bbox mAP
-
-  ```shell
-  mim run mmdet analyze_logs plot_curve \
-      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
-      yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json \
-      --keys bbox_mAP \
-      --legend yolov5_s yolov5_n \
-      --eval-interval 10 # 注意评估间隔必须和训练时设置的一致，否则会报错
-  ```
-
-<img src="https://user-images.githubusercontent.com/27466624/204748704-21db9f9e-386e-449c-91c7-2ce3f8b51f24.png" width="600"/>
-
-#### 计算平均训练速度
-
-```shell
-mim run mmdet analyze_logs cal_train_time \
-    ${LOG} \                                # 日志文件路径
-    [--include-outliers]                    # 计算时包含每个 epoch 的第一个数据
-```
-
-样例：
-
-```shell
-mim run mmdet analyze_logs cal_train_time \
-    yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json
-```
-
-输出以如下形式展示：
-
-```text
------Analyze train time of yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json-----
-slowest epoch 278, average time is 0.1705 s/iter
-fastest epoch 300, average time is 0.1510 s/iter
-time std over epochs is 0.0026
-average iter time: 0.1556 s/iter
-```
-
-### 打印完整配置文件
-
-MMDetection 中的 `tools/misc/print_config.py` 脚本可将所有配置继承关系展开，打印相应的完整配置文件。调用命令如下：
-
-```shell
-mim run mmdet print_config \
-    ${CONFIG} \                              # 需要打印的配置文件路径
-    [--save-path] \                          # 保存文件路径，必须以 .py, .json 或者 .yml 结尾
-    [--cfg-options ${OPTIONS [OPTIONS...]}]  # 通过命令行参数修改配置文件
-```
-
-样例：
-
-```shell
-mim run mmdet print_config \
-    configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
-    --save-path ./work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon_whole.py
-```
-
-运行以上命令，会将 `yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py` 继承关系展开后的配置文件保存到 `./work_dirs` 文件夹内的 `yolov5_s-v61_syncbn_fast_1xb4-300e_balloon_whole.py` 文件中。
-
-## 设置随机种子
-
-如果想要在训练时指定随机种子，可以使用以下命令：
-
-```shell
-python ./tools/train.py \
-    ${CONFIG} \                               # 配置文件路径
-    --cfg-options randomness.seed=2023 \      # 设置随机种子为 2023
-    [randomness.diff_rank_seed=True] \        # 根据 rank 来设置不同的种子。
-    [randomness.deterministic=True]           # 把 cuDNN 后端确定性选项设置为 True
-# [] 代表可选参数，实际输入命令行时，不用输入 []
-```
-
-`randomness` 有三个参数可设置，具体含义如下：
-
-- `randomness.seed=2023` ，设置随机种子为 2023。
-
-- `randomness.diff_rank_seed=True`，根据 rank 来设置不同的种子，`diff_rank_seed` 默认为 False。
-
-- `randomness.deterministic=True`，把 cuDNN 后端确定性选项设置为 True，即把`torch.backends.cudnn.deterministic` 设为 True，把 `torch.backends.cudnn.benchmark` 设为False。`deterministic` 默认为 False。更多细节见 https://pytorch.org/docs/stable/notes/randomness.html。
-
-## 指定特定 GPU 训练或推理
-
-如果你有多张 GPU，比如 8 张，其编号分别为 `0, 1, 2, 3, 4, 5, 6, 7`，使用单卡训练或推理时会默认使用卡 0。如果想指定其他卡进行训练或推理，可以使用以下命令：
-
-```shell
-CUDA_VISIBLE_DEVICES=5 python ./tools/train.py ${CONFIG} #train
-CUDA_VISIBLE_DEVICES=5 python ./tools/test.py ${CONFIG} ${CHECKPOINT_FILE} #test
-```
-
-如果设置`CUDA_VISIBLE_DEVICES`为 -1 或者一个大于 GPU 最大编号的数，比如 8，将会使用 CPU 进行训练或者推理。
-
-如果你想使用其中几张卡并行训练，可以使用如下命令：
-
-```shell
-CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh ${CONFIG} ${GPU_NUM}
-```
-
-这里 `GPU_NUM` 为 4。另外如果在一台机器上多个任务同时多卡训练，需要设置不同的端口，比如以下命令：
-
-```shell
-CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG} 4
-CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG} 4
-```
diff --git a/docs/zh_cn/featured_topics/troubleshooting_steps.md b/docs/zh_cn/featured_topics/troubleshooting_steps.md
new file mode 100644
index 00000000..189a9115
--- /dev/null
+++ b/docs/zh_cn/featured_topics/troubleshooting_steps.md
@@ -0,0 +1 @@
+# 常见错误排除步骤
diff --git a/docs/zh_cn/user_guides/visualization.md b/docs/zh_cn/featured_topics/visualization.md
similarity index 53%
rename from docs/zh_cn/user_guides/visualization.md
rename to docs/zh_cn/featured_topics/visualization.md
index e5975eed..52e8c579 100644
--- a/docs/zh_cn/user_guides/visualization.md
+++ b/docs/zh_cn/featured_topics/visualization.md
@@ -1,4 +1,4 @@
-# 可视化
+# 关于可视化的一切
 
 本文包括特征图可视化和 Grad-Based 和 Grad-Free CAM 可视化
 
@@ -292,3 +292,250 @@ python demo/boxam_vis_demo.py \
 <div align=center>
 <img src="https://user-images.githubusercontent.com/17425982/203777566-7c74e82f-b477-488e-958f-91e1d10833b9.jpg" width="800" alt="image"/>
 </div>
+
+## 可视化 COCO 标签
+
+脚本 `tools/analysis_tools/browse_coco_json.py` 能够使用可视化显示 COCO 标签在图片的情况。
+
+```shell
+python tools/analysis_tools/browse_coco_json.py [--data-root ${DATA_ROOT}] \
+                                                [--img-dir ${IMG_DIR}] \
+                                                [--ann-file ${ANN_FILE}] \
+                                                [--wait-time ${WAIT_TIME}] \
+                                                [--disp-all] [--category-names CATEGORY_NAMES [CATEGORY_NAMES ...]] \
+                                                [--shuffle]
+```
+
+其中，如果图片、标签都在同一个文件夹下的话，可以指定 `--data-root` 到该文件夹，然后 `--img-dir` 和 `--ann-file` 指定该文件夹的相对路径，代码会自动拼接。
+如果图片、标签文件不在同一个文件夹下的话，则无需指定 `--data-root` ，直接指定绝对路径的 `--img-dir` 和 `--ann-file` 即可。
+
+例子：
+
+1. 查看 `COCO` 全部类别，同时展示 `bbox`、`mask` 等所有类型的标注：
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --disp-all
+```
+
+如果图片、标签不在同一个文件夹下的话，可以使用绝对路径：
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --img-dir '/dataset/image/coco/train2017' \
+                                                --ann-file '/label/instances_train2017.json' \
+                                                --disp-all
+```
+
+2. 查看 `COCO` 全部类别，同时仅展示 `bbox` 类型的标注，并打乱显示：
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --shuffle
+```
+
+3. 只查看 `bicycle` 和 `person` 类别，同时仅展示 `bbox` 类型的标注：
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --category-names 'bicycle' 'person'
+```
+
+4. 查看 `COCO` 全部类别，同时展示 `bbox`、`mask` 等所有类型的标注，并打乱显示：
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --disp-all \
+                                                --shuffle
+```
+
+## 可视化数据集
+
+```shell
+python tools/analysis_tools/browse_dataset.py \
+    ${CONFIG_FILE} \
+    [-o, --output-dir ${OUTPUT_DIR}] \
+    [-p, --phase ${DATASET_PHASE}] \
+    [-n, --show-number ${NUMBER_IMAGES_DISPLAY}] \
+    [-i, --show-interval ${SHOW_INTERRVAL}] \
+    [-m, --mode ${DISPLAY_MODE}] \
+    [--cfg-options ${CFG_OPTIONS}]
+```
+
+**所有参数的说明**：
+
+- `config` : 模型配置文件的路径。
+- `-o, --output-dir`: 保存图片文件夹，如果没有指定，默认为 `'./output'`。
+- **`-p, --phase`**: 可视化数据集的阶段，只能为 `['train', 'val', 'test']` 之一，默认为 `'train'`。
+- **`-n, --show-number`**: 可视化样本数量。如果没有指定，默认展示数据集的所有图片。
+- **`-m, --mode`**: 可视化的模式，只能为 `['original', 'transformed', 'pipeline']` 之一。 默认为 `'transformed'`。
+- `--cfg-options` : 对配置文件的修改，参考[学习配置文件](./config.md)。
+
+```shell
+`-m, --mode` 用于设置可视化的模式，默认设置为 'transformed'。
+- 如果 `--mode` 设置为 'original'，则获取原始图片；
+- 如果 `--mode` 设置为 'transformed'，则获取预处理后的图片；
+- 如果 `--mode` 设置为 'pipeline'，则获得数据流水线所有中间过程图片。
+```
+
+**示例**：
+
+1. **'original'** 模式 ：
+
+```shell
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py --phase val --output-dir tmp --mode original
+```
+
+- `--phase val`: 可视化验证集, 可简化为 `-p val`;
+- `--output-dir tmp`: 可视化结果保存在 "tmp" 文件夹, 可简化为 `-o tmp`;
+- `--mode original`: 可视化原图, 可简化为 `-m original`;
+- `--show-number 100`: 可视化100张图，可简化为 `-n 100`;
+
+2.**'transformed'** 模式 ：
+
+```shell
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py
+```
+
+3.**'pipeline'** 模式 ：
+
+```shell
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py -m pipeline
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/45811724/204810831-0fbc7f1c-0951-4be1-a11c-491cf0d194f6.png" alt="Image">
+</div>
+
+## 可视化数据集分析
+
+脚本 `tools/analysis_tools/dataset_analysis.py` 能够帮助用户得到四种功能的结果图，并将图片保存到当前运行目录下的 `dataset_analysis` 文件夹中。
+
+关于该脚本的功能的说明：
+
+通过 `main()` 的数据准备，得到每个子函数所需要的数据。
+
+功能一：显示类别和 bbox 实例个数的分布图，通过子函数 `show_bbox_num` 生成。
+
+<img src="https://user-images.githubusercontent.com/90811472/200314770-4fb21626-72f2-4a4c-be5d-bf860ad830ec.jpg"/>
+
+功能二：显示类别和 bbox 实例宽、高的分布图，通过子函数 `show_bbox_wh` 生成。
+
+<img src="https://user-images.githubusercontent.com/90811472/200315007-96e8e795-992a-4c72-90fa-f6bc00b3f2c7.jpg"/>
+
+功能三：显示类别和 bbox 实例宽/高比例的分布图，通过子函数 `show_bbox_wh_ratio` 生成。
+
+<img src="https://user-images.githubusercontent.com/90811472/200315044-4bdedcf6-087a-418e-8fe8-c2d3240ceba8.jpg"/>
+
+功能四：基于面积规则下，显示类别和 bbox 实例面积的分布图，通过子函数 `show_bbox_area` 生成。
+
+<img src="https://user-images.githubusercontent.com/90811472/200315075-71680fe2-db6f-4981-963e-a035c1281fc1.jpg"/>
+
+打印列表显示，通过脚本中子函数 `show_class_list` 和 `show_data_list` 生成。
+
+<img src="https://user-images.githubusercontent.com/90811472/200315152-9d6df91c-f2d2-4bba-9f95-b790fac37b62.jpg"/>
+
+```shell
+python tools/analysis_tools/dataset_analysis.py ${CONFIG} \
+                                                [-h] \
+                                                [--val-dataset ${TYPE}] \
+                                                [--class-name ${CLASS_NAME}] \
+                                                [--area-rule ${AREA_RULE}] \
+                                                [--func ${FUNC}] \
+                                                [--out-dir ${OUT_DIR}]
+```
+
+例子：
+
+1. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，其中默认设置:数据加载类型为 `train_dataset` ，面积规则设置为 `[0,32,96,1e5]` ,生成包含所有类的结果图并将图片保存到当前运行目录下 `./dataset_analysis` 文件夹中：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
+```
+
+2. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--val-dataset` 设置将数据加载类型由默认的 `train_dataset` 改为 `val_dataset`：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                               --val-dataset
+```
+
+3. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--class-name` 设置将生成所有类改为特定类显示，以显示 `person` 为例：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                               --class-name person
+```
+
+4. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--area-rule` 重新定义面积规则，以 `30 70 125` 为例,面积规则变为 `[0,30,70,125,1e5]`：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                               --area-rule 30 70 125
+```
+
+5. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--func` 设置，将显示四个功能效果图改为只显示 `功能一` 为例：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                               --func show_bbox_num
+```
+
+6. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--out-dir` 设置修改图片保存地址，以 `work_dirs/dataset_analysis` 地址为例：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                               --out-dir work_dirs/dataset_analysis
+```
+
+## 优化器参数策略可视化
+
+`tools/analysis_tools/vis_scheduler.py` 旨在帮助用户检查优化器的超参数调度器（无需训练），支持学习率（learning rate）、动量（momentum）和权值衰减（weight decay）。
+
+```shell
+python tools/analysis_tools/vis_scheduler.py \
+    ${CONFIG_FILE} \
+    [-p, --parameter ${PARAMETER_NAME}] \
+    [-d, --dataset-size ${DATASET_SIZE}] \
+    [-n, --ngpus ${NUM_GPUs}] \
+    [-o, --out-dir ${OUT_DIR}] \
+    [--title ${TITLE}] \
+    [--style ${STYLE}] \
+    [--window-size ${WINDOW_SIZE}] \
+    [--cfg-options]
+```
+
+**所有参数的说明**：
+
+- `config` : 模型配置文件的路径。
+- **`-p, parameter`**: 可视化参数名，只能为 `["lr", "momentum", "wd"]` 之一， 默认为 `"lr"`.
+- **`-d, --dataset-size`**: 数据集的大小。如果指定，`DATASETS.build` 将被跳过并使用这个数值作为数据集大小，默认使用 `DATASETS.build` 所得数据集的大小。
+- **`-n, --ngpus`**: 使用 GPU 的数量, 默认为1。
+- **`-o, --out-dir`**: 保存的可视化图片的文件夹路径，默认不保存。
+- `--title`: 可视化图片的标题，默认为配置文件名。
+- `--style`: 可视化图片的风格，默认为 `whitegrid`。
+- `--window-size`: 可视化窗口大小，如果没有指定，默认为 `12*7`。如果需要指定，按照格式 `'W*H'`。
+- `--cfg-options`: 对配置文件的修改，参考[学习配置文件](../tutorials/config.md)。
+
+```{note}
+部分数据集在解析标注阶段比较耗时，推荐直接将 `-d, dataset-size` 指定数据集的大小，以节约时间。
+```
+
+你可以使用如下命令来绘制配置文件 `configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py` 将会使用的学习率变化曲线：
+
+```shell
+python tools/analysis_tools/vis_scheduler.py \
+    configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py \
+    --dataset-size 118287 \
+    --ngpus 8 \
+    --out-dir ./output
+```
+
+<div align=center><img src="https://user-images.githubusercontent.com/27466624/213091635-d322d2b3-6e28-4755-b871-ef0a89a67a6b.jpg" style=" width: auto; height: 40%; "></div>
diff --git a/docs/zh_cn/get_started/15_minutes_instance_segmentation.md b/docs/zh_cn/get_started/15_minutes_instance_segmentation.md
new file mode 100644
index 00000000..48fe3ca9
--- /dev/null
+++ b/docs/zh_cn/get_started/15_minutes_instance_segmentation.md
@@ -0,0 +1,3 @@
+# 15 分钟上手 MMYOLO 实例分割
+
+TODO
diff --git a/docs/zh_cn/user_guides/yolov5_tutorial.md b/docs/zh_cn/get_started/15_minutes_object_detection.md
similarity index 98%
rename from docs/zh_cn/user_guides/yolov5_tutorial.md
rename to docs/zh_cn/get_started/15_minutes_object_detection.md
index 411c4cb4..fc7dc295 100644
--- a/docs/zh_cn/user_guides/yolov5_tutorial.md
+++ b/docs/zh_cn/get_started/15_minutes_object_detection.md
@@ -1,4 +1,4 @@
-# YOLOv5 从入门到部署全流程
+# 15 分钟上手 MMYOLO 目标检测
 
 ## 环境安装
 
@@ -23,7 +23,7 @@ mim install -v -e .
 # "-e" 表示在可编辑模式下安装项目，因此对代码所做的任何本地修改都会生效，从而无需重新安装。
 ```
 
-详细环境配置操作请查看 [get_started](../get_started.md)
+详细环境配置操作请查看 [get_started](../get_started/installation.md)
 
 ## 数据集准备
 
diff --git a/docs/zh_cn/get_started/15_minutes_rotated_object_detection.md b/docs/zh_cn/get_started/15_minutes_rotated_object_detection.md
new file mode 100644
index 00000000..ce4455c2
--- /dev/null
+++ b/docs/zh_cn/get_started/15_minutes_rotated_object_detection.md
@@ -0,0 +1,3 @@
+# 15 分钟上手 MMYOLO 旋转框目标检测
+
+TODO
diff --git a/docs/zh_cn/article.md b/docs/zh_cn/get_started/article.md
similarity index 99%
rename from docs/zh_cn/article.md
rename to docs/zh_cn/get_started/article.md
index be994b0d..e9e9bfc3 100644
--- a/docs/zh_cn/article.md
+++ b/docs/zh_cn/get_started/article.md
@@ -1,4 +1,4 @@
-# 解读文章和资源汇总
+# 中文解读资源汇总
 
 本文汇总了 MMYOLO 或相关的 [OpenMMLab](https://www.zhihu.com/people/openmmlab) 解读的部分文章（更多文章和视频见 [OpenMMLabCourse](https://github.com/open-mmlab/OpenMMLabCourse) )，如果您有推荐的文章（不一定是 OpenMMLab 发布的文章，可以是自己写的文章），非常欢迎提 Pull Request 添加到这里。
 
@@ -58,6 +58,22 @@
 | :---: | :----------: | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
 | 第1期 | 特征图可视化 | [![Link](https://i0.hdslb.com/bfs/archive/081f300c84d6556f40d984cfbe801fc0644ff449.jpg@112w_63h_1c.webp)](https://www.bilibili.com/video/BV1je4y1478R/)  [![bilibili](https://img.shields.io/badge/dynamic/json?label=views&style=social&logo=bilibili&query=data.stat.view&url=https%3A%2F%2Fapi.bilibili.com%2Fx%2Fweb-interface%2Fview%3Fbvid%3DBV1je4y1478R)](https://www.bilibili.com/video/BV1je4y1478R/) |
 
+## MMDetection 解读文章和资源
+
+### 文章
+
+- [MMDetection 3.0：目标检测新基准与前沿](https://zhuanlan.zhihu.com/p/575246786)
+- [目标检测、实例分割、旋转框样样精通！详解高性能检测算法 RTMDet](https://zhuanlan.zhihu.com/p/598846422)
+- [MMDetection 支持数据增强神器 Simple Copy Paste 全过程](https://zhuanlan.zhihu.com/p/559940982)
+
+### 知乎问答和资源
+
+- [深度学习科研，如何高效进行代码和实验管理？](https://www.zhihu.com/question/269707221/answer/2480772257)
+- [深度学习方面的科研工作中的实验代码有什么规范和写作技巧？如何妥善管理实验数据？](https://www.zhihu.com/question/268193800/answer/2586000037)
+- [COCO 数据集上 1x 模式下为什么不采用多尺度训练?](https://www.zhihu.com/question/462170786/answer/1915119662)
+- [MMDetection 中 SOTA 论文源码中将训练过程中 BN 层的 eval 打开?](https://www.zhihu.com/question/471189603/answer/2195540892)
+- [基于 PyTorch 的 MMDetection 中训练的随机性来自何处？](https://www.zhihu.com/question/453511684/answer/1839683634)
+
 ## MMEngine 解读文章和资源
 
 - [从 MMCV 到 MMEngine，架构升级，体验升级！](https://zhuanlan.zhihu.com/p/571830155)
@@ -67,22 +83,6 @@
 - [MMCV 全新升级，新增超全数据变换功能，还有两大变化](https://zhuanlan.zhihu.com/p/572550592)
 - [手把手教你如何高效地在 MMCV 中贡献算子](https://zhuanlan.zhihu.com/p/464492627)
 
-## MMDetection 解读文章和资源
-
-## 文章
-
-- [MMDetection 3.0：目标检测新基准与前沿](https://zhuanlan.zhihu.com/p/575246786)
-- [目标检测、实例分割、旋转框样样精通！详解高性能检测算法 RTMDet](https://zhuanlan.zhihu.com/p/598846422)
-- [MMDetection 支持数据增强神器 Simple Copy Paste 全过程](https://zhuanlan.zhihu.com/p/559940982)
-
-## 知乎问答和资源
-
-- [深度学习科研，如何高效进行代码和实验管理？](https://www.zhihu.com/question/269707221/answer/2480772257)
-- [深度学习方面的科研工作中的实验代码有什么规范和写作技巧？如何妥善管理实验数据？](https://www.zhihu.com/question/268193800/answer/2586000037)
-- [COCO 数据集上 1x 模式下为什么不采用多尺度训练?](https://www.zhihu.com/question/462170786/answer/1915119662)
-- [MMDetection 中 SOTA 论文源码中将训练过程中 BN 层的 eval 打开?](https://www.zhihu.com/question/471189603/answer/2195540892)
-- [基于 PyTorch 的 MMDetection 中训练的随机性来自何处？](https://www.zhihu.com/question/453511684/answer/1839683634)
-
 ## PyTorch 解读文章和资源
 
 - [PyTorch1.11 亮点一览：TorchData、functorch、DDP 静态图](https://zhuanlan.zhihu.com/p/486222256)
diff --git a/docs/zh_cn/get_started/dependencies.md b/docs/zh_cn/get_started/dependencies.md
new file mode 100644
index 00000000..8b736098
--- /dev/null
+++ b/docs/zh_cn/get_started/dependencies.md
@@ -0,0 +1,44 @@
+# 依赖
+
+下表为 MMYOLO 和 MMEngine, MMCV, MMDetection 依赖库的版本要求，请安装正确的版本以避免安装问题。
+
+| MMYOLO version |   MMDetection version    |     MMEngine version     |      MMCV version       |
+| :------------: | :----------------------: | :----------------------: | :---------------------: |
+|      main      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.3.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.2.0      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.1.3      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.1.2      | mmdet>=3.0.0rc2, \<3.1.0 | mmengine>=0.3.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.1.1      |     mmdet==3.0.0rc1      | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.1.0      |     mmdet==3.0.0rc0      | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+
+本节中，我们将演示如何用 PyTorch 准备一个环境。
+
+MMYOLO 支持在 Linux，Windows 和 macOS 上运行。它需要 Python 3.7 以上，CUDA 9.2 以上和 PyTorch 1.7 以上。
+
+```{note}
+如果你对 PyTorch 有经验并且已经安装了它，你可以直接跳转到[下一小节](#安装流程)。否则，你可以按照下述步骤进行准备
+```
+
+**步骤 0.** 从 [官方网站](https://docs.conda.io/en/latest/miniconda.html) 下载并安装 Miniconda。
+
+**步骤 1.** 创建并激活一个 conda 环境。
+
+```shell
+conda create -n open-mmlab python=3.8 -y
+conda activate open-mmlab
+```
+
+**步骤 2.** 基于 [PyTorch 官方说明](https://pytorch.org/get-started/locally/) 安装 PyTorch。
+
+在 GPU 平台上：
+
+```shell
+conda install pytorch torchvision -c pytorch
+```
+
+在 CPU 平台上:
+
+```shell
+conda install pytorch torchvision cpuonly -c pytorch
+```
diff --git a/docs/zh_cn/get_started.md b/docs/zh_cn/get_started/installation.md
similarity index 82%
rename from docs/zh_cn/get_started.md
rename to docs/zh_cn/get_started/installation.md
index 1cd1d1c0..0dcc0da0 100644
--- a/docs/zh_cn/get_started.md
+++ b/docs/zh_cn/get_started/installation.md
@@ -1,53 +1,6 @@
-# 开始你的第一步
+# 安装和验证
 
-## 依赖
-
-下表为 MMYOLO 和 MMEngine, MMCV, MMDetection 依赖库的版本要求，请安装正确的版本以避免安装问题。
-
-| MMYOLO version |   MMDetection version    |     MMEngine version     |      MMCV version       |
-| :------------: | :----------------------: | :----------------------: | :---------------------: |
-|      main      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.3.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.2.0      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.1.3      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.1.2      | mmdet>=3.0.0rc2, \<3.1.0 | mmengine>=0.3.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.1.1      |     mmdet==3.0.0rc1      | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.1.0      |     mmdet==3.0.0rc0      | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-
-本节中，我们将演示如何用 PyTorch 准备一个环境。
-
-MMYOLO 支持在 Linux，Windows 和 macOS 上运行。它需要 Python 3.7 以上，CUDA 9.2 以上和 PyTorch 1.7 以上。
-
-```{note}
-如果你对 PyTorch 有经验并且已经安装了它，你可以直接跳转到[下一小节](#安装流程)。否则，你可以按照下述步骤进行准备
-```
-
-**步骤 0.** 从 [官方网站](https://docs.conda.io/en/latest/miniconda.html) 下载并安装 Miniconda。
-
-**步骤 1.** 创建并激活一个 conda 环境。
-
-```shell
-conda create -n open-mmlab python=3.8 -y
-conda activate open-mmlab
-```
-
-**步骤 2.** 基于 [PyTorch 官方说明](https://pytorch.org/get-started/locally/) 安装 PyTorch。
-
-在 GPU 平台上：
-
-```shell
-conda install pytorch torchvision -c pytorch
-```
-
-在 CPU 平台上:
-
-```shell
-conda install pytorch torchvision cpuonly -c pytorch
-```
-
-## 安装流程
-
-### 最佳实践
+## 最佳实践
 
 **步骤 0.** 使用 [MIM](https://github.com/open-mmlab/mim) 安装 [MMEngine](https://github.com/open-mmlab/mmengine)、 [MMCV](https://github.com/open-mmlab/mmcv) 和 [MMDetection](https://github.com/open-mmlab/mmdetection) 。
 
@@ -136,9 +89,9 @@ inference_detector(model, 'demo/demo.jpg')
 
 你将会看到一个包含 `DetDataSample` 的列表，预测结果在 `pred_instance` 里，包含有预测框、预测分数 和 预测类别。
 
-### 自定义安装
+## 自定义安装
 
-#### CUDA 版本
+### CUDA 版本
 
 在安装 PyTorch 时，你需要指定 CUDA 的版本。如果你不清楚应该选择哪一个，请遵循我们的建议。
 
@@ -154,7 +107,7 @@ inference_detector(model, 'demo/demo.jpg')
 的配置相匹配（如用 `conda install` 安装 PyTorch 时指定的 cudatoolkit 版本）。
 ```
 
-#### 不使用 MIM 安装 MMEngine
+### 不使用 MIM 安装 MMEngine
 
 要使用 pip 而不是 MIM 来安装 MMEngine，请遵照 [MMEngine 安装指南](https://mmengine.readthedocs.io/en/latest/get_started/installation.html)。
 
@@ -164,7 +117,7 @@ inference_detector(model, 'demo/demo.jpg')
 pip install "mmengine>=0.3.1"
 ```
 
-#### 不使用 MIM 安装 MMCV
+### 不使用 MIM 安装 MMCV
 
 MMCV 包含 C++ 和 CUDA 扩展，因此其对 PyTorch 的依赖比较复杂。MIM 会自动解析这些 依赖，选择合适的 MMCV 预编译包，使安装更简单，但它并不是必需的。
 
@@ -177,7 +130,7 @@ MMCV 包含 C++ 和 CUDA 扩展，因此其对 PyTorch 的依赖比较复杂。M
 pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.12.0/index.html
 ```
 
-#### 在 CPU 环境中安装
+### 在 CPU 环境中安装
 
 我们的代码能够建立在只使用 CPU 的环境（CUDA 不可用）。
 
@@ -205,7 +158,7 @@ pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu116/t
 |                         CARAFE                          |                                          CARAFE                                          |
 |                      SyncBatchNorm                      |                                         ResNeSt                                          |
 
-#### 在 Google Colab 中安装
+### 在 Google Colab 中安装
 
 [Google Colab](https://colab.research.google.com/) 通常已经包含了 PyTorch 环境，因此我们只需要安装 MMEngine、MMCV、MMDetection 和 MMYOLO 即可，命令如下：
 
@@ -238,7 +191,7 @@ print(mmyolo.__version__)
 在 Jupyter 中，感叹号 `!` 用于执行外部命令，而 `%cd` 是一个[魔术命令](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-cd)，用于切换 Python 的工作路径。
 ```
 
-#### 通过 Docker 使用 MMYOLO
+### 通过 Docker 使用 MMYOLO
 
 我们提供了一个 [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile) 来构建一个镜像。请确保你的 [docker 版本](https://docs.docker.com/engine/install/) >=`19.03`。
 
@@ -265,13 +218,13 @@ export DATA_DIR=/path/to/your/dataset
 docker run --gpus all --shm-size=8g -it -v ${DATA_DIR}:/mmyolo/data mmyolo
 ```
 
-### 排除故障
+## 排除故障
 
 如果你在安装过程中遇到一些问题，请先查看 [FAQ](notes/faq.md) 页面。
 
 如果没有找到解决方案，你也可以在 GitHub 上 [打开一个问题](https://github.com/open-mmlab/mmyolo/issues/new/choose)。
 
-### 使用多个 MMYOLO 版本进行开发
+## 使用多个 MMYOLO 版本进行开发
 
 训练和测试的脚本已经在 `PYTHONPATH` 中进行了修改，以确保脚本使用当前目录中的 MMYOLO。
 
diff --git a/docs/zh_cn/overview.md b/docs/zh_cn/get_started/overview.md
similarity index 51%
rename from docs/zh_cn/overview.md
rename to docs/zh_cn/get_started/overview.md
index 6856b132..58dc66ea 100644
--- a/docs/zh_cn/overview.md
+++ b/docs/zh_cn/get_started/overview.md
@@ -2,7 +2,7 @@
 
 本章向您介绍 MMYOLO 的整体框架，并提供详细的教程链接。
 
-## 什么是 MMYOLO
+## MMYOLO 介绍
 
 <div align=center>
 <img src="https://user-images.githubusercontent.com/45811724/190993591-bd3f1f11-1c30-4b93-b5f4-05c9ff64ff7f.gif" alt="图片"/>
@@ -27,33 +27,15 @@ MMYOLO 文件结构和 MMDetection 完全一致。为了能够充分复用 MMDet
   - **optimizers** 提供优化器和优化器封装。
   - **hooks** 提供 runner 的各种钩子。
 
-## 如何使用本指南
+## 文档使用指南
 
-以下是 MMYOLO 的详细指南：
+MMYOLO 中将文档结构分成 6 个部分，对应不同需求的用户。
 
-1. 安装说明见[开始你的第一步](get_started.md)
+- **开启 MMYOLO 之旅**。本部分是第一次使用 MMYOLO 用户的必读文档，请全文仔细阅读
+- **推荐专题**。本部分是 MMYOLO 中提供的以主题形式的精华文档，包括了 MMYOLO 中大量的特性等。强烈推荐使用 MMYOLO 的所有用户阅读
+- **常用功能**。本部分提供了训练测试过程中用户经常会用到的各类常用功能，用户可以在用到时候再次查阅
+- **实用工具**。本部分是 tools 下使用工具的汇总文档，便于大家能够快速的愉快使用 MMYOLO 中提供的各类脚本
+- **基础和进阶教程**。本部分设计到 MMYOLO 中的一些基本概念和进阶教程等，适合想详细了解 MMYOLO 设计思想和结构设计的用户
+- **其他**。其余部分包括 模型仓库、说明和接口文档等等
 
-2. MMYOLO 的基本使用方法请参考以下教程：
-
-   - [训练和测试](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#id1)
-   - [从入门到部署全流程](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#id2)
-   - [实用工具](https://mmyolo.readthedocs.io/zh_CN/latest/user_guides/index.html#id3)
-
-3. YOLO 系列算法实现和全解析教程：
-
-   - [必备基础](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/index.html#id1)
-   - [原理和实现全解析](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/index.html#id2)
-
-4. YOLO 系列部署教程：
-
-   - [部署必备教程](https://mmyolo.readthedocs.io/zh_CN/latest/deploy/index.html#id1)
-   - [部署全流程说明](https://mmyolo.readthedocs.io/zh_CN/latest/deploy/index.html#id2)
-
-5. 参考以下教程深入了解：
-
-   - [模块组合](https://mmyolo.readthedocs.io/zh_CN/latest/advanced_guides/index.html#id1)
-   - [数据流](https://mmyolo.readthedocs.io/zh_CN/latest/advanced_guides/index.html#id2)
-   - [How to](https://mmyolo.readthedocs.io/zh_CN/latest/advanced_guides/index.html#how-to)
-   - [插件](https://mmyolo.readthedocs.io/zh_CN/latest/advanced_guides/index.html#id4)
-
-6. [解读文章和资源汇总](article.md)
+不同需求的用户可以按需选择你心怡的内容阅读。如果你对本文档有不同异议或者更好的优化办法，欢迎给 MMYOLO 提 PR ～
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index 5ce41a6b..d3473e70 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -1,42 +1,79 @@
 欢迎来到 MMYOLO 中文文档!
 =======================================
+您可以在页面左下角切换中英文文档。
 
 .. toctree::
-   :maxdepth: 1
+   :maxdepth: 2
    :caption: 开启 MMYOLO 之旅
 
-   overview.md
-   get_started.md
+   get_started/overview.md
+   get_started/dependencies.md
+   get_started/installation.md
+   get_started/15_minutes_object_detection.md
+   get_started/15_minutes_rotated_object_detection.md
+   get_started/15_minutes_instance_segmentation.md
+   get_started/article.md
 
 .. toctree::
    :maxdepth: 2
-   :caption: 使用指南
+   :caption: 推荐专题
 
-   user_guides/index.rst
+   featured_topics/contributing.md
+   featured_topics/model_design.md
+   featured_topics/industry_examples.md
+   featured_topics/dataset_preparation.md
+   featured_topics/algorithm_descriptions/index.rst
+   featured_topics/replace_backbone.md
+   featured_topics/labeling_to_deployment_tutorials.md
+   featured_topics/visualization.md
+   featured_topics/deploy/index.rst
+   featured_topics/troubleshooting_steps.md
 
 .. toctree::
    :maxdepth: 2
-   :caption: 算法解析
+   :caption: 常用功能
 
-   algorithm_descriptions/index.rst
+   common_usage/resume_training.md
+   common_usage/syncbn.md
+   common_usage/amp_training.md
+   common_usage/plugins.md
+   common_usage/freeze_layers.md
+   common_usage/output_predictions.md
+   common_usage/set_random_seed.md
+   common_usage/mim_usage.md
+   common_usage/multi_necks.md
+   common_usage/specify_device.md
+   common_usage/module_combination.md
 
 .. toctree::
    :maxdepth: 2
-   :caption: 进阶教程
+   :caption: 实用工具
 
-   advanced_guides/index.rst
+   useful_tools/browse_coco_json.md
+   useful_tools/browse_dataset.md
+   useful_tools/dataset_analysis.md
+   useful_tools/dataset_converters.md
+   useful_tools/download_dataset.md
+   useful_tools/extract_subcoco.md
+   useful_tools/log_analysis.md
+   useful_tools/model_converters.md
+   useful_tools/optimize_anchors.md
+   useful_tools/print_config.md
+   useful_tools/vis_scheduler.md
 
 .. toctree::
    :maxdepth: 2
-   :caption: 部署教程
+   :caption: 基础教程
 
-   deploy/index.rst
+   tutorials/config.md
+   tutorials/data_flow.md
+   tutorials/faq.md
 
 .. toctree::
    :maxdepth: 1
-   :caption: 解读文章和资源汇总
+   :caption: 进阶教程
 
-   article.md
+   advanced_guides/cross-library_application.md
 
 .. toctree::
    :maxdepth: 1
@@ -54,17 +91,11 @@
    :maxdepth: 1
    :caption: 说明
 
-   notes/faq.md
    notes/changelog.md
    notes/compatibility.md
    notes/conventions.md
+   notes/code_style.md
 
-.. toctree::
-   :maxdepth: 2
-   :caption: 社区
-
-   community/contributing.md
-   community/code_style.md
 
 .. toctree::
    :caption: 语言切换
diff --git a/docs/zh_cn/community/code_style.md b/docs/zh_cn/notes/code_style.md
similarity index 99%
rename from docs/zh_cn/community/code_style.md
rename to docs/zh_cn/notes/code_style.md
index 8ddb87c2..4634016d 100644
--- a/docs/zh_cn/community/code_style.md
+++ b/docs/zh_cn/notes/code_style.md
@@ -70,7 +70,7 @@ from mmcv.cnn.bricks import Conv2d, build_norm_layer, DropPath, MaxPool2d, \
 from ...utils import is_str  # 最多向上回溯一层，过多的回溯容易导致结构混乱
 ```
 
-OpenMMLab 项目使用 pre-commit 工具自动格式化代码，详情见[贡献代码](./contributing.md#代码风格)。
+OpenMMLab 项目使用 pre-commit 工具自动格式化代码，详情见[贡献代码](../featured_topics/contributing.md#代码风格)。
 
 ### 命名规范
 
diff --git a/docs/zh_cn/user_guides/config.md b/docs/zh_cn/tutorials/config.md
similarity index 100%
rename from docs/zh_cn/user_guides/config.md
rename to docs/zh_cn/tutorials/config.md
diff --git a/docs/zh_cn/advanced_guides/data_flow.md b/docs/zh_cn/tutorials/data_flow.md
similarity index 100%
rename from docs/zh_cn/advanced_guides/data_flow.md
rename to docs/zh_cn/tutorials/data_flow.md
diff --git a/docs/zh_cn/notes/faq.md b/docs/zh_cn/tutorials/faq.md
similarity index 83%
rename from docs/zh_cn/notes/faq.md
rename to docs/zh_cn/tutorials/faq.md
index 52de1fa6..1cda1854 100644
--- a/docs/zh_cn/notes/faq.md
+++ b/docs/zh_cn/tutorials/faq.md
@@ -2,9 +2,9 @@
 
 我们在这里列出了使用时的一些常见问题及其相应的解决方案。 如果您发现有一些问题被遗漏，请随时提 PR 丰富这个列表。 如果您无法在此获得帮助，请创建 [issue](https://github.com/open-mmlab/mmyolo/issues/new/choose) 提问，但是请在模板中填写所有必填信息，这有助于我们更快定位问题。
 
-## 为什么要推出 MMYOLO？ 为何要单独开一个仓库而不是直接放到 MMDetection 中？
+## 为什么要推出 MMYOLO？
 
-自从开源后，不断收到社区小伙伴们类似的疑问，答案可以归纳为以下三点：
+为什么要推出 MMYOLO？ 为何要单独开一个仓库而不是直接放到 MMDetection 中？ 自从开源后，不断收到社区小伙伴们类似的疑问，答案可以归纳为以下三点：
 
 **(1) 统一运行和推理平台**
 
diff --git a/docs/zh_cn/useful_tools/browse_coco_json.md b/docs/zh_cn/useful_tools/browse_coco_json.md
new file mode 100644
index 00000000..3e33f538
--- /dev/null
+++ b/docs/zh_cn/useful_tools/browse_coco_json.md
@@ -0,0 +1,62 @@
+# 可视化 COCO 标签
+
+脚本 `tools/analysis_tools/browse_coco_json.py` 能够使用可视化显示 COCO 标签在图片的情况。
+
+```shell
+python tools/analysis_tools/browse_coco_json.py [--data-root ${DATA_ROOT}] \
+                                                [--img-dir ${IMG_DIR}] \
+                                                [--ann-file ${ANN_FILE}] \
+                                                [--wait-time ${WAIT_TIME}] \
+                                                [--disp-all] [--category-names CATEGORY_NAMES [CATEGORY_NAMES ...]] \
+                                                [--shuffle]
+```
+
+其中，如果图片、标签都在同一个文件夹下的话，可以指定 `--data-root` 到该文件夹，然后 `--img-dir` 和 `--ann-file` 指定该文件夹的相对路径，代码会自动拼接。
+如果图片、标签文件不在同一个文件夹下的话，则无需指定 `--data-root` ，直接指定绝对路径的 `--img-dir` 和 `--ann-file` 即可。
+
+例子：
+
+1. 查看 `COCO` 全部类别，同时展示 `bbox`、`mask` 等所有类型的标注：
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --disp-all
+```
+
+如果图片、标签不在同一个文件夹下的话，可以使用绝对路径：
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --img-dir '/dataset/image/coco/train2017' \
+                                                --ann-file '/label/instances_train2017.json' \
+                                                --disp-all
+```
+
+2. 查看 `COCO` 全部类别，同时仅展示 `bbox` 类型的标注，并打乱显示：
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --shuffle
+```
+
+3. 只查看 `bicycle` 和 `person` 类别，同时仅展示 `bbox` 类型的标注：
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --category-names 'bicycle' 'person'
+```
+
+4. 查看 `COCO` 全部类别，同时展示 `bbox`、`mask` 等所有类型的标注，并打乱显示：
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --disp-all \
+                                                --shuffle
+```
diff --git a/docs/zh_cn/useful_tools/browse_dataset.md b/docs/zh_cn/useful_tools/browse_dataset.md
new file mode 100644
index 00000000..a0d4a7c6
--- /dev/null
+++ b/docs/zh_cn/useful_tools/browse_dataset.md
@@ -0,0 +1,57 @@
+# 可视化数据集
+
+```shell
+python tools/analysis_tools/browse_dataset.py \
+    ${CONFIG_FILE} \
+    [-o, --output-dir ${OUTPUT_DIR}] \
+    [-p, --phase ${DATASET_PHASE}] \
+    [-n, --show-number ${NUMBER_IMAGES_DISPLAY}] \
+    [-i, --show-interval ${SHOW_INTERRVAL}] \
+    [-m, --mode ${DISPLAY_MODE}] \
+    [--cfg-options ${CFG_OPTIONS}]
+```
+
+**所有参数的说明**：
+
+- `config` : 模型配置文件的路径。
+- `-o, --output-dir`: 保存图片文件夹，如果没有指定，默认为 `'./output'`。
+- **`-p, --phase`**: 可视化数据集的阶段，只能为 `['train', 'val', 'test']` 之一，默认为 `'train'`。
+- **`-n, --show-number`**: 可视化样本数量。如果没有指定，默认展示数据集的所有图片。
+- **`-m, --mode`**: 可视化的模式，只能为 `['original', 'transformed', 'pipeline']` 之一。 默认为 `'transformed'`。
+- `--cfg-options` : 对配置文件的修改，参考[学习配置文件](./config.md)。
+
+```shell
+`-m, --mode` 用于设置可视化的模式，默认设置为 'transformed'。
+- 如果 `--mode` 设置为 'original'，则获取原始图片；
+- 如果 `--mode` 设置为 'transformed'，则获取预处理后的图片；
+- 如果 `--mode` 设置为 'pipeline'，则获得数据流水线所有中间过程图片。
+```
+
+**示例**：
+
+1. **'original'** 模式 ：
+
+```shell
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py --phase val --output-dir tmp --mode original
+```
+
+- `--phase val`: 可视化验证集, 可简化为 `-p val`;
+- `--output-dir tmp`: 可视化结果保存在 "tmp" 文件夹, 可简化为 `-o tmp`;
+- `--mode original`: 可视化原图, 可简化为 `-m original`;
+- `--show-number 100`: 可视化100张图，可简化为 `-n 100`;
+
+2. **'transformed'** 模式 ：
+
+```shell
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py
+```
+
+3. **'pipeline'** 模式 ：
+
+```shell
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py -m pipeline
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/45811724/204810831-0fbc7f1c-0951-4be1-a11c-491cf0d194f6.png" alt="Image">
+</div>
diff --git a/docs/zh_cn/useful_tools/dataset_analysis.md b/docs/zh_cn/useful_tools/dataset_analysis.md
new file mode 100644
index 00000000..121128c9
--- /dev/null
+++ b/docs/zh_cn/useful_tools/dataset_analysis.md
@@ -0,0 +1,80 @@
+# 可视化数据集分析结果
+
+脚本 `tools/analysis_tools/dataset_analysis.py` 能够帮助用户得到四种功能的结果图，并将图片保存到当前运行目录下的 `dataset_analysis` 文件夹中。
+
+关于该脚本的功能的说明：
+
+通过 `main()` 的数据准备，得到每个子函数所需要的数据。
+
+功能一：显示类别和 bbox 实例个数的分布图，通过子函数 `show_bbox_num` 生成。
+
+<img src="https://user-images.githubusercontent.com/90811472/200314770-4fb21626-72f2-4a4c-be5d-bf860ad830ec.jpg"/>
+
+功能二：显示类别和 bbox 实例宽、高的分布图，通过子函数 `show_bbox_wh` 生成。
+
+<img src="https://user-images.githubusercontent.com/90811472/200315007-96e8e795-992a-4c72-90fa-f6bc00b3f2c7.jpg"/>
+
+功能三：显示类别和 bbox 实例宽/高比例的分布图，通过子函数 `show_bbox_wh_ratio` 生成。
+
+<img src="https://user-images.githubusercontent.com/90811472/200315044-4bdedcf6-087a-418e-8fe8-c2d3240ceba8.jpg"/>
+
+功能四：基于面积规则下，显示类别和 bbox 实例面积的分布图，通过子函数 `show_bbox_area` 生成。
+
+<img src="https://user-images.githubusercontent.com/90811472/200315075-71680fe2-db6f-4981-963e-a035c1281fc1.jpg"/>
+
+打印列表显示，通过脚本中子函数 `show_class_list` 和 `show_data_list` 生成。
+
+<img src="https://user-images.githubusercontent.com/90811472/200315152-9d6df91c-f2d2-4bba-9f95-b790fac37b62.jpg"/>
+
+```shell
+python tools/analysis_tools/dataset_analysis.py ${CONFIG} \
+                                                [-h] \
+                                                [--val-dataset ${TYPE}] \
+                                                [--class-name ${CLASS_NAME}] \
+                                                [--area-rule ${AREA_RULE}] \
+                                                [--func ${FUNC}] \
+                                                [--out-dir ${OUT_DIR}]
+```
+
+例子：
+
+1. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，其中默认设置:数据加载类型为 `train_dataset` ，面积规则设置为 `[0,32,96,1e5]` ,生成包含所有类的结果图并将图片保存到当前运行目录下 `./dataset_analysis` 文件夹中：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
+```
+
+2. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--val-dataset` 设置将数据加载类型由默认的 `train_dataset` 改为 `val_dataset`：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                               --val-dataset
+```
+
+3. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--class-name` 设置将生成所有类改为特定类显示，以显示 `person` 为例：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                               --class-name person
+```
+
+4. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--area-rule` 重新定义面积规则，以 `30 70 125` 为例,面积规则变为 `[0,30,70,125,1e5]`：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                               --area-rule 30 70 125
+```
+
+5. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--func` 设置，将显示四个功能效果图改为只显示 `功能一` 为例：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                               --func show_bbox_num
+```
+
+6. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--out-dir` 设置修改图片保存地址，以 `work_dirs/dataset_analysis` 地址为例：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                               --out-dir work_dirs/dataset_analysis
+```
diff --git a/docs/zh_cn/useful_tools/dataset_converters.md b/docs/zh_cn/useful_tools/dataset_converters.md
new file mode 100644
index 00000000..71e49472
--- /dev/null
+++ b/docs/zh_cn/useful_tools/dataset_converters.md
@@ -0,0 +1,58 @@
+# 数据集转换
+
+文件夹 `tools/data_converters/` 目前包含 `ballon2coco.py`、`yolo2coco.py` 和 `labelme2coco.py` 三个数据集转换工具。
+
+- `ballon2coco.py` 将 `balloon` 数据集（该小型数据集仅作为入门使用）转换成 COCO 的格式。
+
+关于该脚本的详细说明，请看 [YOLOv5 从入门到部署全流程](../get_started/15_minutes.md) 中 `数据集准备` 小节。
+
+```shell
+python tools/dataset_converters/balloon2coco.py
+```
+
+- `yolo2coco.py` 将 `yolo-style` **.txt** 格式的数据集转换成 COCO 的格式，请按如下方式使用：
+
+```shell
+python tools/dataset_converters/yolo2coco.py /path/to/the/root/dir/of/your_dataset
+```
+
+使用说明：
+
+1. `image_dir` 是需要你传入的待转换的 yolo 格式数据集的根目录，内应包含 `images` 、 `labels` 和 `classes.txt` 文件， `classes.txt` 是当前 dataset 对应的类的声明，一行一个类别。
+   `image_dir` 结构如下例所示：
+
+```bash
+.
+└── $ROOT_PATH
+    ├── classes.txt
+    ├── labels
+    │    ├── a.txt
+    │    ├── b.txt
+    │    └── ...
+    ├── images
+    │    ├── a.jpg
+    │    ├── b.png
+    │    └── ...
+    └── ...
+```
+
+2. 脚本会检测 `image_dir` 下是否已有的 `train.txt` 、 `val.txt` 和 `test.txt` 。若检测到文件，则会按照类别进行整理， 否则默认不需要分类。故请确保对应的 `train.txt` 、 `val.txt` 和 `test.txt` 要在 `image_dir` 内。文件内的图片路径必须是**绝对路径**。
+3. 脚本会默认在 `image_dir` 目录下创建 `annotations` 文件夹并将转换结果存在这里。如果在 `image_dir` 下没找到分类文件，输出文件即为一个 `result.json`，反之则会生成需要的 `train.json` 、 `val.json`、 `test.json`，脚本完成后 `annotations` 结构可如下例所示：
+
+```bash
+.
+└── $ROOT_PATH
+    ├── annotations
+    │    ├── result.json
+    │    └── ...
+    ├── classes.txt
+    ├── labels
+    │    ├── a.txt
+    │    ├── b.txt
+    │    └── ...
+    ├── images
+    │    ├── a.jpg
+    │    ├── b.png
+    │    └── ...
+    └── ...
+```
diff --git a/docs/zh_cn/useful_tools/download_dataset.md b/docs/zh_cn/useful_tools/download_dataset.md
new file mode 100644
index 00000000..a4ad6f41
--- /dev/null
+++ b/docs/zh_cn/useful_tools/download_dataset.md
@@ -0,0 +1,11 @@
+# 数据集下载
+
+脚本 `tools/misc/download_dataset.py` 支持下载数据集，例如 `COCO`、`VOC`、`LVIS` 和 `Balloon`.
+
+```shell
+python tools/misc/download_dataset.py --dataset-name coco2017
+python tools/misc/download_dataset.py --dataset-name voc2007
+python tools/misc/download_dataset.py --dataset-name voc2012
+python tools/misc/download_dataset.py --dataset-name lvis
+python tools/misc/download_dataset.py --dataset-name balloon [--save-dir ${SAVE_DIR}] [--unzip]
+```
diff --git a/docs/zh_cn/useful_tools/extract_subcoco.md b/docs/zh_cn/useful_tools/extract_subcoco.md
new file mode 100644
index 00000000..60935330
--- /dev/null
+++ b/docs/zh_cn/useful_tools/extract_subcoco.md
@@ -0,0 +1,60 @@
+# 提取 COCO 子集
+
+COCO2017 数据集训练数据集包括 118K 张图片，验证集包括 5K 张图片，数据集比较大。在调试或者快速验证程序是否正确的场景下加载 json 会需要消耗较多资源和带来较慢的启动速度，这会导致程序体验不好。
+
+`extract_subcoco.py` 脚本提供了按指定图片数量、类别、锚框尺寸来切分图片的功能，用户可以通过 `--num-img`, `--classes`, `--area-size` 参数来得到指定条件的 COCO 子集，从而满足上述需求。
+
+例如通过以下脚本切分图片：
+
+```shell
+python tools/misc/extract_subcoco.py \
+    ${ROOT} \
+    ${OUT_DIR} \
+    --num-img 20 \
+    --classes cat dog person \
+    --area-size small
+```
+
+会切分出 20 张图片，且这 20 张图片只会保留同时满足类别条件和锚框尺寸条件的标注信息, 没有满足条件的标注信息的图片不会被选择，保证了这 20 张图都是有 annotation info 的。
+
+注意： 本脚本目前仅仅支持 COCO2017 数据集，未来会支持更加通用的 COCO JSON 格式数据集
+
+输入 root 根路径文件夹格式如下所示：
+
+```text
+├── root
+│   ├── annotations
+│   ├── train2017
+│   ├── val2017
+│   ├── test2017
+```
+
+1. 仅仅使用 5K 张验证集切分出 10 张训练图片和 10 张验证图片
+
+```shell
+python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 10
+```
+
+2. 使用训练集切分出 20 张训练图片，使用验证集切分出 20 张验证图片
+
+```shell
+python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 20 --use-training-set
+```
+
+3. 设置全局种子，默认不设置
+
+```shell
+python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 20 --use-training-set --seed 1
+```
+
+4. 按指定类别切分图片
+
+```shell
+python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --classes cat dog person
+```
+
+5. 按指定锚框尺寸切分图片
+
+```shell
+python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --area-size small
+```
diff --git a/docs/zh_cn/useful_tools/log_analysis.md b/docs/zh_cn/useful_tools/log_analysis.md
new file mode 100644
index 00000000..6d0c5739
--- /dev/null
+++ b/docs/zh_cn/useful_tools/log_analysis.md
@@ -0,0 +1,82 @@
+# 日志分析
+
+## 曲线图绘制
+
+MMDetection 中的 `tools/analysis_tools/analyze_logs.py` 可利用指定的训练 log 文件绘制 loss/mAP 曲线图， 第一次运行前请先运行 `pip install seaborn` 安装必要依赖。
+
+```shell
+mim run mmdet analyze_logs plot_curve \
+    ${LOG} \                                     # 日志文件路径
+    [--keys ${KEYS}] \                           # 需要绘制的指标，默认为 'bbox_mAP'
+    [--start-epoch ${START_EPOCH}]               # 起始的 epoch，默认为 1
+    [--eval-interval ${EVALUATION_INTERVAL}] \   # 评估间隔，默认为 1
+    [--title ${TITLE}] \                         # 图片标题，无默认值
+    [--legend ${LEGEND}] \                       # 图例，默认为 None
+    [--backend ${BACKEND}] \                     # 绘制后端，默认为 None
+    [--style ${STYLE}] \                         # 绘制风格，默认为 'dark'
+    [--out ${OUT_FILE}]                          # 输出文件路径
+# [] 代表可选参数，实际输入命令行时，不用输入 []
+```
+
+样例：
+
+- 绘制分类损失曲线图
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      --keys loss_cls \
+      --legend loss_cls
+  ```
+
+  <img src="https://user-images.githubusercontent.com/27466624/204747359-754555df-1f97-4d5c-87ca-9ad3a0badcce.png" width="600"/>
+
+- 绘制分类损失、回归损失曲线图，保存图片为对应的 pdf 文件
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      --keys loss_cls loss_bbox \
+      --legend loss_cls loss_bbox \
+      --out losses_yolov5_s.pdf
+  ```
+
+  <img src="https://user-images.githubusercontent.com/27466624/204748560-2d17ce4b-fb5f-4732-a962-329109e73aad.png" width="600"/>
+
+- 在同一图像中比较两次运行结果的 bbox mAP
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json \
+      --keys bbox_mAP \
+      --legend yolov5_s yolov5_n \
+      --eval-interval 10 # 注意评估间隔必须和训练时设置的一致，否则会报错
+  ```
+
+<img src="https://user-images.githubusercontent.com/27466624/204748704-21db9f9e-386e-449c-91c7-2ce3f8b51f24.png" width="600"/>
+
+## 计算平均训练速度
+
+```shell
+mim run mmdet analyze_logs cal_train_time \
+    ${LOG} \                                # 日志文件路径
+    [--include-outliers]                    # 计算时包含每个 epoch 的第一个数据
+```
+
+样例：
+
+```shell
+mim run mmdet analyze_logs cal_train_time \
+    yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json
+```
+
+输出以如下形式展示：
+
+```text
+-----Analyze train time of yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json-----
+slowest epoch 278, average time is 0.1705 s/iter
+fastest epoch 300, average time is 0.1510 s/iter
+time std over epochs is 0.0026
+average iter time: 0.1556 s/iter
+```
diff --git a/docs/zh_cn/useful_tools/model_converters.md b/docs/zh_cn/useful_tools/model_converters.md
new file mode 100644
index 00000000..b5e7392f
--- /dev/null
+++ b/docs/zh_cn/useful_tools/model_converters.md
@@ -0,0 +1,52 @@
+# 模型转换
+
+文件夹 `tools/model_converters/` 下的六个脚本能够帮助用户将对应YOLO官方的预训练模型中的键转换成 `MMYOLO` 格式，并使用 `MMYOLO` 对模型进行微调。
+
+## YOLOv5
+
+下面以转换 `yolov5s.pt` 为例：
+
+1. 将 YOLOv5 官方代码克隆到本地（目前支持的最高版本为 `v6.1` ）：
+
+```shell
+git clone -b v6.1 https://github.com/ultralytics/yolov5.git
+cd yolov5
+```
+
+2. 下载官方权重：
+
+```shell
+wget https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt
+```
+
+3. 将 `tools/model_converters/yolov5_to_mmyolo.py` 文件复制到 YOLOv5 官方代码克隆的路径：
+
+```shell
+cp ${MMDET_YOLO_PATH}/tools/model_converters/yolov5_to_mmyolo.py yolov5_to_mmyolo.py
+```
+
+4. 执行转换：
+
+```shell
+python yolov5_to_mmyolo.py --src ${WEIGHT_FILE_PATH} --dst mmyolov5.pt
+```
+
+转换好的 `mmyolov5.pt` 即可以为 MMYOLO 所用。 YOLOv6 官方权重转化也是采用一样的使用方式。
+
+## YOLOX
+
+YOLOX 模型的转换不需要下载 YOLOX 官方代码，只需要下载权重即可。下面以转换 `yolox_s.pth` 为例：
+
+1. 下载权重：
+
+```shell
+wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth
+```
+
+2. 执行转换：
+
+```shell
+python tools/model_converters/yolox_to_mmyolo.py --src yolox_s.pth --dst mmyolox.pt
+```
+
+转换好的 `mmyolox.pt` 即可以在 MMYOLO 中使用。
diff --git a/docs/zh_cn/useful_tools/optimize_anchors.md b/docs/zh_cn/useful_tools/optimize_anchors.md
new file mode 100644
index 00000000..5ce98371
--- /dev/null
+++ b/docs/zh_cn/useful_tools/optimize_anchors.md
@@ -0,0 +1,37 @@
+# 优化锚框尺寸
+
+脚本 `tools/analysis_tools/optimize_anchors.py` 支持 YOLO 系列中三种锚框生成方式，分别是 `k-means`、`Differential Evolution`、`v5-k-means`.
+
+## k-means
+
+在 k-means 方法中，使用的是基于 IoU 表示距离的聚类方法，具体使用命令如下:
+
+```shell
+python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
+                                                --algorithm k-means \
+                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
+                                                --out-dir ${OUT_DIR}
+```
+
+## Differential Evolution
+
+在 `Differential Evolution` 方法中，使用的是基于差分进化算法（简称 DE 算法）的聚类方式，其最小化目标函数为 `avg_iou_cost`，具体使用命令如下:
+
+```shell
+python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
+                                                --algorithm DE \
+                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
+                                                --out-dir ${OUT_DIR}
+```
+
+## v5-k-means
+
+在 v5-k-means 方法中，使用的是 YOLOv5 中基于 `shape-match` 的聚类方式，具体使用命令如下:
+
+```shell
+python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
+                                                --algorithm v5-k-means \
+                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
+                                                --prior-match-thr ${PRIOR_MATCH_THR} \
+                                                --out-dir ${OUT_DIR}
+```
diff --git a/docs/zh_cn/useful_tools/print_config.md b/docs/zh_cn/useful_tools/print_config.md
new file mode 100644
index 00000000..904fbd5f
--- /dev/null
+++ b/docs/zh_cn/useful_tools/print_config.md
@@ -0,0 +1,20 @@
+# 打印完整配置文件
+
+MMDetection 中的 `tools/misc/print_config.py` 脚本可将所有配置继承关系展开，打印相应的完整配置文件。调用命令如下：
+
+```shell
+mim run mmdet print_config \
+    ${CONFIG} \                              # 需要打印的配置文件路径
+    [--save-path] \                          # 保存文件路径，必须以 .py, .json 或者 .yml 结尾
+    [--cfg-options ${OPTIONS [OPTIONS...]}]  # 通过命令行参数修改配置文件
+```
+
+样例：
+
+```shell
+mim run mmdet print_config \
+    configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
+    --save-path ./work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon_whole.py
+```
+
+运行以上命令，会将 `yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py` 继承关系展开后的配置文件保存到 `./work_dirs` 文件夹内的 `yolov5_s-v61_syncbn_fast_1xb4-300e_balloon_whole.py` 文件中。
diff --git a/docs/zh_cn/useful_tools/vis_scheduler.md b/docs/zh_cn/useful_tools/vis_scheduler.md
new file mode 100644
index 00000000..e6848b17
--- /dev/null
+++ b/docs/zh_cn/useful_tools/vis_scheduler.md
@@ -0,0 +1,44 @@
+# 可视化优化器参数策略
+
+`tools/analysis_tools/vis_scheduler.py` 旨在帮助用户检查优化器的超参数调度器（无需训练），支持学习率（learning rate）、动量（momentum）和权值衰减（weight decay）。
+
+```shell
+python tools/analysis_tools/vis_scheduler.py \
+    ${CONFIG_FILE} \
+    [-p, --parameter ${PARAMETER_NAME}] \
+    [-d, --dataset-size ${DATASET_SIZE}] \
+    [-n, --ngpus ${NUM_GPUs}] \
+    [-o, --out-dir ${OUT_DIR}] \
+    [--title ${TITLE}] \
+    [--style ${STYLE}] \
+    [--window-size ${WINDOW_SIZE}] \
+    [--cfg-options]
+```
+
+**所有参数的说明**：
+
+- `config` : 模型配置文件的路径。
+- **`-p, parameter`**: 可视化参数名，只能为 `["lr", "momentum", "wd"]` 之一， 默认为 `"lr"`.
+- **`-d, --dataset-size`**: 数据集的大小。如果指定，`DATASETS.build` 将被跳过并使用这个数值作为数据集大小，默认使用 `DATASETS.build` 所得数据集的大小。
+- **`-n, --ngpus`**: 使用 GPU 的数量, 默认为1。
+- **`-o, --out-dir`**: 保存的可视化图片的文件夹路径，默认不保存。
+- `--title`: 可视化图片的标题，默认为配置文件名。
+- `--style`: 可视化图片的风格，默认为 `whitegrid`。
+- `--window-size`: 可视化窗口大小，如果没有指定，默认为 `12*7`。如果需要指定，按照格式 `'W*H'`。
+- `--cfg-options`: 对配置文件的修改，参考[学习配置文件](../user_guides/config.md)。
+
+```{note}
+部分数据集在解析标注阶段比较耗时，推荐直接将 `-d, dataset-size` 指定数据集的大小，以节约时间。
+```
+
+你可以使用如下命令来绘制配置文件 `configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py` 将会使用的学习率变化曲线：
+
+```shell
+python tools/analysis_tools/vis_scheduler.py \
+    configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py \
+    --dataset-size 118287 \
+    --ngpus 8 \
+    --out-dir ./output
+```
+
+<div align=center><img src="https://user-images.githubusercontent.com/27466624/213091635-d322d2b3-6e28-4755-b871-ef0a89a67a6b.jpg" style=" width: auto; height: 40%; "></div>
diff --git a/docs/zh_cn/user_guides/index.rst b/docs/zh_cn/user_guides/index.rst
deleted file mode 100644
index f24b78f6..00000000
--- a/docs/zh_cn/user_guides/index.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-训练 & 测试
-**************
-
-MMYOLO 在 `Model Zoo <https://mmyolo.readthedocs.io/en/latest/model_zoo.html>`_ 中提供了诸多检测模型。本文档将展示如何使用这些模型和数据集来执行常见的训练和测试任务：
-
-.. toctree::
-   :maxdepth: 1
-
-   config.md
-
-
-从入门到部署全流程
-******************
-
-.. toctree::
-   :maxdepth: 1
-
-   custom_dataset.md
-   yolov5_tutorial.md
-
-实用工具
-************
-
-.. toctree::
-   :maxdepth: 1
-
-   visualization.md
-   useful_tools.md
diff --git a/docs/zh_cn/user_guides/useful_tools.md b/docs/zh_cn/user_guides/useful_tools.md
deleted file mode 100644
index 92b3517b..00000000
--- a/docs/zh_cn/user_guides/useful_tools.md
+++ /dev/null
@@ -1,481 +0,0 @@
-# 实用工具
-
-我们在 `tools/` 文件夹下提供很多实用工具。 除此之外，你也可以通过 MIM 来快速运行 OpenMMLab 的其他开源库。
-
-以 MMDetection 为例，如果想利用 [print_config.py](https://github.com/open-mmlab/mmdetection/blob/3.x/tools/misc/print_config.py)，你可以直接采用如下命令，而无需复制源码到 MMYOLO 库中。
-
-```shell
-mim run mmdet print_config ${CONFIG}
-```
-
-## 可视化
-
-### 可视化 COCO 标签
-
-脚本 `tools/analysis_tools/browse_coco_json.py` 能够使用可视化显示 COCO 标签在图片的情况。
-
-```shell
-python tools/analysis_tools/browse_coco_json.py [--data-root ${DATA_ROOT}] \
-                                                [--img-dir ${IMG_DIR}] \
-                                                [--ann-file ${ANN_FILE}] \
-                                                [--wait-time ${WAIT_TIME}] \
-                                                [--disp-all] [--category-names CATEGORY_NAMES [CATEGORY_NAMES ...]] \
-                                                [--shuffle]
-```
-
-其中，如果图片、标签都在同一个文件夹下的话，可以指定 `--data-root` 到该文件夹，然后 `--img-dir` 和 `--ann-file` 指定该文件夹的相对路径，代码会自动拼接。
-如果图片、标签文件不在同一个文件夹下的话，则无需指定 `--data-root` ，直接指定绝对路径的 `--img-dir` 和 `--ann-file` 即可。
-
-例子：
-
-1. 查看 `COCO` 全部类别，同时展示 `bbox`、`mask` 等所有类型的标注：
-
-```shell
-python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
-                                                --img-dir 'train2017' \
-                                                --ann-file 'annotations/instances_train2017.json' \
-                                                --disp-all
-```
-
-如果图片、标签不在同一个文件夹下的话，可以使用绝对路径：
-
-```shell
-python tools/analysis_tools/browse_coco_json.py --img-dir '/dataset/image/coco/train2017' \
-                                                --ann-file '/label/instances_train2017.json' \
-                                                --disp-all
-```
-
-2. 查看 `COCO` 全部类别，同时仅展示 `bbox` 类型的标注，并打乱显示：
-
-```shell
-python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
-                                                --img-dir 'train2017' \
-                                                --ann-file 'annotations/instances_train2017.json' \
-                                                --shuffle
-```
-
-3. 只查看 `bicycle` 和 `person` 类别，同时仅展示 `bbox` 类型的标注：
-
-```shell
-python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
-                                                --img-dir 'train2017' \
-                                                --ann-file 'annotations/instances_train2017.json' \
-                                                --category-names 'bicycle' 'person'
-```
-
-4. 查看 `COCO` 全部类别，同时展示 `bbox`、`mask` 等所有类型的标注，并打乱显示：
-
-```shell
-python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
-                                                --img-dir 'train2017' \
-                                                --ann-file 'annotations/instances_train2017.json' \
-                                                --disp-all \
-                                                --shuffle
-```
-
-### 可视化数据集
-
-```shell
-python tools/analysis_tools/browse_dataset.py \
-    ${CONFIG_FILE} \
-    [-o, --output-dir ${OUTPUT_DIR}] \
-    [-p, --phase ${DATASET_PHASE}] \
-    [-n, --show-number ${NUMBER_IMAGES_DISPLAY}] \
-    [-i, --show-interval ${SHOW_INTERRVAL}] \
-    [-m, --mode ${DISPLAY_MODE}] \
-    [--cfg-options ${CFG_OPTIONS}]
-```
-
-**所有参数的说明**：
-
-- `config` : 模型配置文件的路径。
-- `-o, --output-dir`: 保存图片文件夹，如果没有指定，默认为 `'./output'`。
-- **`-p, --phase`**: 可视化数据集的阶段，只能为 `['train', 'val', 'test']` 之一，默认为 `'train'`。
-- **`-n, --show-number`**: 可视化样本数量。如果没有指定，默认展示数据集的所有图片。
-- **`-m, --mode`**: 可视化的模式，只能为 `['original', 'transformed', 'pipeline']` 之一。 默认为 `'transformed'`。
-- `--cfg-options` : 对配置文件的修改，参考[学习配置文件](./config.md)。
-
-```shell
-`-m, --mode` 用于设置可视化的模式，默认设置为 'transformed'。
-- 如果 `--mode` 设置为 'original'，则获取原始图片；
-- 如果 `--mode` 设置为 'transformed'，则获取预处理后的图片；
-- 如果 `--mode` 设置为 'pipeline'，则获得数据流水线所有中间过程图片。
-```
-
-**示例**：
-
-1. **'original'** 模式 ：
-
-```shell
-python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py --phase val --output-dir tmp --mode original
-```
-
-- `--phase val`: 可视化验证集, 可简化为 `-p val`;
-- `--output-dir tmp`: 可视化结果保存在 "tmp" 文件夹, 可简化为 `-o tmp`;
-- `--mode original`: 可视化原图, 可简化为 `-m original`;
-- `--show-number 100`: 可视化100张图，可简化为 `-n 100`;
-
-2.**'transformed'** 模式 ：
-
-```shell
-python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py
-```
-
-3.**'pipeline'** 模式 ：
-
-```shell
-python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py -m pipeline
-```
-
-<div align=center>
-<img src="https://user-images.githubusercontent.com/45811724/204810831-0fbc7f1c-0951-4be1-a11c-491cf0d194f6.png" alt="Image">
-</div>
-
-### 可视化数据集分析
-
-脚本 `tools/analysis_tools/dataset_analysis.py` 能够帮助用户得到四种功能的结果图，并将图片保存到当前运行目录下的 `dataset_analysis` 文件夹中。
-
-关于该脚本的功能的说明：
-
-通过 `main()` 的数据准备，得到每个子函数所需要的数据。
-
-功能一：显示类别和 bbox 实例个数的分布图，通过子函数 `show_bbox_num` 生成。
-
-<img src="https://user-images.githubusercontent.com/90811472/200314770-4fb21626-72f2-4a4c-be5d-bf860ad830ec.jpg"/>
-
-功能二：显示类别和 bbox 实例宽、高的分布图，通过子函数 `show_bbox_wh` 生成。
-
-<img src="https://user-images.githubusercontent.com/90811472/200315007-96e8e795-992a-4c72-90fa-f6bc00b3f2c7.jpg"/>
-
-功能三：显示类别和 bbox 实例宽/高比例的分布图，通过子函数 `show_bbox_wh_ratio` 生成。
-
-<img src="https://user-images.githubusercontent.com/90811472/200315044-4bdedcf6-087a-418e-8fe8-c2d3240ceba8.jpg"/>
-
-功能四：基于面积规则下，显示类别和 bbox 实例面积的分布图，通过子函数 `show_bbox_area` 生成。
-
-<img src="https://user-images.githubusercontent.com/90811472/200315075-71680fe2-db6f-4981-963e-a035c1281fc1.jpg"/>
-
-打印列表显示，通过脚本中子函数 `show_class_list` 和 `show_data_list` 生成。
-
-<img src="https://user-images.githubusercontent.com/90811472/200315152-9d6df91c-f2d2-4bba-9f95-b790fac37b62.jpg"/>
-
-```shell
-python tools/analysis_tools/dataset_analysis.py ${CONFIG} \
-                                                [-h] \
-                                                [--val-dataset ${TYPE}] \
-                                                [--class-name ${CLASS_NAME}] \
-                                                [--area-rule ${AREA_RULE}] \
-                                                [--func ${FUNC}] \
-                                                [--out-dir ${OUT_DIR}]
-```
-
-例子：
-
-1. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，其中默认设置:数据加载类型为 `train_dataset` ，面积规则设置为 `[0,32,96,1e5]` ,生成包含所有类的结果图并将图片保存到当前运行目录下 `./dataset_analysis` 文件夹中：
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
-```
-
-2. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--val-dataset` 设置将数据加载类型由默认的 `train_dataset` 改为 `val_dataset`：
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
-                                               --val-dataset
-```
-
-3. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--class-name` 设置将生成所有类改为特定类显示，以显示 `person` 为例：
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
-                                               --class-name person
-```
-
-4. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--area-rule` 重新定义面积规则，以 `30 70 125` 为例,面积规则变为 `[0,30,70,125,1e5]`：
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
-                                               --area-rule 30 70 125
-```
-
-5. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--func` 设置，将显示四个功能效果图改为只显示 `功能一` 为例：
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
-                                               --func show_bbox_num
-```
-
-6. 使用 `config` 文件 `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` 分析数据集，通过 `--out-dir` 设置修改图片保存地址，以 `work_dirs/dataset_analysis` 地址为例：
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
-                                               --out-dir work_dirs/dataset_analysis
-```
-
-### 优化器参数策略可视化
-
-`tools/analysis_tools/vis_scheduler.py` 旨在帮助用户检查优化器的超参数调度器（无需训练），支持学习率（learning rate）、动量（momentum）和权值衰减（weight decay）。
-
-```shell
-python tools/analysis_tools/vis_scheduler.py \
-    ${CONFIG_FILE} \
-    [-p, --parameter ${PARAMETER_NAME}] \
-    [-d, --dataset-size ${DATASET_SIZE}] \
-    [-n, --ngpus ${NUM_GPUs}] \
-    [-o, --out-dir ${OUT_DIR}] \
-    [--title ${TITLE}] \
-    [--style ${STYLE}] \
-    [--window-size ${WINDOW_SIZE}] \
-    [--cfg-options]
-```
-
-**所有参数的说明**：
-
-- `config` : 模型配置文件的路径。
-- **`-p, parameter`**: 可视化参数名，只能为 `["lr", "momentum", "wd"]` 之一， 默认为 `"lr"`.
-- **`-d, --dataset-size`**: 数据集的大小。如果指定，`DATASETS.build` 将被跳过并使用这个数值作为数据集大小，默认使用 `DATASETS.build` 所得数据集的大小。
-- **`-n, --ngpus`**: 使用 GPU 的数量, 默认为1。
-- **`-o, --out-dir`**: 保存的可视化图片的文件夹路径，默认不保存。
-- `--title`: 可视化图片的标题，默认为配置文件名。
-- `--style`: 可视化图片的风格，默认为 `whitegrid`。
-- `--window-size`: 可视化窗口大小，如果没有指定，默认为 `12*7`。如果需要指定，按照格式 `'W*H'`。
-- `--cfg-options`: 对配置文件的修改，参考[学习配置文件](../user_guides/config.md)。
-
-```{note}
-部分数据集在解析标注阶段比较耗时，推荐直接将 `-d, dataset-size` 指定数据集的大小，以节约时间。
-```
-
-你可以使用如下命令来绘制配置文件 `configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py` 将会使用的学习率变化曲线：
-
-```shell
-python tools/analysis_tools/vis_scheduler.py \
-    configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py \
-    --dataset-size 118287 \
-    --ngpus 8 \
-    --out-dir ./output
-```
-
-<div align=center><img src="https://user-images.githubusercontent.com/27466624/213091635-d322d2b3-6e28-4755-b871-ef0a89a67a6b.jpg" style=" width: auto; height: 40%; "></div>
-
-## 数据集转换
-
-文件夹 `tools/data_converters/` 目前包含 `ballon2coco.py`、`yolo2coco.py` 和 `labelme2coco.py` 三个数据集转换工具。
-
-- `ballon2coco.py` 将 `balloon` 数据集（该小型数据集仅作为入门使用）转换成 COCO 的格式。
-
-关于该脚本的详细说明，请看 [YOLOv5 从入门到部署全流程](./yolov5_tutorial.md) 中 `数据集准备` 小节。
-
-```shell
-python tools/dataset_converters/balloon2coco.py
-```
-
-- `yolo2coco.py` 将 `yolo-style` **.txt** 格式的数据集转换成 COCO 的格式，请按如下方式使用：
-
-```shell
-python tools/dataset_converters/yolo2coco.py /path/to/the/root/dir/of/your_dataset
-```
-
-使用说明：
-
-1. `image_dir` 是需要你传入的待转换的 yolo 格式数据集的根目录，内应包含 `images` 、 `labels` 和 `classes.txt` 文件， `classes.txt` 是当前 dataset 对应的类的声明，一行一个类别。
-   `image_dir` 结构如下例所示：
-
-```bash
-.
-└── $ROOT_PATH
-    ├── classes.txt
-    ├── labels
-    │    ├── a.txt
-    │    ├── b.txt
-    │    └── ...
-    ├── images
-    │    ├── a.jpg
-    │    ├── b.png
-    │    └── ...
-    └── ...
-```
-
-2. 脚本会检测 `image_dir` 下是否已有的 `train.txt` 、 `val.txt` 和 `test.txt` 。若检测到文件，则会按照类别进行整理， 否则默认不需要分类。故请确保对应的 `train.txt` 、 `val.txt` 和 `test.txt` 要在 `image_dir` 内。文件内的图片路径必须是**绝对路径**。
-3. 脚本会默认在 `image_dir` 目录下创建 `annotations` 文件夹并将转换结果存在这里。如果在 `image_dir` 下没找到分类文件，输出文件即为一个 `result.json`，反之则会生成需要的 `train.json` 、 `val.json`、 `test.json`，脚本完成后 `annotations` 结构可如下例所示：
-
-```bash
-.
-└── $ROOT_PATH
-    ├── annotations
-    │    ├── result.json
-    │    └── ...
-    ├── classes.txt
-    ├── labels
-    │    ├── a.txt
-    │    ├── b.txt
-    │    └── ...
-    ├── images
-    │    ├── a.jpg
-    │    ├── b.png
-    │    └── ...
-    └── ...
-```
-
-## 数据集下载
-
-脚本 `tools/misc/download_dataset.py` 支持下载数据集，例如 `COCO`、`VOC`、`LVIS` 和 `Balloon`.
-
-```shell
-python tools/misc/download_dataset.py --dataset-name coco2017
-python tools/misc/download_dataset.py --dataset-name voc2007
-python tools/misc/download_dataset.py --dataset-name voc2012
-python tools/misc/download_dataset.py --dataset-name lvis
-python tools/misc/download_dataset.py --dataset-name balloon [--save-dir ${SAVE_DIR}] [--unzip]
-```
-
-## 模型转换
-
-文件夹 `tools/model_converters/` 下的六个脚本能够帮助用户将对应YOLO官方的预训练模型中的键转换成 `MMYOLO` 格式，并使用 `MMYOLO` 对模型进行微调。
-
-### YOLOv5
-
-下面以转换 `yolov5s.pt` 为例：
-
-1. 将 YOLOv5 官方代码克隆到本地（目前支持的最高版本为 `v6.1` ）：
-
-```shell
-git clone -b v6.1 https://github.com/ultralytics/yolov5.git
-cd yolov5
-```
-
-2. 下载官方权重：
-
-```shell
-wget https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt
-```
-
-3. 将 `tools/model_converters/yolov5_to_mmyolo.py` 文件复制到 YOLOv5 官方代码克隆的路径：
-
-```shell
-cp ${MMDET_YOLO_PATH}/tools/model_converters/yolov5_to_mmyolo.py yolov5_to_mmyolo.py
-```
-
-4. 执行转换：
-
-```shell
-python yolov5_to_mmyolo.py --src ${WEIGHT_FILE_PATH} --dst mmyolov5.pt
-```
-
-转换好的 `mmyolov5.pt` 即可以为 MMYOLO 所用。 YOLOv6 官方权重转化也是采用一样的使用方式。
-
-### YOLOX
-
-YOLOX 模型的转换不需要下载 YOLOX 官方代码，只需要下载权重即可。下面以转换 `yolox_s.pth` 为例：
-
-1. 下载权重：
-
-```shell
-wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth
-```
-
-2. 执行转换：
-
-```shell
-python tools/model_converters/yolox_to_mmyolo.py --src yolox_s.pth --dst mmyolox.pt
-```
-
-转换好的 `mmyolox.pt` 即可以在 MMYOLO 中使用。
-
-## 优化锚框尺寸
-
-脚本 `tools/analysis_tools/optimize_anchors.py` 支持 YOLO 系列中三种锚框生成方式，分别是 `k-means`、`Differential Evolution`、`v5-k-means`.
-
-### k-means
-
-在 k-means 方法中，使用的是基于 IoU 表示距离的聚类方法，具体使用命令如下:
-
-```shell
-python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
-                                                --algorithm k-means \
-                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
-                                                --out-dir ${OUT_DIR}
-```
-
-### Differential Evolution
-
-在 `Differential Evolution` 方法中，使用的是基于差分进化算法（简称 DE 算法）的聚类方式，其最小化目标函数为 `avg_iou_cost`，具体使用命令如下:
-
-```shell
-python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
-                                                --algorithm DE \
-                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
-                                                --out-dir ${OUT_DIR}
-```
-
-### v5-k-means
-
-在 v5-k-means 方法中，使用的是 YOLOv5 中基于 `shape-match` 的聚类方式，具体使用命令如下:
-
-```shell
-python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
-                                                --algorithm v5-k-means \
-                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
-                                                --prior-match-thr ${PRIOR_MATCH_THR} \
-                                                --out-dir ${OUT_DIR}
-```
-
-## 提取 COCO 子集
-
-COCO2017 数据集训练数据集包括 118K 张图片，验证集包括 5K 张图片，数据集比较大。在调试或者快速验证程序是否正确的场景下加载 json 会需要消耗较多资源和带来较慢的启动速度，这会导致程序体验不好。
-
-`extract_subcoco.py` 脚本提供了按指定图片数量、类别、锚框尺寸来切分图片的功能，用户可以通过 `--num-img`, `--classes`, `--area-size` 参数来得到指定条件的 COCO 子集，从而满足上述需求。
-
-例如通过以下脚本切分图片：
-
-```shell
-python tools/misc/extract_subcoco.py \
-    ${ROOT} \
-    ${OUT_DIR} \
-    --num-img 20 \
-    --classes cat dog person \
-    --area-size small
-```
-
-会切分出 20 张图片，且这 20 张图片只会保留同时满足类别条件和锚框尺寸条件的标注信息, 没有满足条件的标注信息的图片不会被选择，保证了这 20 张图都是有 annotation info 的。
-
-注意： 本脚本目前仅仅支持 COCO2017 数据集，未来会支持更加通用的 COCO JSON 格式数据集
-
-输入 root 根路径文件夹格式如下所示：
-
-```text
-├── root
-│   ├── annotations
-│   ├── train2017
-│   ├── val2017
-│   ├── test2017
-```
-
-1. 仅仅使用 5K 张验证集切分出 10 张训练图片和 10 张验证图片
-
-```shell
-python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 10
-```
-
-2. 使用训练集切分出 20 张训练图片，使用验证集切分出 20 张验证图片
-
-```shell
-python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 20 --use-training-set
-```
-
-3. 设置全局种子，默认不设置
-
-```shell
-python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 20 --use-training-set --seed 1
-```
-
-4. 按指定类别切分图片
-
-```shell
-python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --classes cat dog person
-```
-
-5. 按指定锚框尺寸切分图片
-
-```shell
-python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --area-size small
-```
diff --git a/requirements/docs.txt b/requirements/docs.txt
index d251554c..ca8cb940 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -1,7 +1,12 @@
 docutils==0.16.0
+mmcv>=2.0.0rc1,<2.1.0
+mmdet>=3.0.0rc5
+mmengine>=0.3.1
 myst-parser
 -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
 sphinx==4.0.2
 sphinx-copybutton
 sphinx_markdown_tables
 sphinx_rtd_theme==0.5.2
+torch
+torchvision
diff --git a/requirements/readthedocs.txt b/requirements/readthedocs.txt
deleted file mode 100644
index 32ac3b57..00000000
--- a/requirements/readthedocs.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-mmcv>=2.0.0rc1,<2.1.0
-mmdet>=3.0.0rc5
-mmengine>=0.3.1
-torch
-torchvision

From 4e8baf8a8260effed60da8566832c4c45dd9da47 Mon Sep 17 00:00:00 2001
From: yechenzhi <136920488@qq.com>
Date: Tue, 21 Feb 2023 11:24:43 +0800
Subject: [PATCH 33/64] add yolov8 visualization readme (#574)

* add yolov8 visualization readme

* add readme
---
 projects/assigner_visualization/README.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/projects/assigner_visualization/README.md b/projects/assigner_visualization/README.md
index 0bf0d8dc..918589f2 100644
--- a/projects/assigner_visualization/README.md
+++ b/projects/assigner_visualization/README.md
@@ -6,7 +6,7 @@
 
 This project is developed for easily showing assigning results. The script allows users to analyze where and how many positive samples each gt is assigned in the image.
 
-Now, the script supports `YOLOv5`, `YOLOv7` and `RTMDet`.
+Now, the script supports `YOLOv5`, `YOLOv7`, `YOLOv8` and `RTMDet`.
 
 ## Usage
 
@@ -26,12 +26,18 @@ YOLOv7 assigner visualization command:
 python projects/assigner_visualization/assigner_visualization.py projects/assigner_visualization/configs/yolov7_tiny_syncbn_fast_8xb16-300e_coco_assignervisualization.py -c ${checkpont}
 ```
 
+YOLOv8 assigner visualization command:
+
+```shell
+python projects/assigner_visualization/assigner_visualization.py projects/assigner_visualization/configs/yolov8_s_syncbn_fast_8xb16-500e_coco_assignervisualization.py  -c ${checkpont}
+```
+
 RTMdet assigner visualization command:
 
 ```shell
 python projects/assigner_visualization/assigner_visualization.py projects/assigner_visualization/configs/rtmdet_s_syncbn_fast_8xb32-300e_coco_assignervisualization.py -c ${checkpont}
 ```
 
-${checkpont} is the checkpont file path. Dynamic label assignment is used in `YOLOv7` and `RTMDet`, model weights will affect the positive sample allocation results, so it is recommended to load the trained model weights.
+${checkpont} is the checkpont file path. Dynamic label assignment is used in `YOLOv7`, `YOLOv8` and `RTMDet`, model weights will affect the positive sample allocation results, so it is recommended to load the trained model weights.
 
 If you want to know details about label assignment, you can check the [RTMDet](https://mmyolo.readthedocs.io/zh_CN/latest/algorithm_descriptions/rtmdet_description.html#id5).

From 260509a5741fce3ee83646906b533497b3b8ba05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Tue, 21 Feb 2023 17:19:01 +0800
Subject: [PATCH 34/64] [Update] Refactor Docs (#573)

* update get_started

* update

* update

* update

* update

* update

* update

* fix

* fix

* refactor en

* update

* update

* update
---
 README.md                                     | 116 +++-
 README_zh-CN.md                               |  36 +-
 .../yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py |  52 ++
 .../cross-library_application.md              |   1 +
 docs/en/advanced_guides/how_to.md             | 572 ------------------
 docs/en/advanced_guides/index.rst             |  25 -
 docs/en/common_usage/amp_training.md          |   1 +
 docs/en/common_usage/freeze_layers.md         |  28 +
 docs/en/common_usage/mim_usage.md             |  89 +++
 docs/en/common_usage/module_combination.md    |   1 +
 docs/en/common_usage/multi_necks.md           |  37 ++
 docs/en/common_usage/output_predictions.md    |  40 ++
 .../plugins.md                                |   0
 docs/en/common_usage/resume_training.md       |   1 +
 docs/en/common_usage/set_random_seed.md       |  18 +
 docs/en/common_usage/set_syncbn.md            |   1 +
 docs/en/common_usage/specify_device.md        |  23 +
 docs/en/deploy/index.rst                      |  16 -
 docs/en/get_started.md                        | 280 ---------
 .../15_minutes_instance_segmentation.md       |   3 +
 .../15_minutes_object_detection.md            |   3 +
 .../15_minutes_rotated_object_detection.md    |   3 +
 docs/en/get_started/article.md                |   1 +
 docs/en/get_started/dependencies.md           |  44 ++
 docs/en/get_started/installation.md           | 123 ++++
 docs/en/get_started/overview.md               |   1 +
 docs/en/index.rst                             | 114 ++--
 docs/en/{community => notes}/code_style.md    |   2 +-
 docs/en/overview.md                           |  56 --
 .../algorithm_descriptions/index.rst          |   9 -
 .../rtmdet_description.md                     |   0
 .../yolov5_description.md                     |   0
 .../yolov8_description.md                     |   0
 .../contributing.md                           |  36 +-
 .../recommended_topics/dataset_preparation.md |   1 +
 .../deploy/easydeploy_guide.md                |   1 +
 docs/en/recommended_topics/deploy/index.rst   |  16 +
 .../deploy/mmdeploy_guide.md}                 |   0
 .../deploy/mmdeploy_yolov5.md}                |  10 +-
 .../recommended_topics/industry_examples.md   |   1 +
 .../labeling_to_deployment_tutorials.md}      |   0
 docs/en/recommended_topics/mm_basics.md       |   1 +
 .../model_design.md                           |   0
 .../en/recommended_topics/replace_backbone.md | 306 ++++++++++
 .../troubleshooting_steps.md                  |   1 +
 .../visualization.md                          |  53 ++
 docs/en/{user_guides => tutorials}/config.md  |   2 -
 docs/en/tutorials/custom_installation.md      | 109 ++++
 .../data_flow.md                              |   2 +-
 docs/en/{notes => tutorials}/faq.md           |   4 +-
 docs/en/useful_tools/browse_coco_json.md      |  62 ++
 docs/en/useful_tools/browse_dataset.md        |  42 ++
 docs/en/useful_tools/dataset_analysis.md      |  79 +++
 docs/en/useful_tools/dataset_converters.md    |  55 ++
 docs/en/useful_tools/download_dataset.md      |  11 +
 docs/en/useful_tools/extract_subcoco.md       |  60 ++
 docs/en/useful_tools/log_analysis.md          |  82 +++
 docs/en/useful_tools/model_converters.md      |  54 ++
 docs/en/useful_tools/optimize_anchors.md      |  38 ++
 docs/en/useful_tools/print_config.md          |  20 +
 docs/en/useful_tools/vis_scheduler.md         |  44 ++
 docs/en/user_guides/index.rst                 |  28 -
 docs/en/user_guides/useful_tools.md           | 520 ----------------
 docs/en/user_guides/yolov5_tutorial.md        | 235 -------
 docs/zh_cn/common_usage/output_predictions.md |   4 +-
 .../15_minutes_object_detection.md            | 401 ++++++++----
 docs/zh_cn/get_started/article.md             |   6 +-
 docs/zh_cn/get_started/dependencies.md        |  22 +-
 docs/zh_cn/get_started/installation.md        | 134 +---
 docs/zh_cn/get_started/overview.md            |  71 ++-
 docs/zh_cn/index.rst                          |  36 +-
 docs/zh_cn/notes/code_style.md                |  40 +-
 .../algorithm_descriptions/index.rst          |   0
 .../rtmdet_description.md                     |   0
 .../yolov5_description.md                     |   0
 .../yolov6_description.md                     |   0
 .../yolov8_description.md                     |   0
 .../contributing.md                           |  36 +-
 .../dataset_preparation.md                    |   0
 .../deploy/easydeploy_guide.md                |   2 +-
 .../deploy/index.rst                          |   9 -
 .../deploy/mmdeploy_guide.md                  |   4 +-
 .../deploy/mmdeploy_yolov5.md                 |   2 +-
 .../industry_examples.md                      |   0
 .../labeling_to_deployment_tutorials.md       |   0
 docs/zh_cn/recommended_topics/mm_basics.md    |   1 +
 .../model_design.md                           |   0
 .../replace_backbone.md                       | 274 ++++-----
 .../troubleshooting_steps.md                  |   0
 .../visualization.md                          |   4 +-
 docs/zh_cn/tutorials/config.md                |   2 +-
 docs/zh_cn/tutorials/custom_installation.md   | 111 ++++
 docs/zh_cn/tutorials/data_flow.md             |   2 +-
 docs/zh_cn/useful_tools/browse_dataset.md     |   2 +-
 docs/zh_cn/useful_tools/dataset_converters.md |   2 -
 docs/zh_cn/useful_tools/vis_scheduler.md      |   2 +-
 96 files changed, 2409 insertions(+), 2327 deletions(-)
 create mode 100644 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
 create mode 100644 docs/en/advanced_guides/cross-library_application.md
 delete mode 100644 docs/en/advanced_guides/how_to.md
 delete mode 100644 docs/en/advanced_guides/index.rst
 create mode 100644 docs/en/common_usage/amp_training.md
 create mode 100644 docs/en/common_usage/freeze_layers.md
 create mode 100644 docs/en/common_usage/mim_usage.md
 create mode 100644 docs/en/common_usage/module_combination.md
 create mode 100644 docs/en/common_usage/multi_necks.md
 create mode 100644 docs/en/common_usage/output_predictions.md
 rename docs/en/{advanced_guides => common_usage}/plugins.md (100%)
 create mode 100644 docs/en/common_usage/resume_training.md
 create mode 100644 docs/en/common_usage/set_random_seed.md
 create mode 100644 docs/en/common_usage/set_syncbn.md
 create mode 100644 docs/en/common_usage/specify_device.md
 delete mode 100644 docs/en/deploy/index.rst
 delete mode 100644 docs/en/get_started.md
 create mode 100644 docs/en/get_started/15_minutes_instance_segmentation.md
 create mode 100644 docs/en/get_started/15_minutes_object_detection.md
 create mode 100644 docs/en/get_started/15_minutes_rotated_object_detection.md
 create mode 100644 docs/en/get_started/article.md
 create mode 100644 docs/en/get_started/dependencies.md
 create mode 100644 docs/en/get_started/installation.md
 create mode 100644 docs/en/get_started/overview.md
 rename docs/en/{community => notes}/code_style.md (89%)
 delete mode 100644 docs/en/overview.md
 rename docs/en/{ => recommended_topics}/algorithm_descriptions/index.rst (67%)
 rename docs/en/{ => recommended_topics}/algorithm_descriptions/rtmdet_description.md (100%)
 rename docs/en/{ => recommended_topics}/algorithm_descriptions/yolov5_description.md (100%)
 rename docs/en/{ => recommended_topics}/algorithm_descriptions/yolov8_description.md (100%)
 rename docs/en/{community => recommended_topics}/contributing.md (96%)
 create mode 100644 docs/en/recommended_topics/dataset_preparation.md
 create mode 100644 docs/en/recommended_topics/deploy/easydeploy_guide.md
 create mode 100644 docs/en/recommended_topics/deploy/index.rst
 rename docs/en/{deploy/basic_deployment_guide.md => recommended_topics/deploy/mmdeploy_guide.md} (100%)
 rename docs/en/{deploy/yolov5_deployment.md => recommended_topics/deploy/mmdeploy_yolov5.md} (97%)
 create mode 100644 docs/en/recommended_topics/industry_examples.md
 rename docs/en/{user_guides/custom_dataset.md => recommended_topics/labeling_to_deployment_tutorials.md} (100%)
 create mode 100644 docs/en/recommended_topics/mm_basics.md
 rename docs/en/{algorithm_descriptions => recommended_topics}/model_design.md (100%)
 create mode 100644 docs/en/recommended_topics/replace_backbone.md
 create mode 100644 docs/en/recommended_topics/troubleshooting_steps.md
 rename docs/en/{user_guides => recommended_topics}/visualization.md (90%)
 rename docs/en/{user_guides => tutorials}/config.md (99%)
 create mode 100644 docs/en/tutorials/custom_installation.md
 rename docs/en/{advanced_guides => tutorials}/data_flow.md (99%)
 rename docs/en/{notes => tutorials}/faq.md (81%)
 create mode 100644 docs/en/useful_tools/browse_coco_json.md
 create mode 100644 docs/en/useful_tools/browse_dataset.md
 create mode 100644 docs/en/useful_tools/dataset_analysis.md
 create mode 100644 docs/en/useful_tools/dataset_converters.md
 create mode 100644 docs/en/useful_tools/download_dataset.md
 create mode 100644 docs/en/useful_tools/extract_subcoco.md
 create mode 100644 docs/en/useful_tools/log_analysis.md
 create mode 100644 docs/en/useful_tools/model_converters.md
 create mode 100644 docs/en/useful_tools/optimize_anchors.md
 create mode 100644 docs/en/useful_tools/print_config.md
 create mode 100644 docs/en/useful_tools/vis_scheduler.md
 delete mode 100644 docs/en/user_guides/index.rst
 delete mode 100644 docs/en/user_guides/useful_tools.md
 delete mode 100644 docs/en/user_guides/yolov5_tutorial.md
 rename docs/zh_cn/{featured_topics => recommended_topics}/algorithm_descriptions/index.rst (100%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/algorithm_descriptions/rtmdet_description.md (100%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/algorithm_descriptions/yolov5_description.md (100%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/algorithm_descriptions/yolov6_description.md (100%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/algorithm_descriptions/yolov8_description.md (100%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/contributing.md (95%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/dataset_preparation.md (100%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/deploy/easydeploy_guide.md (90%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/deploy/index.rst (70%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/deploy/mmdeploy_guide.md (98%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/deploy/mmdeploy_yolov5.md (99%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/industry_examples.md (100%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/labeling_to_deployment_tutorials.md (100%)
 create mode 100644 docs/zh_cn/recommended_topics/mm_basics.md
 rename docs/zh_cn/{featured_topics => recommended_topics}/model_design.md (100%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/replace_backbone.md (52%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/troubleshooting_steps.md (100%)
 rename docs/zh_cn/{featured_topics => recommended_topics}/visualization.md (99%)
 create mode 100644 docs/zh_cn/tutorials/custom_installation.md

diff --git a/README.md b/README.md
index ee2d7cba..98d2f964 100644
--- a/README.md
+++ b/README.md
@@ -27,7 +27,7 @@
 [![issue resolution](https://isitmaintained.com/badge/resolution/open-mmlab/mmyolo.svg)](https://github.com/open-mmlab/mmyolo/issues)
 
 [📘Documentation](https://mmyolo.readthedocs.io/en/latest/) |
-[🛠️Installation](https://mmyolo.readthedocs.io/en/latest/get_started.html) |
+[🛠️Installation](https://mmyolo.readthedocs.io/en/latest/get_started/installation.html) |
 [👀Model Zoo](https://mmyolo.readthedocs.io/en/latest/model_zoo.html) |
 [🆕Update News](https://mmyolo.readthedocs.io/en/latest/notes/changelog.html) |
 [🤔Reporting Issues](https://github.com/open-mmlab/mmyolo/issues/new/choose)
@@ -129,13 +129,13 @@ The master branch works with **PyTorch 1.6+**.
 <img src="https://user-images.githubusercontent.com/27466624/199999337-0544a4cb-3cbd-4f3e-be26-bcd9e74db7ff.jpg" alt="BaseModule-P5"/>
   The figure above is contributed by RangeKing@GitHub, thank you very much!
 
-And the figure of P6 model is in [model_design.md](docs/en/algorithm_descriptions/model_design.md).
+And the figure of P6 model is in [model_design.md](docs/en/recommended_topics/model_design.md).
 
 </details>
 
 ## 🛠️ Installation [🔝](#-table-of-contents)
 
-MMYOLO relies on PyTorch, MMCV, MMEngine, and MMDetection. Below are quick steps for installation. Please refer to the [Install Guide](docs/en/get_started.md) for more detailed instructions.
+MMYOLO relies on PyTorch, MMCV, MMEngine, and MMDetection. Below are quick steps for installation. Please refer to the [Install Guide](docs/en/get_started/installation.md) for more detailed instructions.
 
 ```shell
 conda create -n open-mmlab python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
@@ -160,38 +160,96 @@ The usage of MMYOLO is almost identical to MMDetection and all tutorials are str
 
 For different parts from MMDetection, we have also prepared user guides and advanced guides, please read our [documentation](https://mmyolo.readthedocs.io/zenh_CN/latest/).
 
-- User Guides
+<details>
+<summary>Get Started</summary>
 
-  - [Train & Test](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#train-test)
-    - [Learn about Configs with YOLOv5](docs/en/user_guides/config.md)
-  - [From getting started to deployment](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#get-started-to-deployment)
-    - [Custom Dataset](docs/en/user_guides/custom_dataset.md)
-    - [From getting started to deployment with YOLOv5](docs/en/user_guides/yolov5_tutorial.md)
-  - [Useful Tools](https://mmdetection.readthedocs.io/en/latest/user_guides/index.html#useful-tools)
-    - [Visualization](docs/en/user_guides/visualization.md)
-    - [Useful Tools](docs/en/user_guides/useful_tools.md)
+- [Overview](docs/en/get_started/overview.md)
+- [Dependencies](docs/en/get_started/dependencies.md)
+- [Installation](docs/en/get_started/installation.md)
+- [15 minutes object detection](docs/en/get_started/15_minutes_object_detection.md)
+- [15 minutes rotated object detection](docs/en/get_started/15_minutes_rotated_object_detection.md)
+- [15 minutes instance segmentation](docs/en/get_started/15_minutes_instance_segmentation.md)
+- [Resources summary](docs/en/get_started/article.md)
 
-- Algorithm description
+</details>
 
-  - [Essential Basics](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#essential-basics)
-    - [Model design-related instructions](docs/en/algorithm_descriptions/model_design.md)
-  - [Algorithm principles and implementation](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#algorithm-principles-and-implementation)
-    - [Algorithm principles and implementation with YOLOv5](docs/en/algorithm_descriptions/yolov5_description.md)
-    - [Algorithm principles and implementation with RTMDet](docs/en/algorithm_descriptions/rtmdet_description.md)
-    - [Algorithm principles and implementation with YOLOv8](docs/en/algorithm_descriptions/yolov8_description.md)
+<details>
+<summary>Recommended Topics</summary>
 
-- Deployment Guides
+- [How to contribute code to MMYOLO](docs/en/recommended_topics/contributing.md)
+- [MMYOLO model design](docs/en/recommended_topics/model_design.md)
+- [Algorithm principles and implementation](docs/en/recommended_topics/algorithm_descriptions/)
+- [Replace the backbone network](docs/en/recommended_topics/replace_backbone.md)
+- [Annotation-to-deployment workflow for custom dataset](docs/en/recommended_topics/labeling_to_deployment_tutorials.md)
+- [Visualization](docs/en/recommended_topics/visualization.md)
+- [Model deployment](docs/en/recommended_topics/deploy/)
+- [Troubleshooting steps](docs/en/recommended_topics/troubleshooting_steps.md)
+- [MMYOLO industry examples](docs/en/recommended_topics/industry_examples.md)
+- [MM series repo essential basics](docs/en/recommended_topics/mm_basics.md)
+- [Dataset preparation and description](docs/en/recommended_topics/dataset_preparation.md)
 
-  - [Basic Deployment Guide](https://mmyolo.readthedocs.io/en/latest/deploy/index.html#basic-deployment-guide)
-    - [Basic Deployment Guide](docs/en/deploy/basic_deployment_guide.md)
-  - [Deployment Tutorial](https://mmyolo.readthedocs.io/en/latest/deploy/index.html#deployment-tutorial)
-    - [YOLOv5 Deployment](docs/en/deploy/yolov5_deployment.md)
+</details>
 
-- Advanced Guides
+<details>
+<summary>Common Usage</summary>
 
-  - [Data flow](docs/en/advanced_guides/data_flow.md)
-  - [How to](docs/en/advanced_guides/how_to.md)
-  - [Plugins](docs/en/advanced_guides/plugins.md)
+- [Resume training](docs/en/common_usage/resume_training.md)
+- [Enabling and disabling SyncBatchNorm](docs/en/common_usage/syncbn.md)
+- [Enabling AMP](docs/en/common_usage/amp_training.md)
+- [Add plugins to the backbone network](docs/en/common_usage/plugins.md)
+- [Freeze layers](docs/en/common_usage/common_usage/freeze_layers.md)
+- [Output model predictions](docs/en/common_usage/output_predictions.md)
+- [Set random seed](docs/en/common_usage/set_random_seed.md)
+- [Module combination](docs/en/common_usage/module_combination.md)
+- [Cross-library calls using mim](docs/en/common_usage/mim_usage.md)
+- [Apply multiple Necks](docs/en/common_usage/multi_necks.md)
+- [Specify specific device training or inference](docs/en/common_usage/specify_device.md)
+
+</details>
+
+<details>
+<summary>Useful Tools</summary>
+
+- [Browse coco json](docs/en/useful_tools/browse_coco_json.md)
+- [Browse dataset](docs/en/useful_tools/browse_dataset.md)
+- [Print config](docs/en/useful_tools/print_config.md)
+- [Dataset analysis](docs/en/useful_tools/dataset_analysis.md)
+- [Optimize anchors](docs/en/useful_tools/optimize_anchors.md)
+- [Extract subcoco](docs/en/useful_tools/extract_subcoco.md)
+- [Visualization scheduler](docs/en/useful_tools/vis_scheduler.md)
+- [Dataset converters](docs/en/useful_tools/dataset_converters.md)
+- [Download dataset](docs/en/useful_tools/download_dataset.md)
+- [Log analysis](docs/en/useful_tools/log_analysis.md)
+- [Model converters](docs/en/useful_tools/model_converters.md)
+
+</details>
+
+<details>
+<summary>Basic Tutorials</summary>
+
+- [Learn about configs with YOLOv5](docs/en/tutorials/config.md)
+- [Data flow](docs/en/tutorials/data_flow.md)
+- [Custom Installation](docs/en/tutorials/custom_installation.md)
+- [FAQ](docs/en/tutorials/faq.md)
+
+</details>
+
+<details>
+<summary>Advanced Tutorials</summary>
+
+- [MMYOLO cross-library application](docs/en/advanced_guides/cross-library_application.md)
+
+</details>
+
+<details>
+<summary>Descriptions</summary>
+
+- [Changelog](docs/en/notes/changelog.md)
+- [Compatibility](docs/en/notes/compatibility.md)
+- [Conventions](docs/en/notes/conventions.md)
+- [Code Style](docs/en/notes/code_style.md)
+
+</details>
 
 ## 📊 Overview of Benchmark and Model Zoo [🔝](#-table-of-contents)
 
@@ -276,7 +334,7 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
 
 ## ❓ FAQ [🔝](#-table-of-contents)
 
-Please refer to the [FAQ](docs/en/notes/faq.md) for frequently asked questions.
+Please refer to the [FAQ](docs/en/tutorials/faq.md) for frequently asked questions.
 
 ## 🙌 Contributing [🔝](#-table-of-contents)
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 991dc6c5..4c9c5201 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -150,7 +150,7 @@ MMYOLO 是一个基于 PyTorch 和 MMDetection 的 YOLO 系列算法开源工具
 <img src="https://user-images.githubusercontent.com/27466624/199999337-0544a4cb-3cbd-4f3e-be26-bcd9e74db7ff.jpg" alt="基类-P5"/>
   图为 RangeKing@GitHub 提供，非常感谢！
 
-P6 模型图详见 [model_design.md](docs/zh_cn/featured_topics/model_design.md)。
+P6 模型图详见 [model_design.md](docs/zh_cn/recommended_topics/model_design.md)。
 
 </details>
 
@@ -197,16 +197,17 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 <details>
 <summary>推荐专题</summary>
 
-- [如何给 MMYOLO 贡献代码](docs/zh_cn/featured_topics/contributing.md)
-- [MMYOLO 模型结构设计](docs/zh_cn/featured_topics/model_design.md)
-- [MMYOLO 产业范例介绍](docs/zh_cn/featured_topics/industry_examples.md)
-- [数据集准备和说明](docs/zh_cn/featured_topics/dataset_preparation.md)
-- [原理和实现全解析](docs/zh_cn/featured_topics/algorithm_descriptions/)
-- [轻松更换主干网络](docs/zh_cn/featured_topics/replace_backbone.md)
-- [标注+训练+测试+部署全流程](docs/zh_cn/featured_topics/labeling_to_deployment_tutorials.md)
-- [关于可视化的一切](docs/zh_cn/featured_topics/visualization.md)
-- [模型部署流程](docs/zh_cn/featured_topics/deploy/)
-- [常见错误排查步骤](docs/zh_cn/featured_topics/troubleshooting_steps.md)
+- [如何给 MMYOLO 贡献代码](docs/zh_cn/recommended_topics/contributing.md)
+- [MMYOLO 模型结构设计](docs/zh_cn/recommended_topics/model_design.md)
+- [原理和实现全解析](docs/zh_cn/recommended_topics/algorithm_descriptions/)
+- [轻松更换主干网络](docs/zh_cn/recommended_topics/replace_backbone.md)
+- [标注+训练+测试+部署全流程](docs/zh_cn/recommended_topics/labeling_to_deployment_tutorials.md)
+- [关于可视化的一切](docs/zh_cn/recommended_topics/visualization.md)
+- [模型部署流程](docs/zh_cn/recommended_topics/deploy/)
+- [常见错误排查步骤](docs/zh_cn/recommended_topics/troubleshooting_steps.md)
+- [MMYOLO 产业范例介绍](docs/zh_cn/recommended_topics/industry_examples.md)
+- [MM 系列 Repo 必备基础](docs/zh_cn/recommended_topics/mm_basics.md)
+- [数据集准备和说明](docs/zh_cn/recommended_topics/dataset_preparation.md)
 
 </details>
 
@@ -220,10 +221,10 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 - [冻结指定网络层权重](docs/zh_cn/common_usage/common_usage/freeze_layers.md)
 - [输出模型预测结果](docs/zh_cn/common_usage/output_predictions.md)
 - [设置随机种子](docs/zh_cn/common_usage/set_random_seed.md)
+- [算法组合替换教程](docs/zh_cn/common_usage/module_combination.md)
 - [使用 mim 跨库调用其他 OpenMMLab 仓库的脚本](docs/zh_cn/common_usage/mim_usage.md)
 - [应用多个 Neck](docs/zh_cn/common_usage/multi_necks.md)
 - [指定特定设备训练或推理](docs/zh_cn/common_usage/specify_device.md)
-- [算法组合替换教程](docs/zh_cn/common_usage/module_combination.md)
 
 </details>
 
@@ -232,15 +233,15 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 
 - [可视化 COCO 标签](docs/zh_cn/useful_tools/browse_coco_json.md)
 - [可视化数据集](docs/zh_cn/useful_tools/browse_dataset.md)
+- [打印完整配置文件](docs/zh_cn/useful_tools/print_config.md)
 - [可视化数据集分析结果](docs/zh_cn/useful_tools/dataset_analysis.md)
+- [优化锚框尺寸](docs/zh_cn/useful_tools/optimize_anchors.md)
+- [提取 COCO 子集](docs/zh_cn/useful_tools/extract_subcoco.md)
+- [可视化优化器参数策略](docs/zh_cn/useful_tools/vis_scheduler.md)
 - [数据集转换](docs/zh_cn/useful_tools/dataset_converters.md)
 - [数据集下载](docs/zh_cn/useful_tools/download_dataset.md)
-- [提取 COCO 子集](docs/zh_cn/useful_tools/extract_subcoco.md)
 - [日志分析](docs/zh_cn/useful_tools/log_analysis.md)
 - [模型转换](docs/zh_cn/useful_tools/model_converters.md)
-- [优化锚框尺寸](docs/zh_cn/useful_tools/optimize_anchors.md)
-- [打印完整配置文件](docs/zh_cn/useful_tools/print_config.md)
-- [可视化优化器参数策略](docs/zh_cn/useful_tools/vis_scheduler.md)
 
 </details>
 
@@ -249,6 +250,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 
 - [学习 YOLOv5 配置文件](docs/zh_cn/tutorials/config.md)
 - [数据流](docs/zh_cn/tutorials/data_flow.md)
+- [自定义安装](docs/zh_cn/tutorials/custom_installation.md)
 - [常见问题](docs/zh_cn/tutorials/faq.md)
 
 </details>
@@ -353,7 +355,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 
 ## ❓ 常见问题 [🔝](#-table-of-contents)
 
-请参考 [FAQ](docs/zh_cn/featured_topics/faq.md) 了解其他用户的常见问题。
+请参考 [FAQ](docs/zh_cn/tutorials/faq.md) 了解其他用户的常见问题。
 
 ## 🙌 贡献指南 [🔝](#-table-of-contents)
 
diff --git a/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py b/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
new file mode 100644
index 00000000..61c24356
--- /dev/null
+++ b/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
@@ -0,0 +1,52 @@
+_base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
+
+data_root = './data/cat/'
+class_name = ('cat', )
+num_classes = len(class_name)
+metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
+
+anchors = [
+    [(68, 69), (154, 91), (143, 162)],  # P3/8
+    [(242, 160), (189, 287), (391, 207)],  # P4/16
+    [(353, 337), (539, 341), (443, 432)]  # P5/32
+]
+
+max_epochs = 40
+train_batch_size_per_gpu = 12
+train_num_workers = 4
+
+load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'  # noqa
+
+model = dict(
+    backbone=dict(frozen_stages=4),
+    bbox_head=dict(
+        head_module=dict(num_classes=num_classes),
+        prior_generator=dict(base_sizes=anchors)))
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    dataset=dict(
+        data_root=data_root,
+        metainfo=metainfo,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
+
+val_dataloader = dict(
+    dataset=dict(
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
+
+test_dataloader = val_dataloader
+
+val_evaluator = dict(ann_file=data_root + 'annotations/trainval.json')
+test_evaluator = val_evaluator
+
+default_hooks = dict(
+    checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
+    param_scheduler=dict(max_epochs=max_epochs),
+    logger=dict(type='LoggerHook', interval=5))
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
diff --git a/docs/en/advanced_guides/cross-library_application.md b/docs/en/advanced_guides/cross-library_application.md
new file mode 100644
index 00000000..271d1290
--- /dev/null
+++ b/docs/en/advanced_guides/cross-library_application.md
@@ -0,0 +1 @@
+# MMYOLO cross-library application
diff --git a/docs/en/advanced_guides/how_to.md b/docs/en/advanced_guides/how_to.md
deleted file mode 100644
index 37a3671f..00000000
--- a/docs/en/advanced_guides/how_to.md
+++ /dev/null
@@ -1,572 +0,0 @@
-# How to xxx
-
-This tutorial collects answers to any `How to xxx with MMYOLO`. Feel free to update this doc if you meet new questions about `How to` and find the answers!
-
-## Add plugins to the backbone network
-
-Please see [Plugins](plugins.md).
-
-## Apply multiple Necks
-
-If you want to stack multiple Necks, you can directly set the Neck parameters in the config. MMYOLO supports concatenating multiple Necks in the form of `List`. You need to ensure that the output channel of the previous Neck matches the input channel of the next Neck. If you need to adjust the number of channels, you can insert the `mmdet.ChannelMapper` module to align the number of channels between multiple Necks. The specific configuration is as follows:
-
-```python
-_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-deepen_factor = _base_.deepen_factor
-widen_factor = _base_.widen_factor
-model = dict(
-    type='YOLODetector',
-    neck=[
-        dict(
-            type='YOLOv5PAFPN',
-            deepen_factor=deepen_factor,
-            widen_factor=widen_factor,
-            in_channels=[256, 512, 1024],
-            out_channels=[256, 512, 1024], # The out_channels is controlled by widen_factor，so the YOLOv5PAFPN's out_channels equls to out_channels * widen_factor
-            num_csp_blocks=3,
-            norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
-            act_cfg=dict(type='SiLU', inplace=True)),
-        dict(
-            type='mmdet.ChannelMapper',
-            in_channels=[128, 256, 512],
-            out_channels=128,
-        ),
-        dict(
-            type='mmdet.DyHead',
-            in_channels=128,
-            out_channels=256,
-            num_blocks=2,
-            # disable zero_init_offset to follow official implementation
-            zero_init_offset=False)
-    ]
-    bbox_head=dict(head_module=dict(in_channels=[512,512,512])) # The out_channels is controlled by widen_factor，so the YOLOv5HeadModuled in_channels * widen_factor equals to  the last neck's out_channels
-)
-```
-
-## Replace the backbone network
-
-```{note}
-1. When using other backbone networks, you need to ensure that the output channels of the backbone network match the input channels of the neck network.
-2. The configuration files given below only ensure that the training will work correctly, and their training performance may not be optimal. Because some backbones require specific learning rates, optimizers, and other hyperparameters. Related contents will be added in the "Training Tips" section later.
-```
-
-### Use backbone network implemented in MMYOLO
-
-Suppose you want to use `YOLOv6EfficientRep` as the backbone network of `YOLOv5`, the example config is as the following:
-
-```python
-_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-model = dict(
-    backbone=dict(
-        type='YOLOv6EfficientRep',
-        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
-        act_cfg=dict(type='ReLU', inplace=True))
-)
-```
-
-### Use backbone network implemented in other OpenMMLab repositories
-
-The model registry in MMYOLO, MMDetection, MMClassification, and MMSegmentation all inherit from the root registry in MMEngine in the OpenMMLab 2.0 system, allowing these repositories to directly use modules already implemented by each other. Therefore, in MMYOLO, users can use backbone networks from MMDetection and MMClassification without reimplementation.
-
-#### Use backbone network implemented in MMDetection
-
-1. Suppose you want to use `ResNet-50` as the backbone network of `YOLOv5`, the example config is as the following:
-
-   ```python
-   _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-   deepen_factor = _base_.deepen_factor
-   widen_factor = 1.0
-   channels = [512, 1024, 2048]
-
-   model = dict(
-       backbone=dict(
-           _delete_=True, # Delete the backbone field in _base_
-           type='mmdet.ResNet', # Using ResNet from mmdet
-           depth=50,
-           num_stages=4,
-           out_indices=(1, 2, 3),
-           frozen_stages=1,
-           norm_cfg=dict(type='BN', requires_grad=True),
-           norm_eval=True,
-           style='pytorch',
-           init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
-       neck=dict(
-           type='YOLOv5PAFPN',
-           widen_factor=widen_factor,
-           in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
-           out_channels=channels),
-       bbox_head=dict(
-           type='YOLOv5Head',
-           head_module=dict(
-               type='YOLOv5HeadModule',
-               in_channels=channels, # input channels of head need to be changed accordingly
-               widen_factor=widen_factor))
-   )
-   ```
-
-2. Suppose you want to use `SwinTransformer-Tiny` as the backbone network of `YOLOv5`, the example config is as the following:
-
-   ```python
-   _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-   deepen_factor = _base_.deepen_factor
-   widen_factor = 1.0
-   channels = [192, 384, 768]
-   checkpoint_file = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa
-
-   model = dict(
-       backbone=dict(
-           _delete_=True, # Delete the backbone field in _base_
-           type='mmdet.SwinTransformer', # Using SwinTransformer from mmdet
-           embed_dims=96,
-           depths=[2, 2, 6, 2],
-           num_heads=[3, 6, 12, 24],
-           window_size=7,
-           mlp_ratio=4,
-           qkv_bias=True,
-           qk_scale=None,
-           drop_rate=0.,
-           attn_drop_rate=0.,
-           drop_path_rate=0.2,
-           patch_norm=True,
-           out_indices=(1, 2, 3),
-           with_cp=False,
-           convert_weights=True,
-           init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
-       neck=dict(
-           type='YOLOv5PAFPN',
-           deepen_factor=deepen_factor,
-           widen_factor=widen_factor,
-           in_channels=channels, # Note: The 3 channels of SwinTransformer-Tiny output are [192, 384, 768], which do not match the original yolov5-s neck and need to be changed.
-           out_channels=channels),
-       bbox_head=dict(
-           type='YOLOv5Head',
-           head_module=dict(
-               type='YOLOv5HeadModule',
-               in_channels=channels, # input channels of head need to be changed accordingly
-               widen_factor=widen_factor))
-   )
-   ```
-
-#### Use backbone network implemented in MMClassification
-
-1. Suppose you want to use `ConvNeXt-Tiny` as the backbone network of `YOLOv5`, the example config is as the following:
-
-   ```python
-   _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-   # please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
-   # import mmcls.models to trigger register_module in mmcls
-   custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
-   checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth'  # noqa
-   deepen_factor = _base_.deepen_factor
-   widen_factor = 1.0
-   channels = [192, 384, 768]
-
-   model = dict(
-       backbone=dict(
-           _delete_=True, # Delete the backbone field in _base_
-           type='mmcls.ConvNeXt', # Using ConvNeXt from mmcls
-           arch='tiny',
-           out_indices=(1, 2, 3),
-           drop_path_rate=0.4,
-           layer_scale_init_value=1.0,
-           gap_before_final_norm=False,
-           init_cfg=dict(
-               type='Pretrained', checkpoint=checkpoint_file,
-               prefix='backbone.')), # The pre-trained weights of backbone network in MMCls have prefix='backbone.'. The prefix in the keys will be removed so that these weights can be normally loaded.
-       neck=dict(
-           type='YOLOv5PAFPN',
-           deepen_factor=deepen_factor,
-           widen_factor=widen_factor,
-           in_channels=channels, # Note: The 3 channels of ConvNeXt-Tiny output are [192, 384, 768], which do not match the original yolov5-s neck and need to be changed.
-           out_channels=channels),
-       bbox_head=dict(
-           type='YOLOv5Head',
-           head_module=dict(
-               type='YOLOv5HeadModule',
-               in_channels=channels, # input channels of head need to be changed accordingly
-               widen_factor=widen_factor))
-   )
-   ```
-
-2. Suppose you want to use `MobileNetV3-small` as the backbone network of `YOLOv5`, the example config is as the following:
-
-   ```python
-   _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-   # please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
-   # import mmcls.models to trigger register_module in mmcls
-   custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
-   checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth'  # noqa
-   deepen_factor = _base_.deepen_factor
-   widen_factor = 1.0
-   channels = [24, 48, 96]
-
-   model = dict(
-       backbone=dict(
-           _delete_=True, # Delete the backbone field in _base_
-           type='mmcls.MobileNetV3', # Using MobileNetV3 from mmcls
-           arch='small',
-           out_indices=(3, 8, 11), # Modify out_indices
-           init_cfg=dict(
-               type='Pretrained',
-               checkpoint=checkpoint_file,
-               prefix='backbone.')), # The pre-trained weights of backbone network in MMCls have prefix='backbone.'. The prefix in the keys will be removed so that these weights can be normally loaded.
-       neck=dict(
-           type='YOLOv5PAFPN',
-           deepen_factor=deepen_factor,
-           widen_factor=widen_factor,
-           in_channels=channels, # Note: The 3 channels of MobileNetV3 output are [24, 48, 96], which do not match the original yolov5-s neck and need to be changed.
-           out_channels=channels),
-       bbox_head=dict(
-           type='YOLOv5Head',
-           head_module=dict(
-               type='YOLOv5HeadModule',
-               in_channels=channels, # input channels of head need to be changed accordingly
-               widen_factor=widen_factor))
-   )
-   ```
-
-#### Use backbone network in `timm` through MMClassification
-
-MMClassification also provides a wrapper for the Py**T**orch **Im**age **M**odels (`timm`) backbone network, users can directly use the backbone network in `timm` through MMClassification. Suppose you want to use `EfficientNet-B1` as the backbone network of `YOLOv5`, the example config is as the following:
-
-```python
-_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-# please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
-# and the command, pip install timm, to install timm
-# import mmcls.models to trigger register_module in mmcls
-custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
-
-deepen_factor = _base_.deepen_factor
-widen_factor = 1.0
-channels = [40, 112, 320]
-
-model = dict(
-    backbone=dict(
-        _delete_=True, # Delete the backbone field in _base_
-        type='mmcls.TIMMBackbone', # Using timm from mmcls
-        model_name='efficientnet_b1', # Using efficientnet_b1 in timm
-        features_only=True,
-        pretrained=True,
-        out_indices=(2, 3, 4)),
-    neck=dict(
-        type='YOLOv5PAFPN',
-        deepen_factor=deepen_factor,
-        widen_factor=widen_factor,
-        in_channels=channels, # Note: The 3 channels of EfficientNet-B1 output are [40, 112, 320], which do not match the original yolov5-s neck and need to be changed.
-        out_channels=channels),
-    bbox_head=dict(
-        type='YOLOv5Head',
-        head_module=dict(
-            type='YOLOv5HeadModule',
-            in_channels=channels, # input channels of head need to be changed accordingly
-            widen_factor=widen_factor))
-)
-```
-
-#### Use backbone network implemented in MMSelfSup
-
-Suppose you want to use `ResNet-50` which is self-supervised trained by `MoCo v3` in MMSelfSup as the backbone network of `YOLOv5`, the example config is as the following:
-
-```python
-_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-# please run the command, mim install "mmselfsup>=1.0.0rc3", to install mmselfsup
-# import mmselfsup.models to trigger register_module in mmselfsup
-custom_imports = dict(imports=['mmselfsup.models'], allow_failed_imports=False)
-checkpoint_file = 'https://download.openmmlab.com/mmselfsup/1.x/mocov3/mocov3_resnet50_8xb512-amp-coslr-800e_in1k/mocov3_resnet50_8xb512-amp-coslr-800e_in1k_20220927-e043f51a.pth'  # noqa
-deepen_factor = _base_.deepen_factor
-widen_factor = 1.0
-channels = [512, 1024, 2048]
-
-model = dict(
-    backbone=dict(
-        _delete_=True, # Delete the backbone field in _base_
-        type='mmselfsup.ResNet',
-        depth=50,
-        num_stages=4,
-        out_indices=(2, 3, 4), # Note: out_indices of ResNet in MMSelfSup are 1 larger than those in MMdet and MMCls
-        frozen_stages=1,
-        norm_cfg=dict(type='BN', requires_grad=True),
-        norm_eval=True,
-        style='pytorch',
-        init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
-    neck=dict(
-        type='YOLOv5PAFPN',
-        deepen_factor=deepen_factor,
-        widen_factor=widen_factor,
-        in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
-        out_channels=channels),
-    bbox_head=dict(
-        type='YOLOv5Head',
-        head_module=dict(
-            type='YOLOv5HeadModule',
-            in_channels=channels, # input channels of head need to be changed accordingly
-            widen_factor=widen_factor))
-)
-```
-
-#### Don't used pre-training weights
-
-When we replace the backbone network, the model initialization is trained by default loading the pre-training weight of the backbone network. Instead of using the pre-training weights of the backbone network, if you want to train the time model from scratch,
-You can set `init_cfg` in 'backbone' to 'None'. In this case, the backbone network will be initialized with the default initialization method, instead of using the trained pre-training weight.
-
-```python
-_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-deepen_factor = _base_.deepen_factor
-widen_factor = 1.0
-channels = [512, 1024, 2048]
-
-model = dict(
-   backbone=dict(
-       _delete_=True, # Delete the backbone field in _base_
-       type='mmdet.ResNet', # Using ResNet from mmdet
-       depth=50,
-       num_stages=4,
-       out_indices=(1, 2, 3),
-       frozen_stages=1,
-       norm_cfg=dict(type='BN', requires_grad=True),
-       norm_eval=True,
-       style='pytorch',
-       init_cfg=None # If init_cfg is set to None, backbone will not be initialized with pre-trained weights
-   ),
-   neck=dict(
-       type='YOLOv5PAFPN',
-       widen_factor=widen_factor,
-       in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
-       out_channels=channels),
-   bbox_head=dict(
-       type='YOLOv5Head',
-       head_module=dict(
-           type='YOLOv5HeadModule',
-           in_channels=channels, # input channels of head need to be changed accordingly
-           widen_factor=widen_factor))
-)
-```
-
-#### Freeze the weight of backbone or neck
-
-In MMYOLO, we can freeze some `stages` of the backbone network by setting `frozen_stages` parameters, so that these `stage` parameters do not participate in model updating.
-It should be noted that `frozen_stages = i` means that all parameters from the initial `stage` to the `i`<sup>th</sup> `stage` will be frozen. The following is an example of `YOLOv5`. Other algorithms are the same logic.
-
-```python
-_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-model = dict(
-    backbone=dict(
-        frozen_stages=1 # Indicates that the parameters in the first stage and all stages before it are frozen
-    ))
-```
-
-In addition, it's able to freeze the whole `neck` with the parameter `freeze_all` in MMYOLO. The following is an example of `YOLOv5`. Other algorithms are the same logic.
-
-```python
-_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
-
-model = dict(
-    neck=dict(
-        freeze_all=True # If freeze_all=True, all parameters of the neck will be frozen
-    ))
-```
-
-## Output prediction results
-
-If you want to save the prediction results as a specific file for offline evaluation, MMYOLO currently supports both json and pkl formats.
-
-```{note}
-The json file only save `image_id`, `bbox`, `score` and `category_id`. The json file can be read using the json library.
-The pkl file holds more content than the json file, and also holds information such as the file name and size of the predicted image; the pkl file can be read using the pickle library. The pkl file can be read using the pickle library.
-```
-
-### Output into json file
-
-If you want to output the prediction results as a json file, the command is as follows.
-
-```shell
-python tools/test.py {path_to_config} {path_to_checkpoint} --json-prefix {json_prefix}
-```
-
-The argument after `--json-prefix` should be a filename prefix (no need to enter the `.json` suffix) and can also contain a path. For a concrete example:
-
-```shell
-python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --json-prefix work_dirs/demo/json_demo
-```
-
-Running the above command will output the `json_demo.bbox.json` file in the `work_dirs/demo` folder.
-
-### Output into pkl file
-
-If you want to output the prediction results as a pkl file, the command is as follows.
-
-```shell
-python tools/test.py {path_to_config} {path_to_checkpoint} --out {path_to_output_file}
-```
-
-The argument after `--out` should be a full filename (**must be** with a `.pkl` or `.pickle` suffix) and can also contain a path. For a concrete example:
-
-```shell
-python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --out work_dirs/demo/pkl_demo.pkl
-```
-
-Running the above command will output the `pkl_demo.pkl` file in the `work_dirs/demo` folder.
-
-## Use mim to run scripts from other OpenMMLab repositories
-
-```{note}
-1. All script calls across libraries are currently not supported and are being fixed. More examples will be added to this document when the fix is complete. 2.
-2. mAP plotting and average training speed calculation are fixed in the MMDetection dev-3.x branch, which currently needs to be installed via the source code to be run successfully.
-```
-
-## Log Analysis
-
-#### Curve plotting
-
-`tools/analysis_tools/analyze_logs.py` plots loss/mAP curves given a training log file. Run `pip install seaborn` first to install the dependency.
-
-```shell
-mim run mmdet analyze_logs plot_curve \
-    ${LOG} \                                     # path of train log in json format
-    [--keys ${KEYS}] \                           # the metric that you want to plot, default to 'bbox_mAP'
-    [--start-epoch ${START_EPOCH}]               # the epoch that you want to start, default to 1
-    [--eval-interval ${EVALUATION_INTERVAL}] \   # the evaluation interval when training, default to 1
-    [--title ${TITLE}] \                         # title of figure
-    [--legend ${LEGEND}] \                       # legend of each plot, default to None
-    [--backend ${BACKEND}] \                     # backend of plt, default to None
-    [--style ${STYLE}] \                         # style of plt, default to 'dark'
-    [--out ${OUT_FILE}]                          # the path of output file
-# [] stands for optional parameters, when actually entering the command line, you do not need to enter []
-```
-
-Examples:
-
-- Plot the classification loss of some run.
-
-  ```shell
-  mim run mmdet analyze_logs plot_curve \
-      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
-      --keys loss_cls \
-      --legend loss_cls
-  ```
-
-  <img src="https://user-images.githubusercontent.com/27466624/204747359-754555df-1f97-4d5c-87ca-9ad3a0badcce.png" width="600"/>
-
-- Plot the classification and regression loss of some run, and save the figure to a pdf.
-
-  ```shell
-  mim run mmdet analyze_logs plot_curve \
-      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
-      --keys loss_cls loss_bbox \
-      --legend loss_cls loss_bbox \
-      --out losses_yolov5_s.pdf
-  ```
-
-  <img src="https://user-images.githubusercontent.com/27466624/204748560-2d17ce4b-fb5f-4732-a962-329109e73aad.png" width="600"/>
-
-- Compare the bbox mAP of two runs in the same figure.
-
-  ```shell
-  mim run mmdet analyze_logs plot_curve \
-      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
-      yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json \
-      --keys bbox_mAP \
-      --legend yolov5_s yolov5_n \
-      --eval-interval 10 # Note that the evaluation interval must be the same as during training. Otherwise, it will raise an error.
-  ```
-
-<img src="https://user-images.githubusercontent.com/27466624/204748704-21db9f9e-386e-449c-91c7-2ce3f8b51f24.png" width="600"/>
-
-#### Compute the average training speed
-
-```shell
-mim run mmdet analyze_logs cal_train_time \
-    ${LOG} \                                # path of train log in json format
-    [--include-outliers]                    # include the first value of every epoch when computing the average time
-```
-
-Examples:
-
-```shell
-mim run mmdet analyze_logs cal_train_time \
-    yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json
-```
-
-The output is expected to be like the following.
-
-```text
------Analyze train time of yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json-----
-slowest epoch 278, average time is 0.1705 s/iter
-fastest epoch 300, average time is 0.1510 s/iter
-time std over epochs is 0.0026
-average iter time: 0.1556 s/iter
-```
-
-### Print the whole config
-
-`print_config.py` in MMDetection prints the whole config verbatim, expanding all its imports. The command is as following.
-
-```shell
-mim run mmdet print_config \
-    ${CONFIG} \                              # path of the config file
-    [--save-path] \                          # save path of whole config, suffixed with .py, .json or .yml
-    [--cfg-options ${OPTIONS [OPTIONS...]}]  # override some settings in the used config
-```
-
-Examples:
-
-```shell
-mim run mmdet print_config \
-    configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
-    --save-path ./work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
-```
-
-Running the above command will save the `yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py` config file with the inheritance relationship expanded to \`\`yolov5_s-v61_syncbn_fast_1xb4-300e_balloon_whole.py`in the`./work_dirs\` folder.
-
-## Set the random seed
-
-If you want to set the random seed during training, you can use the following command.
-
-```shell
-python ./tools/train.py \
-    ${CONFIG} \                               # path of the config file
-    --cfg-options randomness.seed=2023 \      # set seed to 2023
-    [randomness.diff_rank_seed=True] \        # set different seeds according to global rank
-    [randomness.deterministic=True]           # set the deterministic option for CUDNN backend
-# [] stands for optional parameters, when actually entering the command line, you do not need to enter []
-```
-
-`randomness` has three parameters that can be set, with the following meanings.
-
-- `randomness.seed=2023`, set the random seed to 2023.
-- `randomness.diff_rank_seed=True`, set different seeds according to global rank. Defaults to False.
-- `randomness.deterministic=True`, set the deterministic option for cuDNN backend, i.e., set `torch.backends.cudnn.deterministic` to True and `torch.backends.cudnn.benchmark` to False. Defaults to False. See https://pytorch.org/docs/stable/notes/randomness.html for more details.
-
-## Specify specific GPUs during training or inference
-
-If you have multiple GPUs, such as 8 GPUs, numbered `0, 1, 2, 3, 4, 5, 6, 7`, GPU 0 will be used by default for training or inference. If you want to specify other GPUs for training or inference, you can use the following commands:
-
-```shell
-CUDA_VISIBLE_DEVICES=5 python ./tools/train.py ${CONFIG} #train
-CUDA_VISIBLE_DEVICES=5 python ./tools/test.py ${CONFIG} ${CHECKPOINT_FILE} #test
-```
-
-If you set `CUDA_VISIBLE_DEVICES` to -1 or a number greater than the maximum GPU number, such as 8, the CPU will be used for training or inference.
-
-If you want to use several of these GPUs to train in parallel, you can use the following command:
-
-```shell
-CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh ${CONFIG} ${GPU_NUM}
-```
-
-Here the `GPU_NUM` is 4. In addition, if multiple tasks are trained in parallel on one machine and each task requires multiple GPUs, the PORT of each task need to be set differently to avoid communication conflict, like the following commands:
-
-```shell
-CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG} 4
-CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG} 4
-```
diff --git a/docs/en/advanced_guides/index.rst b/docs/en/advanced_guides/index.rst
deleted file mode 100644
index bd72cd2e..00000000
--- a/docs/en/advanced_guides/index.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-Data flow
-************************
-
-.. toctree::
-   :maxdepth: 1
-
-   data_flow.md
-
-
-How to
-************************
-
-.. toctree::
-   :maxdepth: 1
-
-   how_to.md
-
-
-Plugins
-************************
-
-.. toctree::
-   :maxdepth: 1
-
-   plugins.md
diff --git a/docs/en/common_usage/amp_training.md b/docs/en/common_usage/amp_training.md
new file mode 100644
index 00000000..3767114a
--- /dev/null
+++ b/docs/en/common_usage/amp_training.md
@@ -0,0 +1 @@
+# Automatic mixed precision（AMP）training
diff --git a/docs/en/common_usage/freeze_layers.md b/docs/en/common_usage/freeze_layers.md
new file mode 100644
index 00000000..4614f324
--- /dev/null
+++ b/docs/en/common_usage/freeze_layers.md
@@ -0,0 +1,28 @@
+# Freeze layers
+
+## Freeze the weight of backbone
+
+In MMYOLO, we can freeze some `stages` of the backbone network by setting `frozen_stages` parameters, so that these `stage` parameters do not participate in model updating.
+It should be noted that `frozen_stages = i` means that all parameters from the initial `stage` to the `i`<sup>th</sup> `stage` will be frozen. The following is an example of `YOLOv5`. Other algorithms are the same logic.
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+model = dict(
+    backbone=dict(
+        frozen_stages=1 # Indicates that the parameters in the first stage and all stages before it are frozen
+    ))
+```
+
+## Freeze the weight of neck
+
+In addition, it's able to freeze the whole `neck` with the parameter `freeze_all` in MMYOLO. The following is an example of `YOLOv5`. Other algorithms are the same logic.
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+model = dict(
+    neck=dict(
+        freeze_all=True # If freeze_all=True, all parameters of the neck will be frozen
+    ))
+```
diff --git a/docs/en/common_usage/mim_usage.md b/docs/en/common_usage/mim_usage.md
new file mode 100644
index 00000000..2752ea5f
--- /dev/null
+++ b/docs/en/common_usage/mim_usage.md
@@ -0,0 +1,89 @@
+# Use mim to run scripts from other OpenMMLab repositories
+
+```{note}
+1. All script calls across libraries are currently not supported and are being fixed. More examples will be added to this document when the fix is complete. 2.
+2. mAP plotting and average training speed calculation are fixed in the MMDetection dev-3.x branch, which currently needs to be installed via the source code to be run successfully.
+```
+
+## Log Analysis
+
+### Curve plotting
+
+`tools/analysis_tools/analyze_logs.py` plots loss/mAP curves given a training log file. Run `pip install seaborn` first to install the dependency.
+
+```shell
+mim run mmdet analyze_logs plot_curve \
+    ${LOG} \                                     # path of train log in json format
+    [--keys ${KEYS}] \                           # the metric that you want to plot, default to 'bbox_mAP'
+    [--start-epoch ${START_EPOCH}]               # the epoch that you want to start, default to 1
+    [--eval-interval ${EVALUATION_INTERVAL}] \   # the evaluation interval when training, default to 1
+    [--title ${TITLE}] \                         # title of figure
+    [--legend ${LEGEND}] \                       # legend of each plot, default to None
+    [--backend ${BACKEND}] \                     # backend of plt, default to None
+    [--style ${STYLE}] \                         # style of plt, default to 'dark'
+    [--out ${OUT_FILE}]                          # the path of output file
+# [] stands for optional parameters, when actually entering the command line, you do not need to enter []
+```
+
+Examples:
+
+- Plot the classification loss of some run.
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      --keys loss_cls \
+      --legend loss_cls
+  ```
+
+  <img src="https://user-images.githubusercontent.com/27466624/204747359-754555df-1f97-4d5c-87ca-9ad3a0badcce.png" width="600"/>
+
+- Plot the classification and regression loss of some run, and save the figure to a pdf.
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      --keys loss_cls loss_bbox \
+      --legend loss_cls loss_bbox \
+      --out losses_yolov5_s.pdf
+  ```
+
+  <img src="https://user-images.githubusercontent.com/27466624/204748560-2d17ce4b-fb5f-4732-a962-329109e73aad.png" width="600"/>
+
+- Compare the bbox mAP of two runs in the same figure.
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json \
+      --keys bbox_mAP \
+      --legend yolov5_s yolov5_n \
+      --eval-interval 10 # Note that the evaluation interval must be the same as during training. Otherwise, it will raise an error.
+  ```
+
+<img src="https://user-images.githubusercontent.com/27466624/204748704-21db9f9e-386e-449c-91c7-2ce3f8b51f24.png" width="600"/>
+
+### Compute the average training speed
+
+```shell
+mim run mmdet analyze_logs cal_train_time \
+    ${LOG} \                                # path of train log in json format
+    [--include-outliers]                    # include the first value of every epoch when computing the average time
+```
+
+Examples:
+
+```shell
+mim run mmdet analyze_logs cal_train_time \
+    yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json
+```
+
+The output is expected to be like the following.
+
+```text
+-----Analyze train time of yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json-----
+slowest epoch 278, average time is 0.1705 s/iter
+fastest epoch 300, average time is 0.1510 s/iter
+time std over epochs is 0.0026
+average iter time: 0.1556 s/iter
+```
diff --git a/docs/en/common_usage/module_combination.md b/docs/en/common_usage/module_combination.md
new file mode 100644
index 00000000..3f9ffa4c
--- /dev/null
+++ b/docs/en/common_usage/module_combination.md
@@ -0,0 +1 @@
+# Module combination
diff --git a/docs/en/common_usage/multi_necks.md b/docs/en/common_usage/multi_necks.md
new file mode 100644
index 00000000..b6f2bc25
--- /dev/null
+++ b/docs/en/common_usage/multi_necks.md
@@ -0,0 +1,37 @@
+# Apply multiple Necks
+
+If you want to stack multiple Necks, you can directly set the Neck parameters in the config. MMYOLO supports concatenating multiple Necks in the form of `List`. You need to ensure that the output channel of the previous Neck matches the input channel of the next Neck. If you need to adjust the number of channels, you can insert the `mmdet.ChannelMapper` module to align the number of channels between multiple Necks. The specific configuration is as follows:
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+deepen_factor = _base_.deepen_factor
+widen_factor = _base_.widen_factor
+model = dict(
+    type='YOLODetector',
+    neck=[
+        dict(
+            type='YOLOv5PAFPN',
+            deepen_factor=deepen_factor,
+            widen_factor=widen_factor,
+            in_channels=[256, 512, 1024],
+            out_channels=[256, 512, 1024], # The out_channels is controlled by widen_factor，so the YOLOv5PAFPN's out_channels equls to out_channels * widen_factor
+            num_csp_blocks=3,
+            norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+            act_cfg=dict(type='SiLU', inplace=True)),
+        dict(
+            type='mmdet.ChannelMapper',
+            in_channels=[128, 256, 512],
+            out_channels=128,
+        ),
+        dict(
+            type='mmdet.DyHead',
+            in_channels=128,
+            out_channels=256,
+            num_blocks=2,
+            # disable zero_init_offset to follow official implementation
+            zero_init_offset=False)
+    ]
+    bbox_head=dict(head_module=dict(in_channels=[512,512,512])) # The out_channels is controlled by widen_factor，so the YOLOv5HeadModuled in_channels * widen_factor equals to  the last neck's out_channels
+)
+```
diff --git a/docs/en/common_usage/output_predictions.md b/docs/en/common_usage/output_predictions.md
new file mode 100644
index 00000000..57192990
--- /dev/null
+++ b/docs/en/common_usage/output_predictions.md
@@ -0,0 +1,40 @@
+# Output prediction results
+
+If you want to save the prediction results as a specific file for offline evaluation, MMYOLO currently supports both json and pkl formats.
+
+```{note}
+The json file only save `image_id`, `bbox`, `score` and `category_id`. The json file can be read using the json library.
+The pkl file holds more content than the json file, and also holds information such as the file name and size of the predicted image; the pkl file can be read using the pickle library. The pkl file can be read using the pickle library.
+```
+
+## Output into json file
+
+If you want to output the prediction results as a json file, the command is as follows.
+
+```shell
+python tools/test.py {path_to_config} {path_to_checkpoint} --json-prefix {json_prefix}
+```
+
+The argument after `--json-prefix` should be a filename prefix (no need to enter the `.json` suffix) and can also contain a path. For a concrete example:
+
+```shell
+python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --json-prefix work_dirs/demo/json_demo
+```
+
+Running the above command will output the `json_demo.bbox.json` file in the `work_dirs/demo` folder.
+
+## Output into pkl file
+
+If you want to output the prediction results as a pkl file, the command is as follows.
+
+```shell
+python tools/test.py {path_to_config} {path_to_checkpoint} --out {path_to_output_file}
+```
+
+The argument after `--out` should be a full filename (**must be** with a `.pkl` or `.pickle` suffix) and can also contain a path. For a concrete example:
+
+```shell
+python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth --out work_dirs/demo/pkl_demo.pkl
+```
+
+Running the above command will output the `pkl_demo.pkl` file in the `work_dirs/demo` folder.
diff --git a/docs/en/advanced_guides/plugins.md b/docs/en/common_usage/plugins.md
similarity index 100%
rename from docs/en/advanced_guides/plugins.md
rename to docs/en/common_usage/plugins.md
diff --git a/docs/en/common_usage/resume_training.md b/docs/en/common_usage/resume_training.md
new file mode 100644
index 00000000..d33f1d28
--- /dev/null
+++ b/docs/en/common_usage/resume_training.md
@@ -0,0 +1 @@
+# Resume training
diff --git a/docs/en/common_usage/set_random_seed.md b/docs/en/common_usage/set_random_seed.md
new file mode 100644
index 00000000..c45c165f
--- /dev/null
+++ b/docs/en/common_usage/set_random_seed.md
@@ -0,0 +1,18 @@
+# Set the random seed
+
+If you want to set the random seed during training, you can use the following command.
+
+```shell
+python ./tools/train.py \
+    ${CONFIG} \                               # path of the config file
+    --cfg-options randomness.seed=2023 \      # set seed to 2023
+    [randomness.diff_rank_seed=True] \        # set different seeds according to global rank
+    [randomness.deterministic=True]           # set the deterministic option for CUDNN backend
+# [] stands for optional parameters, when actually entering the command line, you do not need to enter []
+```
+
+`randomness` has three parameters that can be set, with the following meanings.
+
+- `randomness.seed=2023`, set the random seed to 2023.
+- `randomness.diff_rank_seed=True`, set different seeds according to global rank. Defaults to False.
+- `randomness.deterministic=True`, set the deterministic option for cuDNN backend, i.e., set `torch.backends.cudnn.deterministic` to True and `torch.backends.cudnn.benchmark` to False. Defaults to False. See https://pytorch.org/docs/stable/notes/randomness.html for more details.
diff --git a/docs/en/common_usage/set_syncbn.md b/docs/en/common_usage/set_syncbn.md
new file mode 100644
index 00000000..dba33be6
--- /dev/null
+++ b/docs/en/common_usage/set_syncbn.md
@@ -0,0 +1 @@
+# Enabling and disabling SyncBatchNorm
diff --git a/docs/en/common_usage/specify_device.md b/docs/en/common_usage/specify_device.md
new file mode 100644
index 00000000..72c8017e
--- /dev/null
+++ b/docs/en/common_usage/specify_device.md
@@ -0,0 +1,23 @@
+# Specify specific GPUs during training or inference
+
+If you have multiple GPUs, such as 8 GPUs, numbered `0, 1, 2, 3, 4, 5, 6, 7`, GPU 0 will be used by default for training or inference. If you want to specify other GPUs for training or inference, you can use the following commands:
+
+```shell
+CUDA_VISIBLE_DEVICES=5 python ./tools/train.py ${CONFIG} #train
+CUDA_VISIBLE_DEVICES=5 python ./tools/test.py ${CONFIG} ${CHECKPOINT_FILE} #test
+```
+
+If you set `CUDA_VISIBLE_DEVICES` to -1 or a number greater than the maximum GPU number, such as 8, the CPU will be used for training or inference.
+
+If you want to use several of these GPUs to train in parallel, you can use the following command:
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 ./tools/dist_train.sh ${CONFIG} ${GPU_NUM}
+```
+
+Here the `GPU_NUM` is 4. In addition, if multiple tasks are trained in parallel on one machine and each task requires multiple GPUs, the PORT of each task need to be set differently to avoid communication conflict, like the following commands:
+
+```shell
+CUDA_VISIBLE_DEVICES=0,1,2,3 PORT=29500 ./tools/dist_train.sh ${CONFIG} 4
+CUDA_VISIBLE_DEVICES=4,5,6,7 PORT=29501 ./tools/dist_train.sh ${CONFIG} 4
+```
diff --git a/docs/en/deploy/index.rst b/docs/en/deploy/index.rst
deleted file mode 100644
index 6fcfb241..00000000
--- a/docs/en/deploy/index.rst
+++ /dev/null
@@ -1,16 +0,0 @@
-Basic Deployment Guide
-************************
-
-.. toctree::
-   :maxdepth: 1
-
-   basic_deployment_guide.md
-
-
-Deployment tutorial
-************************
-
-.. toctree::
-   :maxdepth: 1
-
-   yolov5_deployment.md
diff --git a/docs/en/get_started.md b/docs/en/get_started.md
deleted file mode 100644
index 01c1a716..00000000
--- a/docs/en/get_started.md
+++ /dev/null
@@ -1,280 +0,0 @@
-# Get Started
-
-## Prerequisites
-
-Compatible MMEngine, MMCV and MMDetection versions are shown as below. Please install the correct version to avoid installation issues.
-
-| MMYOLO version |   MMDetection version    |     MMEngine version     |      MMCV version       |
-| :------------: | :----------------------: | :----------------------: | :---------------------: |
-|      main      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.3.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.2.0      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.1.3      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.1.2      | mmdet>=3.0.0rc2, \<3.1.0 | mmengine>=0.3.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.1.1      |     mmdet==3.0.0rc1      | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-|     0.1.0      |     mmdet==3.0.0rc0      | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 |
-
-In this section, we demonstrate how to prepare an environment with PyTorch.
-
-MMDetection works on Linux, Windows, and macOS. It requires Python 3.7+, CUDA 9.2+, and PyTorch 1.7+.
-
-```{note}
-If you are experienced with PyTorch and have already installed it, just skip this part and jump to the [next section](#installation). Otherwise, you can follow these steps for the preparation.
-```
-
-**Step 0.** Download and install Miniconda from the [official website](https://docs.conda.io/en/latest/miniconda.html).
-
-**Step 1.** Create a conda environment and activate it.
-
-```shell
-conda create --name openmmlab python=3.8 -y
-conda activate openmmlab
-```
-
-**Step 2.** Install PyTorch following [official instructions](https://pytorch.org/get-started/locally/), e.g.
-
-On GPU platforms:
-
-```shell
-conda install pytorch torchvision -c pytorch
-```
-
-On CPU platforms:
-
-```shell
-conda install pytorch torchvision cpuonly -c pytorch
-```
-
-## Installation
-
-### Best Practices
-
-**Step 0.** Install [MMEngine](https://github.com/open-mmlab/mmengine) and [MMCV](https://github.com/open-mmlab/mmcv) using [MIM](https://github.com/open-mmlab/mim).
-
-```shell
-pip install -U openmim
-mim install "mmengine>=0.3.1"
-mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc5,<3.1.0"
-```
-
-**Note:**
-
-a. In MMCV-v2.x, `mmcv-full` is rename to `mmcv`, if you want to install `mmcv` without CUDA ops, you can use `mim install "mmcv-lite>=2.0.0rc1"` to install the lite version.
-
-b. If you would like to use albumentations, we suggest using pip install -r requirements/albu.txt or pip install -U albumentations --no-binary qudida,albumentations. If you simply use pip install albumentations==1.0.1, it will install opencv-python-headless simultaneously (even though you have already installed opencv-python). We recommended checking the environment after installing albumentation to ensure that opencv-python and opencv-python-headless are not installed at the same time, because it might cause unexpected issues if they both installed. Please refer to [official documentation](https://albumentations.ai/docs/getting_started/installation/#note-on-opencv-dependencies) for more details.
-
-**Step 1.** Install MMYOLO.
-
-Case a: If you develop and run mmdet directly, install it from source:
-
-```shell
-git clone https://github.com/open-mmlab/mmyolo.git
-cd mmyolo
-# Install albumentations
-pip install -r requirements/albu.txt
-# Install MMYOLO
-mim install -v -e .
-# "-v" means verbose, or more output
-# "-e" means installing a project in editable mode,
-# thus any local modifications made to the code will take effect without reinstallation.
-```
-
-Case b: If you use MMYOLO as a dependency or third-party package, install it with MIM:
-
-```shell
-mim install "mmyolo"
-```
-
-## Verify the installation
-
-To verify whether MMYOLO is installed correctly, we provide some sample codes to run an inference demo.
-
-**Step 1.** We need to download config and checkpoint files.
-
-```shell
-mim download mmyolo --config yolov5_s-v61_syncbn_fast_8xb16-300e_coco --dest .
-```
-
-The downloading will take several seconds or more, depending on your network environment. When it is done, you will find two files `yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py` and `yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth` in your current folder.
-
-**Step 2.** Verify the inference demo.
-
-Option (a). If you install MMYOLO from source, just run the following command.
-
-```shell
-python demo/image_demo.py demo/demo.jpg \
-                          yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py \
-                          yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth
-
-# Optional parameters
-# --out-dir ./output *The detection results are output to the specified directory. When args have action --show, the script do not save results. Default: ./output
-# --device cuda:0    *The computing resources used, including cuda and cpu. Default: cuda:0
-# --show             *Display the results on the screen. Default: False
-# --score-thr 0.3    *Confidence threshold. Default: 0.3
-```
-
-You will see a new image on your `output` folder, where bounding boxes are plotted.
-
-Supported input types:
-
-- Single image, include `jpg`, `jpeg`, `png`, `ppm`, `bmp`, `pgm`, `tif`, `tiff`, `webp`.
-- Folder, all image files in the folder will be traversed and the corresponding results will be output.
-- URL, will automatically download from the URL and the corresponding results will be output.
-
-Option (b). If you install MMYOLO with MIM, open your python interpreter and copy&paste the following codes.
-
-```python
-from mmdet.apis import init_detector, inference_detector
-from mmyolo.utils import register_all_modules
-
-register_all_modules()
-config_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
-checkpoint_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'
-model = init_detector(config_file, checkpoint_file, device='cpu')  # or device='cuda:0'
-inference_detector(model, 'demo/demo.jpg')
-```
-
-You will see a list of `DetDataSample`, and the predictions are in the `pred_instance`, indicating the detected bounding boxes, labels, and scores.
-
-### Customize Installation
-
-#### CUDA versions
-
-When installing PyTorch, you need to specify the version of CUDA. If you are not clear on which to choose, follow our recommendations:
-
-- For Ampere-based NVIDIA GPUs, such as GeForce 30 series and NVIDIA A100, CUDA 11 is a must.
-- For older NVIDIA GPUs, CUDA 11 is backward compatible, but CUDA 10.2 offers better compatibility and is more lightweight.
-
-Please make sure the GPU driver satisfies the minimum version requirements. See [this table](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions__table-cuda-toolkit-driver-versions) for more information.
-
-```{note}
-Installing CUDA runtime libraries is enough if you follow our best practices, because no CUDA code will be compiled locally. However, if you hope to compile MMCV from source or develop other CUDA operators, you need to install the complete CUDA toolkit from NVIDIA's [website](https://developer.nvidia.com/cuda-downloads), and its version should match the CUDA version of PyTorch. i.e., the specified version of cudatoolkit in `conda install` command.
-```
-
-#### Install MMEngine without MIM
-
-To install MMEngine with pip instead of MIM, please follow \[MMEngine installation guides\](https://mmengine.readthedocs.io/en/latest/get_started/installation.html).
-
-For example, you can install MMEngine by the following command.
-
-```shell
-pip install "mmengine>=0.3.1"
-```
-
-#### Install MMCV without MIM
-
-MMCV contains C++ and CUDA extensions, thus depending on PyTorch in a complex way. MIM solves such dependencies automatically and makes the installation easier. However, it is not a must.
-
-To install MMCV with pip instead of MIM, please follow [MMCV installation guides](https://mmcv.readthedocs.io/en/2.x/get_started/installation.html). This requires manually specifying a find-url based on the PyTorch version and its CUDA version.
-
-For example, the following command installs MMCV built for PyTorch 1.12.x and CUDA 11.6.
-
-```shell
-pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.12.0/index.html
-```
-
-#### Install on CPU-only platforms
-
-MMDetection can be built for the CPU-only environment. In CPU mode you can train (requires MMCV version >= `2.0.0rc1`), test, or infer a model.
-
-However, some functionalities are gone in this mode:
-
-- Deformable Convolution
-- Modulated Deformable Convolution
-- ROI pooling
-- Deformable ROI pooling
-- CARAFE
-- SyncBatchNorm
-- CrissCrossAttention
-- MaskedConv2d
-- Temporal Interlace Shift
-- nms_cuda
-- sigmoid_focal_loss_cuda
-- bbox_overlaps
-
-If you try to train/test/infer a model containing the above ops, an error will be raised.
-The following table lists affected algorithms.
-
-|                        Operator                         |                                          Model                                           |
-| :-----------------------------------------------------: | :--------------------------------------------------------------------------------------: |
-| Deformable Convolution/Modulated Deformable Convolution | DCN、Guided Anchoring、RepPoints、CentripetalNet、VFNet、CascadeRPN、NAS-FCOS、DetectoRS |
-|                      MaskedConv2d                       |                                     Guided Anchoring                                     |
-|                         CARAFE                          |                                          CARAFE                                          |
-|                      SyncBatchNorm                      |                                         ResNeSt                                          |
-
-#### Install on Google Colab
-
-[Google Colab](https://research.google.com/) usually has PyTorch installed,
-thus we only need to install MMEngine, MMCV, MMDetection, and MMYOLO with the following commands.
-
-**Step 1.** Install [MMEngine](https://github.com/open-mmlab/mmengine) and [MMCV](https://github.com/open-mmlab/mmcv) using [MIM](https://github.com/open-mmlab/mim).
-
-```shell
-!pip3 install openmim
-!mim install "mmengine>=0.3.1"
-!mim install "mmcv>=2.0.0rc1,<2.1.0"
-!mim install "mmdet>=3.0.0rc5,<3.1.0"
-```
-
-**Step 2.** Install MMYOLO from the source.
-
-```shell
-!git clone https://github.com/open-mmlab/mmyolo.git
-%cd mmyolo
-!pip install -e .
-```
-
-**Step 3.** Verification.
-
-```python
-import mmyolo
-print(mmyolo.__version__)
-# Example output: 0.1.0, or an another version.
-```
-
-```{note}
-Within Jupyter, the exclamation mark `!` is used to call external executables and `%cd` is a [magic command](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-cd) to change the current working directory of Python.
-```
-
-#### Using MMYOLO with Docker
-
-We provide a [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile) to build an image. Ensure that your [docker version](https://docs.docker.com/engine/install/) >=19.03.
-
-Reminder: If you find out that your download speed is very slow, we suggest that you can canceling the comments in the last two lines of `Optional` in the [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile#L19-L20) to obtain a rocket like download speed:
-
-```dockerfile
-# (Optional)
-RUN sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list && \
-    pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
-```
-
-Build Command：
-
-```shell
-# build an image with PyTorch 1.9, CUDA 11.1
-# If you prefer other versions, just modified the Dockerfile
-docker build -t mmyolo docker/
-```
-
-Run it with:
-
-```shell
-export DATA_DIR=/path/to/your/dataset
-docker run --gpus all --shm-size=8g -it -v ${DATA_DIR}:/mmyolo/data mmyolo
-```
-
-### Troubleshooting
-
-If you have some issues during the installation, please first view the [FAQ](notes/faq.md) page.
-You may [open an issue](https://github.com/open-mmlab/mmyolo/issues/new/choose) on GitHub if no solution is found.
-
-### Develop using multiple MMYOLO versions
-
-The training and testing scripts have been modified in `PYTHONPATH` to ensure that the scripts use MMYOLO in the current directory.
-
-To have the default MMYOLO installed in your environment instead of what is currently in use, you can remove the code that appears in the relevant script:
-
-```shell
-PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
-```
diff --git a/docs/en/get_started/15_minutes_instance_segmentation.md b/docs/en/get_started/15_minutes_instance_segmentation.md
new file mode 100644
index 00000000..c66a2f28
--- /dev/null
+++ b/docs/en/get_started/15_minutes_instance_segmentation.md
@@ -0,0 +1,3 @@
+# 15 minutes to get started with MMYOLO instance segmentation
+
+TODO
diff --git a/docs/en/get_started/15_minutes_object_detection.md b/docs/en/get_started/15_minutes_object_detection.md
new file mode 100644
index 00000000..37409e5a
--- /dev/null
+++ b/docs/en/get_started/15_minutes_object_detection.md
@@ -0,0 +1,3 @@
+# 15 minutes to get started with MMYOLO object detection
+
+TODO
diff --git a/docs/en/get_started/15_minutes_rotated_object_detection.md b/docs/en/get_started/15_minutes_rotated_object_detection.md
new file mode 100644
index 00000000..6e04c8c0
--- /dev/null
+++ b/docs/en/get_started/15_minutes_rotated_object_detection.md
@@ -0,0 +1,3 @@
+# 15 minutes to get started with MMYOLO rotated object detection
+
+TODO
diff --git a/docs/en/get_started/article.md b/docs/en/get_started/article.md
new file mode 100644
index 00000000..ea28d491
--- /dev/null
+++ b/docs/en/get_started/article.md
@@ -0,0 +1 @@
+# Resources summary
diff --git a/docs/en/get_started/dependencies.md b/docs/en/get_started/dependencies.md
new file mode 100644
index 00000000..d75275f1
--- /dev/null
+++ b/docs/en/get_started/dependencies.md
@@ -0,0 +1,44 @@
+# Prerequisites
+
+Compatible MMEngine, MMCV and MMDetection versions are shown as below. Please install the correct version to avoid installation issues.
+
+| MMYOLO version |   MMDetection version    |     MMEngine version     |      MMCV version       |
+| :------------: | :----------------------: | :----------------------: | :---------------------: |
+|      main      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.3.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.2.0      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.1.3      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.1.2      | mmdet>=3.0.0rc2, \<3.1.0 | mmengine>=0.3.0, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.1.1      |     mmdet==3.0.0rc1      | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.1.0      |     mmdet==3.0.0rc0      | mmengine>=0.1.0, \<0.2.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+
+In this section, we demonstrate how to prepare an environment with PyTorch.
+
+MMDetection works on Linux, Windows, and macOS. It requires Python 3.7+, CUDA 9.2+, and PyTorch 1.7+.
+
+```{note}
+If you are experienced with PyTorch and have already installed it, just skip this part and jump to the [next section](#installation). Otherwise, you can follow these steps for the preparation.
+```
+
+**Step 0.** Download and install Miniconda from the [official website](https://docs.conda.io/en/latest/miniconda.html).
+
+**Step 1.** Create a conda environment and activate it.
+
+```shell
+conda create --name openmmlab python=3.8 -y
+conda activate openmmlab
+```
+
+**Step 2.** Install PyTorch following [official instructions](https://pytorch.org/get-started/locally/), e.g.
+
+On GPU platforms:
+
+```shell
+conda install pytorch torchvision -c pytorch
+```
+
+On CPU platforms:
+
+```shell
+conda install pytorch torchvision cpuonly -c pytorch
+```
diff --git a/docs/en/get_started/installation.md b/docs/en/get_started/installation.md
new file mode 100644
index 00000000..d73bede7
--- /dev/null
+++ b/docs/en/get_started/installation.md
@@ -0,0 +1,123 @@
+# Installation
+
+## Best Practices
+
+**Step 0.** Install [MMEngine](https://github.com/open-mmlab/mmengine) and [MMCV](https://github.com/open-mmlab/mmcv) using [MIM](https://github.com/open-mmlab/mim).
+
+```shell
+pip install -U openmim
+mim install "mmengine>=0.3.1"
+mim install "mmcv>=2.0.0rc1,<2.1.0"
+mim install "mmdet>=3.0.0rc5,<3.1.0"
+```
+
+**Note:**
+
+a. In MMCV-v2.x, `mmcv-full` is rename to `mmcv`, if you want to install `mmcv` without CUDA ops, you can use `mim install "mmcv-lite>=2.0.0rc1"` to install the lite version.
+
+b. If you would like to use albumentations, we suggest using pip install -r requirements/albu.txt or pip install -U albumentations --no-binary qudida,albumentations. If you simply use pip install albumentations==1.0.1, it will install opencv-python-headless simultaneously (even though you have already installed opencv-python). We recommended checking the environment after installing albumentation to ensure that opencv-python and opencv-python-headless are not installed at the same time, because it might cause unexpected issues if they both installed. Please refer to [official documentation](https://albumentations.ai/docs/getting_started/installation/#note-on-opencv-dependencies) for more details.
+
+**Step 1.** Install MMYOLO.
+
+Case a: If you develop and run mmdet directly, install it from source:
+
+```shell
+git clone https://github.com/open-mmlab/mmyolo.git
+cd mmyolo
+# Install albumentations
+pip install -r requirements/albu.txt
+# Install MMYOLO
+mim install -v -e .
+# "-v" means verbose, or more output
+# "-e" means installing a project in editable mode,
+# thus any local modifications made to the code will take effect without reinstallation.
+```
+
+Case b: If you use MMYOLO as a dependency or third-party package, install it with MIM:
+
+```shell
+mim install "mmyolo"
+```
+
+## Verify the installation
+
+To verify whether MMYOLO is installed correctly, we provide some sample codes to run an inference demo.
+
+**Step 1.** We need to download config and checkpoint files.
+
+```shell
+mim download mmyolo --config yolov5_s-v61_syncbn_fast_8xb16-300e_coco --dest .
+```
+
+The downloading will take several seconds or more, depending on your network environment. When it is done, you will find two files `yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py` and `yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth` in your current folder.
+
+**Step 2.** Verify the inference demo.
+
+Option (a). If you install MMYOLO from source, just run the following command.
+
+```shell
+python demo/image_demo.py demo/demo.jpg \
+                          yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py \
+                          yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth
+
+# Optional parameters
+# --out-dir ./output *The detection results are output to the specified directory. When args have action --show, the script do not save results. Default: ./output
+# --device cuda:0    *The computing resources used, including cuda and cpu. Default: cuda:0
+# --show             *Display the results on the screen. Default: False
+# --score-thr 0.3    *Confidence threshold. Default: 0.3
+```
+
+You will see a new image on your `output` folder, where bounding boxes are plotted.
+
+Supported input types:
+
+- Single image, include `jpg`, `jpeg`, `png`, `ppm`, `bmp`, `pgm`, `tif`, `tiff`, `webp`.
+- Folder, all image files in the folder will be traversed and the corresponding results will be output.
+- URL, will automatically download from the URL and the corresponding results will be output.
+
+Option (b). If you install MMYOLO with MIM, open your python interpreter and copy&paste the following codes.
+
+```python
+from mmdet.apis import init_detector, inference_detector
+from mmyolo.utils import register_all_modules
+
+register_all_modules()
+config_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
+checkpoint_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'
+model = init_detector(config_file, checkpoint_file, device='cpu')  # or device='cuda:0'
+inference_detector(model, 'demo/demo.jpg')
+```
+
+You will see a list of `DetDataSample`, and the predictions are in the `pred_instance`, indicating the detected bounding boxes, labels, and scores.
+
+## Using MMYOLO with Docker
+
+We provide a [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile) to build an image. Ensure that your [docker version](https://docs.docker.com/engine/install/) >=19.03.
+
+Reminder: If you find out that your download speed is very slow, we suggest that you can canceling the comments in the last two lines of `Optional` in the [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile#L19-L20) to obtain a rocket like download speed:
+
+```dockerfile
+# (Optional)
+RUN sed -i 's/http:\/\/archive.ubuntu.com\/ubuntu\//http:\/\/mirrors.aliyun.com\/ubuntu\//g' /etc/apt/sources.list && \
+    pip config set global.index-url https://pypi.tuna.tsinghua.edu.cn/simple
+```
+
+Build Command：
+
+```shell
+# build an image with PyTorch 1.9, CUDA 11.1
+# If you prefer other versions, just modified the Dockerfile
+docker build -t mmyolo docker/
+```
+
+Run it with:
+
+```shell
+export DATA_DIR=/path/to/your/dataset
+docker run --gpus all --shm-size=8g -it -v ${DATA_DIR}:/mmyolo/data mmyolo
+```
+
+## Troubleshooting
+
+If you have some issues during the installation, please first view the [FAQ](../tutorials/faq.md) page.
+You may [open an issue](https://github.com/open-mmlab/mmyolo/issues/new/choose) on GitHub if no solution is found.
diff --git a/docs/en/get_started/overview.md b/docs/en/get_started/overview.md
new file mode 100644
index 00000000..07dd0c5c
--- /dev/null
+++ b/docs/en/get_started/overview.md
@@ -0,0 +1 @@
+# Overview
diff --git a/docs/en/index.rst b/docs/en/index.rst
index 123680de..5516b619 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -1,36 +1,83 @@
 Welcome to MMYOLO's documentation!
 =======================================
+You can switch between Chinese and English documents in the top-right corner of the layout.
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Get Started
+
+   get_started/overview.md
+   get_started/dependencies.md
+   get_started/installation.md
+   get_started/15_minutes_object_detection.md
+   get_started/15_minutes_rotated_object_detection.md
+   get_started/15_minutes_instance_segmentation.md
+   get_started/article.md
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Recommended Topics
+
+   recommended_topics/contributing.md
+   recommended_topics/model_design.md
+   recommended_topics/algorithm_descriptions/index.rst
+   recommended_topics/replace_backbone.md
+   recommended_topics/labeling_to_deployment_tutorials.md
+   recommended_topics/visualization.md
+   recommended_topics/deploy/index.rst
+   recommended_topics/troubleshooting_steps.md
+   recommended_topics/industry_examples.md
+   recommended_topics/mm_basics.md
+   recommended_topics/dataset_preparation.md
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Common Usage
+
+   common_usage/resume_training.md
+   common_usage/syncbn.md
+   common_usage/amp_training.md
+   common_usage/plugins.md
+   common_usage/freeze_layers.md
+   common_usage/output_predictions.md
+   common_usage/set_random_seed.md
+   common_usage/module_combination.md
+   common_usage/mim_usage.md
+   common_usage/multi_necks.md
+   common_usage/specify_device.md
+
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Useful Tools
+
+   useful_tools/browse_coco_json.md
+   useful_tools/browse_dataset.md
+   useful_tools/print_config.md
+   useful_tools/dataset_analysis.md
+   useful_tools/optimize_anchors.md
+   useful_tools/extract_subcoco.md
+   useful_tools/vis_scheduler.md
+   useful_tools/dataset_converters.md
+   useful_tools/download_dataset.md
+   useful_tools/log_analysis.md
+   useful_tools/model_converters.md
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Basic Tutorials
+
+   tutorials/config.md
+   tutorials/data_flow.md
+   tutorials/custom_installation.md
+   tutorials/faq.md
+
 
 .. toctree::
    :maxdepth: 1
-   :caption: Get Started
+   :caption: Advanced Tutorials
 
-   overview.md
-   get_started.md
-
-.. toctree::
-   :maxdepth: 2
-   :caption: User Guides
-
-   user_guides/index.rst
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Algorithm Descriptions
-
-   algorithm_descriptions/index.rst
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Advanced Guides
-
-   advanced_guides/index.rst
-
-.. toctree::
-   :maxdepth: 2
-   :caption: Deployment Guides
-
-   deploy/index.rst
+   advanced_guides/cross-library_application.md
 
 .. toctree::
    :maxdepth: 1
@@ -49,24 +96,17 @@ Welcome to MMYOLO's documentation!
    :caption: Notes
 
    notes/changelog.md
-   notes/faq.md
    notes/compatibility.md
    notes/conventions.md
+   notes/code_style.md
+
 
 .. toctree::
-   :maxdepth: 2
-   :caption: Community
-
-   community/contributing.md
-   community/code_style.md
-
-.. toctree::
-   :caption: Switch Languag
+   :caption: Switch Language
 
    switch_language.md
 
 
-
 Indices and tables
 ==================
 
diff --git a/docs/en/community/code_style.md b/docs/en/notes/code_style.md
similarity index 89%
rename from docs/en/community/code_style.md
rename to docs/en/notes/code_style.md
index 08c534ea..3bc8291e 100644
--- a/docs/en/community/code_style.md
+++ b/docs/en/notes/code_style.md
@@ -1,3 +1,3 @@
-## Code Style
+# Code Style
 
 Coming soon. Please refer to [chinese documentation](https://mmyolo.readthedocs.io/zh_CN/latest/community/code_style.html).
diff --git a/docs/en/overview.md b/docs/en/overview.md
deleted file mode 100644
index 339bcfa3..00000000
--- a/docs/en/overview.md
+++ /dev/null
@@ -1,56 +0,0 @@
-# Overview
-
-This chapter introduces you to the overall framework of MMYOLO and provides links to detailed tutorials.
-
-## What is  MMYOLO
-
-<div align=center>
-<img src="https://user-images.githubusercontent.com/45811724/190993591-bd3f1f11-1c30-4b93-b5f4-05c9ff64ff7f.gif" alt="image">
-</div>
-
-MMYOLO is a YOLO series algorithm toolbox, which currently implements only the object detection task and will subsequently support various tasks such as instance segmentation, panoramic segmentation, and key point detection. It includes a rich set of object detection algorithms and related components and modules, and the following is its overall framework.
-
-MMYOLO file structure is identical to the MMDetection. To fully reuse the MMDetection code, MMYOLO includes only custom content, consisting of 3 main parts: `datasets`, `models`, `engine`.
-
-- **datasets** support a variety of data sets for object detection.
-  - **transforms** include various data enhancement transforms.
-- **models** are the most important part of the detector, which includes different components of it.
-  - **detectors** define all detection model classes.
-  - **data_preprocessors** is used to preprocess the dataset of the model.
-  - **backbones** include various backbone networks.
-  - **necks** include various neck components.
-  - **dense_heads** include various dense heads of different tasks.
-  - **losses** include various loss functions.
-  - **task_modules** provide components for testing tasks, such as assigners, samplers, box coders, and prior generators.
-  - **layers** provide some basic network layers.
-- **engine** is a component of running.
-  - **optimizers** provide optimizers and packages for optimizers.
-  - **hooks** provide hooks for runner.
-
-## How to use this tutorial
-
-The detailed instruction of MMYOLO is as follows.
-
-1. Look up install instructions to [get_started.md](get_started.md).
-
-2. The basic method of how to use MMYOLO can be found here:
-
-   - [Training and testing](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#train-test)
-   - [From getting started to deployment tutorial](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#from-getting-started-to-deployment-tutorial)
-   - [Useful Tools](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#useful-tools)
-
-3. YOLO series of tutorials on algorithm implementation and full analysis:
-
-   - [Essential Basics](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#essential-basics)
-   - [A full explanation of the model and implementation](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#algorithm-principles-and-implementation)
-
-4. YOLO series of Deploy tutorials
-
-   - [Basic Deployment Guide](https://mmyolo.readthedocs.io/en/latest/deploy/index.html#basic-deployment-guide)
-   - [Deployment Tutorial](https://mmyolo.readthedocs.io/en/latest/deploy/index.html#deployment-tutorial)
-
-5. Refer to the following tutorials for an in-depth look:
-
-   - [Data flow](https://mmyolo.readthedocs.io/en/latest/advanced_guides/index.html#data-flow)
-   - [How to](https://mmyolo.readthedocs.io/en/latest/advanced_guides/index.html#how-to)
-   - [Plugins](https://mmyolo.readthedocs.io/en/latest/advanced_guides/index.html#plugins)
diff --git a/docs/en/algorithm_descriptions/index.rst b/docs/en/recommended_topics/algorithm_descriptions/index.rst
similarity index 67%
rename from docs/en/algorithm_descriptions/index.rst
rename to docs/en/recommended_topics/algorithm_descriptions/index.rst
index e46cac3f..e51d04cb 100644
--- a/docs/en/algorithm_descriptions/index.rst
+++ b/docs/en/recommended_topics/algorithm_descriptions/index.rst
@@ -1,12 +1,3 @@
-Essential Basics
-********************
-
-.. toctree::
-   :maxdepth: 1
-
-   model_design.md
-
-
 Algorithm principles and implementation
 ******************************************
 
diff --git a/docs/en/algorithm_descriptions/rtmdet_description.md b/docs/en/recommended_topics/algorithm_descriptions/rtmdet_description.md
similarity index 100%
rename from docs/en/algorithm_descriptions/rtmdet_description.md
rename to docs/en/recommended_topics/algorithm_descriptions/rtmdet_description.md
diff --git a/docs/en/algorithm_descriptions/yolov5_description.md b/docs/en/recommended_topics/algorithm_descriptions/yolov5_description.md
similarity index 100%
rename from docs/en/algorithm_descriptions/yolov5_description.md
rename to docs/en/recommended_topics/algorithm_descriptions/yolov5_description.md
diff --git a/docs/en/algorithm_descriptions/yolov8_description.md b/docs/en/recommended_topics/algorithm_descriptions/yolov8_description.md
similarity index 100%
rename from docs/en/algorithm_descriptions/yolov8_description.md
rename to docs/en/recommended_topics/algorithm_descriptions/yolov8_description.md
diff --git a/docs/en/community/contributing.md b/docs/en/recommended_topics/contributing.md
similarity index 96%
rename from docs/en/community/contributing.md
rename to docs/en/recommended_topics/contributing.md
index 2c73a768..31858572 100644
--- a/docs/en/community/contributing.md
+++ b/docs/en/recommended_topics/contributing.md
@@ -1,4 +1,4 @@
-## Contributing to OpenMMLab
+# Contributing to OpenMMLab
 
 Welcome to the MMYOLO community, we are committed to building a cutting-edge computer vision foundational library, and all kinds of contributions are welcomed, including but not limited to
 
@@ -21,11 +21,11 @@ The steps to fix the bug of code implementation are as follows.
 
 You can directly post a pull request to fix documents. If you want to add a document, you should first create an issue to check if it is reasonable.
 
-### Pull Request Workflow
+## Pull Request Workflow
 
 If you're not familiar with Pull Request, don't worry! The following guidance will tell you how to create a Pull Request step by step. If you want to dive into the development mode of Pull Request, you can refer to the [official documents](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)
 
-#### 1. Fork and clone
+### 1. Fork and clone
 
 If you are posting a pull request for the first time, you should fork the OpenMMLab repositories by clicking the **Fork** button in the top right corner of the GitHub page, and the forked repositories will appear under your GitHub profile.
 
@@ -57,7 +57,7 @@ upstream	git@github.com:open-mmlab/mmyolo (push)
 Here's a brief introduction to the origin and upstream. When we use "git clone", we create an "origin" remote by default, which points to the repository cloned from. As for "upstream", we add it ourselves to point to the target repository. Of course, if you don't like the name "upstream", you could name it as you wish. Usually, we'll push the code to "origin". If the pushed code conflicts with the latest code in official("upstream"), we should pull the latest code from upstream to resolve the conflicts, and then push to "origin" again. The posted Pull Request will be updated automatically.
 ```
 
-#### 2. Configure pre-commit
+### 2. Configure pre-commit
 
 You should configure [pre-commit](https://pre-commit.com/#intro) in the local development environment to make sure the code style matches that of OpenMMLab. **Note**: The following code should be executed under the MMYOLO directory.
 
@@ -95,7 +95,7 @@ If we want to commit our code bypassing the pre-commit hook, we can use the `--n
 git commit -m "xxx" --no-verify
 ```
 
-#### 3. Create a development branch
+### 3. Create a development branch
 
 After configuring the pre-commit, we should create a branch based on the dev branch to develop the new feature or fix the bug. The proposed branch name is `username/pr_name`
 
@@ -109,7 +109,7 @@ In subsequent development, if the dev branch of the local repository is behind t
 git pull upstream dev
 ```
 
-#### 4. Commit the code and pass the unit test
+### 4. Commit the code and pass the unit test
 
 - MMYOLO introduces mypy to do static type checking to increase the robustness of the code. Therefore, we need to add Type Hints to our code and pass the mypy check. If you are not familiar with Type Hints, you can refer to [this tutorial](https://docs.python.org/3/library/typing.html).
 
@@ -127,7 +127,7 @@ git pull upstream dev
 
 - If the documents are modified/added, we should check the rendering result referring to [guidance](#document-rendering)
 
-#### 5. Push the code to remote
+### 5. Push the code to remote
 
 We could push the local commits to remote after passing through the check of unit test and pre-commit. You can associate the local branch with remote branch by adding `-u` option.
 
@@ -137,7 +137,7 @@ git push -u origin {branch_name}
 
 This will allow you to use the `git push` command to push code directly next time, without having to specify a branch or the remote repository.
 
-#### 6. Create a Pull Request
+### 6. Create a Pull Request
 
 (1) Create a pull request in GitHub's Pull request interface
 
@@ -171,7 +171,7 @@ MMYOLO will run unit test for the posted Pull Request on Linux, based on differe
 
 <img src="https://user-images.githubusercontent.com/57566630/202145400-cc2cd8c4-10b0-472f-ba37-07e6f50acc67.png" width="1200">
 
-#### 7. Resolve conflicts
+### 7. Resolve conflicts
 
 If your local branch conflicts with the latest dev branch of "upstream", you'll need to resolove them. There are two ways to do this:
 
@@ -189,9 +189,9 @@ git merge upstream/dev
 
 If you are very good at handling conflicts, then you can use rebase to resolve conflicts, as this will keep your commit logs tidy. If you are unfamiliar with `rebase`, you can use `merge` to resolve conflicts.
 
-### Guidance
+## Guidance
 
-#### Unit test
+### Unit test
 
 We should also make sure the committed code will not decrease the coverage of unit test, we could run the following command to check the coverage of unit test:
 
@@ -201,7 +201,7 @@ python -m coverage html
 # check file in htmlcov/index.html
 ```
 
-#### Document rendering
+### Document rendering
 
 If the documents are modified/added, we should check the rendering result. We could install the dependencies and run the following command to render the documents and check the results:
 
@@ -213,9 +213,9 @@ make html
 # check file in ./docs/zh_cn/_build/html/index.html
 ```
 
-### Code style
+## Code style
 
-#### Python
+### Python
 
 We adopt [PEP8](https://www.python.org/dev/peps/pep-0008/) as the preferred code style.
 
@@ -228,17 +228,17 @@ We use the following tools for linting and formatting:
 - [mdformat](https://github.com/executablebooks/mdformat): Mdformat is an opinionated Markdown formatter that can be used to enforce a consistent style in Markdown files.
 - [docformatter](https://github.com/myint/docformatter): A formatter to format docstring.
 
-Style configurations of yapf and isort can be found in [setup.cfg](./setup.cfg).
+Style configurations of yapf and isort can be found in [setup.cfg](../../../setup.cfg).
 
 We use [pre-commit hook](https://pre-commit.com/) that checks and formats for `flake8`, `yapf`, `isort`, `trailing whitespaces`, `markdown files`,
 fixes `end-of-files`, `double-quoted-strings`, `python-encoding-pragma`, `mixed-line-ending`, sorts `requirments.txt` automatically on every commit.
-The config for a pre-commit hook is stored in [.pre-commit-config](./.pre-commit-config.yaml).
+The config for a pre-commit hook is stored in [.pre-commit-config](../../../.pre-commit-config.yaml).
 
-#### C++ and CUDA
+### C++ and CUDA
 
 We follow the [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html).
 
-### PR Specs
+## PR Specs
 
 1. Use [pre-commit](https://pre-commit.com) hook to avoid issues of code style
 
diff --git a/docs/en/recommended_topics/dataset_preparation.md b/docs/en/recommended_topics/dataset_preparation.md
new file mode 100644
index 00000000..5c573910
--- /dev/null
+++ b/docs/en/recommended_topics/dataset_preparation.md
@@ -0,0 +1 @@
+# Dataset preparation and description
diff --git a/docs/en/recommended_topics/deploy/easydeploy_guide.md b/docs/en/recommended_topics/deploy/easydeploy_guide.md
new file mode 100644
index 00000000..46fab865
--- /dev/null
+++ b/docs/en/recommended_topics/deploy/easydeploy_guide.md
@@ -0,0 +1 @@
+# EasyDeploy Deployment
diff --git a/docs/en/recommended_topics/deploy/index.rst b/docs/en/recommended_topics/deploy/index.rst
new file mode 100644
index 00000000..f21f353c
--- /dev/null
+++ b/docs/en/recommended_topics/deploy/index.rst
@@ -0,0 +1,16 @@
+MMDeploy deployment tutorial
+********************************
+
+.. toctree::
+   :maxdepth: 1
+
+   mmdeploy_guide.md
+   mmdeploy_yolov5.md
+
+EasyDeploy deployment tutorial
+************************************
+
+.. toctree::
+   :maxdepth: 1
+
+   easydeploy_guide.md
diff --git a/docs/en/deploy/basic_deployment_guide.md b/docs/en/recommended_topics/deploy/mmdeploy_guide.md
similarity index 100%
rename from docs/en/deploy/basic_deployment_guide.md
rename to docs/en/recommended_topics/deploy/mmdeploy_guide.md
diff --git a/docs/en/deploy/yolov5_deployment.md b/docs/en/recommended_topics/deploy/mmdeploy_yolov5.md
similarity index 97%
rename from docs/en/deploy/yolov5_deployment.md
rename to docs/en/recommended_topics/deploy/mmdeploy_yolov5.md
index f0e31967..7eb85b24 100644
--- a/docs/en/deploy/yolov5_deployment.md
+++ b/docs/en/recommended_topics/deploy/mmdeploy_yolov5.md
@@ -1,10 +1,10 @@
 # YOLOv5 Deployment
 
-Please check the [basic_deployment_guide](basic_deployment_guide.md) to get familiar with the configurations.
+Please check the [basic_deployment_guide](mmdeploy_guide.md) to get familiar with the configurations.
 
 ## Model Training and Validation
 
-The details of training and validation can be found at [yolov5_tutorial](../user_guides/yolov5_tutorial.md).
+TODO
 
 ## MMDeploy Environment Setup
 
@@ -75,7 +75,7 @@ codebase_config = dict(
 backend_config = dict(type='onnxruntime')
 ```
 
-The `post_processing` in the default configuration aligns the accuracy of the current model with the trained `pytorch` model. If you need to modify the relevant parameters, you can refer to the detailed introduction of [dasic_deployment_guide](basic_deployment_guide.md).
+The `post_processing` in the default configuration aligns the accuracy of the current model with the trained `pytorch` model. If you need to modify the relevant parameters, you can refer to the detailed introduction of [dasic_deployment_guide](mmdeploy_guide.md).
 
 To deploy the model to `TensorRT`, please refer to the [`detection_tensorrt_static-640x640.py`](https://github.com/open-mmlab/mmyolo/tree/main/configs/deploy/detection_tensorrt_static-640x640.p).
 
@@ -283,7 +283,7 @@ After exporting to `TensorRT`, you will get the four files as shown in Figure 2,
 
 After successfully convert the model, you can use `${MMDEPLOY_DIR}/tools/test.py` to evaluate the converted model. The following part shows how to evaluate the static models of `ONNXRuntime` and `TensorRT`. For dynamic model evaluation, please modify the configuration of the inputs.
 
-#### ONNXRuntime
+### ONNXRuntime
 
 ```shell
 python3 ${MMDEPLOY_DIR}/tools/test.py \
@@ -298,7 +298,7 @@ Once the process is done, you can get the output results as this:
 
 ![image](https://user-images.githubusercontent.com/92794867/199380483-cf8d867b-7309-4994-938a-f743f4cada77.png)
 
-#### TensorRT
+### TensorRT
 
 Note: `TensorRT` must run on `CUDA` devices!
 
diff --git a/docs/en/recommended_topics/industry_examples.md b/docs/en/recommended_topics/industry_examples.md
new file mode 100644
index 00000000..2380143b
--- /dev/null
+++ b/docs/en/recommended_topics/industry_examples.md
@@ -0,0 +1 @@
+# MMYOLO industry examples
diff --git a/docs/en/user_guides/custom_dataset.md b/docs/en/recommended_topics/labeling_to_deployment_tutorials.md
similarity index 100%
rename from docs/en/user_guides/custom_dataset.md
rename to docs/en/recommended_topics/labeling_to_deployment_tutorials.md
diff --git a/docs/en/recommended_topics/mm_basics.md b/docs/en/recommended_topics/mm_basics.md
new file mode 100644
index 00000000..9f23cfe6
--- /dev/null
+++ b/docs/en/recommended_topics/mm_basics.md
@@ -0,0 +1 @@
+# MM series repo essential basics
diff --git a/docs/en/algorithm_descriptions/model_design.md b/docs/en/recommended_topics/model_design.md
similarity index 100%
rename from docs/en/algorithm_descriptions/model_design.md
rename to docs/en/recommended_topics/model_design.md
diff --git a/docs/en/recommended_topics/replace_backbone.md b/docs/en/recommended_topics/replace_backbone.md
new file mode 100644
index 00000000..82d2046b
--- /dev/null
+++ b/docs/en/recommended_topics/replace_backbone.md
@@ -0,0 +1,306 @@
+# Replace the backbone network
+
+```{note}
+1. When using other backbone networks, you need to ensure that the output channels of the backbone network match the input channels of the neck network.
+2. The configuration files given below only ensure that the training will work correctly, and their training performance may not be optimal. Because some backbones require specific learning rates, optimizers, and other hyperparameters. Related contents will be added in the "Training Tips" section later.
+```
+
+## Use backbone network implemented in MMYOLO
+
+Suppose you want to use `YOLOv6EfficientRep` as the backbone network of `YOLOv5`, the example config is as the following:
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+model = dict(
+    backbone=dict(
+        type='YOLOv6EfficientRep',
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='ReLU', inplace=True))
+)
+```
+
+## Use backbone network implemented in other OpenMMLab repositories
+
+The model registry in MMYOLO, MMDetection, MMClassification, and MMSegmentation all inherit from the root registry in MMEngine in the OpenMMLab 2.0 system, allowing these repositories to directly use modules already implemented by each other. Therefore, in MMYOLO, users can use backbone networks from MMDetection and MMClassification without reimplementation.
+
+### Use backbone network implemented in MMDetection
+
+1. Suppose you want to use `ResNet-50` as the backbone network of `YOLOv5`, the example config is as the following:
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [512, 1024, 2048]
+
+model = dict(
+    backbone=dict(
+        _delete_=True, # Delete the backbone field in _base_
+        type='mmdet.ResNet', # Using ResNet from mmdet
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='YOLOv5PAFPN',
+        widen_factor=widen_factor,
+        in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
+        out_channels=channels),
+    bbox_head=dict(
+        type='YOLOv5Head',
+        head_module=dict(
+            type='YOLOv5HeadModule',
+            in_channels=channels, # input channels of head need to be changed accordingly
+            widen_factor=widen_factor))
+)
+```
+
+2. Suppose you want to use `SwinTransformer-Tiny` as the backbone network of `YOLOv5`, the example config is as the following:
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [192, 384, 768]
+checkpoint_file = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa
+
+model = dict(
+    backbone=dict(
+        _delete_=True, # Delete the backbone field in _base_
+        type='mmdet.SwinTransformer', # Using SwinTransformer from mmdet
+        embed_dims=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        drop_path_rate=0.2,
+        patch_norm=True,
+        out_indices=(1, 2, 3),
+        with_cp=False,
+        convert_weights=True,
+        init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
+    neck=dict(
+        type='YOLOv5PAFPN',
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=channels, # Note: The 3 channels of SwinTransformer-Tiny output are [192, 384, 768], which do not match the original yolov5-s neck and need to be changed.
+        out_channels=channels),
+    bbox_head=dict(
+        type='YOLOv5Head',
+        head_module=dict(
+            type='YOLOv5HeadModule',
+            in_channels=channels, # input channels of head need to be changed accordingly
+            widen_factor=widen_factor))
+)
+```
+
+### Use backbone network implemented in MMClassification
+
+1. Suppose you want to use `ConvNeXt-Tiny` as the backbone network of `YOLOv5`, the example config is as the following:
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+# please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
+# import mmcls.models to trigger register_module in mmcls
+custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth'  # noqa
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [192, 384, 768]
+
+model = dict(
+    backbone=dict(
+        _delete_=True, # Delete the backbone field in _base_
+        type='mmcls.ConvNeXt', # Using ConvNeXt from mmcls
+        arch='tiny',
+        out_indices=(1, 2, 3),
+        drop_path_rate=0.4,
+        layer_scale_init_value=1.0,
+        gap_before_final_norm=False,
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint_file,
+            prefix='backbone.')), # The pre-trained weights of backbone network in MMCls have prefix='backbone.'. The prefix in the keys will be removed so that these weights can be normally loaded.
+    neck=dict(
+        type='YOLOv5PAFPN',
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=channels, # Note: The 3 channels of ConvNeXt-Tiny output are [192, 384, 768], which do not match the original yolov5-s neck and need to be changed.
+        out_channels=channels),
+    bbox_head=dict(
+        type='YOLOv5Head',
+        head_module=dict(
+            type='YOLOv5HeadModule',
+            in_channels=channels, # input channels of head need to be changed accordingly
+            widen_factor=widen_factor))
+)
+```
+
+2. Suppose you want to use `MobileNetV3-small` as the backbone network of `YOLOv5`, the example config is as the following:
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+# please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
+# import mmcls.models to trigger register_module in mmcls
+custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth'  # noqa
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [24, 48, 96]
+
+model = dict(
+    backbone=dict(
+        _delete_=True, # Delete the backbone field in _base_
+        type='mmcls.MobileNetV3', # Using MobileNetV3 from mmcls
+        arch='small',
+        out_indices=(3, 8, 11), # Modify out_indices
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint=checkpoint_file,
+            prefix='backbone.')), # The pre-trained weights of backbone network in MMCls have prefix='backbone.'. The prefix in the keys will be removed so that these weights can be normally loaded.
+    neck=dict(
+        type='YOLOv5PAFPN',
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=channels, # Note: The 3 channels of MobileNetV3 output are [24, 48, 96], which do not match the original yolov5-s neck and need to be changed.
+        out_channels=channels),
+    bbox_head=dict(
+        type='YOLOv5Head',
+        head_module=dict(
+            type='YOLOv5HeadModule',
+            in_channels=channels, # input channels of head need to be changed accordingly
+            widen_factor=widen_factor))
+)
+```
+
+### Use backbone network in `timm` through MMClassification
+
+MMClassification also provides a wrapper for the Py**T**orch **Im**age **M**odels (`timm`) backbone network, users can directly use the backbone network in `timm` through MMClassification. Suppose you want to use `EfficientNet-B1` as the backbone network of `YOLOv5`, the example config is as the following:
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+# please run the command, mim install "mmcls>=1.0.0rc2", to install mmcls
+# and the command, pip install timm, to install timm
+# import mmcls.models to trigger register_module in mmcls
+custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
+
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [40, 112, 320]
+
+model = dict(
+    backbone=dict(
+        _delete_=True, # Delete the backbone field in _base_
+        type='mmcls.TIMMBackbone', # Using timm from mmcls
+        model_name='efficientnet_b1', # Using efficientnet_b1 in timm
+        features_only=True,
+        pretrained=True,
+        out_indices=(2, 3, 4)),
+    neck=dict(
+        type='YOLOv5PAFPN',
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=channels, # Note: The 3 channels of EfficientNet-B1 output are [40, 112, 320], which do not match the original yolov5-s neck and need to be changed.
+        out_channels=channels),
+    bbox_head=dict(
+        type='YOLOv5Head',
+        head_module=dict(
+            type='YOLOv5HeadModule',
+            in_channels=channels, # input channels of head need to be changed accordingly
+            widen_factor=widen_factor))
+)
+```
+
+### Use backbone network implemented in MMSelfSup
+
+Suppose you want to use `ResNet-50` which is self-supervised trained by `MoCo v3` in MMSelfSup as the backbone network of `YOLOv5`, the example config is as the following:
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+# please run the command, mim install "mmselfsup>=1.0.0rc3", to install mmselfsup
+# import mmselfsup.models to trigger register_module in mmselfsup
+custom_imports = dict(imports=['mmselfsup.models'], allow_failed_imports=False)
+checkpoint_file = 'https://download.openmmlab.com/mmselfsup/1.x/mocov3/mocov3_resnet50_8xb512-amp-coslr-800e_in1k/mocov3_resnet50_8xb512-amp-coslr-800e_in1k_20220927-e043f51a.pth'  # noqa
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [512, 1024, 2048]
+
+model = dict(
+    backbone=dict(
+        _delete_=True, # Delete the backbone field in _base_
+        type='mmselfsup.ResNet',
+        depth=50,
+        num_stages=4,
+        out_indices=(2, 3, 4), # Note: out_indices of ResNet in MMSelfSup are 1 larger than those in MMdet and MMCls
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
+    neck=dict(
+        type='YOLOv5PAFPN',
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
+        out_channels=channels),
+    bbox_head=dict(
+        type='YOLOv5Head',
+        head_module=dict(
+            type='YOLOv5HeadModule',
+            in_channels=channels, # input channels of head need to be changed accordingly
+            widen_factor=widen_factor))
+)
+```
+
+### Don't used pre-training weights
+
+When we replace the backbone network, the model initialization is trained by default loading the pre-training weight of the backbone network. Instead of using the pre-training weights of the backbone network, if you want to train the time model from scratch,
+You can set `init_cfg` in 'backbone' to 'None'. In this case, the backbone network will be initialized with the default initialization method, instead of using the trained pre-training weight.
+
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [512, 1024, 2048]
+
+model = dict(
+   backbone=dict(
+       _delete_=True, # Delete the backbone field in _base_
+       type='mmdet.ResNet', # Using ResNet from mmdet
+       depth=50,
+       num_stages=4,
+       out_indices=(1, 2, 3),
+       frozen_stages=1,
+       norm_cfg=dict(type='BN', requires_grad=True),
+       norm_eval=True,
+       style='pytorch',
+       init_cfg=None # If init_cfg is set to None, backbone will not be initialized with pre-trained weights
+   ),
+   neck=dict(
+       type='YOLOv5PAFPN',
+       widen_factor=widen_factor,
+       in_channels=channels, # Note: The 3 channels of ResNet-50 output are [512, 1024, 2048], which do not match the original yolov5-s neck and need to be changed.
+       out_channels=channels),
+   bbox_head=dict(
+       type='YOLOv5Head',
+       head_module=dict(
+           type='YOLOv5HeadModule',
+           in_channels=channels, # input channels of head need to be changed accordingly
+           widen_factor=widen_factor))
+)
+```
diff --git a/docs/en/recommended_topics/troubleshooting_steps.md b/docs/en/recommended_topics/troubleshooting_steps.md
new file mode 100644
index 00000000..60cc1143
--- /dev/null
+++ b/docs/en/recommended_topics/troubleshooting_steps.md
@@ -0,0 +1 @@
+# Troubleshooting steps for common errors
diff --git a/docs/en/user_guides/visualization.md b/docs/en/recommended_topics/visualization.md
similarity index 90%
rename from docs/en/user_guides/visualization.md
rename to docs/en/recommended_topics/visualization.md
index 7835a434..30caa9e1 100644
--- a/docs/en/user_guides/visualization.md
+++ b/docs/en/recommended_topics/visualization.md
@@ -291,3 +291,56 @@ python demo/boxam_vis_demo.py \
 <div align=center>
 <img src="https://user-images.githubusercontent.com/17425982/203777566-7c74e82f-b477-488e-958f-91e1d10833b9.jpg" width="800" alt="image"/>
 </div>
+
+## Perform inference on large images
+
+First install [`sahi`](https://github.com/obss/sahi) with:
+
+```shell
+pip install -U sahi>=0.11.4
+```
+
+Perform MMYOLO inference on large images (as satellite imagery) as:
+
+```shell
+wget -P checkpoint https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth
+
+python demo/large_image_demo.py \
+    demo/large_image.jpg \
+    configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py \
+    checkpoint/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth \
+```
+
+Arrange slicing parameters as:
+
+```shell
+python demo/large_image_demo.py \
+    demo/large_image.jpg \
+    configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py \
+    checkpoint/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth \
+    --patch-size 512
+    --patch-overlap-ratio 0.25
+```
+
+Export debug visuals while performing inference on large images as:
+
+```shell
+python demo/large_image_demo.py \
+    demo/large_image.jpg \
+    configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py \
+    checkpoint/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth \
+    --debug
+```
+
+[`sahi`](https://github.com/obss/sahi) citation:
+
+```
+@article{akyon2022sahi,
+  title={Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection},
+  author={Akyon, Fatih Cagatay and Altinuc, Sinan Onur and Temizel, Alptekin},
+  journal={2022 IEEE International Conference on Image Processing (ICIP)},
+  doi={10.1109/ICIP46576.2022.9897990},
+  pages={966-970},
+  year={2022}
+}
+```
diff --git a/docs/en/user_guides/config.md b/docs/en/tutorials/config.md
similarity index 99%
rename from docs/en/user_guides/config.md
rename to docs/en/tutorials/config.md
index 0e0aac44..01937f30 100644
--- a/docs/en/user_guides/config.md
+++ b/docs/en/tutorials/config.md
@@ -427,8 +427,6 @@ model = dict(
 Some intermediate variables are used in the configs files, like `train_pipeline` and `test_pipeline` in datasets. It's worth noting that when modifying intermediate variables in the children configs, users need to pass the intermediate variables into corresponding fields again.
 For example, we would like to change the `image_scale` during training and add `YOLOv5MixUp` data augmentation, `img_scale/train_pipeline/test_pipeline` are intermediate variables we would like to modify.
 
-**Notice**: `YOLOv5MixUp` requires adding the `pre_transform` and `mosaic_affine_pipeline` to its `train_pipeline` field. Please refer to [The description of YOLOv5 algorithm and its implementation](../algorithm_descriptions/yolov5_description.md) for detailed processes and diagrams.
-
 ```python
 _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
 
diff --git a/docs/en/tutorials/custom_installation.md b/docs/en/tutorials/custom_installation.md
new file mode 100644
index 00000000..4aafe6a3
--- /dev/null
+++ b/docs/en/tutorials/custom_installation.md
@@ -0,0 +1,109 @@
+# Customize Installation
+
+## CUDA versions
+
+When installing PyTorch, you need to specify the version of CUDA. If you are not clear on which to choose, follow our recommendations:
+
+- For Ampere-based NVIDIA GPUs, such as GeForce 30 series and NVIDIA A100, CUDA 11 is a must.
+- For older NVIDIA GPUs, CUDA 11 is backward compatible, but CUDA 10.2 offers better compatibility and is more lightweight.
+
+Please make sure the GPU driver satisfies the minimum version requirements. See [this table](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions__table-cuda-toolkit-driver-versions) for more information.
+
+```{note}
+Installing CUDA runtime libraries is enough if you follow our best practices, because no CUDA code will be compiled locally. However, if you hope to compile MMCV from source or develop other CUDA operators, you need to install the complete CUDA toolkit from NVIDIA's [website](https://developer.nvidia.com/cuda-downloads), and its version should match the CUDA version of PyTorch. i.e., the specified version of cudatoolkit in `conda install` command.
+```
+
+## Install MMEngine without MIM
+
+To install MMEngine with pip instead of MIM, please follow \[MMEngine installation guides\](https://mmengine.readthedocs.io/en/latest/get_started/installation.html).
+
+For example, you can install MMEngine by the following command.
+
+```shell
+pip install "mmengine>=0.3.1"
+```
+
+## Install MMCV without MIM
+
+MMCV contains C++ and CUDA extensions, thus depending on PyTorch in a complex way. MIM solves such dependencies automatically and makes the installation easier. However, it is not a must.
+
+To install MMCV with pip instead of MIM, please follow [MMCV installation guides](https://mmcv.readthedocs.io/en/2.x/get_started/installation.html). This requires manually specifying a find-url based on the PyTorch version and its CUDA version.
+
+For example, the following command installs MMCV built for PyTorch 1.12.x and CUDA 11.6.
+
+```shell
+pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.12.0/index.html
+```
+
+## Install on CPU-only platforms
+
+MMDetection can be built for the CPU-only environment. In CPU mode you can train (requires MMCV version >= `2.0.0rc1`), test, or infer a model.
+
+However, some functionalities are gone in this mode:
+
+- Deformable Convolution
+- Modulated Deformable Convolution
+- ROI pooling
+- Deformable ROI pooling
+- CARAFE
+- SyncBatchNorm
+- CrissCrossAttention
+- MaskedConv2d
+- Temporal Interlace Shift
+- nms_cuda
+- sigmoid_focal_loss_cuda
+- bbox_overlaps
+
+If you try to train/test/infer a model containing the above ops, an error will be raised.
+The following table lists affected algorithms.
+
+|                        Operator                         |                                          Model                                           |
+| :-----------------------------------------------------: | :--------------------------------------------------------------------------------------: |
+| Deformable Convolution/Modulated Deformable Convolution | DCN、Guided Anchoring、RepPoints、CentripetalNet、VFNet、CascadeRPN、NAS-FCOS、DetectoRS |
+|                      MaskedConv2d                       |                                     Guided Anchoring                                     |
+|                         CARAFE                          |                                          CARAFE                                          |
+|                      SyncBatchNorm                      |                                         ResNeSt                                          |
+
+## Install on Google Colab
+
+[Google Colab](https://research.google.com/) usually has PyTorch installed,
+thus we only need to install MMEngine, MMCV, MMDetection, and MMYOLO with the following commands.
+
+**Step 1.** Install [MMEngine](https://github.com/open-mmlab/mmengine) and [MMCV](https://github.com/open-mmlab/mmcv) using [MIM](https://github.com/open-mmlab/mim).
+
+```shell
+!pip3 install openmim
+!mim install "mmengine>=0.3.1"
+!mim install "mmcv>=2.0.0rc1,<2.1.0"
+!mim install "mmdet>=3.0.0rc5,<3.1.0"
+```
+
+**Step 2.** Install MMYOLO from the source.
+
+```shell
+!git clone https://github.com/open-mmlab/mmyolo.git
+%cd mmyolo
+!pip install -e .
+```
+
+**Step 3.** Verification.
+
+```python
+import mmyolo
+print(mmyolo.__version__)
+# Example output: 0.1.0, or an another version.
+```
+
+```{note}
+Within Jupyter, the exclamation mark `!` is used to call external executables and `%cd` is a [magic command](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-cd) to change the current working directory of Python.
+```
+
+## Develop using multiple MMYOLO versions
+
+The training and testing scripts have been modified in `PYTHONPATH` to ensure that the scripts use MMYOLO in the current directory.
+
+To have the default MMYOLO installed in your environment instead of what is currently in use, you can remove the code that appears in the relevant script:
+
+```shell
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
+```
diff --git a/docs/en/advanced_guides/data_flow.md b/docs/en/tutorials/data_flow.md
similarity index 99%
rename from docs/en/advanced_guides/data_flow.md
rename to docs/en/tutorials/data_flow.md
index 161e6179..ab0e2e64 100644
--- a/docs/en/advanced_guides/data_flow.md
+++ b/docs/en/tutorials/data_flow.md
@@ -1,4 +1,4 @@
-## Mixed image data augmentation update
+# Mixed image data augmentation update
 
 Mixed image data augmentation is similar to Mosaic and MixUp, in which the annotation information of multiple images needs to be obtained for fusion during the running process. In the OpenMMLab data augmentation pipeline, other indexes of the dataset are generally not available. In order to achieve the above function, in the YOLOX reproduced in MMDetection, the concept of [MultiImageMixDataset](https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/dataset_wrappers.py#L338) dataset wrapper is proposed.
 
diff --git a/docs/en/notes/faq.md b/docs/en/tutorials/faq.md
similarity index 81%
rename from docs/en/notes/faq.md
rename to docs/en/tutorials/faq.md
index 2e91bc81..b79f2720 100644
--- a/docs/en/notes/faq.md
+++ b/docs/en/tutorials/faq.md
@@ -2,9 +2,9 @@
 
 We list some common problems many users face and their corresponding solutions here. Feel free to enrich the list if you find any frequent issues and have ways to help others to solve them. If the contents here do not cover your issue, please create an [issue](https://github.com/open-mmlab/mmyolo/issues/new/choose) and make sure you fill in all the required information in the template.
 
-## Why do we need to launch MMYOLO? Why do we need to open a separate repository instead of putting it directly into MMDetection?
+## Why do we need to launch MMYOLO?
 
-Since the open source, we have been receiving similar questions from our community partners, and the answers can be summarized in the following three points.
+Why do we need to launch MMYOLO? Why do we need to open a separate repository instead of putting it directly into MMDetection? Since the open source, we have been receiving similar questions from our community partners, and the answers can be summarized in the following three points.
 
 **(1) Unified operation and inference platform**
 
diff --git a/docs/en/useful_tools/browse_coco_json.md b/docs/en/useful_tools/browse_coco_json.md
new file mode 100644
index 00000000..772b8a56
--- /dev/null
+++ b/docs/en/useful_tools/browse_coco_json.md
@@ -0,0 +1,62 @@
+# Visualize COCO labels
+
+`tools/analysis_tools/browse_coco_json.py` is a script that can visualization to display the COCO label in the picture.
+
+```shell
+python tools/analysis_tools/browse_coco_json.py [--data-root ${DATA_ROOT}] \
+                                                [--img-dir ${IMG_DIR}] \
+                                                [--ann-file ${ANN_FILE}] \
+                                                [--wait-time ${WAIT_TIME}] \
+                                                [--disp-all] [--category-names CATEGORY_NAMES [CATEGORY_NAMES ...]] \
+                                                [--shuffle]
+```
+
+If images and labels are in the same folder, you can specify `--data-root` to the folder, and then `--img-dir` and `--ann-file` to specify the relative path of the folder. The code will be automatically spliced.
+If the image and label files are not in the same folder, you do not need to specify `--data-root`, but directly specify `--img-dir` and `--ann-file` of the absolute path.
+
+E.g:
+
+1. Visualize all categories of `COCO` and display all types of annotations such as `bbox` and `mask`:
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --disp-all
+```
+
+If images and labels are not in the same folder, you can use a absolutely path:
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --img-dir '/dataset/image/coco/train2017' \
+                                                --ann-file '/label/instances_train2017.json' \
+                                                --disp-all
+```
+
+2. Visualize all categories of `COCO`, and display only the `bbox` type labels, and shuffle the image to show:
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --shuffle
+```
+
+3. Only visualize the `bicycle` and `person` categories of `COCO` and only the `bbox` type labels are displayed:
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --category-names 'bicycle' 'person'
+```
+
+4. Visualize all categories of `COCO`, and display all types of label such as `bbox`, `mask`, and shuffle the image to show:
+
+```shell
+python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
+                                                --img-dir 'train2017' \
+                                                --ann-file 'annotations/instances_train2017.json' \
+                                                --disp-all \
+                                                --shuffle
+```
diff --git a/docs/en/useful_tools/browse_dataset.md b/docs/en/useful_tools/browse_dataset.md
new file mode 100644
index 00000000..f066d225
--- /dev/null
+++ b/docs/en/useful_tools/browse_dataset.md
@@ -0,0 +1,42 @@
+# Visualize Datasets
+
+`tools/analysis_tools/browse_dataset.py` helps the user to browse a detection dataset (both images and bounding box annotations) visually, or save the image to a designated directory.
+
+```shell
+python tools/analysis_tools/browse_dataset.py ${CONFIG} \
+                                              [--out-dir ${OUT_DIR}] \
+                                              [--not-show] \
+                                              [--show-interval ${SHOW_INTERVAL}]
+```
+
+E,g：
+
+1. Use `config` file `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` to visualize the picture. The picture will pop up directly and be saved to the directory `work_dirs/browse_ dataset` at the same time:
+
+```shell
+python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
+                                              --out-dir 'work_dirs/browse_dataset'
+```
+
+2. Use `config` file `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` to visualize the picture. The picture will pop up and display directly. Each picture lasts for `10` seconds. At the same time, it will be saved to the directory `work_dirs/browse_ dataset`:
+
+```shell
+python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
+                                              --out-dir 'work_dirs/browse_dataset' \
+                                              --show-interval 10
+```
+
+3. Use `config` file `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` to visualize the picture. The picture will pop up and display directly. Each picture lasts for `10` seconds and the picture will not be saved:
+
+```shell
+python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
+                                              --show-interval 10
+```
+
+4. Use `config` file `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` to visualize the picture. The picture will not pop up directly, but only saved to the directory `work_dirs/browse_ dataset`:
+
+```shell
+python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
+                                              --out-dir 'work_dirs/browse_dataset' \
+                                              --not-show
+```
diff --git a/docs/en/useful_tools/dataset_analysis.md b/docs/en/useful_tools/dataset_analysis.md
new file mode 100644
index 00000000..c6149e94
--- /dev/null
+++ b/docs/en/useful_tools/dataset_analysis.md
@@ -0,0 +1,79 @@
+# Visualize dataset analysis
+
+`tools/analysis_tools/dataset_analysis.py` help users get the renderings of the four functions, and save the pictures to the `dataset_analysis` folder under the current running directory.
+
+Description of the script's functions:
+
+The data required by each sub function is obtained through the data preparation of `main()`.
+
+Function 1: Generated by the sub function `show_bbox_num` to display the distribution of categories and bbox instances.
+
+<img src="https://user-images.githubusercontent.com/90811472/200314770-4fb21626-72f2-4a4c-be5d-bf860ad830ec.jpg"/>
+
+Function 2: Generated by the sub function `show_bbox_wh` to display the width and height distribution of categories and bbox instances.
+
+<img src="https://user-images.githubusercontent.com/90811472/200315007-96e8e795-992a-4c72-90fa-f6bc00b3f2c7.jpg"/>
+
+Function 3: Generated by the sub function `show_bbox_wh_ratio` to display the width to height ratio distribution of categories and bbox instances.
+
+<img src="https://user-images.githubusercontent.com/90811472/200315044-4bdedcf6-087a-418e-8fe8-c2d3240ceba8.jpg"/>
+
+Function 3: Generated by the sub function `show_bbox_area` to display the distribution map of category and bbox instance area based on area rules.
+
+<img src="https://user-images.githubusercontent.com/90811472/200315075-71680fe2-db6f-4981-963e-a035c1281fc1.jpg"/>
+
+Print List: Generated by the sub function `show_class_list` and `show_data_list`.
+
+<img src="https://user-images.githubusercontent.com/90811472/200315152-9d6df91c-f2d2-4bba-9f95-b790fac37b62.jpg"/>
+
+```shell
+python tools/analysis_tools/dataset_analysis.py ${CONFIG} \
+                                                [--type ${TYPE}] \
+                                                [--class-name ${CLASS_NAME}] \
+                                                [--area-rule ${AREA_RULE}] \
+                                                [--func ${FUNC}] \
+                                                [--out-dir ${OUT_DIR}]
+```
+
+E,g：
+
+1.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, By default,the data loading type is `train_dataset`, the area rule is `[0,32,96,1e5]`, generate a result graph containing all functions and save the graph to the current running directory `./dataset_analysis` folder:
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
+```
+
+2.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, change the data loading type from the default `train_dataset` to `val_dataset` through the `--val-dataset` setting:
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                                --val-dataset
+```
+
+3.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, change the display of all generated classes to specific classes. Take the display of `person` classes as an example:
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                                --class-name person
+```
+
+4.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, redefine the area rule through `--area-rule` . Take `30 70 125` as an example, the area rule becomes `[0,30,70,125,1e5]`：
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                                --area-rule 30 70 125
+```
+
+5.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, change the display of four function renderings to only display `Function 1` as an example:
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                                --func show_bbox_num
+```
+
+6.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, modify the picture saving address to `work_dirs/dataset_analysis`:
+
+```shell
+python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
+                                                --out-dir work_dirs/dataset_analysis
+```
diff --git a/docs/en/useful_tools/dataset_converters.md b/docs/en/useful_tools/dataset_converters.md
new file mode 100644
index 00000000..72ad968c
--- /dev/null
+++ b/docs/en/useful_tools/dataset_converters.md
@@ -0,0 +1,55 @@
+# Dataset Conversion
+
+The folder `tools/data_converters` currently contains `ballon2coco.py`, `yolo2coco.py`, and `labelme2coco.py` - three dataset conversion tools.
+
+- `ballon2coco.py` converts the `balloon` dataset (this small dataset is for starters only) to COCO format.
+
+```shell
+python tools/dataset_converters/balloon2coco.py
+```
+
+- `yolo2coco.py` converts a dataset from `yolo-style` **.txt** format to COCO format, please use it as follows:
+
+```shell
+python tools/dataset_converters/yolo2coco.py /path/to/the/root/dir/of/your_dataset
+```
+
+Instructions:
+
+1. `image_dir` is the root directory of the yolo-style dataset you need to pass to the script, which should contain `images`, `labels`, and `classes.txt`. `classes.txt` is the class declaration corresponding to the current dataset. One class a line. The structure of the root directory should be formatted as this example shows:
+
+```bash
+.
+└── $ROOT_PATH
+    ├── classes.txt
+    ├── labels
+    │    ├── a.txt
+    │    ├── b.txt
+    │    └── ...
+    ├── images
+    │    ├── a.jpg
+    │    ├── b.png
+    │    └── ...
+    └── ...
+```
+
+2. The script will automatically check if `train.txt`, `val.txt`, and `test.txt` have already existed under `image_dir`. If these files are located, the script will organize the dataset accordingly. Otherwise, the script will convert the dataset into one file. The image paths in these files must be **ABSOLUTE** paths.
+3. By default, the script will create a folder called `annotations` in the `image_dir` directory which stores the converted JSON file. If `train.txt`, `val.txt`, and `test.txt` are not found, the output file is `result.json`. Otherwise, the corresponding JSON file will be generated, named as `train.json`, `val.json`, and `test.json`. The `annotations` folder may look similar to this:
+
+```bash
+.
+└── $ROOT_PATH
+    ├── annotations
+    │    ├── result.json
+    │    └── ...
+    ├── classes.txt
+    ├── labels
+    │    ├── a.txt
+    │    ├── b.txt
+    │    └── ...
+    ├── images
+    │    ├── a.jpg
+    │    ├── b.png
+    │    └── ...
+    └── ...
+```
diff --git a/docs/en/useful_tools/download_dataset.md b/docs/en/useful_tools/download_dataset.md
new file mode 100644
index 00000000..8a3e57ec
--- /dev/null
+++ b/docs/en/useful_tools/download_dataset.md
@@ -0,0 +1,11 @@
+# Download Dataset
+
+`tools/misc/download_dataset.py` supports downloading datasets such as `COCO`, `VOC`, `LVIS` and `Balloon`.
+
+```shell
+python tools/misc/download_dataset.py --dataset-name coco2017
+python tools/misc/download_dataset.py --dataset-name voc2007
+python tools/misc/download_dataset.py --dataset-name voc2012
+python tools/misc/download_dataset.py --dataset-name lvis
+python tools/misc/download_dataset.py --dataset-name balloon [--save-dir ${SAVE_DIR}] [--unzip]
+```
diff --git a/docs/en/useful_tools/extract_subcoco.md b/docs/en/useful_tools/extract_subcoco.md
new file mode 100644
index 00000000..b2c7e06c
--- /dev/null
+++ b/docs/en/useful_tools/extract_subcoco.md
@@ -0,0 +1,60 @@
+# Extracts a subset of COCO
+
+The training dataset of the COCO2017 dataset includes 118K images, and the validation set includes 5K images, which is a relatively large dataset. Loading JSON in debugging or quick verification scenarios will consume more resources and bring slower startup speed.
+
+The `extract_subcoco.py` script provides the ability to extract a specified number/classes/area-size of images. The user can use the `--num-img`, `--classes`, `--area-size` parameter to get a COCO subset of the specified condition of images.
+
+For example, extract images use scripts as follows:
+
+```shell
+python tools/misc/extract_subcoco.py \
+    ${ROOT} \
+    ${OUT_DIR} \
+    --num-img 20 \
+    --classes cat dog person \
+    --area-size small
+```
+
+It gone be extract 20 images, and only includes annotations which belongs to cat(or dog/person) and bbox area size is small, after filter by class and area size, the empty annotation images won't be chosen, guarantee the images be extracted definitely has annotation info.
+
+Currently, only support COCO2017. In the future will support user-defined datasets of standard coco JSON format.
+
+The root path folder format is as follows:
+
+```text
+├── root
+│   ├── annotations
+│   ├── train2017
+│   ├── val2017
+│   ├── test2017
+```
+
+1. Extract 10 training images and 10 validation images using only 5K validation sets.
+
+```shell
+python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 10
+```
+
+2. Extract 20 training images using the training set and 20 validation images using the validation set.
+
+```shell
+python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 20 --use-training-set
+```
+
+3. Set the global seed to 1. The default is no setting.
+
+```shell
+python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 20 --use-training-set --seed 1
+```
+
+4. Extract images by specify classes
+
+```shell
+python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --classes cat dog person
+```
+
+5. Extract images by specify anchor size
+
+```shell
+python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --area-size small
+```
diff --git a/docs/en/useful_tools/log_analysis.md b/docs/en/useful_tools/log_analysis.md
new file mode 100644
index 00000000..6b3e6040
--- /dev/null
+++ b/docs/en/useful_tools/log_analysis.md
@@ -0,0 +1,82 @@
+# Log Analysis
+
+## Curve plotting
+
+`tools/analysis_tools/analyze_logs.py` plots loss/mAP curves given a training log file. Run `pip install seaborn` first to install the dependency.
+
+```shell
+mim run mmdet analyze_logs plot_curve \
+    ${LOG} \                                     # path of train log in json format
+    [--keys ${KEYS}] \                           # the metric that you want to plot, default to 'bbox_mAP'
+    [--start-epoch ${START_EPOCH}]               # the epoch that you want to start, default to 1
+    [--eval-interval ${EVALUATION_INTERVAL}] \   # the evaluation interval when training, default to 1
+    [--title ${TITLE}] \                         # title of figure
+    [--legend ${LEGEND}] \                       # legend of each plot, default to None
+    [--backend ${BACKEND}] \                     # backend of plt, default to None
+    [--style ${STYLE}] \                         # style of plt, default to 'dark'
+    [--out ${OUT_FILE}]                          # the path of output file
+# [] stands for optional parameters, when actually entering the command line, you do not need to enter []
+```
+
+Examples:
+
+- Plot the classification loss of some run.
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      --keys loss_cls \
+      --legend loss_cls
+  ```
+
+  <img src="https://user-images.githubusercontent.com/27466624/204747359-754555df-1f97-4d5c-87ca-9ad3a0badcce.png" width="600"/>
+
+- Plot the classification and regression loss of some run, and save the figure to a pdf.
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      --keys loss_cls loss_bbox \
+      --legend loss_cls loss_bbox \
+      --out losses_yolov5_s.pdf
+  ```
+
+  <img src="https://user-images.githubusercontent.com/27466624/204748560-2d17ce4b-fb5f-4732-a962-329109e73aad.png" width="600"/>
+
+- Compare the bbox mAP of two runs in the same figure.
+
+  ```shell
+  mim run mmdet analyze_logs plot_curve \
+      yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json \
+      yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json \
+      --keys bbox_mAP \
+      --legend yolov5_s yolov5_n \
+      --eval-interval 10 # Note that the evaluation interval must be the same as during training. Otherwise, it will raise an error.
+  ```
+
+<img src="https://user-images.githubusercontent.com/27466624/204748704-21db9f9e-386e-449c-91c7-2ce3f8b51f24.png" width="600"/>
+
+## Compute the average training speed
+
+```shell
+mim run mmdet analyze_logs cal_train_time \
+    ${LOG} \                                # path of train log in json format
+    [--include-outliers]                    # include the first value of every epoch when computing the average time
+```
+
+Examples:
+
+```shell
+mim run mmdet analyze_logs cal_train_time \
+    yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json
+```
+
+The output is expected to be like the following.
+
+```text
+-----Analyze train time of yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json-----
+slowest epoch 278, average time is 0.1705 s/iter
+fastest epoch 300, average time is 0.1510 s/iter
+time std over epochs is 0.0026
+average iter time: 0.1556 s/iter
+```
diff --git a/docs/en/useful_tools/model_converters.md b/docs/en/useful_tools/model_converters.md
new file mode 100644
index 00000000..09fb52df
--- /dev/null
+++ b/docs/en/useful_tools/model_converters.md
@@ -0,0 +1,54 @@
+# Convert Model
+
+The six scripts under the `tools/model_converters` directory can help users convert the keys in the official pre-trained model of YOLO to the format of MMYOLO, and use MMYOLO to fine-tune the model.
+
+## YOLOv5
+
+Take conversion `yolov5s.pt` as an example:
+
+1. Clone the official YOLOv5 code to the local (currently the maximum supported version is `v6.1`):
+
+```shell
+git clone -b v6.1 https://github.com/ultralytics/yolov5.git
+cd yolov5
+```
+
+2. Download official weight file:
+
+```shell
+wget https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt
+```
+
+3. Copy file `tools/model_converters/yolov5_to_mmyolo.py` to the path of YOLOv5 official code clone:
+
+```shell
+cp ${MMDET_YOLO_PATH}/tools/model_converters/yolov5_to_mmyolo.py yolov5_to_mmyolo.py
+```
+
+4. Conversion
+
+```shell
+python yolov5_to_mmyolo.py --src ${WEIGHT_FILE_PATH} --dst mmyolov5.pt
+```
+
+The converted `mmyolov5.pt` can be used by MMYOLO. The official weight conversion of YOLOv6 is also used in the same way.
+
+## YOLOX
+
+The conversion of YOLOX model **does not need** to download the official YOLOX code, just download the weight.
+
+Take conversion `yolox_s.pth` as an example:
+
+1. Download official weight file:
+
+```shell
+wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth
+```
+
+2. Conversion
+
+```shell
+python tools/model_converters/yolox_to_mmyolo.py --src yolox_s.pth --dst mmyolox.pt
+```
+
+The converted `mmyolox.pt` can be used by MMYOLO.
diff --git a/docs/en/useful_tools/optimize_anchors.md b/docs/en/useful_tools/optimize_anchors.md
new file mode 100644
index 00000000..460bc6e2
--- /dev/null
+++ b/docs/en/useful_tools/optimize_anchors.md
@@ -0,0 +1,38 @@
+# Optimize anchors size
+
+Script `tools/analysis_tools/optimize_anchors.py` supports three methods to optimize YOLO anchors including `k-means`
+anchor cluster, `Differential Evolution` and `v5-k-means`.
+
+## k-means
+
+In k-means method, the distance criteria is based IoU, python shell as follow:
+
+```shell
+python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
+                                                --algorithm k-means \
+                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
+                                                --out-dir ${OUT_DIR}
+```
+
+## Differential Evolution
+
+In differential_evolution method, based differential evolution algorithm, use `avg_iou_cost` as minimum target function, python shell as follow:
+
+```shell
+python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
+                                                --algorithm DE \
+                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
+                                                --out-dir ${OUT_DIR}
+```
+
+## v5-k-means
+
+In v5-k-means method, clustering standard as same with YOLOv5 which use shape-match, python shell as follow:
+
+```shell
+python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
+                                                --algorithm v5-k-means \
+                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
+                                                --prior_match_thr ${PRIOR_MATCH_THR} \
+                                                --out-dir ${OUT_DIR}
+```
diff --git a/docs/en/useful_tools/print_config.md b/docs/en/useful_tools/print_config.md
new file mode 100644
index 00000000..2a6ee79f
--- /dev/null
+++ b/docs/en/useful_tools/print_config.md
@@ -0,0 +1,20 @@
+# Print the whole config
+
+`print_config.py` in MMDetection prints the whole config verbatim, expanding all its imports. The command is as following.
+
+```shell
+mim run mmdet print_config \
+    ${CONFIG} \                              # path of the config file
+    [--save-path] \                          # save path of whole config, suffixed with .py, .json or .yml
+    [--cfg-options ${OPTIONS [OPTIONS...]}]  # override some settings in the used config
+```
+
+Examples:
+
+```shell
+mim run mmdet print_config \
+    configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
+    --save-path ./work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
+```
+
+Running the above command will save the `yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py` config file with the inheritance relationship expanded to \`\`yolov5_s-v61_syncbn_fast_1xb4-300e_balloon_whole.py`in the`./work_dirs\` folder.
diff --git a/docs/en/useful_tools/vis_scheduler.md b/docs/en/useful_tools/vis_scheduler.md
new file mode 100644
index 00000000..f1526342
--- /dev/null
+++ b/docs/en/useful_tools/vis_scheduler.md
@@ -0,0 +1,44 @@
+# Hyper-parameter Scheduler Visualization
+
+`tools/analysis_tools/vis_scheduler` aims to help the user to check the hyper-parameter scheduler of the optimizer(without training), which support the "learning rate", "momentum", and "weight_decay".
+
+```bash
+python tools/analysis_tools/vis_scheduler.py \
+    ${CONFIG_FILE} \
+    [-p, --parameter ${PARAMETER_NAME}] \
+    [-d, --dataset-size ${DATASET_SIZE}] \
+    [-n, --ngpus ${NUM_GPUs}] \
+    [-o, --out-dir ${OUT_DIR}] \
+    [--title ${TITLE}] \
+    [--style ${STYLE}] \
+    [--window-size ${WINDOW_SIZE}] \
+    [--cfg-options]
+```
+
+**Description of all arguments**：
+
+- `config`: The path of a model config file.
+- **`-p, --parameter`**: The param to visualize its change curve, choose from "lr", "momentum" or "wd". Default to use "lr".
+- **`-d, --dataset-size`**: The size of the datasets. If set，`DATASETS.build` will be skipped and `${DATASET_SIZE}` will be used as the size. Default to use the function `DATASETS.build`.
+- **`-n, --ngpus`**: The number of GPUs used in training, default to be 1.
+- **`-o, --out-dir`**: The output path of the curve plot, default not to output.
+- `--title`: Title of figure. If not set, default to be config file name.
+- `--style`: Style of plt. If not set, default to be `whitegrid`.
+- `--window-size`: The shape of the display window. If not specified, it will be set to `12*7`. If used, it must be in the format `'W*H'`.
+- `--cfg-options`: Modifications to the configuration file, refer to [Learn about Configs](../tutorials/config.md).
+
+```{note}
+Loading annotations maybe consume much time, you can directly specify the size of the dataset with `-d, dataset-size` to save time.
+```
+
+You can use the following command to plot the step learning rate schedule used in the config `configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py`:
+
+```shell
+python tools/analysis_tools/vis_scheduler.py \
+    configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py \
+    --dataset-size 118287 \
+    --ngpus 8 \
+    --out-dir ./output
+```
+
+<div align=center><img src="https://user-images.githubusercontent.com/27466624/213091635-d322d2b3-6e28-4755-b871-ef0a89a67a6b.jpg" style=" width: auto; height: 40%; "></div>
diff --git a/docs/en/user_guides/index.rst b/docs/en/user_guides/index.rst
deleted file mode 100644
index 50649c96..00000000
--- a/docs/en/user_guides/index.rst
+++ /dev/null
@@ -1,28 +0,0 @@
-Train & Test
-**************
-
-MMYOLO provides a lot of detection models in `Model Zoo <https://mmyolo.readthedocs.io/en/latest/model_zoo.html>`_. This note will show how to perform common tasks on these existing models and standard datasets:
-
-
-.. toctree::
-   :maxdepth: 1
-
-   config.md
-
-From Getting Started to Deployment tutorial
-***********************************************
-
-.. toctree::
-   :maxdepth: 1
-
-   custom_dataset.md
-   yolov5_tutorial.md
-
-Useful Tools
-************
-
-.. toctree::
-   :maxdepth: 1
-
-   visualization.md
-   useful_tools.md
diff --git a/docs/en/user_guides/useful_tools.md b/docs/en/user_guides/useful_tools.md
deleted file mode 100644
index e38217ff..00000000
--- a/docs/en/user_guides/useful_tools.md
+++ /dev/null
@@ -1,520 +0,0 @@
-# Useful tools
-
-We provide lots of useful tools under the `tools/` directory. In addition, you can also quickly run other open source libraries of OpenMMLab through MIM.
-
-Take MMDetection as an example. If you want to use [print_config.py](https://github.com/open-mmlab/mmdetection/blob/3.x/tools/misc/print_config.py), you can directly use the following commands without copying the source code to the MMYOLO library.
-
-```shell
-mim run mmdet print_config ${CONFIG}
-```
-
-## Visualization
-
-### Visualize COCO labels
-
-`tools/analysis_tools/browse_coco_json.py` is a script that can visualization to display the COCO label in the picture.
-
-```shell
-python tools/analysis_tools/browse_coco_json.py [--data-root ${DATA_ROOT}] \
-                                                [--img-dir ${IMG_DIR}] \
-                                                [--ann-file ${ANN_FILE}] \
-                                                [--wait-time ${WAIT_TIME}] \
-                                                [--disp-all] [--category-names CATEGORY_NAMES [CATEGORY_NAMES ...]] \
-                                                [--shuffle]
-```
-
-If images and labels are in the same folder, you can specify `--data-root` to the folder, and then `--img-dir` and `--ann-file` to specify the relative path of the folder. The code will be automatically spliced.
-If the image and label files are not in the same folder, you do not need to specify `--data-root`, but directly specify `--img-dir` and `--ann-file` of the absolute path.
-
-E.g:
-
-1. Visualize all categories of `COCO` and display all types of annotations such as `bbox` and `mask`:
-
-```shell
-python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
-                                                --img-dir 'train2017' \
-                                                --ann-file 'annotations/instances_train2017.json' \
-                                                --disp-all
-```
-
-If images and labels are not in the same folder, you can use a absolutely path:
-
-```shell
-python tools/analysis_tools/browse_coco_json.py --img-dir '/dataset/image/coco/train2017' \
-                                                --ann-file '/label/instances_train2017.json' \
-                                                --disp-all
-```
-
-2. Visualize all categories of `COCO`, and display only the `bbox` type labels, and shuffle the image to show:
-
-```shell
-python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
-                                                --img-dir 'train2017' \
-                                                --ann-file 'annotations/instances_train2017.json' \
-                                                --shuffle
-```
-
-3. Only visualize the `bicycle` and `person` categories of `COCO` and only the `bbox` type labels are displayed:
-
-```shell
-python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
-                                                --img-dir 'train2017' \
-                                                --ann-file 'annotations/instances_train2017.json' \
-                                                --category-names 'bicycle' 'person'
-```
-
-4. Visualize all categories of `COCO`, and display all types of label such as `bbox`, `mask`, and shuffle the image to show:
-
-```shell
-python tools/analysis_tools/browse_coco_json.py --data-root './data/coco' \
-                                                --img-dir 'train2017' \
-                                                --ann-file 'annotations/instances_train2017.json' \
-                                                --disp-all \
-                                                --shuffle
-```
-
-### Visualize Datasets
-
-`tools/analysis_tools/browse_dataset.py` helps the user to browse a detection dataset (both images and bounding box annotations) visually, or save the image to a designated directory.
-
-```shell
-python tools/analysis_tools/browse_dataset.py ${CONFIG} \
-                                              [--out-dir ${OUT_DIR}] \
-                                              [--not-show] \
-                                              [--show-interval ${SHOW_INTERVAL}]
-```
-
-E,g：
-
-1. Use `config` file `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` to visualize the picture. The picture will pop up directly and be saved to the directory `work_dirs/browse_ dataset` at the same time:
-
-```shell
-python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
-                                              --out-dir 'work_dirs/browse_dataset'
-```
-
-2. Use `config` file `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` to visualize the picture. The picture will pop up and display directly. Each picture lasts for `10` seconds. At the same time, it will be saved to the directory `work_dirs/browse_ dataset`:
-
-```shell
-python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
-                                              --out-dir 'work_dirs/browse_dataset' \
-                                              --show-interval 10
-```
-
-3. Use `config` file `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` to visualize the picture. The picture will pop up and display directly. Each picture lasts for `10` seconds and the picture will not be saved:
-
-```shell
-python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
-                                              --show-interval 10
-```
-
-4. Use `config` file `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` to visualize the picture. The picture will not pop up directly, but only saved to the directory `work_dirs/browse_ dataset`:
-
-```shell
-python tools/analysis_tools/browse_dataset.py 'configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py' \
-                                              --out-dir 'work_dirs/browse_dataset' \
-                                              --not-show
-```
-
-### Visualize dataset analysis
-
-`tools/analysis_tools/dataset_analysis.py` help users get the renderings of the four functions, and save the pictures to the `dataset_analysis` folder under the current running directory.
-
-Description of the script's functions:
-
-The data required by each sub function is obtained through the data preparation of `main()`.
-
-Function 1: Generated by the sub function `show_bbox_num` to display the distribution of categories and bbox instances.
-
-<img src="https://user-images.githubusercontent.com/90811472/200314770-4fb21626-72f2-4a4c-be5d-bf860ad830ec.jpg"/>
-
-Function 2: Generated by the sub function `show_bbox_wh` to display the width and height distribution of categories and bbox instances.
-
-<img src="https://user-images.githubusercontent.com/90811472/200315007-96e8e795-992a-4c72-90fa-f6bc00b3f2c7.jpg"/>
-
-Function 3: Generated by the sub function `show_bbox_wh_ratio` to display the width to height ratio distribution of categories and bbox instances.
-
-<img src="https://user-images.githubusercontent.com/90811472/200315044-4bdedcf6-087a-418e-8fe8-c2d3240ceba8.jpg"/>
-
-Function 3: Generated by the sub function `show_bbox_area` to display the distribution map of category and bbox instance area based on area rules.
-
-<img src="https://user-images.githubusercontent.com/90811472/200315075-71680fe2-db6f-4981-963e-a035c1281fc1.jpg"/>
-
-Print List: Generated by the sub function `show_class_list` and `show_data_list`.
-
-<img src="https://user-images.githubusercontent.com/90811472/200315152-9d6df91c-f2d2-4bba-9f95-b790fac37b62.jpg"/>
-
-```shell
-python tools/analysis_tools/dataset_analysis.py ${CONFIG} \
-                                                [--type ${TYPE}] \
-                                                [--class-name ${CLASS_NAME}] \
-                                                [--area-rule ${AREA_RULE}] \
-                                                [--func ${FUNC}] \
-                                                [--out-dir ${OUT_DIR}]
-```
-
-E,g：
-
-1.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, By default,the data loading type is `train_dataset`, the area rule is `[0,32,96,1e5]`, generate a result graph containing all functions and save the graph to the current running directory `./dataset_analysis` folder:
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
-```
-
-2.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, change the data loading type from the default `train_dataset` to `val_dataset` through the `--val-dataset` setting:
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
-                                                --val-dataset
-```
-
-3.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, change the display of all generated classes to specific classes. Take the display of `person` classes as an example:
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
-                                                --class-name person
-```
-
-4.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, redefine the area rule through `--area-rule` . Take `30 70 125` as an example, the area rule becomes `[0,30,70,125,1e5]`：
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
-                                                --area-rule 30 70 125
-```
-
-5.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, change the display of four function renderings to only display `Function 1` as an example:
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
-                                                --func show_bbox_num
-```
-
-6.Use `config` file `configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py` analyze the dataset, modify the picture saving address to `work_dirs/dataset_analysis`:
-
-```shell
-python tools/analysis_tools/dataset_analysis.py configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py \
-                                                --out-dir work_dirs/dataset_analysis
-```
-
-### Hyper-parameter Scheduler Visualization
-
-`tools/analysis_tools/vis_scheduler` aims to help the user to check the hyper-parameter scheduler of the optimizer(without training), which support the "learning rate", "momentum", and "weight_decay".
-
-```bash
-python tools/analysis_tools/vis_scheduler.py \
-    ${CONFIG_FILE} \
-    [-p, --parameter ${PARAMETER_NAME}] \
-    [-d, --dataset-size ${DATASET_SIZE}] \
-    [-n, --ngpus ${NUM_GPUs}] \
-    [-o, --out-dir ${OUT_DIR}] \
-    [--title ${TITLE}] \
-    [--style ${STYLE}] \
-    [--window-size ${WINDOW_SIZE}] \
-    [--cfg-options]
-```
-
-**Description of all arguments**：
-
-- `config`: The path of a model config file.
-- **`-p, --parameter`**: The param to visualize its change curve, choose from "lr", "momentum" or "wd". Default to use "lr".
-- **`-d, --dataset-size`**: The size of the datasets. If set，`DATASETS.build` will be skipped and `${DATASET_SIZE}` will be used as the size. Default to use the function `DATASETS.build`.
-- **`-n, --ngpus`**: The number of GPUs used in training, default to be 1.
-- **`-o, --out-dir`**: The output path of the curve plot, default not to output.
-- `--title`: Title of figure. If not set, default to be config file name.
-- `--style`: Style of plt. If not set, default to be `whitegrid`.
-- `--window-size`: The shape of the display window. If not specified, it will be set to `12*7`. If used, it must be in the format `'W*H'`.
-- `--cfg-options`: Modifications to the configuration file, refer to [Learn about Configs](../user_guides/config.md).
-
-```{note}
-Loading annotations maybe consume much time, you can directly specify the size of the dataset with `-d, dataset-size` to save time.
-```
-
-You can use the following command to plot the step learning rate schedule used in the config `configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py`:
-
-```shell
-python tools/analysis_tools/vis_scheduler.py \
-    configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py \
-    --dataset-size 118287 \
-    --ngpus 8 \
-    --out-dir ./output
-```
-
-<div align=center><img src="https://user-images.githubusercontent.com/27466624/213091635-d322d2b3-6e28-4755-b871-ef0a89a67a6b.jpg" style=" width: auto; height: 40%; "></div>
-
-## Dataset Conversion
-
-The folder `tools/data_converters` currently contains `ballon2coco.py`, `yolo2coco.py`, and `labelme2coco.py` - three dataset conversion tools.
-
-- `ballon2coco.py` converts the `balloon` dataset (this small dataset is for starters only) to COCO format.
-
-For a detailed description of this script, please see the `Dataset Preparation` section in [From getting started to deployment with YOLOv5](./yolov5_tutorial.md).
-
-```shell
-python tools/dataset_converters/balloon2coco.py
-```
-
-- `yolo2coco.py` converts a dataset from `yolo-style` **.txt** format to COCO format, please use it as follows:
-
-```shell
-python tools/dataset_converters/yolo2coco.py /path/to/the/root/dir/of/your_dataset
-```
-
-Instructions:
-
-1. `image_dir` is the root directory of the yolo-style dataset you need to pass to the script, which should contain `images`, `labels`, and `classes.txt`. `classes.txt` is the class declaration corresponding to the current dataset. One class a line. The structure of the root directory should be formatted as this example shows:
-
-```bash
-.
-└── $ROOT_PATH
-    ├── classes.txt
-    ├── labels
-    │    ├── a.txt
-    │    ├── b.txt
-    │    └── ...
-    ├── images
-    │    ├── a.jpg
-    │    ├── b.png
-    │    └── ...
-    └── ...
-```
-
-2. The script will automatically check if `train.txt`, `val.txt`, and `test.txt` have already existed under `image_dir`. If these files are located, the script will organize the dataset accordingly. Otherwise, the script will convert the dataset into one file. The image paths in these files must be **ABSOLUTE** paths.
-3. By default, the script will create a folder called `annotations` in the `image_dir` directory which stores the converted JSON file. If `train.txt`, `val.txt`, and `test.txt` are not found, the output file is `result.json`. Otherwise, the corresponding JSON file will be generated, named as `train.json`, `val.json`, and `test.json`. The `annotations` folder may look similar to this:
-
-```bash
-.
-└── $ROOT_PATH
-    ├── annotations
-    │    ├── result.json
-    │    └── ...
-    ├── classes.txt
-    ├── labels
-    │    ├── a.txt
-    │    ├── b.txt
-    │    └── ...
-    ├── images
-    │    ├── a.jpg
-    │    ├── b.png
-    │    └── ...
-    └── ...
-```
-
-## Download Dataset
-
-`tools/misc/download_dataset.py` supports downloading datasets such as `COCO`, `VOC`, `LVIS` and `Balloon`.
-
-```shell
-python tools/misc/download_dataset.py --dataset-name coco2017
-python tools/misc/download_dataset.py --dataset-name voc2007
-python tools/misc/download_dataset.py --dataset-name voc2012
-python tools/misc/download_dataset.py --dataset-name lvis
-python tools/misc/download_dataset.py --dataset-name balloon [--save-dir ${SAVE_DIR}] [--unzip]
-```
-
-## Convert Model
-
-The six scripts under the `tools/model_converters` directory can help users convert the keys in the official pre-trained model of YOLO to the format of MMYOLO, and use MMYOLO to fine-tune the model.
-
-### YOLOv5
-
-Take conversion `yolov5s.pt` as an example:
-
-1. Clone the official YOLOv5 code to the local (currently the maximum supported version is `v6.1`):
-
-```shell
-git clone -b v6.1 https://github.com/ultralytics/yolov5.git
-cd yolov5
-```
-
-2. Download official weight file:
-
-```shell
-wget https://github.com/ultralytics/yolov5/releases/download/v6.1/yolov5s.pt
-```
-
-3. Copy file `tools/model_converters/yolov5_to_mmyolo.py` to the path of YOLOv5 official code clone:
-
-```shell
-cp ${MMDET_YOLO_PATH}/tools/model_converters/yolov5_to_mmyolo.py yolov5_to_mmyolo.py
-```
-
-4. Conversion
-
-```shell
-python yolov5_to_mmyolo.py --src ${WEIGHT_FILE_PATH} --dst mmyolov5.pt
-```
-
-The converted `mmyolov5.pt` can be used by MMYOLO. The official weight conversion of YOLOv6 is also used in the same way.
-
-### YOLOX
-
-The conversion of YOLOX model **does not need** to download the official YOLOX code, just download the weight.
-
-Take conversion `yolox_s.pth` as an example:
-
-1. Download official weight file:
-
-```shell
-wget https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_s.pth
-```
-
-2. Conversion
-
-```shell
-python tools/model_converters/yolox_to_mmyolo.py --src yolox_s.pth --dst mmyolox.pt
-```
-
-The converted `mmyolox.pt` can be used by MMYOLO.
-
-## Optimize anchors size
-
-Script `tools/analysis_tools/optimize_anchors.py` supports three methods to optimize YOLO anchors including `k-means`
-anchor cluster, `Differential Evolution` and `v5-k-means`.
-
-### k-means
-
-In k-means method, the distance criteria is based IoU, python shell as follow:
-
-```shell
-python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
-                                                --algorithm k-means \
-                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
-                                                --out-dir ${OUT_DIR}
-```
-
-### Differential Evolution
-
-In differential_evolution method, based differential evolution algorithm, use `avg_iou_cost` as minimum target function, python shell as follow:
-
-```shell
-python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
-                                                --algorithm DE \
-                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
-                                                --out-dir ${OUT_DIR}
-```
-
-### v5-k-means
-
-In v5-k-means method, clustering standard as same with YOLOv5 which use shape-match, python shell as follow:
-
-```shell
-python tools/analysis_tools/optimize_anchors.py ${CONFIG} \
-                                                --algorithm v5-k-means \
-                                                --input-shape ${INPUT_SHAPE [WIDTH HEIGHT]} \
-                                                --prior_match_thr ${PRIOR_MATCH_THR} \
-                                                --out-dir ${OUT_DIR}
-```
-
-## Perform inference on large images
-
-First install [`sahi`](https://github.com/obss/sahi) with:
-
-```shell
-pip install -U sahi>=0.11.4
-```
-
-Perform MMYOLO inference on large images (as satellite imagery) as:
-
-```shell
-wget -P checkpoint https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth
-
-python demo/large_image_demo.py \
-    demo/large_image.jpg \
-    configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py \
-    checkpoint/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth \
-```
-
-Arrange slicing parameters as:
-
-```shell
-python demo/large_image_demo.py \
-    demo/large_image.jpg \
-    configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py \
-    checkpoint/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth \
-    --patch-size 512
-    --patch-overlap-ratio 0.25
-```
-
-Export debug visuals while performing inference on large images as:
-
-```shell
-python demo/large_image_demo.py \
-    demo/large_image.jpg \
-    configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py \
-    checkpoint/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth \
-    --debug
-```
-
-[`sahi`](https://github.com/obss/sahi) citation:
-
-```
-@article{akyon2022sahi,
-  title={Slicing Aided Hyper Inference and Fine-tuning for Small Object Detection},
-  author={Akyon, Fatih Cagatay and Altinuc, Sinan Onur and Temizel, Alptekin},
-  journal={2022 IEEE International Conference on Image Processing (ICIP)},
-  doi={10.1109/ICIP46576.2022.9897990},
-  pages={966-970},
-  year={2022}
-}
-```
-
-## Extracts a subset of COCO
-
-The training dataset of the COCO2017 dataset includes 118K images, and the validation set includes 5K images, which is a relatively large dataset. Loading JSON in debugging or quick verification scenarios will consume more resources and bring slower startup speed.
-
-The `extract_subcoco.py` script provides the ability to extract a specified number/classes/area-size of images. The user can use the `--num-img`, `--classes`, `--area-size` parameter to get a COCO subset of the specified condition of images.
-
-For example, extract images use scripts as follows:
-
-```shell
-python tools/misc/extract_subcoco.py \
-    ${ROOT} \
-    ${OUT_DIR} \
-    --num-img 20 \
-    --classes cat dog person \
-    --area-size small
-```
-
-It gone be extract 20 images, and only includes annotations which belongs to cat(or dog/person) and bbox area size is small, after filter by class and area size, the empty annotation images won't be chosen, guarantee the images be extracted definitely has annotation info.
-
-Currently, only support COCO2017. In the future will support user-defined datasets of standard coco JSON format.
-
-The root path folder format is as follows:
-
-```text
-├── root
-│   ├── annotations
-│   ├── train2017
-│   ├── val2017
-│   ├── test2017
-```
-
-1. Extract 10 training images and 10 validation images using only 5K validation sets.
-
-```shell
-python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 10
-```
-
-2. Extract 20 training images using the training set and 20 validation images using the validation set.
-
-```shell
-python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 20 --use-training-set
-```
-
-3. Set the global seed to 1. The default is no setting.
-
-```shell
-python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --num-img 20 --use-training-set --seed 1
-```
-
-4. Extract images by specify classes
-
-```shell
-python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --classes cat dog person
-```
-
-5. Extract images by specify anchor size
-
-```shell
-python tools/misc/extract_subcoco.py ${ROOT} ${OUT_DIR} --area-size small
-```
diff --git a/docs/en/user_guides/yolov5_tutorial.md b/docs/en/user_guides/yolov5_tutorial.md
deleted file mode 100644
index ff9d703a..00000000
--- a/docs/en/user_guides/yolov5_tutorial.md
+++ /dev/null
@@ -1,235 +0,0 @@
-# From getting started to deployment with YOLOv5
-
-## Environment Setup
-
-Note: Since this repository uses OpenMMLab 2.0, please create a new conda virtual environment to prevent conflicts with your existing repositories and projects of OpenMMLab 1.0.
-
-```shell
-conda create -n open-mmlab python=3.8 -y
-conda activate open-mmlab
-conda install pytorch torchvision -c pytorch
-# conda install pytorch torchvision cpuonly -c pytorch
-pip install -U openmim
-mim install "mmengine>=0.3.1"
-mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc5,<3.1.0"
-git clone https://github.com/open-mmlab/mmyolo.git
-cd mmyolo
-# Install albumentations
-pip install -r requirements/albu.txt
-# Install MMYOLO
-mim install -v -e .
-# "-v" means verbose, or more output
-# "-e" means install the project in editable mode, so any local modifications made to the code will take effect, eliminating the need to reinstall.
-```
-
-For more detailed information about environment configuration, please refer to [get_started](../get_started.md).
-
-## Dataset Preparation
-
-In this tutorial, we provide the ballon dataset, which is less than 40MB, as the training dataset for MMYOLO.
-
-```shell
-python tools/misc/download_dataset.py --dataset-name balloon --save-dir data --unzip
-python tools/dataset_converters/balloon2coco.py
-```
-
-After executing the above command, the balloon dataset will be downloaded in the `data` folder with the converted format we need. The `train.json` and `val.json` are the annotation files in the COCO format.
-
-<div align=center>
-<img src="https://cdn.vansin.top/img/20220912105312.png" alt="image"/>
-</div>
-
-## Config File Preparation
-
-Create a new file called the `yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py` configuration file in the `configs/yolov5` folder, and copy the following content into it.
-
-```python
-_base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
-
-data_root = 'data/balloon/'
-
-train_batch_size_per_gpu = 4
-train_num_workers = 2
-
-metainfo = {
-    'classes': ('balloon', ),
-    'palette': [
-        (220, 20, 60),
-    ]
-}
-
-train_dataloader = dict(
-    batch_size=train_batch_size_per_gpu,
-    num_workers=train_num_workers,
-    dataset=dict(
-        data_root=data_root,
-        metainfo=metainfo,
-        data_prefix=dict(img='train/'),
-        ann_file='train.json'))
-
-val_dataloader = dict(
-    dataset=dict(
-        data_root=data_root,
-        metainfo=metainfo,
-        data_prefix=dict(img='val/'),
-        ann_file='val.json'))
-
-test_dataloader = val_dataloader
-
-val_evaluator = dict(ann_file=data_root + 'val.json')
-
-test_evaluator = val_evaluator
-
-model = dict(bbox_head=dict(head_module=dict(num_classes=1)))
-
-default_hooks = dict(logger=dict(interval=1))
-```
-
-The above configuration is inherited from `./yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py`, and `data_root`, `metainfo`, `train_dataloader`, `val_dataloader`, `num_classes` and other configurations are updated according to the balloon data we are using.
-The reason why we set the `interval` of the logger to 1 is that the balloon data set we choose is relatively small, and if the `interval` is too large, we will not see the output of the loss-related log. Therefore, by setting the `interval` of the logger to 1 will ensure that each interval iteration will output a loss-related log.
-
-## Training
-
-```shell
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
-```
-
-After executing the above training command, the `work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon` folder will be automatically generated. Both the weight and the training configuration files will be saved in this folder.
-
-<div align=center>
-<img src="https://cdn.vansin.top/img/20220913213846.png" alt="image"/>
-</div>
-
-### Resume training after interruptions
-
-If training stops midway, add `--resume` at the end of the training command, and the program will automatically load the latest weight file from `work_dirs` to resume training.
-
-```shell
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py --resume
-```
-
-### Fine-tune with loaded pretrained weights
-
-NOTICE: It is highly recommended that finetuning from large datasets, such as COCO, can significantly boost the performance of overall network.
-In this example, compared with training from scratch, finetuning the pretrained model outperforms with a significant margin. (Over 30+ mAP boost than training from scratch).
-
-1. Download the COCO dataset pre-trained weights
-
-```shell
-cd mmyolo
-wget https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth
-```
-
-2. Load the pretrained model to train
-
-```shell
-cd mmyolo
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
-                      --cfg-options load_from='yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'
-```
-
-3. Freeze backbone to train
-
-Freeze the four backbone stages by setting `model.backbone.frozen_stages=4` in the config file or from the command line.
-
-```shell
-# Set model.backbone.frozen_stages=4 from the command line
-cd mmyolo
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
-                      --cfg-options load_from='yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' model.backbone.frozen_stages=4
-```
-
-### Visualization
-
-For `visualization` of `default_hooks` in `configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py`, we set `draw` to `True` and `interval` to `2`.
-
-```python
-default_hooks = dict(
-     logger=dict(interval=1),
-     visualization=dict(draw=True, interval=2),
-)
-```
-
-Re-run the following training command. During the validation, each `interval` image will save a puzzle of the annotation and prediction results to `work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon/{timestamp}/vis_data/vis_image` folder.
-
-```shell
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
-```
-
-<div align=center>
-<img src="https://moonstarimg.oss-cn-hangzhou.aliyuncs.com/img/20220920094007.png" alt="image"/>
-</div>
-
-#### Visualization Backend Support
-
-MMEngine supports various backends such as local, TensorBoard, and wandb.
-
-- wandb
-
-Register and get your wandb API key from the [official website](https://wandb.ai/settings).
-
-<div align=center>
-<img src="https://cdn.vansin.top/img/20220913212628.png" alt="image"/>
-</div>
-
-```shell
-pip install wandb
-
-wandb login
-# enter your API key, then you can see if you login successfully
-```
-
-Add wandb configuration in `configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py`.
-
-```python
-visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')])
-```
-
-Re-run the training command to check data visualization results such as loss, learning rate, and coco/bbox_mAP in the web link prompted on the command line.
-
-```shell
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
-```
-
-<div align=center>
-<img src="https://cdn.vansin.top/img/20220913213221.png" alt="image"/>
-</div>
-
-- Tensorboard
-
-Install Tensorboard
-
-```shell
-pip install tensorboard
-```
-
-Similar to wandb, we need to add Tensorboard configuration in `configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py`.
-
-```python
-visualizer = dict(vis_backends=[dict(type='LocalVisBackend'),dict(type='TensorboardVisBackend')])
-```
-
-Re-run the training command, a Tensorboard folder will be created in `work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon/{timestamp}/vis_data`, You can get data visualization results such as loss, learning rate, and coco/bbox_mAP in the web link prompted on the command line with the following command:
-
-```shell
-tensorboard --logdir=work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon
-```
-
-## Model Testing
-
-```shell
-python tools/test.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
-                      work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon/epoch_300.pth \
-                      --show-dir show_results
-```
-
-Run the above command, the inference result picture will be automatically saved to the `work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon/{timestamp}/show_results` folder. The following is one of the result pictures. The left one is the actual annotation, and the right is the model inference result.
-
-<div align=center>
-<img src="https://user-images.githubusercontent.com/27466624/190913272-f99709e5-c798-46b8-aede-30f4e91683a3.jpg" alt="result_img"/>
-</div>
-
-## Model Deployment
-
-Please refer to [this](../deploy/yolov5_deployment.md)
diff --git a/docs/zh_cn/common_usage/output_predictions.md b/docs/zh_cn/common_usage/output_predictions.md
index 6c123e96..b11f856d 100644
--- a/docs/zh_cn/common_usage/output_predictions.md
+++ b/docs/zh_cn/common_usage/output_predictions.md
@@ -7,7 +7,7 @@ json 文件仅保存 `image_id`、`bbox`、`score` 和 `category_id`； json 文
 pkl 保存内容比 json 文件更多，还会保存预测图片的文件名和尺寸等一系列信息； pkl 文件可以使用 pickle 库读取。
 ```
 
-### 输出为 json 文件
+## 输出为 json 文件
 
 如果想将预测结果输出为 json 文件，则命令如下：
 
@@ -23,7 +23,7 @@ python tools/test.py configs\yolov5\yolov5_s-v61_syncbn_8xb16-300e_coco.py yolov
 
 运行以上命令会在 `work_dirs/demo` 文件夹下，输出 `json_demo.bbox.json` 文件。
 
-### 输出为 pkl 文件
+## 输出为 pkl 文件
 
 如果想将预测结果输出为 pkl 文件，则命令如下：
 
diff --git a/docs/zh_cn/get_started/15_minutes_object_detection.md b/docs/zh_cn/get_started/15_minutes_object_detection.md
index fc7dc295..6523dd51 100644
--- a/docs/zh_cn/get_started/15_minutes_object_detection.md
+++ b/docs/zh_cn/get_started/15_minutes_object_detection.md
@@ -1,63 +1,123 @@
 # 15 分钟上手 MMYOLO 目标检测
 
+目标检测任务是指给定一张图片，网络预测出图片中所包括的所有物体类别和对应的边界框
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220232979-fffa480b-9ae6-4601-8af6-4116265dc650.png" alt="object detection" width="100%"/>
+</div>
+
+以我们提供的猫 cat 小数据集为例，带大家 15 分钟轻松上手 MMYOLO 目标检测。整个流程包含如下步骤：
+
+- [环境安装](#环境安装)
+- [数据集准备](#数据集准备)
+- [配置准备](#配置准备)
+- [模型训练](#模型训练)
+- [模型测试](#模型测试)
+- [EasyDeploy 模型部署](#easydeploy-模型部署)
+
 ## 环境安装
 
-温馨提醒：由于本仓库采用的是 OpenMMLab 2.0，请最好新建一个 conda 虚拟环境，防止和 OpenMMLab 1.0 已经安装的仓库冲突。
+假设你已经提前安装好了 Conda，接下来安装 PyTorch
 
 ```shell
-conda create -n open-mmlab python=3.8 -y
-conda activate open-mmlab
+conda create -n mmyolo python=3.8 -y
+conda activate mmyolo
+# 如果你有 GPU
 conda install pytorch torchvision -c pytorch
+# 如果你是 CPU
 # conda install pytorch torchvision cpuonly -c pytorch
-pip install -U openmim
-mim install "mmengine>=0.3.1"
-mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc5,<3.1.0"
+```
+
+安装 MMYOLO 和依赖库
+
+```shell
 git clone https://github.com/open-mmlab/mmyolo.git
 cd mmyolo
+pip install -U openmim
+mim install -r requirements/mminstall.txt
 # Install albumentations
-pip install -r requirements/albu.txt
+mim install -r requirements/albu.txt
 # Install MMYOLO
 mim install -v -e .
 # "-v" 指详细说明，或更多的输出
 # "-e" 表示在可编辑模式下安装项目，因此对代码所做的任何本地修改都会生效，从而无需重新安装。
 ```
 
-详细环境配置操作请查看 [get_started](../get_started/installation.md)
+```{note}
+温馨提醒：由于本仓库采用的是 OpenMMLab 2.0，请最好新建一个 conda 虚拟环境，防止和 OpenMMLab 1.0 已经安装的仓库冲突。
+```
+
+详细环境配置操作请查看 [安装和验证](./installation.md)
 
 ## 数据集准备
 
-本文选取不到 40MB 大小的 balloon 气球数据集作为 MMYOLO 的学习数据集。
-
-```shell
-python tools/misc/download_dataset.py --dataset-name balloon --save-dir data --unzip
-python tools/dataset_converters/balloon2coco.py
-```
-
-执行以上命令，下载数据集并转化格式后，balloon 数据集在 `data` 文件夹中准备好了，`train.json` 和 `val.json` 便是 coco 格式的标注文件了。
+Cat 数据集是一个包括 144 张图片的单类别数据集（本 cat 数据集由 @RangeKing 提供原始图片，由 @PeterH0323 进行数据清洗）, 包括了训练所需的标注信息。 样例图片如下所示：
 
 <div align=center>
-<img src="https://cdn.vansin.top/img/20220912105312.png" alt="image"/>
+<img src="https://user-images.githubusercontent.com/25873202/205423220-c4b8f2fd-22ba-4937-8e47-1b3f6a8facd8.png" alt="cat dataset"/>
 </div>
 
-## config 文件准备
+你只需执行如下命令即可下载并且直接用起来
 
-在 `configs/yolov5` 文件夹下新建 `yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py` 配置文件，并把以下内容复制配置文件中。
+```shell
+python tools/misc/download_dataset.py --dataset-name cat --save-dir ./data/cat --unzip --delete
+```
+
+数据集组织格式如下所示：
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220072078-48b88a08-6179-483e-b8d3-0549e1b465de.png" alt="image"/>
+</div>
+
+data 位于 mmyolo 工程目录下， `data/cat/annotations` 中存放的是 COCO 格式的标注，`data/cat/images` 中存放的是所有图片
+
+## 配置准备
+
+以 YOLOv5 算法为例，考虑到用户显存和内存有限，我们需要修改一些默认训练参数来让大家愉快的跑起来，核心需要修改的参数如下
+
+- YOLOv5 是 Anchor-Based 类算法，不同的数据集需要自适应计算合适的 Anchor
+- 默认配置是 8 卡，每张卡 batch size 为 16，现将其改成单卡，每张卡 batch size 为 12
+- 默认训练 epoch 是 300，将其改成 40 epoch
+- 由于数据集太小，我们选择固定 backbone 网络权重
+- 原则上 batch size 改变后，学习率也需要进行线性缩放，但是实测发现不需要
+
+具体操作为在 `configs/yolov5` 文件夹下新建 `yolov5_s-v61_fast_1xb12-40e_cat.py` 配置文件(为了方便大家直接使用，我们已经提供了该配置)，并把以下内容复制配置文件中。
 
 ```python
-_base_ = './yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
+# 基于该配置进行继承并重写部分配置
+_base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
 
-data_root = 'data/balloon/'
+data_root = './data/cat/' # 数据集根路径
+class_name = ('cat', ) # 数据集类别名称
+num_classes = len(class_name) # 数据集类别数
+# metainfo 必须要传给后面的 dataloader 配置，否则无效
+# palette 是可视化时候对应类别的显示颜色
+# palette 长度必须大于等于和 classes 长度
+metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
 
-train_batch_size_per_gpu = 4
-train_num_workers = 2
+# 基于 tools/analysis_tools/optimize_anchors.py 自适应计算的 anchor
+anchors = [
+    [(68, 69), (154, 91), (143, 162)],  # P3/8
+    [(242, 160), (189, 287), (391, 207)],  # P4/16
+    [(353, 337), (539, 341), (443, 432)]  # P5/32
+]
+# 最大训练 40 epoch
+max_epochs = 40
+# bs 为 12
+train_batch_size_per_gpu = 12
+# dataloader 加载进程数
+train_num_workers = 4
 
-metainfo = {
-    'classes': ('balloon', ),
-    'palette': [
-        (220, 20, 60),
-    ]
-}
+# 加载 COCO 预训练权重
+load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'  # noqa
+
+model = dict(
+    # 固定整个 backbone 权重，不进行训练
+    backbone=dict(frozen_stages=4),
+    bbox_head=dict(
+        head_module=dict(num_classes=num_classes),
+        prior_generator=dict(base_sizes=anchors)
+    ))
 
 train_dataloader = dict(
     batch_size=train_batch_size_per_gpu,
@@ -65,110 +125,100 @@ train_dataloader = dict(
     dataset=dict(
         data_root=data_root,
         metainfo=metainfo,
-        data_prefix=dict(img='train/'),
-        ann_file='train.json'))
+        # 数据集标注文件 json 路径
+        ann_file='annotations/trainval.json',
+        # 数据集前缀
+        data_prefix=dict(img='images/')))
 
 val_dataloader = dict(
     dataset=dict(
-        data_root=data_root,
         metainfo=metainfo,
-        data_prefix=dict(img='val/'),
-        ann_file='val.json'))
+        data_root=data_root,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
 
 test_dataloader = val_dataloader
 
-val_evaluator = dict(ann_file=data_root + 'val.json')
-
+val_evaluator = dict(ann_file=data_root + 'annotations/trainval.json')
 test_evaluator = val_evaluator
 
-model = dict(bbox_head=dict(head_module=dict(num_classes=1)))
-
-default_hooks = dict(logger=dict(interval=1))
+default_hooks = dict(
+    # 每隔 10 个 epoch 保存一次权重，并且最多保存 2 个权重
+    # 模型评估时候自动保存最佳模型
+    checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
+    param_scheduler=dict(max_epochs=max_epochs),
+    # 日志打印间隔为 5
+    logger=dict(type='LoggerHook', interval=5))
+# 评估间隔为 10
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
 ```
 
-以上配置从 `./yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py` 中继承，并根据 balloon 数据的特点更新了 `data_root`、`metainfo`、`train_dataloader`、`val_dataloader`、`num_classes` 等配置。
-我们将 logger 的 `interval` 设置为 1 的原因是，每进行 `interval` 次 iteration 会输出一次 loss 相关的日志，而我们选取气球数据集比较小，`interval` 太大我们将看不到 loss 相关日志的输出。
+以上配置从 `yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py` 中继承，并根据 cat 数据的特点更新了 `data_root`、`metainfo`、`train_dataloader`、`val_dataloader`、`num_classes` 等配置。
 
-## 训练
+## 模型训练
 
 ```shell
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
+python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
 ```
 
-运行以上训练命令，`work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon` 文件夹会被自动生成，权重文件以及此次的训练配置文件将会保存在此文件夹中。
+运行以上训练命令 `work_dirs/yolov5_s-v61_fast_1xb12-40e_cat` 文件夹会被自动生成，权重文件以及此次的训练配置文件将会保存在此文件夹中。 在 1660 低端显卡上，整个训练过程大概需要 8 分钟。
 
 <div align=center>
-<img src="https://cdn.vansin.top/img/20220913213846.png" alt="image"/>
+<img src="https://user-images.githubusercontent.com/17425982/220236361-bd113606-248e-4a0e-a484-c0dc9e355b5b.png" alt="image"/>
 </div>
 
+在 `trainval.json` 上性能如下所示：
+
+```text
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.685
+ Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.953
+ Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.852
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.685
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.664
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.749
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.761
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.761
+```
+
+上述性能是通过 COCO API 打印，其中 -1 表示不存在对于尺度的物体。根据 COCO 定义的规则，Cat 数据集里面全部是大物体，不存在小和中等规模物体。
+
+### 一些注意事项
+
+在训练过程中会打印如下两个关键警告：
+
+- You are using `YOLOv5Head` with num_classes == 1. The loss_cls will be 0. This is a normal phenomenon.
+- The model and loaded state dict do not match exactly
+
+这两个警告都不会对性能有任何影响。第一个警告是说明由于当前训练的类别数是 1，根据 YOLOv5 算法的社区， 分类分支的 loss 始终是 0，这是正常现象。第二个警告是因为目前是采用微调模式进行训练，我们加载了 COCO 80 个类的预训练权重，
+这会导致最后的 Head 模块卷积通道数不对应，从而导致这部分权重无法加载，这也是正常现象。
+
 ### 中断后恢复训练
 
-如果训练中途停止，在训练命令最后加上 `--resume` ,程序会自动从 `work_dirs` 中加载最新的权重文件恢复训练。
+如果训练中途停止，可以在训练命令最后加上 `--resume` ,程序会自动从 `work_dirs` 中加载最新的权重文件恢复训练。
 
 ```shell
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py --resume
+python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py --resume
 ```
 
-### 加载预训练权重微调
+### 节省显存策略
 
-经过测试，相比不加载预训练模型，加载 YOLOv5-s 预训练模型在气球数据集上训练和验证 coco/bbox_mAP 能涨 30 多个百分点。
-
-1. 下载 COCO 数据集预训练权重
+上述配置大概需要 3.0G 显存，如果你的显存不够，可以考虑开启混合精度训练
 
 ```shell
-cd mmyolo
-wget https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth
+python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py --amp
 ```
 
-2. 加载预训练模型进行训练
+### 训练可视化
 
-```shell
-cd mmyolo
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
-                      --cfg-options load_from='yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'
-```
+MMYOLO 目前支持本地、TensorBoard 以及 WandB 等多种后端可视化，默认是采用本地可视化方式，你可以切换为 WandB 等实时可视化训练过程中各类指标。
 
-3. 冻结 backbone 进行训练
+#### 1 WandB 可视化使用
 
-通过 config 文件或者命令行中设置 model.backbone.frozen_stages=4 冻结 backbone 的 4 个 stages。
-
-```shell
-# 命令行中设置 model.backbone.frozen_stages=4
-cd mmyolo
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
-                      --cfg-options load_from='yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth' model.backbone.frozen_stages=4
-```
-
-### 训练验证中可视化相关
-
-#### 验证阶段可视化
-
-我们将 `configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py` 中的 `default_hooks` 的 `visualization` 进行修改，设置 `draw` 为 `True`，`interval` 为 `2`。
-
-```shell
-default_hooks = dict(
-    logger=dict(interval=1),
-    visualization=dict(draw=True, interval=2),
-)
-```
-
-重新运行以下训练命令，在验证评估的过程中，每 `interval` 张图片就会保存一张标注结果和预测结果的拼图到 `work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon/{timestamp}/vis_data/vis_image` 文件夹中了。
-
-```shell
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
-```
-
-<div align=center>
-<img src="https://moonstarimg.oss-cn-hangzhou.aliyuncs.com/img/20220920094007.png" alt="image"/>
-</div>
-
-#### 可视化后端使用
-
-MMEngine 支持本地、TensorBoard 以及 wandb 等多种后端。
-
-##### wandb 可视化使用
-
-wandb 官网注册并在 https://wandb.ai/settings 获取到 wandb 的 API Keys。
+WandB 官网注册并在 https://wandb.ai/settings 获取到 WandB 的 API Keys。
 
 <div align=center>
 <img src="https://cdn.vansin.top/img/20220913212628.png" alt="image"/>
@@ -180,7 +230,7 @@ pip install wandb
 wandb login
 ```
 
-在 `configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py` 添加 wandb 配置
+在 `configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py` 配置文件最后添加 WandB 配置
 
 ```python
 visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')])
@@ -189,14 +239,17 @@ visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='Wandb
 重新运行训练命令便可以在命令行中提示的网页链接中看到 loss、学习率和 coco/bbox_mAP 等数据可视化了。
 
 ```shell
-python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
+python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
 ```
 
 <div align=center>
-<img src="https://cdn.vansin.top/img/20220913213221.png" alt="image"/>
+<img src="https://user-images.githubusercontent.com/17425982/220238131-08eacedc-28a7-4008-af8c-f36dc239ecaa.png" alt="image"/>
+</div>
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220238535-f363a6ba-876c-4bb7-80d6-9d8d8ca9b966.png" alt="image"/>
 </div>
 
-##### Tensorboard 可视化使用
+#### 2 Tensorboard 可视化使用
 
 安装 Tensorboard 环境
 
@@ -204,33 +257,155 @@ python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.
 pip install tensorboard
 ```
 
-同上述在配置文件 `configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py`中添加 `tensorboard` 配置
+同上述在配置文件 `configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py`配置的最后添加 `tensorboard` 配置
 
 ```python
 visualizer = dict(vis_backends=[dict(type='LocalVisBackend'),dict(type='TensorboardVisBackend')])
 ```
 
-重新运行训练命令后，Tensorboard 文件会生成在可视化文件夹 `work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon/{timestamp}/vis_data` 下，
+重新运行训练命令后，Tensorboard 文件会生成在可视化文件夹 `work_dirs/yolov5_s-v61_fast_1xb12-40e_cat.py/{timestamp}/vis_data` 下，
 运行下面的命令便可以在网页链接使用 Tensorboard 查看 loss、学习率和 coco/bbox_mAP 等可视化数据了：
 
 ```shell
-tensorboard --logdir=work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon
+tensorboard --logdir=work_dirs/yolov5_s-v61_fast_1xb12-40e_cat.py
 ```
 
 ## 模型测试
 
 ```shell
-python tools/test.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py \
-                     work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon/epoch_300.pth \
+python tools/test.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                     work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                      --show-dir show_results
 ```
 
-运行以上测试命令，推理结果图片会自动保存至 `work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon/{timestamp}/show_results` 文件夹中。下面为其中一张结果图片，左图为实际标注，右图为模型推理结果。
+运行以上测试命令， 你不不仅可以得到**模型训练**部分所打印的 AP 性能，还可以将推理结果图片自动保存至 `work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/{timestamp}/show_results` 文件夹中。下面为其中一张结果图片，左图为实际标注，右图为模型推理结果。
 
 <div align=center>
-<img src="https://user-images.githubusercontent.com/27466624/190913272-f99709e5-c798-46b8-aede-30f4e91683a3.jpg" alt="result_img"/>
+<img src="https://user-images.githubusercontent.com/17425982/220251677-6c7e5c8f-9417-4803-97fc-a968d0172ab7.png" alt="result_img"/>
 </div>
 
-## 模型部署
+如果你使用了 `WandbVisBackend` 或者 `TensorboardVisBackend`，则还可以在浏览器窗口可视化模型推理结果。
 
-请参考[这里](../deploy/yolov5_deployment.md)
+## 特征图相关可视化
+
+MMYOLO 中提供了特征图相关可视化脚本，用于分析当前模型训练效果。 详细使用流程请参考 [特征图可视化](../recommended_topics/visualization.md)
+
+由于 `test_pipeline` 直接可视化会存在偏差，故将需要 `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` 中 `test_pipeline`
+
+```python
+test_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        file_client_args=_base_.file_client_args),
+    dict(type='YOLOv5KeepRatioResize', scale=img_scale),
+    dict(
+        type='LetterResize',
+        scale=img_scale,
+        allow_scale_up=False,
+        pad_val=dict(img=114)),
+    dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor', 'pad_param'))
+]
+```
+
+修改为如下配置：
+
+```python
+test_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        file_client_args=_base_.file_client_args),
+    dict(type='mmdet.Resize', scale=img_scale, keep_ratio=False), # 这里将 LetterResize 修改成 mmdet.Resize
+    dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+```
+
+我们选择 `data/cat/images/IMG_20221020_112705.jpg` 图片作为例子，可视化 YOLOv5 backbone 和 neck 层的输出特征图。
+
+**1. 可视化 YOLOv5 backbone 输出的 3 个通道**
+
+```shell
+python demo/featmap_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
+                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                --target-layers backbone \
+                                --channel-reduction squeeze_mean
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220292217-b343a6f4-0c88-4fdb-9680-35d0ff8e5bdb.png" width="800" alt="image"/>
+</div>
+
+结果会保存到当前路径的 output 文件夹下。上图中绘制的 3 个输出特征图对应大中小输出特征图。由于本次训练的 backbone 实际上没有参与训练，从上图可以看到，大物体 cat 是在小特征图进行预测，这符合目标检测分层检测思想。
+
+**2. 可视化 YOLOv5 neck 输出的 3 个通道**
+
+```shell
+python demo/featmap_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
+                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                --target-layers neck \
+                                --channel-reduction squeeze_mean
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220293382-0a241415-e717-4688-a718-5f6d5c844785.png" width="800" alt="image"/>
+</div>
+
+从上图可以看出，由于 neck 是参与训练的，并且由于我们重新设置了 anchor, 强行让 3 个输出特征图都拟合同一个尺度的物体，导致 neck 输出的 3 个图类似，破坏了 backbone 原先的预训练分布。同时也可以看出 40 epoch 训练上述数据集是不够的，特征图效果不佳。
+
+**3. Grad-Based CAM 可视化**
+
+基于上述特征图可视化效果，我们可以分析特征层 bbox 级别的 Grad CAM。
+
+(a) 查看 neck 输出的最小输出特征图的 Grad CAM
+
+```shell
+python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
+                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                --target-layer neck.out_layers[2]
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220298462-b0631f27-2366-4864-915a-a4ee21acd4b9.png" width="800" alt="image"/>
+</div>
+
+(b) 查看 neck 输出的中等输出特征图的 Grad CAM
+
+```shell
+python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
+                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                --target-layer neck.out_layers[1]
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220298090-6f335786-0b35-4ab8-9c5a-0dbdb6b6c967.png" width="800" alt="image"/>
+</div>
+
+(c) 查看 neck 输出的最大输出特征图的 Grad CAM
+
+```shell
+python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
+                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                --target-layer neck.out_layers[0]
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220297905-e23369db-d383-48f9-b15e-528a70ec7b23.png" width="800" alt="image"/>
+</div>
+
+## EasyDeploy 模型部署
+
+TODO
+
+以上完整内容可以查看 [15_minutes_object_detection.ipynb](<>)
diff --git a/docs/zh_cn/get_started/article.md b/docs/zh_cn/get_started/article.md
index e9e9bfc3..43cd0f8e 100644
--- a/docs/zh_cn/get_started/article.md
+++ b/docs/zh_cn/get_started/article.md
@@ -16,14 +16,14 @@
 
 - [社区协作，简洁易用，快来开箱新一代 YOLO 系列开源库](https://zhuanlan.zhihu.com/p/575615805)
 - [MMYOLO 社区倾情贡献，RTMDet 原理社区开发者解读来啦！](https://zhuanlan.zhihu.com/p/569777684)
+- [MMYOLO 自定义数据集从标注到部署保姆级教程](https://zhuanlan.zhihu.com/p/595497726)
+- [满足一切需求的 MMYOLO 可视化：测试过程可视化](https://zhuanlan.zhihu.com/p/593179372)
 - [YOLOv8 深度详解！一文看懂，快速上手](https://zhuanlan.zhihu.com/p/598566644)
 - [玩转 MMYOLO 基础类第一期： 配置文件太复杂？继承用法看不懂？配置全解读来了](https://zhuanlan.zhihu.com/p/577715188)
 - [玩转 MMYOLO 工具类第一期： 特征图可视化](https://zhuanlan.zhihu.com/p/578141381?)
-- [玩转 MMYOLO 实用类第二期：源码阅读和调试「必备」技巧文档](https://zhuanlan.zhihu.com/p/580885852)
+- [玩转 MMYOLO 实用类第一期：源码阅读和调试「必备」技巧文档](https://zhuanlan.zhihu.com/p/580885852)
 - [玩转 MMYOLO 基础类第二期：工程文件结构简析](https://zhuanlan.zhihu.com/p/584807195)
 - [玩转 MMYOLO 实用类第二期：10分钟换遍主干网络文档](https://zhuanlan.zhihu.com/p/585641598)
-- [MMYOLO 自定义数据集从标注到部署保姆级教程](https://zhuanlan.zhihu.com/p/595497726)
-- [满足一切需求的 MMYOLO 可视化：测试过程可视化](https://zhuanlan.zhihu.com/p/593179372)
 
 ### 视频
 
diff --git a/docs/zh_cn/get_started/dependencies.md b/docs/zh_cn/get_started/dependencies.md
index 8b736098..fcd3f661 100644
--- a/docs/zh_cn/get_started/dependencies.md
+++ b/docs/zh_cn/get_started/dependencies.md
@@ -5,6 +5,7 @@
 | MMYOLO version |   MMDetection version    |     MMEngine version     |      MMCV version       |
 | :------------: | :----------------------: | :----------------------: | :---------------------: |
 |      main      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|     0.4.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.3.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.2.0      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.1.3      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
@@ -14,10 +15,15 @@
 
 本节中，我们将演示如何用 PyTorch 准备一个环境。
 
-MMYOLO 支持在 Linux，Windows 和 macOS 上运行。它需要 Python 3.7 以上，CUDA 9.2 以上和 PyTorch 1.7 以上。
+MMYOLO 支持在 Linux，Windows 和 macOS 上运行。它的基本环境依赖为：
+
+- Python 3.7+
+- PyTorch 1.7+
+- CUDA 9.2+
+- GCC 5.4+
 
 ```{note}
-如果你对 PyTorch 有经验并且已经安装了它，你可以直接跳转到[下一小节](#安装流程)。否则，你可以按照下述步骤进行准备
+如果你对 PyTorch 有经验并且已经安装了它，你可以直接跳转到下一小节。否则，你可以按照下述步骤进行准备
 ```
 
 **步骤 0.** 从 [官方网站](https://docs.conda.io/en/latest/miniconda.html) 下载并安装 Miniconda。
@@ -25,8 +31,8 @@ MMYOLO 支持在 Linux，Windows 和 macOS 上运行。它需要 Python 3.7 以
 **步骤 1.** 创建并激活一个 conda 环境。
 
 ```shell
-conda create -n open-mmlab python=3.8 -y
-conda activate open-mmlab
+conda create -n mmyolo python=3.8 -y
+conda activate mmyolo
 ```
 
 **步骤 2.** 基于 [PyTorch 官方说明](https://pytorch.org/get-started/locally/) 安装 PyTorch。
@@ -42,3 +48,11 @@ conda install pytorch torchvision -c pytorch
 ```shell
 conda install pytorch torchvision cpuonly -c pytorch
 ```
+
+**步骤 3.** 验证 PyTorch 安装
+
+```shell
+python -c "import torch; print(torch.__version__); print(torch.cuda.is_available())"
+```
+
+如果是在 GPU 平台上，那么会打印版本信息和 True 字符，否则打印版本信息和 False 字符。
diff --git a/docs/zh_cn/get_started/installation.md b/docs/zh_cn/get_started/installation.md
index 0dcc0da0..147d204f 100644
--- a/docs/zh_cn/get_started/installation.md
+++ b/docs/zh_cn/get_started/installation.md
@@ -11,11 +11,19 @@ mim install "mmcv>=2.0.0rc1,<2.1.0"
 mim install "mmdet>=3.0.0rc5,<3.1.0"
 ```
 
+如果你当前已经处于 mmyolo 工程目录下，则可以采用如下简化写法
+
+```shell
+cd mmyolo
+pip install -U openmim
+mim install -r requirements/mminstall.txt
+```
+
 **注意：**
 
 a. 在 MMCV-v2.x 中，`mmcv-full` 改名为 `mmcv`，如果你想安装不包含 CUDA 算子精简版，可以通过 `mim install mmcv-lite>=2.0.0rc1` 来安装。
 
-b. 如果使用 albumentations，我们建议使用 pip install -r requirements/albu.txt 或者 pip install -U albumentations --no-binary qudida,albumentations 进行安装。 如果简单地使用 pip install albumentations==1.0.1 进行安装，则会同时安装 opencv-python-headless（即便已经安装了 opencv-python 也会再次安装）。我们建议在安装 albumentations 后检查环境，以确保没有同时安装 opencv-python 和 opencv-python-headless，因为同时安装可能会导致一些问题。更多细节请参考 [官方文档](https://albumentations.ai/docs/getting_started/installation/#note-on-opencv-dependencies) 。
+b. 如果使用 `albumentations`，我们建议使用 `pip install -r requirements/albu.txt` 或者 `pip install -U albumentations --no-binary qudida,albumentations` 进行安装。 如果简单地使用 `pip install albumentations==1.0.1` 进行安装，则会同时安装 `opencv-python-headless`（即便已经安装了 `opencv-python` 也会再次安装）。我们建议在安装 albumentations 后检查环境，以确保没有同时安装 `opencv-python` 和 `opencv-python-headless`，因为同时安装可能会导致一些问题。更多细节请参考 [官方文档](https://albumentations.ai/docs/getting_started/installation/#note-on-opencv-dependencies) 。
 
 **步骤 1.** 安装 MMYOLO
 
@@ -25,7 +33,7 @@ b. 如果使用 albumentations，我们建议使用 pip install -r requirements/
 git clone https://github.com/open-mmlab/mmyolo.git
 cd mmyolo
 # Install albumentations
-pip install -r requirements/albu.txt
+mim install -r requirements/albu.txt
 # Install MMYOLO
 mim install -v -e .
 # "-v" 指详细说明，或更多的输出
@@ -48,7 +56,7 @@ mim install "mmyolo"
 mim download mmyolo --config yolov5_s-v61_syncbn_fast_8xb16-300e_coco --dest .
 ```
 
-下载将需要几秒钟或更长时间，这取决于你的网络环境。完成后，你会在当前文件夹中发现两个文件 `yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py` and `yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth`。
+下载将需要几秒钟或更长时间，这取决于你的网络环境。完成后，你会在当前文件夹中发现两个文件 `yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py` 和 `yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth`。
 
 **步骤 2.** 推理验证
 
@@ -89,109 +97,7 @@ inference_detector(model, 'demo/demo.jpg')
 
 你将会看到一个包含 `DetDataSample` 的列表，预测结果在 `pred_instance` 里，包含有预测框、预测分数 和 预测类别。
 
-## 自定义安装
-
-### CUDA 版本
-
-在安装 PyTorch 时，你需要指定 CUDA 的版本。如果你不清楚应该选择哪一个，请遵循我们的建议。
-
-- 对于 Ampere 架构的 NVIDIA GPU，例如 GeForce 30 系列 以及 NVIDIA A100，CUDA 11 是必需的。
-- 对于更早的 NVIDIA GPU，CUDA 11 是向后兼容 (backward compatible) 的，但 CUDA 10.2 能够提供更好的兼容性，也更加轻量。
-
-请确保你的 GPU 驱动版本满足最低的版本需求，参阅 NVIDIA 官方的 [CUDA 工具箱和相应的驱动版本关系表](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions__table-cuda-toolkit-driver-versions)。
-
-```{note}
-如果按照我们的最佳实践进行安装，CUDA 运行时库就足够了，因为我们提供相关 CUDA 代码的预编译，不需要进行本地编译。
-但如果你希望从源码进行 MMCV 的编译，或是进行其他 CUDA 算子的开发，那么就必须安装完整的 CUDA 工具链，参见
-[NVIDIA 官网](https://developer.nvidia.com/cuda-downloads) ，另外还需要确保该 CUDA 工具链的版本与 PyTorch 安装时
-的配置相匹配（如用 `conda install` 安装 PyTorch 时指定的 cudatoolkit 版本）。
-```
-
-### 不使用 MIM 安装 MMEngine
-
-要使用 pip 而不是 MIM 来安装 MMEngine，请遵照 [MMEngine 安装指南](https://mmengine.readthedocs.io/en/latest/get_started/installation.html)。
-
-例如，你可以通过以下命令安装 MMEngine：
-
-```shell
-pip install "mmengine>=0.3.1"
-```
-
-### 不使用 MIM 安装 MMCV
-
-MMCV 包含 C++ 和 CUDA 扩展，因此其对 PyTorch 的依赖比较复杂。MIM 会自动解析这些 依赖，选择合适的 MMCV 预编译包，使安装更简单，但它并不是必需的。
-
-要使用 pip 而不是 MIM 来安装 MMCV，请遵照 [MMCV 安装指南](https://mmcv.readthedocs.io/zh_CN/2.x/get_started/installation.html)。
-它需要您用指定 URL 的形式手动指定对应的 PyTorch 和 CUDA 版本。
-
-例如，下述命令将会安装基于 PyTorch 1.12.x 和 CUDA 11.6 编译的 mmcv：
-
-```shell
-pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.12.0/index.html
-```
-
-### 在 CPU 环境中安装
-
-我们的代码能够建立在只使用 CPU 的环境（CUDA 不可用）。
-
-在 CPU 模式下，可以进行模型训练（需要 MMCV 版本 >= `2.0.0rc1`)、测试或者推理，然而以下功能将在 CPU 模式下不能使用：
-
-- Deformable Convolution
-- Modulated Deformable Convolution
-- ROI pooling
-- Deformable ROI pooling
-- CARAFE: Content-Aware ReAssembly of FEatures
-- SyncBatchNorm
-- CrissCrossAttention: Criss-Cross Attention
-- MaskedConv2d
-- Temporal Interlace Shift
-- nms_cuda
-- sigmoid_focal_loss_cuda
-- bbox_overlaps
-
-因此，如果尝试使用包含上述操作的模型进行训练/测试/推理，将会报错。下表列出了由于依赖上述算子而无法在 CPU 上运行的相关模型：
-
-|                          操作                           |                                           模型                                           |
-| :-----------------------------------------------------: | :--------------------------------------------------------------------------------------: |
-| Deformable Convolution/Modulated Deformable Convolution | DCN、Guided Anchoring、RepPoints、CentripetalNet、VFNet、CascadeRPN、NAS-FCOS、DetectoRS |
-|                      MaskedConv2d                       |                                     Guided Anchoring                                     |
-|                         CARAFE                          |                                          CARAFE                                          |
-|                      SyncBatchNorm                      |                                         ResNeSt                                          |
-
-### 在 Google Colab 中安装
-
-[Google Colab](https://colab.research.google.com/) 通常已经包含了 PyTorch 环境，因此我们只需要安装 MMEngine、MMCV、MMDetection 和 MMYOLO 即可，命令如下：
-
-**步骤 1.** 使用 [MIM](https://github.com/open-mmlab/mim) 安装 [MMEngine](https://github.com/open-mmlab/mmengine) 、 [MMCV](https://github.com/open-mmlab/mmcv) 和 [MMDetection](https://github.com/open-mmlab/mmdetection) 。
-
-```shell
-!pip3 install openmim
-!mim install "mmengine>=0.3.1"
-!mim install "mmcv>=2.0.0rc1,<2.1.0"
-!mim install "mmdet>=3.0.0rc5,<3.1.0"
-```
-
-**步骤 2.** 使用源码安装 MMYOLO：
-
-```shell
-!git clone https://github.com/open-mmlab/mmyolo.git
-%cd mmyolo
-!pip install -e .
-```
-
-**步骤 3.** 验证安装是否成功：
-
-```python
-import mmyolo
-print(mmyolo.__version__)
-# 预期输出: 0.1.0 或其他版本号
-```
-
-```{note}
-在 Jupyter 中，感叹号 `!` 用于执行外部命令，而 `%cd` 是一个[魔术命令](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-cd)，用于切换 Python 的工作路径。
-```
-
-### 通过 Docker 使用 MMYOLO
+## 通过 Docker 使用 MMYOLO
 
 我们提供了一个 [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile) 来构建一个镜像。请确保你的 [docker 版本](https://docs.docker.com/engine/install/) >=`19.03`。
 
@@ -218,18 +124,8 @@ export DATA_DIR=/path/to/your/dataset
 docker run --gpus all --shm-size=8g -it -v ${DATA_DIR}:/mmyolo/data mmyolo
 ```
 
+其余自定义安装流程请查看 [自定义安装](../tutorials/custom_installation.md)
+
 ## 排除故障
 
-如果你在安装过程中遇到一些问题，请先查看 [FAQ](notes/faq.md) 页面。
-
-如果没有找到解决方案，你也可以在 GitHub 上 [打开一个问题](https://github.com/open-mmlab/mmyolo/issues/new/choose)。
-
-## 使用多个 MMYOLO 版本进行开发
-
-训练和测试的脚本已经在 `PYTHONPATH` 中进行了修改，以确保脚本使用当前目录中的 MMYOLO。
-
-要使环境中安装默认的 MMYOLO 而不是当前正在在使用的，可以删除出现在相关脚本中的代码：
-
-```shell
-PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
-```
+如果你在安装过程中遇到一些问题，你可以在 GitHub 上 [打开一个问题](https://github.com/open-mmlab/mmyolo/issues/new/choose)。
diff --git a/docs/zh_cn/get_started/overview.md b/docs/zh_cn/get_started/overview.md
index 58dc66ea..c87454ba 100644
--- a/docs/zh_cn/get_started/overview.md
+++ b/docs/zh_cn/get_started/overview.md
@@ -1,33 +1,60 @@
 # 概述
 
-本章向您介绍 MMYOLO 的整体框架，并提供详细的教程链接。
-
 ## MMYOLO 介绍
 
 <div align=center>
 <img src="https://user-images.githubusercontent.com/45811724/190993591-bd3f1f11-1c30-4b93-b5f4-05c9ff64ff7f.gif" alt="图片"/>
 </div>
 
-MMYOLO 是一个 YOLO 系列的算法工具箱，目前仅实现了目标检测任务，后续会支持实例分割、全景分割和关键点检测等多种任务。其包括丰富的目标检测算法以及相关的组件和模块，下面是它的整体框架：
+MMYOLO 是一个基于 PyTorch 和 MMDetection 的 YOLO 系列算法开源工具箱，它是 [OpenMMLab](https://openmmlab.com/) 项目的一部分。 MMYOLO 定位为 YOLO 系列热门开源库以及工业应用核心库，其愿景图如下所示：
 
-MMYOLO 文件结构和 MMDetection 完全一致。为了能够充分复用 MMDetection 代码，MMYOLO 仅包括定制内容，其由 3 个主要部分组成：`datasets`、`models`、`engine`。
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220060451-d50414e5-a239-45b7-a4db-ed8699820300.png" alt="愿景图"/>
+</div>
 
-- **datasets** 支持用于目标检测的各种数据集。
-  - **transforms** 包含各种数据增强变换。
-- **models** 是检测器最重要的部分，包含检测器的不同组件。
-  - **detectors** 定义所有检测模型类。
-  - **data_preprocessors** 用于预处理模型的输入数据。
-  - **backbones** 包含各种骨干网络
-  - **necks** 包含各种模型颈部组件
-  - **dense_heads** 包含执行密集预测的各种检测头。
-  - **losses** 包含各种损失函数
-  - **task_modules** 为检测任务提供模块。例如 assigners、samplers、box coders 和 prior generators。
-  - **layers** 提供了一些基本的神经网络层
-- **engine** 是运行时组件的一部分。
-  - **optimizers** 提供优化器和优化器封装。
-  - **hooks** 提供 runner 的各种钩子。
+目前支持的 YOLO 系列算法如下：
 
-## 文档使用指南
+<details open>
+<summary><b>支持的算法</b></summary>
+
+- YOLOv5
+- YOLOX
+- RTMDet
+- YOLOv6
+- YOLOv7
+- PPYOLOE
+- YOLOv8
+
+</details>
+
+目前支持的任务如下：
+
+<details open>
+<summary><b>支持的任务</b></summary>
+
+- 目标检测
+- 旋转框目标检测
+
+</details>
+
+MMYOLO 支持在 Linux、Windows、macOS 上运行， 支持 PyTorch 1.7 及其以上版本运行。它具有如下三个特性：
+
+- 🕹️ **统一便捷的算法评测**
+
+  MMYOLO 统一了各类 YOLO 算法模块的实现，并提供了统一的评测流程，用户可以公平便捷地进行对比分析。
+
+- 📚 **丰富的入门和进阶文档**
+
+  MMYOLO 提供了从入门到部署到进阶和算法解析等一系列文档，方便不同用户快速上手和扩展。
+
+- 🧩 **模块化设计**
+
+  MMYOLO 将框架解耦成不同的模块组件，通过组合不同的模块和训练测试策略，用户可以便捷地构建自定义模型。
+
+<img src="https://user-images.githubusercontent.com/27466624/199999337-0544a4cb-3cbd-4f3e-be26-bcd9e74db7ff.jpg" alt="基类-P5"/>
+  图为 RangeKing@GitHub 提供，非常感谢！
+
+## 本文档使用指南
 
 MMYOLO 中将文档结构分成 6 个部分，对应不同需求的用户。
 
@@ -35,7 +62,7 @@ MMYOLO 中将文档结构分成 6 个部分，对应不同需求的用户。
 - **推荐专题**。本部分是 MMYOLO 中提供的以主题形式的精华文档，包括了 MMYOLO 中大量的特性等。强烈推荐使用 MMYOLO 的所有用户阅读
 - **常用功能**。本部分提供了训练测试过程中用户经常会用到的各类常用功能，用户可以在用到时候再次查阅
 - **实用工具**。本部分是 tools 下使用工具的汇总文档，便于大家能够快速的愉快使用 MMYOLO 中提供的各类脚本
-- **基础和进阶教程**。本部分设计到 MMYOLO 中的一些基本概念和进阶教程等，适合想详细了解 MMYOLO 设计思想和结构设计的用户
-- **其他**。其余部分包括 模型仓库、说明和接口文档等等
+- **基础和进阶教程**。本部分涉及到 MMYOLO 中的一些基本概念和进阶教程等，适合想详细了解 MMYOLO 设计思想和结构设计的用户
+- **其他**。其余部分包括模型仓库、说明和接口文档等等
 
-不同需求的用户可以按需选择你心怡的内容阅读。如果你对本文档有不同异议或者更好的优化办法，欢迎给 MMYOLO 提 PR ～
+不同需求的用户可以按需选择你心怡的内容阅读。如果你对本文档有异议或者更好的优化办法，欢迎给 MMYOLO 提 PR ～, 请参考 [如何给 MMYOLO 贡献代码](../recommended_topics/contributing.md)
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index d3473e70..1138e9c3 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -1,6 +1,6 @@
 欢迎来到 MMYOLO 中文文档!
 =======================================
-您可以在页面左下角切换中英文文档。
+您可以在页面右上角切换中英文文档。
 
 .. toctree::
    :maxdepth: 2
@@ -18,16 +18,17 @@
    :maxdepth: 2
    :caption: 推荐专题
 
-   featured_topics/contributing.md
-   featured_topics/model_design.md
-   featured_topics/industry_examples.md
-   featured_topics/dataset_preparation.md
-   featured_topics/algorithm_descriptions/index.rst
-   featured_topics/replace_backbone.md
-   featured_topics/labeling_to_deployment_tutorials.md
-   featured_topics/visualization.md
-   featured_topics/deploy/index.rst
-   featured_topics/troubleshooting_steps.md
+   recommended_topics/contributing.md
+   recommended_topics/model_design.md
+   recommended_topics/algorithm_descriptions/index.rst
+   recommended_topics/replace_backbone.md
+   recommended_topics/labeling_to_deployment_tutorials.md
+   recommended_topics/visualization.md
+   recommended_topics/deploy/index.rst
+   recommended_topics/troubleshooting_steps.md
+   recommended_topics/industry_examples.md
+   recommended_topics/mm_basics.md
+   recommended_topics/dataset_preparation.md
 
 .. toctree::
    :maxdepth: 2
@@ -40,10 +41,11 @@
    common_usage/freeze_layers.md
    common_usage/output_predictions.md
    common_usage/set_random_seed.md
+   common_usage/module_combination.md
    common_usage/mim_usage.md
    common_usage/multi_necks.md
    common_usage/specify_device.md
-   common_usage/module_combination.md
+
 
 .. toctree::
    :maxdepth: 2
@@ -51,15 +53,15 @@
 
    useful_tools/browse_coco_json.md
    useful_tools/browse_dataset.md
+   useful_tools/print_config.md
    useful_tools/dataset_analysis.md
+   useful_tools/optimize_anchors.md
+   useful_tools/extract_subcoco.md
+   useful_tools/vis_scheduler.md
    useful_tools/dataset_converters.md
    useful_tools/download_dataset.md
-   useful_tools/extract_subcoco.md
    useful_tools/log_analysis.md
    useful_tools/model_converters.md
-   useful_tools/optimize_anchors.md
-   useful_tools/print_config.md
-   useful_tools/vis_scheduler.md
 
 .. toctree::
    :maxdepth: 2
@@ -67,8 +69,10 @@
 
    tutorials/config.md
    tutorials/data_flow.md
+   tutorials/custom_installation.md
    tutorials/faq.md
 
+
 .. toctree::
    :maxdepth: 1
    :caption: 进阶教程
diff --git a/docs/zh_cn/notes/code_style.md b/docs/zh_cn/notes/code_style.md
index 4634016d..fc6120cc 100644
--- a/docs/zh_cn/notes/code_style.md
+++ b/docs/zh_cn/notes/code_style.md
@@ -1,8 +1,8 @@
-## 代码规范
+# 代码规范
 
-### 代码规范标准
+## 代码规范标准
 
-#### PEP 8 —— Python 官方代码规范
+### PEP 8 —— Python 官方代码规范
 
 [Python 官方的代码风格指南](https://www.python.org/dev/peps/pep-0008/)，包含了以下几个方面的内容：
 
@@ -46,7 +46,7 @@ hypot2 = x * x + y * y
 这一规范是为了指示不同优先级，但 OpenMMLab 的设置中通常没有启用 yapf 的 `ARITHMETIC_PRECEDENCE_INDICATION` 选项，因而格式规范工具不会按照推荐样式格式化，以设置为准。
 :::
 
-#### Google 开源项目风格指南
+### Google 开源项目风格指南
 
 [Google 使用的编程风格指南](https://google.github.io/styleguide/pyguide.html)，包括了 Python 相关的章节。相较于 PEP 8，该指南提供了更为详尽的代码指南。该指南包括了语言规范和风格规范两个部分。
 
@@ -70,15 +70,15 @@ from mmcv.cnn.bricks import Conv2d, build_norm_layer, DropPath, MaxPool2d, \
 from ...utils import is_str  # 最多向上回溯一层，过多的回溯容易导致结构混乱
 ```
 
-OpenMMLab 项目使用 pre-commit 工具自动格式化代码，详情见[贡献代码](../featured_topics/contributing.md#代码风格)。
+OpenMMLab 项目使用 pre-commit 工具自动格式化代码，详情见[贡献代码](../recommended_topics/contributing.md#代码风格)。
 
-### 命名规范
+## 命名规范
 
-#### 命名规范的重要性
+### 命名规范的重要性
 
 优秀的命名是良好代码可读的基础。基础的命名规范对各类变量的命名做了要求，使读者可以方便地根据代码名了解变量是一个类 / 局部变量 / 全局变量等。而优秀的命名则需要代码作者对于变量的功能有清晰的认识，以及良好的表达能力，从而使读者根据名称就能了解其含义，甚至帮助了解该段代码的功能。
 
-#### 基础命名规范
+### 基础命名规范
 
 | 类型            | 公有             | 私有               |
 | --------------- | ---------------- | ------------------ |
@@ -99,7 +99,7 @@ OpenMMLab 项目使用 pre-commit 工具自动格式化代码，详情见[贡献
 - 尽量不要使用过于简单的命名，除了约定俗成的循环变量 i，文件变量 f，错误变量 e 等。
 - 不会被用到的变量可以命名为 \_，逻辑检查器会将其忽略。
 
-#### 命名技巧
+### 命名技巧
 
 良好的变量命名需要保证三点：
 
@@ -136,13 +136,13 @@ def __init__(self, in_channels, out_channels):
 
 注意避免非常规或统一约定的缩写，如 nb -> num_blocks，in_nc -> in_channels
 
-### docstring 规范
+## docstring 规范
 
-#### 为什么要写 docstring
+### 为什么要写 docstring
 
 docstring 是对一个类、一个函数功能与 API 接口的详细描述，有两个功能，一是帮助其他开发者了解代码功能，方便 debug 和复用代码；二是在 Readthedocs 文档中自动生成相关的 API reference 文档，帮助不了解源代码的社区用户使用相关功能。
 
-#### 如何写 docstring
+### 如何写 docstring
 
 与注释不同，一份规范的 docstring 有着严格的格式要求，以便于 Python 解释器以及 sphinx 进行文档解析，详细的 docstring 约定参见 [PEP 257](https://www.python.org/dev/peps/pep-0257/)。此处以例子的形式介绍各种文档的标准格式，参考格式为 [Google 风格](https://zh-google-styleguide.readthedocs.io/en/latest/google-python-styleguide/python_style_rules/#comments)。
 
@@ -372,13 +372,13 @@ docstring 是对一个类、一个函数功能与 API 接口的详细描述，
 - [Example Google Style Python Docstrings ‒ napoleon 0.7 documentation](https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html#example-google)
 ```
 
-### 注释规范
+## 注释规范
 
-#### 为什么要写注释
+### 为什么要写注释
 
 对于一个开源项目，团队合作以及社区之间的合作是必不可少的，因而尤其要重视合理的注释。不写注释的代码，很有可能过几个月自己也难以理解，造成额外的阅读和修改成本。
 
-#### 如何写注释
+### 如何写注释
 
 最需要写注释的是代码中那些技巧性的部分。如果你在下次代码审查的时候必须解释一下，那么你应该现在就给它写注释。对于复杂的操作，应该在其操作开始前写上若干行注释。对于不是一目了然的代码，应在其行尾添加注释。
 —— Google 开源项目风格指南
@@ -414,7 +414,7 @@ if i & (i-1) == 0:  # True if i bitwise and i-1 is 0.
 self._reversed_padding_repeated_twice = _reverse_repeat_tuple(self.padding, 2)
 ```
 
-#### 注释示例
+### 注释示例
 
 1. 出自 `mmcv/utils/registry.py`，对于较为复杂的逻辑结构，通过注释，明确了优先级关系。
 
@@ -447,9 +447,9 @@ self._reversed_padding_repeated_twice = _reverse_repeat_tuple(self.padding, 2)
            torch.save(checkpoint, file)
    ```
 
-### 类型注解
+## 类型注解
 
-#### 为什么要写类型注解
+### 为什么要写类型注解
 
 类型注解是对函数中变量的类型做限定或提示，为代码的安全性提供保障、增强代码的可读性、避免出现类型相关的错误。
 Python 没有对类型做强制限制，类型注解只起到一个提示作用，通常你的 IDE 会解析这些类型注解，然后在你调用相关代码时对类型做提示。另外也有类型注解检查工具，这些工具会根据类型注解，对代码中可能出现的问题进行检查，减少 bug 的出现。
@@ -461,7 +461,7 @@ Python 没有对类型做强制限制，类型注解只起到一个提示作用
 4. 难以理解的代码请进行注释
 5. 若代码中的类型已经稳定，可以进行注释. 对于一份成熟的代码，多数情况下，即使注释了所有的函数，也不会丧失太多的灵活性.
 
-#### 如何写类型注解
+### 如何写类型注解
 
 1. 函数 / 方法类型注解，通常不对 self 和 cls 注释。
 
@@ -586,7 +586,7 @@ Python 没有对类型做强制限制，类型注解只起到一个提示作用
 
 更多关于类型注解的写法请参考 [typing](https://docs.python.org/3/library/typing.html)。
 
-#### 类型注解检查工具
+### 类型注解检查工具
 
 [mypy](https://mypy.readthedocs.io/en/stable/) 是一个 Python 静态类型检查工具。根据你的类型注解，mypy 会检查传参、赋值等操作是否符合类型注解，从而避免可能出现的 bug。
 
diff --git a/docs/zh_cn/featured_topics/algorithm_descriptions/index.rst b/docs/zh_cn/recommended_topics/algorithm_descriptions/index.rst
similarity index 100%
rename from docs/zh_cn/featured_topics/algorithm_descriptions/index.rst
rename to docs/zh_cn/recommended_topics/algorithm_descriptions/index.rst
diff --git a/docs/zh_cn/featured_topics/algorithm_descriptions/rtmdet_description.md b/docs/zh_cn/recommended_topics/algorithm_descriptions/rtmdet_description.md
similarity index 100%
rename from docs/zh_cn/featured_topics/algorithm_descriptions/rtmdet_description.md
rename to docs/zh_cn/recommended_topics/algorithm_descriptions/rtmdet_description.md
diff --git a/docs/zh_cn/featured_topics/algorithm_descriptions/yolov5_description.md b/docs/zh_cn/recommended_topics/algorithm_descriptions/yolov5_description.md
similarity index 100%
rename from docs/zh_cn/featured_topics/algorithm_descriptions/yolov5_description.md
rename to docs/zh_cn/recommended_topics/algorithm_descriptions/yolov5_description.md
diff --git a/docs/zh_cn/featured_topics/algorithm_descriptions/yolov6_description.md b/docs/zh_cn/recommended_topics/algorithm_descriptions/yolov6_description.md
similarity index 100%
rename from docs/zh_cn/featured_topics/algorithm_descriptions/yolov6_description.md
rename to docs/zh_cn/recommended_topics/algorithm_descriptions/yolov6_description.md
diff --git a/docs/zh_cn/featured_topics/algorithm_descriptions/yolov8_description.md b/docs/zh_cn/recommended_topics/algorithm_descriptions/yolov8_description.md
similarity index 100%
rename from docs/zh_cn/featured_topics/algorithm_descriptions/yolov8_description.md
rename to docs/zh_cn/recommended_topics/algorithm_descriptions/yolov8_description.md
diff --git a/docs/zh_cn/featured_topics/contributing.md b/docs/zh_cn/recommended_topics/contributing.md
similarity index 95%
rename from docs/zh_cn/featured_topics/contributing.md
rename to docs/zh_cn/recommended_topics/contributing.md
index 16c76b0a..ff3b2ca3 100644
--- a/docs/zh_cn/featured_topics/contributing.md
+++ b/docs/zh_cn/recommended_topics/contributing.md
@@ -1,4 +1,4 @@
-## 如何给 MMYOLO 贡献代码
+# 如何给 MMYOLO 贡献代码
 
 欢迎加入 MMYOLO 社区，我们致力于打造最前沿的计算机视觉基础库，我们欢迎任何类型的贡献，包括但不限于
 
@@ -23,11 +23,11 @@
 1. 提交 issue，确认添加文档的必要性。
 2. 添加文档，提交拉取请求。
 
-### 拉取请求工作流
+## 拉取请求工作流
 
 如果你对拉取请求不了解，没关系，接下来的内容将会从零开始，一步一步地指引你如何创建一个拉取请求。如果你想深入了解拉取请求的开发模式，可以参考 github [官方文档](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)
 
-#### 1. 复刻仓库
+### 1. 复刻仓库
 
 当你第一次提交拉取请求时，先复刻 OpenMMLab 原代码库，点击 GitHub 页面右上角的 **Fork** 按钮，复刻后的代码库将会出现在你的 GitHub 个人主页下。
 
@@ -59,7 +59,7 @@ upstream	git@github.com:open-mmlab/mmyolo (push)
 这里对 origin 和 upstream 进行一个简单的介绍，当我们使用 git clone 来克隆代码时，会默认创建一个 origin 的 remote，它指向我们克隆的代码库地址，而 upstream 则是我们自己添加的，用来指向原始代码库地址。当然如果你不喜欢他叫 upstream，也可以自己修改，比如叫 open-mmlab。我们通常向 origin 提交代码（即 fork 下来的远程仓库），然后向 upstream 提交一个 pull request。如果提交的代码和最新的代码发生冲突，再从 upstream 拉取最新的代码，和本地分支解决冲突，再提交到 origin。
 ```
 
-#### 2. 配置 pre-commit
+### 2. 配置 pre-commit
 
 在本地开发环境中，我们使用 [pre-commit](https://pre-commit.com/#intro) 来检查代码风格，以确保代码风格的统一。在提交代码，需要先安装 pre-commit（需要在 MMYOLO 目录下执行）:
 
@@ -98,7 +98,7 @@ pre-commit run --all-files -c .pre-commit-config-zh-cn.yaml
 git commit -m "xxx" --no-verify
 ```
 
-#### 3. 创建开发分支
+### 3. 创建开发分支
 
 安装完 pre-commit 之后，我们需要基于 dev 创建开发分支，建议的分支命名规则为 `username/pr_name`。
 
@@ -112,7 +112,7 @@ git checkout -b yhc/refactor_contributing_doc
 git pull upstream dev
 ```
 
-#### 4. 提交代码并在本地通过单元测试
+### 4. 提交代码并在本地通过单元测试
 
 - MMYOLO 引入了 mypy 来做静态类型检查，以增加代码的鲁棒性。因此我们在提交代码时，需要补充 Type Hints。具体规则可以参考[教程](https://zhuanlan.zhihu.com/p/519335398)。
 
@@ -130,7 +130,7 @@ git pull upstream dev
 
 - 如果修改/添加了文档，参考[指引](#文档渲染)确认文档渲染正常。
 
-#### 5. 推送代码到远程
+### 5. 推送代码到远程
 
 代码通过单元测试和 pre-commit 检查后，将代码推送到远程仓库，如果是第一次推送，可以在 `git push` 后加上 `-u` 参数以关联远程分支
 
@@ -140,7 +140,7 @@ git push -u origin {branch_name}
 
 这样下次就可以直接使用 `git push` 命令推送代码了，而无需指定分支和远程仓库。
 
-#### 6. 提交拉取请求（PR）
+### 6. 提交拉取请求（PR）
 
 (1) 在 GitHub 的 Pull request 界面创建拉取请求
 <img src="https://user-images.githubusercontent.com/27466624/204302289-d1e54901-8f27-4934-923f-fda800ff9851.png" width="1200">
@@ -177,7 +177,7 @@ MMYOLO 会在 Linux 上，基于不同版本的 Python、PyTorch 对提交的代
 
 所有 reviewer 同意合入 PR 后，我们会尽快将 PR 合并到 dev 分支。
 
-#### 7. 解决冲突
+### 7. 解决冲突
 
 随着时间的推移，我们的代码库会不断更新，这时候，如果你的 PR 与 dev 分支存在冲突，你需要解决冲突，解决冲突的方式有两种：
 
@@ -195,9 +195,9 @@ git merge upstream/dev
 
 如果你非常善于处理冲突，那么可以使用 rebase 的方式来解决冲突，因为这能够保证你的 commit log 的整洁。如果你不太熟悉 `rebase` 的使用，那么可以使用 `merge` 的方式来解决冲突。
 
-### 指引
+## 指引
 
-#### 单元测试
+### 单元测试
 
 在提交修复代码错误或新增特性的拉取请求时，我们应该尽可能的让单元测试覆盖所有提交的代码，计算单元测试覆盖率的方法如下
 
@@ -207,7 +207,7 @@ python -m coverage html
 # check file in htmlcov/index.html
 ```
 
-#### 文档渲染
+### 文档渲染
 
 在提交修复代码错误或新增特性的拉取请求时，可能会需要修改/新增模块的 docstring。我们需要确认渲染后的文档样式是正确的。
 本地生成渲染后的文档的方法如下
@@ -220,9 +220,9 @@ make html
 # check file in ./docs/zh_cn/_build/html/index.html
 ```
 
-### 代码风格
+## 代码风格
 
-#### Python
+### Python
 
 [PEP8](https://www.python.org/dev/peps/pep-0008/) 作为 OpenMMLab 算法库首选的代码规范，我们使用以下工具检查和格式化代码
 
@@ -233,21 +233,21 @@ make html
 - [mdformat](https://github.com/executablebooks/mdformat)：检查 markdown 文件的工具
 - [docformatter](https://github.com/myint/docformatter)：格式化 docstring 的工具
 
-yapf 和 isort 的配置可以在 [setup.cfg](./setup.cfg) 找到
+yapf 和 isort 的配置可以在 [setup.cfg](../../../setup.cfg) 找到
 
 通过配置 [pre-commit hook](https://pre-commit.com/) ，我们可以在提交代码时自动检查和格式化 `flake8`、`yapf`、`isort`、`trailing whitespaces`、`markdown files`，
 修复 `end-of-files`、`double-quoted-strings`、`python-encoding-pragma`、`mixed-line-ending`，调整 `requirments.txt` 的包顺序。
-pre-commit 钩子的配置可以在 [.pre-commit-config](./.pre-commit-config.yaml) 找到。
+pre-commit 钩子的配置可以在 [.pre-commit-config](../../../.pre-commit-config.yaml) 找到。
 
 pre-commit 具体的安装使用方式见[拉取请求](#2-配置-pre-commit)。
 
 更具体的规范请参考 [OpenMMLab 代码规范](../notes/code_style.md)。
 
-#### C++ and CUDA
+### C++ and CUDA
 
 C++ 和 CUDA 的代码规范遵从 [Google C++ Style Guide](https://google.github.io/styleguide/cppguide.html)
 
-### 拉取请求规范
+## 拉取请求规范
 
 1. 使用 [pre-commit hook](https://pre-commit.com)，尽量减少代码风格相关问题
 
diff --git a/docs/zh_cn/featured_topics/dataset_preparation.md b/docs/zh_cn/recommended_topics/dataset_preparation.md
similarity index 100%
rename from docs/zh_cn/featured_topics/dataset_preparation.md
rename to docs/zh_cn/recommended_topics/dataset_preparation.md
diff --git a/docs/zh_cn/featured_topics/deploy/easydeploy_guide.md b/docs/zh_cn/recommended_topics/deploy/easydeploy_guide.md
similarity index 90%
rename from docs/zh_cn/featured_topics/deploy/easydeploy_guide.md
rename to docs/zh_cn/recommended_topics/deploy/easydeploy_guide.md
index 80a69d25..8f337e6c 100644
--- a/docs/zh_cn/featured_topics/deploy/easydeploy_guide.md
+++ b/docs/zh_cn/recommended_topics/deploy/easydeploy_guide.md
@@ -1,4 +1,4 @@
-# EasyDeploy 部署必备教程
+# EasyDeploy 部署
 
 本项目作为 MMYOLO 的部署 project 单独存在，意图剥离 MMDeploy 当前的体系，独自支持用户完成模型训练后的转换和部署功能，使用户的学习和工程成本下降。
 
diff --git a/docs/zh_cn/featured_topics/deploy/index.rst b/docs/zh_cn/recommended_topics/deploy/index.rst
similarity index 70%
rename from docs/zh_cn/featured_topics/deploy/index.rst
rename to docs/zh_cn/recommended_topics/deploy/index.rst
index 3c545466..3d5f08bc 100644
--- a/docs/zh_cn/featured_topics/deploy/index.rst
+++ b/docs/zh_cn/recommended_topics/deploy/index.rst
@@ -5,17 +5,8 @@ MMDeploy 部署必备教程
    :maxdepth: 1
 
    mmdeploy_guide.md
-
-
-MMDeploy 部署 YOLOv5 全流程说明
-************************
-
-.. toctree::
-   :maxdepth: 1
-
    mmdeploy_yolov5.md
 
-
 EasyDeploy 部署必备教程
 ************************
 
diff --git a/docs/zh_cn/featured_topics/deploy/mmdeploy_guide.md b/docs/zh_cn/recommended_topics/deploy/mmdeploy_guide.md
similarity index 98%
rename from docs/zh_cn/featured_topics/deploy/mmdeploy_guide.md
rename to docs/zh_cn/recommended_topics/deploy/mmdeploy_guide.md
index e0181d54..a6a98d3d 100644
--- a/docs/zh_cn/featured_topics/deploy/mmdeploy_guide.md
+++ b/docs/zh_cn/recommended_topics/deploy/mmdeploy_guide.md
@@ -1,4 +1,4 @@
-# 部署必备教程
+# MMDeploy 部署
 
 ## MMDeploy 介绍
 
@@ -83,7 +83,7 @@ test_dataloader = dict(
 
 `test_pipeline` 为部署时对输入图像进行处理的流程，`LetterResize` 控制了输入图像的尺寸，同时限制了导出模型所能接受的输入尺寸。
 
-`test_dataloader` 为部署时构建数据加载器配置，`batch_shapes_cfg` 控制了是否启用 `batch_shapes` 策略，详细内容可以参考 [yolov5 配置文件说明](https://github.com/open-mmlab/mmyolo/blob/main/docs/zh_cn/user_guides/config.md) 。
+`test_dataloader` 为部署时构建数据加载器配置，`batch_shapes_cfg` 控制了是否启用 `batch_shapes` 策略，详细内容可以参考 [yolov5 配置文件说明](../../tutorials/config.md) 。
 
 #### (2) 部署配置文件介绍
 
diff --git a/docs/zh_cn/featured_topics/deploy/mmdeploy_yolov5.md b/docs/zh_cn/recommended_topics/deploy/mmdeploy_yolov5.md
similarity index 99%
rename from docs/zh_cn/featured_topics/deploy/mmdeploy_yolov5.md
rename to docs/zh_cn/recommended_topics/deploy/mmdeploy_yolov5.md
index 99c0895d..c48a6406 100644
--- a/docs/zh_cn/featured_topics/deploy/mmdeploy_yolov5.md
+++ b/docs/zh_cn/recommended_topics/deploy/mmdeploy_yolov5.md
@@ -4,7 +4,7 @@
 
 ## 模型训练和测试
 
-模型训练和测试请参考 [YOLOv5 从入门到部署全流程](../get_started/15_minutes.md) 。
+模型训练和测试请参考 [YOLOv5 从入门到部署全流程](./mmdeploy_yolov5.md) 。
 
 ## 准备 MMDeploy 运行环境
 
diff --git a/docs/zh_cn/featured_topics/industry_examples.md b/docs/zh_cn/recommended_topics/industry_examples.md
similarity index 100%
rename from docs/zh_cn/featured_topics/industry_examples.md
rename to docs/zh_cn/recommended_topics/industry_examples.md
diff --git a/docs/zh_cn/featured_topics/labeling_to_deployment_tutorials.md b/docs/zh_cn/recommended_topics/labeling_to_deployment_tutorials.md
similarity index 100%
rename from docs/zh_cn/featured_topics/labeling_to_deployment_tutorials.md
rename to docs/zh_cn/recommended_topics/labeling_to_deployment_tutorials.md
diff --git a/docs/zh_cn/recommended_topics/mm_basics.md b/docs/zh_cn/recommended_topics/mm_basics.md
new file mode 100644
index 00000000..2d8098b1
--- /dev/null
+++ b/docs/zh_cn/recommended_topics/mm_basics.md
@@ -0,0 +1 @@
+# MM 系列仓库必备基础
diff --git a/docs/zh_cn/featured_topics/model_design.md b/docs/zh_cn/recommended_topics/model_design.md
similarity index 100%
rename from docs/zh_cn/featured_topics/model_design.md
rename to docs/zh_cn/recommended_topics/model_design.md
diff --git a/docs/zh_cn/featured_topics/replace_backbone.md b/docs/zh_cn/recommended_topics/replace_backbone.md
similarity index 52%
rename from docs/zh_cn/featured_topics/replace_backbone.md
rename to docs/zh_cn/recommended_topics/replace_backbone.md
index 4514fefe..d78a2520 100644
--- a/docs/zh_cn/featured_topics/replace_backbone.md
+++ b/docs/zh_cn/recommended_topics/replace_backbone.md
@@ -28,162 +28,162 @@ OpenMMLab 2.0 体系中 MMYOLO、MMDetection、MMClassification、MMSelfsup 中
 
 1. 假设想将 `ResNet-50` 作为 `YOLOv5` 的主干网络，则配置文件如下：
 
-   ```python
-   _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
 
-   deepen_factor = _base_.deepen_factor
-   widen_factor = 1.0
-   channels = [512, 1024, 2048]
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [512, 1024, 2048]
 
-   model = dict(
-       backbone=dict(
-           _delete_=True, # 将 _base_ 中关于 backbone 的字段删除
-           type='mmdet.ResNet', # 使用 mmdet 中的 ResNet
-           depth=50,
-           num_stages=4,
-           out_indices=(1, 2, 3),
-           frozen_stages=1,
-           norm_cfg=dict(type='BN', requires_grad=True),
-           norm_eval=True,
-           style='pytorch',
-           init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
-       neck=dict(
-           type='YOLOv5PAFPN',
-           widen_factor=widen_factor,
-           in_channels=channels, # 注意：ResNet-50 输出的3个通道是 [512, 1024, 2048]，和原先的 yolov5-s neck 不匹配，需要更改
-           out_channels=channels),
-       bbox_head=dict(
-           type='YOLOv5Head',
-           head_module=dict(
-               type='YOLOv5HeadModule',
-               in_channels=channels, # head 部分输入通道也要做相应更改
-               widen_factor=widen_factor))
-   )
-   ```
+model = dict(
+    backbone=dict(
+        _delete_=True, # 将 _base_ 中关于 backbone 的字段删除
+        type='mmdet.ResNet', # 使用 mmdet 中的 ResNet
+        depth=50,
+        num_stages=4,
+        out_indices=(1, 2, 3),
+        frozen_stages=1,
+        norm_cfg=dict(type='BN', requires_grad=True),
+        norm_eval=True,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    neck=dict(
+        type='YOLOv5PAFPN',
+        widen_factor=widen_factor,
+        in_channels=channels, # 注意：ResNet-50 输出的3个通道是 [512, 1024, 2048]，和原先的 yolov5-s neck 不匹配，需要更改
+        out_channels=channels),
+    bbox_head=dict(
+        type='YOLOv5Head',
+        head_module=dict(
+            type='YOLOv5HeadModule',
+            in_channels=channels, # head 部分输入通道也要做相应更改
+            widen_factor=widen_factor))
+)
+```
 
 2. 假设想将 `SwinTransformer-Tiny` 作为 `YOLOv5` 的主干网络，则配置文件如下：
 
-   ```python
-   _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
 
-   deepen_factor = _base_.deepen_factor
-   widen_factor = 1.0
-   channels = [192, 384, 768]
-   checkpoint_file = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [192, 384, 768]
+checkpoint_file = 'https://github.com/SwinTransformer/storage/releases/download/v1.0.0/swin_tiny_patch4_window7_224.pth'  # noqa
 
-   model = dict(
-       backbone=dict(
-           _delete_=True, # 将 _base_ 中关于 backbone 的字段删除
-           type='mmdet.SwinTransformer', # 使用 mmdet 中的 SwinTransformer
-           embed_dims=96,
-           depths=[2, 2, 6, 2],
-           num_heads=[3, 6, 12, 24],
-           window_size=7,
-           mlp_ratio=4,
-           qkv_bias=True,
-           qk_scale=None,
-           drop_rate=0.,
-           attn_drop_rate=0.,
-           drop_path_rate=0.2,
-           patch_norm=True,
-           out_indices=(1, 2, 3),
-           with_cp=False,
-           convert_weights=True,
-           init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
-       neck=dict(
-           type='YOLOv5PAFPN',
-           deepen_factor=deepen_factor,
-           widen_factor=widen_factor,
-           in_channels=channels, # 注意：SwinTransformer-Tiny 输出的3个通道是 [192, 384, 768]，和原先的 yolov5-s neck 不匹配，需要更改
-           out_channels=channels),
-       bbox_head=dict(
-           type='YOLOv5Head',
-           head_module=dict(
-               type='YOLOv5HeadModule',
-               in_channels=channels, # head 部分输入通道也要做相应更改
-               widen_factor=widen_factor))
-   )
-   ```
+model = dict(
+    backbone=dict(
+        _delete_=True, # 将 _base_ 中关于 backbone 的字段删除
+        type='mmdet.SwinTransformer', # 使用 mmdet 中的 SwinTransformer
+        embed_dims=96,
+        depths=[2, 2, 6, 2],
+        num_heads=[3, 6, 12, 24],
+        window_size=7,
+        mlp_ratio=4,
+        qkv_bias=True,
+        qk_scale=None,
+        drop_rate=0.,
+        attn_drop_rate=0.,
+        drop_path_rate=0.2,
+        patch_norm=True,
+        out_indices=(1, 2, 3),
+        with_cp=False,
+        convert_weights=True,
+        init_cfg=dict(type='Pretrained', checkpoint=checkpoint_file)),
+    neck=dict(
+        type='YOLOv5PAFPN',
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=channels, # 注意：SwinTransformer-Tiny 输出的3个通道是 [192, 384, 768]，和原先的 yolov5-s neck 不匹配，需要更改
+        out_channels=channels),
+    bbox_head=dict(
+        type='YOLOv5Head',
+        head_module=dict(
+            type='YOLOv5HeadModule',
+            in_channels=channels, # head 部分输入通道也要做相应更改
+            widen_factor=widen_factor))
+)
+```
 
 ### 使用在 MMClassification 中实现的主干网络
 
 1. 假设想将 `ConvNeXt-Tiny` 作为 `YOLOv5` 的主干网络，则配置文件如下：
 
-   ```python
-   _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
 
-   # 请先使用命令： mim install "mmcls>=1.0.0rc2"，安装 mmcls
-   # 导入 mmcls.models 使得可以调用 mmcls 中注册的模块
-   custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
-   checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth'  # noqa
-   deepen_factor = _base_.deepen_factor
-   widen_factor = 1.0
-   channels = [192, 384, 768]
+# 请先使用命令： mim install "mmcls>=1.0.0rc2"，安装 mmcls
+# 导入 mmcls.models 使得可以调用 mmcls 中注册的模块
+custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/convnext/downstream/convnext-tiny_3rdparty_32xb128-noema_in1k_20220301-795e9634.pth'  # noqa
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [192, 384, 768]
 
-   model = dict(
-       backbone=dict(
-           _delete_=True, # 将 _base_ 中关于 backbone 的字段删除
-           type='mmcls.ConvNeXt', # 使用 mmcls 中的 ConvNeXt
-           arch='tiny',
-           out_indices=(1, 2, 3),
-           drop_path_rate=0.4,
-           layer_scale_init_value=1.0,
-           gap_before_final_norm=False,
-           init_cfg=dict(
-               type='Pretrained', checkpoint=checkpoint_file,
-               prefix='backbone.')), # MMCls 中主干网络的预训练权重含义 prefix='backbone.'，为了正常加载权重，需要把这个 prefix 去掉。
-       neck=dict(
-           type='YOLOv5PAFPN',
-           deepen_factor=deepen_factor,
-           widen_factor=widen_factor,
-           in_channels=channels, # 注意：ConvNeXt-Tiny 输出的3个通道是 [192, 384, 768]，和原先的 yolov5-s neck 不匹配，需要更改
-           out_channels=channels),
-       bbox_head=dict(
-           type='YOLOv5Head',
-           head_module=dict(
-               type='YOLOv5HeadModule',
-               in_channels=channels, # head 部分输入通道也要做相应更改
-               widen_factor=widen_factor))
-   )
-   ```
+model = dict(
+    backbone=dict(
+        _delete_=True, # 将 _base_ 中关于 backbone 的字段删除
+        type='mmcls.ConvNeXt', # 使用 mmcls 中的 ConvNeXt
+        arch='tiny',
+        out_indices=(1, 2, 3),
+        drop_path_rate=0.4,
+        layer_scale_init_value=1.0,
+        gap_before_final_norm=False,
+        init_cfg=dict(
+            type='Pretrained', checkpoint=checkpoint_file,
+            prefix='backbone.')), # MMCls 中主干网络的预训练权重含义 prefix='backbone.'，为了正常加载权重，需要把这个 prefix 去掉。
+    neck=dict(
+        type='YOLOv5PAFPN',
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=channels, # 注意：ConvNeXt-Tiny 输出的3个通道是 [192, 384, 768]，和原先的 yolov5-s neck 不匹配，需要更改
+        out_channels=channels),
+    bbox_head=dict(
+        type='YOLOv5Head',
+        head_module=dict(
+            type='YOLOv5HeadModule',
+            in_channels=channels, # head 部分输入通道也要做相应更改
+            widen_factor=widen_factor))
+)
+```
 
 2. 假设想将 `MobileNetV3-small` 作为 `YOLOv5` 的主干网络，则配置文件如下：
 
-   ```python
-   _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
+```python
+_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
 
-   # 请先使用命令： mim install "mmcls>=1.0.0rc2"，安装 mmcls
-   # 导入 mmcls.models 使得可以调用 mmcls 中注册的模块
-   custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
-   checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth'  # noqa
-   deepen_factor = _base_.deepen_factor
-   widen_factor = 1.0
-   channels = [24, 48, 96]
+# 请先使用命令： mim install "mmcls>=1.0.0rc2"，安装 mmcls
+# 导入 mmcls.models 使得可以调用 mmcls 中注册的模块
+custom_imports = dict(imports=['mmcls.models'], allow_failed_imports=False)
+checkpoint_file = 'https://download.openmmlab.com/mmclassification/v0/mobilenet_v3/convert/mobilenet_v3_small-8427ecf0.pth'  # noqa
+deepen_factor = _base_.deepen_factor
+widen_factor = 1.0
+channels = [24, 48, 96]
 
-   model = dict(
-       backbone=dict(
-           _delete_=True, # 将 _base_ 中关于 backbone 的字段删除
-           type='mmcls.MobileNetV3', # 使用 mmcls 中的 MobileNetV3
-           arch='small',
-           out_indices=(3, 8, 11), # 修改 out_indices
-           init_cfg=dict(
-               type='Pretrained',
-               checkpoint=checkpoint_file,
-               prefix='backbone.')), # MMCls 中主干网络的预训练权重含义 prefix='backbone.'，为了正常加载权重，需要把这个 prefix 去掉。
-       neck=dict(
-           type='YOLOv5PAFPN',
-           deepen_factor=deepen_factor,
-           widen_factor=widen_factor,
-           in_channels=channels, # 注意：MobileNetV3-small 输出的3个通道是 [24, 48, 96]，和原先的 yolov5-s neck 不匹配，需要更改
-           out_channels=channels),
-       bbox_head=dict(
-           type='YOLOv5Head',
-           head_module=dict(
-               type='YOLOv5HeadModule',
-               in_channels=channels, # head 部分输入通道也要做相应更改
-               widen_factor=widen_factor))
-   )
-   ```
+model = dict(
+    backbone=dict(
+        _delete_=True, # 将 _base_ 中关于 backbone 的字段删除
+        type='mmcls.MobileNetV3', # 使用 mmcls 中的 MobileNetV3
+        arch='small',
+        out_indices=(3, 8, 11), # 修改 out_indices
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint=checkpoint_file,
+            prefix='backbone.')), # MMCls 中主干网络的预训练权重含义 prefix='backbone.'，为了正常加载权重，需要把这个 prefix 去掉。
+    neck=dict(
+        type='YOLOv5PAFPN',
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=channels, # 注意：MobileNetV3-small 输出的3个通道是 [24, 48, 96]，和原先的 yolov5-s neck 不匹配，需要更改
+        out_channels=channels),
+    bbox_head=dict(
+        type='YOLOv5Head',
+        head_module=dict(
+            type='YOLOv5HeadModule',
+            in_channels=channels, # head 部分输入通道也要做相应更改
+            widen_factor=widen_factor))
+)
+```
 
 ### 通过 MMClassification 使用 `timm` 中实现的主干网络
 
diff --git a/docs/zh_cn/featured_topics/troubleshooting_steps.md b/docs/zh_cn/recommended_topics/troubleshooting_steps.md
similarity index 100%
rename from docs/zh_cn/featured_topics/troubleshooting_steps.md
rename to docs/zh_cn/recommended_topics/troubleshooting_steps.md
diff --git a/docs/zh_cn/featured_topics/visualization.md b/docs/zh_cn/recommended_topics/visualization.md
similarity index 99%
rename from docs/zh_cn/featured_topics/visualization.md
rename to docs/zh_cn/recommended_topics/visualization.md
index 52e8c579..8a1b8c6f 100644
--- a/docs/zh_cn/featured_topics/visualization.md
+++ b/docs/zh_cn/recommended_topics/visualization.md
@@ -376,7 +376,7 @@ python tools/analysis_tools/browse_dataset.py \
 - **`-p, --phase`**: 可视化数据集的阶段，只能为 `['train', 'val', 'test']` 之一，默认为 `'train'`。
 - **`-n, --show-number`**: 可视化样本数量。如果没有指定，默认展示数据集的所有图片。
 - **`-m, --mode`**: 可视化的模式，只能为 `['original', 'transformed', 'pipeline']` 之一。 默认为 `'transformed'`。
-- `--cfg-options` : 对配置文件的修改，参考[学习配置文件](./config.md)。
+- `--cfg-options` : 对配置文件的修改，参考[学习配置文件](../../tutorials/config.md)。
 
 ```shell
 `-m, --mode` 用于设置可视化的模式，默认设置为 'transformed'。
@@ -539,3 +539,5 @@ python tools/analysis_tools/vis_scheduler.py \
 ```
 
 <div align=center><img src="https://user-images.githubusercontent.com/27466624/213091635-d322d2b3-6e28-4755-b871-ef0a89a67a6b.jpg" style=" width: auto; height: 40%; "></div>
+
+## 大图推理 (TODO)
diff --git a/docs/zh_cn/tutorials/config.md b/docs/zh_cn/tutorials/config.md
index 738931a3..12c7aafe 100644
--- a/docs/zh_cn/tutorials/config.md
+++ b/docs/zh_cn/tutorials/config.md
@@ -425,7 +425,7 @@ model = dict(
 
 配置文件里会使用一些中间变量，例如数据集里的 `train_pipeline`/`test_pipeline`。我们在定义新的 `train_pipeline`/`test_pipeline` 之后，需要将它们传递到 `data` 里。例如，我们想在训练或测试时，改变 YOLOv5 网络的 `img_scale` 训练尺度并在训练时添加 `YOLOv5MixUp` 数据增强，`img_scale/train_pipeline/test_pipeline` 是我们想要修改的中间变量。
 
-**注**：使用 `YOLOv5MixUp` 数据增强时，需要将 `YOLOv5MixUp` 之前的训练数据处理流程定义在其 `pre_transform`  中。详细过程和图解可参见 [YOLOv5 原理和实现全解析](../algorithm_descriptions/yolov5_description.md)。
+**注**：使用 `YOLOv5MixUp` 数据增强时，需要将 `YOLOv5MixUp` 之前的训练数据处理流程定义在其 `pre_transform`  中。详细过程和图解可参见 [YOLOv5 原理和实现全解析](../recommended_topics/algorithm_descriptions/yolov5_description.md)。
 
 ```python
 _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
diff --git a/docs/zh_cn/tutorials/custom_installation.md b/docs/zh_cn/tutorials/custom_installation.md
new file mode 100644
index 00000000..df9bf0cf
--- /dev/null
+++ b/docs/zh_cn/tutorials/custom_installation.md
@@ -0,0 +1,111 @@
+# 自定义安装
+
+## CUDA 版本
+
+在安装 PyTorch 时，你需要指定 CUDA 的版本。如果你不清楚应该选择哪一个，请遵循我们的建议。
+
+- 对于 Ampere 架构的 NVIDIA GPU，例如 GeForce 30 系列 以及 NVIDIA A100，CUDA 11 是必需的。
+- 对于更早的 NVIDIA GPU，CUDA 11 是向后兼容 (backward compatible) 的，但 CUDA 10.2 能够提供更好的兼容性，也更加轻量。
+
+请确保你的 GPU 驱动版本满足最低的版本需求，参阅 NVIDIA 官方的 [CUDA 工具箱和相应的驱动版本关系表](https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html#cuda-major-component-versions__table-cuda-toolkit-driver-versions)。
+
+```{note}
+如果按照我们的最佳实践进行安装，CUDA 运行时库就足够了，因为我们提供相关 CUDA 代码的预编译，不需要进行本地编译。
+但如果你希望从源码进行 MMCV 的编译，或是进行其他 CUDA 算子的开发，那么就必须安装完整的 CUDA 工具链，参见
+[NVIDIA 官网](https://developer.nvidia.com/cuda-downloads) ，另外还需要确保该 CUDA 工具链的版本与 PyTorch 安装时
+的配置相匹配（如用 `conda install` 安装 PyTorch 时指定的 cudatoolkit 版本）。
+```
+
+## 不使用 MIM 安装 MMEngine
+
+要使用 pip 而不是 MIM 来安装 MMEngine，请遵照 [MMEngine 安装指南](https://mmengine.readthedocs.io/en/latest/get_started/installation.html)。
+
+例如，你可以通过以下命令安装 MMEngine：
+
+```shell
+pip install "mmengine>=0.3.1"
+```
+
+## 不使用 MIM 安装 MMCV
+
+MMCV 包含 C++ 和 CUDA 扩展，因此其对 PyTorch 的依赖比较复杂。MIM 会自动解析这些 依赖，选择合适的 MMCV 预编译包，使安装更简单，但它并不是必需的。
+
+要使用 pip 而不是 MIM 来安装 MMCV，请遵照 [MMCV 安装指南](https://mmcv.readthedocs.io/zh_CN/2.x/get_started/installation.html)。
+它需要您用指定 URL 的形式手动指定对应的 PyTorch 和 CUDA 版本。
+
+例如，下述命令将会安装基于 PyTorch 1.12.x 和 CUDA 11.6 编译的 mmcv：
+
+```shell
+pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.12.0/index.html
+```
+
+## 在 CPU 环境中安装
+
+我们的代码能够建立在只使用 CPU 的环境（CUDA 不可用）。
+
+在 CPU 模式下，可以进行模型训练（需要 MMCV 版本 >= `2.0.0rc1`)、测试或者推理，然而以下功能将在 CPU 模式下不能使用：
+
+- Deformable Convolution
+- Modulated Deformable Convolution
+- ROI pooling
+- Deformable ROI pooling
+- CARAFE: Content-Aware ReAssembly of FEatures
+- SyncBatchNorm
+- CrissCrossAttention: Criss-Cross Attention
+- MaskedConv2d
+- Temporal Interlace Shift
+- nms_cuda
+- sigmoid_focal_loss_cuda
+- bbox_overlaps
+
+因此，如果尝试使用包含上述操作的模型进行训练/测试/推理，将会报错。下表列出了由于依赖上述算子而无法在 CPU 上运行的相关模型：
+
+|                          操作                           |                                           模型                                           |
+| :-----------------------------------------------------: | :--------------------------------------------------------------------------------------: |
+| Deformable Convolution/Modulated Deformable Convolution | DCN、Guided Anchoring、RepPoints、CentripetalNet、VFNet、CascadeRPN、NAS-FCOS、DetectoRS |
+|                      MaskedConv2d                       |                                     Guided Anchoring                                     |
+|                         CARAFE                          |                                          CARAFE                                          |
+|                      SyncBatchNorm                      |                                         ResNeSt                                          |
+
+## 在 Google Colab 中安装
+
+[Google Colab](https://colab.research.google.com/) 通常已经包含了 PyTorch 环境，因此我们只需要安装 MMEngine、MMCV、MMDetection 和 MMYOLO 即可，命令如下：
+
+**步骤 1.** 使用 [MIM](https://github.com/open-mmlab/mim) 安装 [MMEngine](https://github.com/open-mmlab/mmengine) 、 [MMCV](https://github.com/open-mmlab/mmcv) 和 [MMDetection](https://github.com/open-mmlab/mmdetection) 。
+
+```shell
+!pip3 install openmim
+!mim install "mmengine>=0.3.1"
+!mim install "mmcv>=2.0.0rc1,<2.1.0"
+!mim install "mmdet>=3.0.0rc5,<3.1.0"
+```
+
+**步骤 2.** 使用源码安装 MMYOLO：
+
+```shell
+!git clone https://github.com/open-mmlab/mmyolo.git
+%cd mmyolo
+!pip install -e .
+```
+
+**步骤 3.** 验证安装是否成功：
+
+```python
+import mmyolo
+print(mmyolo.__version__)
+# 预期输出: 0.1.0 或其他版本号
+```
+
+```{note}
+在 Jupyter 中，感叹号 `!` 用于执行外部命令，而 `%cd` 是一个[魔术命令](https://ipython.readthedocs.io/en/stable/interactive/magics.html#magic-cd)，用于切换 Python 的工作路径。
+```
+
+## 使用多个 MMYOLO 版本进行开发
+
+训练和测试的脚本已经在 `PYTHONPATH` 中进行了修改，以确保脚本使用当前目录中的 MMYOLO。
+
+要使环境中安装默认的 MMYOLO 而不是当前正在在使用的，可以删除出现在相关脚本中的如下代码：
+
+```shell
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
+```
diff --git a/docs/zh_cn/tutorials/data_flow.md b/docs/zh_cn/tutorials/data_flow.md
index 9e4e52b5..804004de 100644
--- a/docs/zh_cn/tutorials/data_flow.md
+++ b/docs/zh_cn/tutorials/data_flow.md
@@ -1,4 +1,4 @@
-## 混合类图片数据增强更新
+# 混合类图片数据增强更新
 
 混合类图片数据增强是指类似 Mosaic 和 MixUp 一样，在运行过程中需要获取多张图片的标注信息进行融合。 在 OpenMMLab 数据增强 pipeline 中一般是获取不到数据集其他索引的。 为了实现上述功能，在 MMDetection 复现的 YOLOX 中提出了 [MultiImageMixDataset](https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/dataset_wrappers.py#L338) 数据集包装器的概念。
 
diff --git a/docs/zh_cn/useful_tools/browse_dataset.md b/docs/zh_cn/useful_tools/browse_dataset.md
index a0d4a7c6..ab991daa 100644
--- a/docs/zh_cn/useful_tools/browse_dataset.md
+++ b/docs/zh_cn/useful_tools/browse_dataset.md
@@ -18,7 +18,7 @@ python tools/analysis_tools/browse_dataset.py \
 - **`-p, --phase`**: 可视化数据集的阶段，只能为 `['train', 'val', 'test']` 之一，默认为 `'train'`。
 - **`-n, --show-number`**: 可视化样本数量。如果没有指定，默认展示数据集的所有图片。
 - **`-m, --mode`**: 可视化的模式，只能为 `['original', 'transformed', 'pipeline']` 之一。 默认为 `'transformed'`。
-- `--cfg-options` : 对配置文件的修改，参考[学习配置文件](./config.md)。
+- `--cfg-options` : 对配置文件的修改，参考[学习配置文件](../tutorials/config.md)。
 
 ```shell
 `-m, --mode` 用于设置可视化的模式，默认设置为 'transformed'。
diff --git a/docs/zh_cn/useful_tools/dataset_converters.md b/docs/zh_cn/useful_tools/dataset_converters.md
index 71e49472..38da7fc7 100644
--- a/docs/zh_cn/useful_tools/dataset_converters.md
+++ b/docs/zh_cn/useful_tools/dataset_converters.md
@@ -4,8 +4,6 @@
 
 - `ballon2coco.py` 将 `balloon` 数据集（该小型数据集仅作为入门使用）转换成 COCO 的格式。
 
-关于该脚本的详细说明，请看 [YOLOv5 从入门到部署全流程](../get_started/15_minutes.md) 中 `数据集准备` 小节。
-
 ```shell
 python tools/dataset_converters/balloon2coco.py
 ```
diff --git a/docs/zh_cn/useful_tools/vis_scheduler.md b/docs/zh_cn/useful_tools/vis_scheduler.md
index e6848b17..f0d772ae 100644
--- a/docs/zh_cn/useful_tools/vis_scheduler.md
+++ b/docs/zh_cn/useful_tools/vis_scheduler.md
@@ -25,7 +25,7 @@ python tools/analysis_tools/vis_scheduler.py \
 - `--title`: 可视化图片的标题，默认为配置文件名。
 - `--style`: 可视化图片的风格，默认为 `whitegrid`。
 - `--window-size`: 可视化窗口大小，如果没有指定，默认为 `12*7`。如果需要指定，按照格式 `'W*H'`。
-- `--cfg-options`: 对配置文件的修改，参考[学习配置文件](../user_guides/config.md)。
+- `--cfg-options`: 对配置文件的修改，参考[学习配置文件](../tutorials/config.md)。
 
 ```{note}
 部分数据集在解析标注阶段比较耗时，推荐直接将 `-d, dataset-size` 指定数据集的大小，以节约时间。

From b0186e7e731c81baa52cee4898b5ab850dde90d4 Mon Sep 17 00:00:00 2001
From: Xin Li <7219519+xin-li-67@users.noreply.github.com>
Date: Tue, 21 Feb 2023 17:39:37 +0800
Subject: [PATCH 35/64] [Docs] Add DeepStream documentations (#545)

* deepstream doc init

* ch version init

* remove benchmark
---
 projects/easydeploy/deepstream/README.md      | 48 +++++++++++++++++++
 .../easydeploy/deepstream/README_zh-CN.md     | 48 +++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 projects/easydeploy/deepstream/README.md
 create mode 100644 projects/easydeploy/deepstream/README_zh-CN.md

diff --git a/projects/easydeploy/deepstream/README.md b/projects/easydeploy/deepstream/README.md
new file mode 100644
index 00000000..111f3765
--- /dev/null
+++ b/projects/easydeploy/deepstream/README.md
@@ -0,0 +1,48 @@
+# Inference MMYOLO Models with DeepStream
+
+This project demonstrates how to inference MMYOLO models with customized parsers in [DeepStream SDK](https://developer.nvidia.com/deepstream-sdk).
+
+## Pre-requisites
+
+### 1. Install Nvidia Driver and CUDA
+
+First, please follow the official documents and instructions to install dedicated Nvidia graphic driver and CUDA matched to your gpu and target Nvidia AIoT devices.
+
+### 2. Install DeepStream SDK
+
+Second, please follow the official instruction to download and install DeepStream SDK. Currently stable version of DeepStream is v6.2.
+
+### 3. Generate TensorRT Engine
+
+As DeepStream builds on top of several NVIDIA libraries, you need to first convert your trained MMYOLO models to TensorRT engine files. We strongly recommend you to try the supported TensorRT deployment solution in [EasyDeploy](../../easydeploy/).
+
+## Build and Run
+
+Please make sure that your converted TensorRT engine is already located in the `deepstream` folder as the config shows. Create your own model config files and change the `config-file` parameter in [deepstream_app_config.txt](deepstream_app_config.txt) to the model you want to run with.
+
+```bash
+mkdir build && cd build
+cmake ..
+make -j$(nproc) && make install
+```
+
+Then you can run the inference with this command.
+
+```bash
+deepstream-app -c deepstream_app_config.txt
+```
+
+## Code Structure
+
+```bash
+├── deepstream
+│   ├── configs                   # config file for MMYOLO models
+│   │   └── config_infer_rtmdet.txt
+│   ├── custom_mmyolo_bbox_parser # customized parser for MMYOLO models to DeepStream formats
+│   │   └── nvdsparsebbox_mmyolo.cpp
+|   ├── CMakeLists.txt
+│   ├── coco_labels.txt           # labels for coco detection
+│   ├── deepstream_app_config.txt # deepStream reference app configs for MMYOLO models
+│   ├── README_zh-CN.md
+│   └── README.md
+```
diff --git a/projects/easydeploy/deepstream/README_zh-CN.md b/projects/easydeploy/deepstream/README_zh-CN.md
new file mode 100644
index 00000000..13a85d5b
--- /dev/null
+++ b/projects/easydeploy/deepstream/README_zh-CN.md
@@ -0,0 +1,48 @@
+# 使用 DeepStream SDK 推理 MMYOLO 模型
+
+本项目演示了如何使用 [DeepStream SDK](https://developer.nvidia.com/deepstream-sdk) 配合改写的 parser 来推理 MMYOLO 的模型。
+
+## 预先准备
+
+### 1. 安装 Nidia 驱动和 CUDA
+
+首先请根据当前的显卡驱动和目标使用设备的驱动完成显卡驱动和 CUDA 的安装。
+
+### 2. 安装 DeepStream SDK
+
+目前 DeepStream SDK 稳定版本已经更新到 v6.2，官方推荐使用这个版本。
+
+### 3. 将 MMYOLO 模型转换为 TensorRT Engine
+
+推荐使用 EasyDeploy 中的 TensorRT 方案完成目标模型的转换部署，具体可参考 [此文档](../../easydeploy/docs/model_convert.md) 。
+
+## 编译使用
+
+当前项目使用的是 MMYOLO 的 rtmdet 模型，若想使用其他的模型，请参照目录下的配置文件进行改写。然后将转换完的 TensorRT engine 放在当前目录下并执行如下命令：
+
+```bash
+mkdir build && cd build
+cmake ..
+make -j$(nproc) && make install
+```
+
+完成编译后可使用如下命令进行推理：
+
+```bash
+deepstream-app -c deepstream_app_config.txt
+```
+
+## 项目代码结构
+
+```bash
+├── deepstream
+│   ├── configs                   # MMYOLO 模型对应的 DeepStream 配置
+│   │   └── config_infer_rtmdet.txt
+│   ├── custom_mmyolo_bbox_parser # 适配 DeepStream formats 的 parser
+│   │   └── nvdsparsebbox_mmyolo.cpp
+|   ├── CMakeLists.txt
+│   ├── coco_labels.txt           # coco labels
+│   ├── deepstream_app_config.txt # DeepStream app 配置
+│   ├── README_zh-CN.md
+│   └── README.md
+```

From ab0166abeb4a2770fd81866eeeaf3a88b0759999 Mon Sep 17 00:00:00 2001
From: Yijie Zheng <67947949+VoyagerXvoyagerx@users.noreply.github.com>
Date: Wed, 22 Feb 2023 18:52:56 +0800
Subject: [PATCH 36/64] [Fix] fix the config link for rtmdet_tiny (#580)

* [Fix] fix the config link for rtmdet_tiny

* fir the lint
---
 configs/rtmdet/README.md | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/configs/rtmdet/README.md b/configs/rtmdet/README.md
index 33ae6823..3059a575 100644
--- a/configs/rtmdet/README.md
+++ b/configs/rtmdet/README.md
@@ -23,13 +23,13 @@ RTMDet-l model structure
 
 ## Object Detection
 
-|    Model    | size | box AP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) |                       Config                        |                                                                                                                                                                 Download                                                                                                                                                                 |
-| :---------: | :--: | :----: | :-------: | :------: | :------------------: | :-------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| RTMDet-tiny | 640  |  41.0  |    4.8    |   8.1    |         0.98         | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
-|  RTMDet-s   | 640  |  44.6  |   8.89    |   14.8   |         1.22         | [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py) |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json)       |
-|  RTMDet-m   | 640  |  49.3  |   24.71   |  39.27   |         1.62         | [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py) |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json)       |
-|  RTMDet-l   | 640  |  51.4  |   52.3    |  80.23   |         2.44         | [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py) |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json)       |
-|  RTMDet-x   | 640  |  52.8  |   94.86   |  141.67  |         3.10         | [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py) |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json)       |
+|    Model    | size | box AP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) |                         Config                         |                                                                                                                                                                 Download                                                                                                                                                                 |
+| :---------: | :--: | :----: | :-------: | :------: | :------------------: | :----------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| RTMDet-tiny | 640  |  41.0  |    4.8    |   8.1    |         0.98         | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
+|  RTMDet-s   | 640  |  44.6  |   8.89    |   14.8   |         1.22         |  [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json)       |
+|  RTMDet-m   | 640  |  49.3  |   24.71   |  39.27   |         1.62         |  [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json)       |
+|  RTMDet-l   | 640  |  51.4  |   52.3    |  80.23   |         2.44         |  [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json)       |
+|  RTMDet-x   | 640  |  52.8  |   94.86   |  141.67  |         3.10         |  [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json)       |
 
 **Note**:
 

From 87a2672c001f6ad5feab08016cdf0402ef06f3fb Mon Sep 17 00:00:00 2001
From: leling <34684833+landhill@users.noreply.github.com>
Date: Thu, 23 Feb 2023 10:08:54 +0800
Subject: [PATCH 37/64] [Feature] Add confusion matrix drawing tool (#572)

* Add files via upload

* cat_single_channel_test

* [Feature] Add confusion matrix drawing tool

* [Feature] Add confusion matrix drawing tool

* [Feature] Add confusion matrix drawing tool

* [Feature] Add confusion matrix drawing tool
---
 tools/analysis_tools/confusion_matrix.py | 272 +++++++++++++++++++++++
 1 file changed, 272 insertions(+)
 create mode 100644 tools/analysis_tools/confusion_matrix.py

diff --git a/tools/analysis_tools/confusion_matrix.py b/tools/analysis_tools/confusion_matrix.py
new file mode 100644
index 00000000..a92ed1cc
--- /dev/null
+++ b/tools/analysis_tools/confusion_matrix.py
@@ -0,0 +1,272 @@
+import argparse
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+from matplotlib.ticker import MultipleLocator
+from mmcv.ops import nms
+from mmdet.evaluation import bbox_overlaps
+from mmdet.utils import replace_cfg_vals, update_data_root
+from mmengine import Config, DictAction
+from mmengine.fileio import load
+from mmengine.utils import ProgressBar
+
+from mmyolo.registry import DATASETS
+from mmyolo.utils import register_all_modules
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Generate confusion matrix from detection results')
+    parser.add_argument('config', help='test config file path')
+    parser.add_argument(
+        'prediction_path', help='prediction path where test .pkl result')
+    parser.add_argument(
+        'save_dir', help='directory where confusion matrix will be saved')
+    parser.add_argument(
+        '--show', action='store_true', help='show confusion matrix')
+    parser.add_argument(
+        '--color-theme',
+        default='plasma',
+        help='theme of the matrix color map')
+    parser.add_argument(
+        '--score-thr',
+        type=float,
+        default=0.3,
+        help='score threshold to filter detection bboxes')
+    parser.add_argument(
+        '--tp-iou-thr',
+        type=float,
+        default=0.5,
+        help='IoU threshold to be considered as matched')
+    parser.add_argument(
+        '--nms-iou-thr',
+        type=float,
+        default=None,
+        help='nms IoU threshold, only applied when users want to change the'
+        'nms IoU threshold.')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+    return args
+
+
+def calculate_confusion_matrix(dataset,
+                               results,
+                               score_thr=0,
+                               nms_iou_thr=None,
+                               tp_iou_thr=0.5):
+    """Calculate the confusion matrix.
+
+    Args:
+        dataset (Dataset): Test or val dataset.
+        results (list[ndarray]): A list of detection results in each image.
+        score_thr (float|optional): Score threshold to filter bboxes.
+            Default: 0.
+        nms_iou_thr (float|optional): nms IoU threshold, the detection results
+            have done nms in the detector, only applied when users want to
+            change the nms IoU threshold. Default: None.
+        tp_iou_thr (float|optional): IoU threshold to be considered as matched.
+            Default: 0.5.
+    """
+    num_classes = len(dataset.metainfo['classes'])
+    confusion_matrix = np.zeros(shape=[num_classes + 1, num_classes + 1])
+    assert len(dataset) == len(results)
+    prog_bar = ProgressBar(len(results))
+    for idx, per_img_res in enumerate(results):
+        res_bboxes = per_img_res['pred_instances']
+        gts = dataset.get_data_info(idx)['instances']
+        analyze_per_img_dets(confusion_matrix, gts, res_bboxes, score_thr,
+                             tp_iou_thr, nms_iou_thr)
+        prog_bar.update()
+    return confusion_matrix
+
+
+def analyze_per_img_dets(confusion_matrix,
+                         gts,
+                         result,
+                         score_thr=0,
+                         tp_iou_thr=0.5,
+                         nms_iou_thr=None):
+    """Analyze detection results on each image.
+
+    Args:
+        confusion_matrix (ndarray): The confusion matrix,
+            has shape (num_classes + 1, num_classes + 1).
+        gt_bboxes (ndarray): Ground truth bboxes, has shape (num_gt, 4).
+        gt_labels (ndarray): Ground truth labels, has shape (num_gt).
+        result (ndarray): Detection results, has shape
+            (num_classes, num_bboxes, 5).
+        score_thr (float): Score threshold to filter bboxes.
+            Default: 0.
+        tp_iou_thr (float): IoU threshold to be considered as matched.
+            Default: 0.5.
+        nms_iou_thr (float|optional): nms IoU threshold, the detection results
+            have done nms in the detector, only applied when users want to
+            change the nms IoU threshold. Default: None.
+    """
+    true_positives = np.zeros(len(gts))
+    gt_bboxes = []
+    gt_labels = []
+    for gt in gts:
+        gt_bboxes.append(gt['bbox'])
+        gt_labels.append(gt['bbox_label'])
+
+    gt_bboxes = np.array(gt_bboxes)
+    gt_labels = np.array(gt_labels)
+
+    unique_label = np.unique(result['labels'].numpy())
+
+    for det_label in unique_label:
+        mask = (result['labels'] == det_label)
+        det_bboxes = result['bboxes'][mask].numpy()
+        det_scores = result['scores'][mask].numpy()
+
+        if nms_iou_thr:
+            det_bboxes, _ = nms(
+                det_bboxes, det_scores, nms_iou_thr, score_threshold=score_thr)
+        ious = bbox_overlaps(det_bboxes[:, :4], gt_bboxes)
+        for i, score in enumerate(det_scores):
+            det_match = 0
+            if score >= score_thr:
+                for j, gt_label in enumerate(gt_labels):
+                    if ious[i, j] >= tp_iou_thr:
+                        det_match += 1
+                        if gt_label == det_label:
+                            true_positives[j] += 1  # TP
+                        confusion_matrix[gt_label, det_label] += 1
+                if det_match == 0:  # BG FP
+                    confusion_matrix[-1, det_label] += 1
+    for num_tp, gt_label in zip(true_positives, gt_labels):
+        if num_tp == 0:  # FN
+            confusion_matrix[gt_label, -1] += 1
+
+
+def plot_confusion_matrix(confusion_matrix,
+                          labels,
+                          save_dir=None,
+                          show=True,
+                          title='Normalized Confusion Matrix',
+                          color_theme='plasma'):
+    """Draw confusion matrix with matplotlib.
+
+    Args:
+        confusion_matrix (ndarray): The confusion matrix.
+        labels (list[str]): List of class names.
+        save_dir (str|optional): If set, save the confusion matrix plot to the
+            given path. Default: None.
+        show (bool): Whether to show the plot. Default: True.
+        title (str): Title of the plot. Default: `Normalized Confusion Matrix`.
+        color_theme (str): Theme of the matrix color map. Default: `plasma`.
+    """
+    # normalize the confusion matrix
+    per_label_sums = confusion_matrix.sum(axis=1)[:, np.newaxis]
+    confusion_matrix = \
+        confusion_matrix.astype(np.float32) / per_label_sums * 100
+
+    num_classes = len(labels)
+    fig, ax = plt.subplots(
+        figsize=(0.5 * num_classes, 0.5 * num_classes * 0.8), dpi=180)
+    cmap = plt.get_cmap(color_theme)
+    im = ax.imshow(confusion_matrix, cmap=cmap)
+    plt.colorbar(mappable=im, ax=ax)
+
+    title_font = {'weight': 'bold', 'size': 12}
+    ax.set_title(title, fontdict=title_font)
+    label_font = {'size': 10}
+    plt.ylabel('Ground Truth Label', fontdict=label_font)
+    plt.xlabel('Prediction Label', fontdict=label_font)
+
+    # draw locator
+    xmajor_locator = MultipleLocator(1)
+    xminor_locator = MultipleLocator(0.5)
+    ax.xaxis.set_major_locator(xmajor_locator)
+    ax.xaxis.set_minor_locator(xminor_locator)
+    ymajor_locator = MultipleLocator(1)
+    yminor_locator = MultipleLocator(0.5)
+    ax.yaxis.set_major_locator(ymajor_locator)
+    ax.yaxis.set_minor_locator(yminor_locator)
+
+    # draw grid
+    ax.grid(True, which='minor', linestyle='-')
+
+    # draw label
+    ax.set_xticks(np.arange(num_classes))
+    ax.set_yticks(np.arange(num_classes))
+    ax.set_xticklabels(labels)
+    ax.set_yticklabels(labels)
+
+    ax.tick_params(
+        axis='x', bottom=False, top=True, labelbottom=False, labeltop=True)
+    plt.setp(
+        ax.get_xticklabels(), rotation=45, ha='left', rotation_mode='anchor')
+
+    # draw confution matrix value
+    for i in range(num_classes):
+        for j in range(num_classes):
+            ax.text(
+                j,
+                i,
+                '{}%'.format(
+                    int(confusion_matrix[
+                        i,
+                        j]) if not np.isnan(confusion_matrix[i, j]) else -1),
+                ha='center',
+                va='center',
+                color='w',
+                size=7)
+
+    ax.set_ylim(len(confusion_matrix) - 0.5, -0.5)  # matplotlib>3.1.1
+
+    fig.tight_layout()
+    if save_dir is not None:
+        plt.savefig(
+            os.path.join(save_dir, 'confusion_matrix.png'), format='png')
+    if show:
+        plt.show()
+
+
+def main():
+    register_all_modules()
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+
+    # replace the ${key} with the value of cfg.key
+    cfg = replace_cfg_vals(cfg)
+
+    # update data root according to MMYOLO_DATASETS
+    update_data_root(cfg)
+
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    results = load(args.prediction_path)
+
+    if not os.path.exists(args.save_dir):
+        os.makedirs(args.save_dir)
+
+    dataset = DATASETS.build(cfg.test_dataloader.dataset)
+
+    confusion_matrix = calculate_confusion_matrix(dataset, results,
+                                                  args.score_thr,
+                                                  args.nms_iou_thr,
+                                                  args.tp_iou_thr)
+    plot_confusion_matrix(
+        confusion_matrix,
+        dataset.metainfo['classes'] + ('background', ),
+        save_dir=args.save_dir,
+        show=args.show,
+        color_theme=args.color_theme)
+
+
+if __name__ == '__main__':
+    main()

From a2fdb2a13211425a1a2cf7f3d6fba7cc9850e0f4 Mon Sep 17 00:00:00 2001
From: tripleMu <gpu@163.com>
Date: Thu, 23 Feb 2023 10:14:32 +0800
Subject: [PATCH 38/64] Add deepstream yolov5 config (#571)

---
 .../configs/config_infer_yolov5.txt           | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 projects/easydeploy/deepstream/configs/config_infer_yolov5.txt

diff --git a/projects/easydeploy/deepstream/configs/config_infer_yolov5.txt b/projects/easydeploy/deepstream/configs/config_infer_yolov5.txt
new file mode 100644
index 00000000..6ad7d642
--- /dev/null
+++ b/projects/easydeploy/deepstream/configs/config_infer_yolov5.txt
@@ -0,0 +1,21 @@
+[property]
+gpu-id=0
+net-scale-factor=0.0039215697906911373
+model-color-format=0
+model-engine-file=../end2end.engine
+labelfile-path=../coco_labels.txt
+batch-size=1
+network-mode=0
+num-detected-classes=80
+interval=0
+gie-unique-id=1
+process-mode=1
+network-type=0
+cluster-mode=2
+maintain-aspect-ratio=1
+parse-bbox-func-name=NvDsInferParseCustomMMYOLO
+custom-lib-path=../build/libnvdsparsebbox_mmyolo.so
+
+[class-attrs-all]
+pre-cluster-threshold=0.45
+topk=100

From e08e9895b02ec1c2ff24dfd33552d661537f2fb9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Thu, 23 Feb 2023 16:38:08 +0800
Subject: [PATCH 39/64] [Docs] Add FAQ and Troubleshooting steps (#579)

* add FAQ

* update

* update

* update

* update

* update

* update
---
 README.md                                     |   1 +
 README_zh-CN.md                               |   1 +
 docs/en/index.rst                             |   1 +
 docs/en/tutorials/warning_notes.md            |   1 +
 docs/zh_cn/index.rst                          |   1 +
 .../troubleshooting_steps.md                  | 108 ++++++++++++++++++
 docs/zh_cn/tutorials/faq.md                   |  77 +++++++++++++
 docs/zh_cn/tutorials/warning_notes.md         |  22 ++++
 tools/misc/publish_model.py                   |  57 +++++++++
 9 files changed, 269 insertions(+)
 create mode 100644 docs/en/tutorials/warning_notes.md
 create mode 100644 docs/zh_cn/tutorials/warning_notes.md
 create mode 100644 tools/misc/publish_model.py

diff --git a/README.md b/README.md
index 98d2f964..5300c747 100644
--- a/README.md
+++ b/README.md
@@ -230,6 +230,7 @@ For different parts from MMDetection, we have also prepared user guides and adva
 - [Learn about configs with YOLOv5](docs/en/tutorials/config.md)
 - [Data flow](docs/en/tutorials/data_flow.md)
 - [Custom Installation](docs/en/tutorials/custom_installation.md)
+- [Common Warning Notes](docs/zh_cn/tutorials/warning_notes.md)
 - [FAQ](docs/en/tutorials/faq.md)
 
 </details>
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 4c9c5201..a378415b 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -251,6 +251,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 - [学习 YOLOv5 配置文件](docs/zh_cn/tutorials/config.md)
 - [数据流](docs/zh_cn/tutorials/data_flow.md)
 - [自定义安装](docs/zh_cn/tutorials/custom_installation.md)
+- [常见警告说明](docs/zh_cn/tutorials/warning_notes.md)
 - [常见问题](docs/zh_cn/tutorials/faq.md)
 
 </details>
diff --git a/docs/en/index.rst b/docs/en/index.rst
index 5516b619..175f166b 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -70,6 +70,7 @@ You can switch between Chinese and English documents in the top-right corner of
    tutorials/config.md
    tutorials/data_flow.md
    tutorials/custom_installation.md
+   tutorials/warning_notes.md
    tutorials/faq.md
 
 
diff --git a/docs/en/tutorials/warning_notes.md b/docs/en/tutorials/warning_notes.md
new file mode 100644
index 00000000..54ed973d
--- /dev/null
+++ b/docs/en/tutorials/warning_notes.md
@@ -0,0 +1 @@
+# Common Warning Notes
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index 1138e9c3..1f27366c 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -70,6 +70,7 @@
    tutorials/config.md
    tutorials/data_flow.md
    tutorials/custom_installation.md
+   tutorials/warning_notes.md
    tutorials/faq.md
 
 
diff --git a/docs/zh_cn/recommended_topics/troubleshooting_steps.md b/docs/zh_cn/recommended_topics/troubleshooting_steps.md
index 189a9115..7cca926f 100644
--- a/docs/zh_cn/recommended_topics/troubleshooting_steps.md
+++ b/docs/zh_cn/recommended_topics/troubleshooting_steps.md
@@ -1 +1,109 @@
 # 常见错误排除步骤
+
+本文档收集用户经常碰到的常见错误情况，并提供详细的排查步骤。如果你发现阅读本文你没有找到正确的解决方案，请联系我们或者提 PR 进行更新。提 PR 请参考 [如何给 MMYOLO 贡献代码](../recommended_topics/contributing.md)
+
+## xxx is not in the model registry
+
+这个错误信息是指某个模块没有被注册到 model 中。 这个错误出现的原因非常多，典型的情况有：
+
+1. 你新增的模块没有在类别前面加上注册器装饰器 @MODELS.register_module()
+2. 虽然注册了，但是注册错了位置，例如你实际想注册到 MMYOLO 中，但是你导入的 MODELS 是 MMDet 包里面的
+3. 你注册了且注册正确了，但是没有在对应的 `__init__.py` 中加入导致没有被导入
+4. 以上 3 个步骤都确认没问题，但是你是新增 py 文件来自定义模块的却没有重新安装 MMYOLO 导致没有生效，此时你可以重新安装一遍，即使你是 -e 模式安装也需要重新安装
+5. 如果你是在 mmyolo 包路径下新增了一个 package, 除上述步骤外，你还需要在 [register_all_modules](https://github.com/open-mmlab/mmyolo/blob/main/mmyolo/utils/setup_env.py#L8) 函数中增加其导包代码，否则该 package 不会被自动触发
+6. 你的环境中有多个版本 MMYOLO，你注册的和实际运行的实际上不是同一套代码，导致没有生效。此时你可以在程序运行前输入 `PYTHONPATH="$(dirname $0)/..":$PYTHONPATH` 强行使用当前代码
+
+## loss_bbox 始终为 0
+
+该原因出现主要有两个原因：
+
+1. 训练过程中没有 GT 标注数据
+2. 参数设置不合理导致训练中没有正样本
+
+第一种情况出现的概率更大。 `loss_bbox` 通常是只考虑正样本的 loss，如果训练中没有正样本则始终为 0。如果是第一种原因照成的 `loss_bbox` 始终为 0，那么通常意味着你配置不对，特别是 dataset 部分的配置不正确。
+一个非常典型的情况是用户的 `dataset` 中 `metainfo` 设置不正确或者设置了但是没有传给 dataset 导致加载后没有找到对应类别的 GT Bbox 标注。 这种情况请仔细阅读我们提供的 [示例配置](https://github.com/open-mmlab/mmyolo/blob/main/projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py#L27) 。
+验证 dataset 配置是否正确的一个最直接的途径是运行 [browse_dataset 脚本](https://github.com/open-mmlab/mmyolo/blob/main/tools/analysis_tools/browse_dataset.py)，如果可视化效果正确则说明是正确的。
+
+## MMCV 安装时间非常久
+
+这通常意味着你在自己编译 MMCV 而不是直接下载使用我们提供的预编译包。 MMCV 中包括了大量的自定义的 CUDA 算子，如果从源码安装则需要非常久的时间去编译，并且由于其安装成功依赖于严格的底层环境信息，需要多个库的版本一致才可以。如果用户自己编译大概率会失败。
+我们不推荐用户自己去编译 MMCV 而应该优先选择预编译包。如果你当前的环境中我们没有提供对应的预编译包，那么建议你可以快速换一个 Conda 环境，并安装有预编译包的 Torch。 以 torch1.8.0+cu102 为例，如果你想查看目前查看所有的预编译包，可以查看 https://download.openmmlab.com/mmcv/dist/cu102/torch1.8.0/index.html。
+
+## 基于官方配置继承新建的配置出现 unexpected keyword argument
+
+这通常是由于你没有删除 base 配置中的额外参数。 可以在你新建配置中的修改字典中增加 `__delete__=True` 删掉 base 中该类之前的所有参数。
+
+## The testing results of the whole dataset is empty
+
+这通常说明训练效果太差导致网络没有预测出任何符合阈值要求的检测框。 出现这种现象有多个原因，典型的为：
+
+1. 当前为前几个 epoch，网络当前训练效果还较差，等后续训练久一点后可能就不会出现该警告了
+2. 配置设置不正确，网络虽然正常训练但是实际上无效训练，例如前面的 `loss_bbox` 始终为 0 就会导致上述警告
+3. 超参设置不合理
+
+## ValueError: not enough values to unpack(expected 2, got 0)
+
+这个错误通常是在 epoch 切换时候出现。这是 PyTorch 1.7 的已知问题，在 PyTorch 1.8+ 中已经修复。如果在 PyTorch 1.7 中想修复这个问题，可以简单的设置 dataloader 参数 `persistent_workers` 为 False。
+
+## ValueError: need at least one array to concatenate
+
+这个是一个非常场景的错误，可能出现在训练一开始或者训练正常但是评估时候。不管出现在何阶段，均说明你的配置不对，最常见的错误就是 `num_classes` 参数设置不对。
+在 MMYOLO 或者 MMDet 中大部分配置都是以 COCO 数据为例，因此配置中默认的 `num_classes` 是 80, 如果用户自定义数据集没有正确修改这个字段则会出现上述错误。
+MMYOLO 中有些算法配置会在多个模块中都需要 `num_classes` 参数，用户经常出现的错误就是仅仅修改了某一个地方的 `num_classes` 而没有将所有的 `num_classes` 都修改。想快速解决这个问题，可以使用 [print_config](https://github.com/open-mmlab/mmyolo/blob/main/tools/misc/print_config.py)
+脚本打印下全配置，然后全局搜索 `num_classes` 确认是否有没有修改的模块。
+
+## 评估时候 IndexError: list index out of range
+
+具体输出信息是
+
+```text
+  File "site-packages/mmdet/evaluation/metrics/coco_metric.py", line 216, in results2json
+    data['category_id'] = self.cat_ids[label]
+IndexError: list index out of range
+```
+
+可以看出是评估时候类别索引越界，这个通常的原因是配置中的 `num_classes` 设置不正确，默认的 `num_classes` 是 80，如果你自定义类别小于 80，那么就有可能出现类别越界。注意算法配置的 `num_classes` 一般会用到多个模块，你可能只改了某几个而漏掉了一些。想快速解决这个问题，可以使用 [print_config](https://github.com/open-mmlab/mmyolo/blob/main/tools/misc/print_config.py)
+脚本打印下全配置，然后全局搜索 `num_classes` 确认是否有没有修改的模块。
+
+## 训练中不打印 loss，但是程序依然正常训练和评估
+
+这通常是因为一个训练 epoch 没有超过 50 个迭代，而 MMYOLO 中默认的打印间隔是 50。你可以修改 `default_hooks.logger.interval` 参数。
+
+## GPU out of memory
+
+1. 存在大量 ground truth boxes 或者大量 anchor 的场景，可能在 assigner 会 OOM。
+2. 使用 --amp 来开启混合精度训练。
+3. 你也可以尝试使用 MMDet 中的 AvoidCUDAOOM 来避免该问题。首先它将尝试调用 torch.cuda.empty_cache()。如果失败，将会尝试把输入类型转换到 FP16。如果仍然失败，将会把输入从 GPUs 转换到 CPUs 进行计算。这里提供了两个使用的例子：
+
+```python
+from mmdet.utils import AvoidCUDAOOM
+
+output = AvoidCUDAOOM.retry_if_cuda_oom(some_function)(input1, input2)
+```
+
+你也可也使用 AvoidCUDAOOM 作为装饰器让代码遇到 OOM 的时候继续运行：
+
+```python
+from mmdet.utils import AvoidCUDAOOM
+
+@AvoidCUDAOOM.retry_if_cuda_oom
+def function(*args, **kwargs):
+    ...
+    return xxx
+```
+
+## Loss goes Nan
+
+1. 检查数据的标注是否正常， 长或宽为 0 的框可能会导致回归 loss 变为 nan，一些小尺寸（宽度或高度小于 1）的框在数据增强后也会导致此问题。 因此，可以检查标注并过滤掉那些特别小甚至面积为 0 的框，并关闭一些可能会导致 0 面积框出现数据增强。
+2. 降低学习率：由于某些原因，例如 batch size 大小的变化， 导致当前学习率可能太大。 您可以降低为可以稳定训练模型的值。
+3. 延长 warm up 的时间：一些模型在训练初始时对学习率很敏感。
+4. 添加 gradient clipping: 一些模型需要梯度裁剪来稳定训练过程。 你可以在 config 设置 `optim_wrapper.clip_grad=dict(max_norm=xx)`
+
+## 训练中其他不符合预期或者错误
+
+如果训练或者评估中出现了不属于上述描述的问题，由于原因不明，现提供常用的排除流程：
+
+1. 首先确认配置是否正确，可以使用 [print_config](https://github.com/open-mmlab/mmyolo/blob/main/tools/misc/print_config.py) 脚本打印全部配置，如果运行成功则说明配置语法没有错误
+2. 确认 COCO 格式的 json 标注是否正确，可以使用 [browse_coco_json.py](https://github.com/open-mmlab/mmyolo/blob/main/tools/misc/browse_coco_json.py) 脚本确认
+3. 确认 dataset 部分配置是否正确，这一步骤几乎是必须要提前运行的，可以提前排查很多问题，可以使用 [browse_dataset.py](https://github.com/open-mmlab/mmyolo/blob/main/tools/misc/browse_dataset.py) 脚本确认
+4. 如果以上 3 步都没有问题，那么出问题可能在 model 部分了。这个部分的排除没有特别的办法，你可以单独写一个脚本来仅运行 model 部分并通过调试来确认，如果对于 model 中多个模块的输入构建存在困惑，可以参考对应模块的单元测试写法
diff --git a/docs/zh_cn/tutorials/faq.md b/docs/zh_cn/tutorials/faq.md
index 1cda1854..a088b5bd 100644
--- a/docs/zh_cn/tutorials/faq.md
+++ b/docs/zh_cn/tutorials/faq.md
@@ -17,3 +17,80 @@
 **(3) 多任务支持**
 
 还有一层深远的原因： **MMYOLO 任务不局限于 MMDetection**，后续会支持更多任务例如基于 MMPose 实现关键点相关的应用，基于 MMTracking 实现追踪相关的应用，因此不太适合直接并入 MMDetection 中。
+
+## projects 文件夹是用来干什么的？
+
+projects 文件夹是 OpenMMLab 2.0 中引入的一个全新文件夹。其初衷有如下 3 点：
+
+1. 便于社区贡献。由于 OpenMMLab 系列代码库对于代码合入有一套规范严谨的流程，这不可避免的会导致算法复现周期很长，不利于社区贡献
+2. 便于快速支持新算法。算法开发周期过长同样会导致用户无法尽快体验最新算法
+3. 便于快速支持新方向和新特性。新发展方向或者一些新的特性可能和现如今代码库中的设计有些不兼容，没法快速合入到代码库中
+
+综上所述，projects 文件夹的引入主要是解决算法复现周期过长导致的新算法支持速度较慢，新特性支持较复杂等多个问题。 projects 中每个文件夹属于一个完全独立的工程，社区用户可以通过
+projects 快速支持一些在当前版本中较难支持或者想快速支持的新算法和新特性。等后续设计稳定或者代码符合合入规范，则会考虑合入到主分支中。
+
+## YOLOv5 backbone 替换为 Swin 后效果很差
+
+在 [轻松更换主干网络](../recommended_topics/replace_backbone.md) 一文中我们提供了大量替换 backbone 的教程，但是该文档只是教用户如何替换 backbone，直接训练不一定能得到比较优异的结果。原因是
+不同 backbone 所需要的训练超参是不一样的，以 Swin 和 YOLOv5 backbone 为例两者差异较大，Swin 属于 transformer 系列算法，而 YOLOv5 backbone 属于卷积系列算法，其训练的优化器、学习率以及其他超参差异较大。
+如果强行将 Swin 作为 YOLOv5 backbone 且想取得不错的效果，需要同时调整诸多参数。
+
+## MM 系列开源库中有很多组件，如何在 MMYOLO 中使用？
+
+在 OpenMMLab 2.0 中对多个 MM 系列开源库之间的模块跨库调用功能进行增强。目前在 MMYOLO 中可以在配置文件中通过 `MM 算法库 A.模块名` 来之间调用 MM 算法库 A 中已经被注册的任意模块。 具体例子可以参考
+[轻松更换主干网络](../recommended_topics/replace_backbone.md) 中使用在 MMClassification 中实现的主干网络章节，其他模块调用也是相同的用法。
+
+## MMYOLO 中是否可以加入纯背景图片进行训练？
+
+将纯背景图片加入训练大部分情况可以抑制误报率，是否将纯背景图片加入训练功能已经大部分数据集上支持了。以 `YOLOv5CocoDataset` 为例，核心控制参数是 `train_dataloader.dataset.filter_cfg.filter_empty_gt`，如果 `filter_empty_gt` 为 True 表示将纯背景图片过滤掉不加入训练，反正将纯
+背景图片加入到训练中。 目前 MMYOLO 中大部分算法都是默认将纯背景图片加入训练中。
+
+## MMYOLO 是否有计算模型推理 FPS 脚本？
+
+MMYOLO 是基于 MMDet 3.x 来开发的，在 MMDet 3.x 中提供了计算模型推理 FPS 的脚本。 具体脚本为 [benchmark](https://github.com/open-mmlab/mmdetection/blob/3.x/tools/analysis_tools/benchmark.py)。我们推荐大家使用 mim 直接跨库启动 MMDet 中的脚本而不是直接复制到 MMYOLO 中。
+关于如果通过 mim 启动 MMDet 中脚本，可以查看 [使用 mim 跨库调用其他 OpenMMLab 仓库的脚本](../common_usage/mim_usage.md)。
+
+## MMDeploy 和 EasyDeploy 有啥区别？
+
+MMDeploy 是由 OpenMMLab 中部署团队开发的针对 OpenMMLab 系列算法库提供部署支持的开源库，支持各种后端和自定义等等强大功能。 EasyDeploy 是由社区小伙伴提供的一个相比 MMDeploy 更加简单易用的部署 projects。
+EasyDeploy 支持的功能目前没有 MMDeploy 多，但是使用上更加简单。 MMYOLO 中同时提供对 MMDeploy 和 EasyDeploy 的支持，用户可以根据自己需求选择。
+
+## COCOMetric 中如何查看每个类的 AP
+
+只需要在配置中设置 `test_evaluator.classwise` 为 True，或者在 test.py 运行时候增加 `--cfg-options test_evaluator.classwise=True` 即可。
+
+## MMYOLO 中为何没有支持 MMDet 类似的自动学习率缩放功能？
+
+原因是实验发现 YOLO 系列算法不是非常满足现象缩放功能。在多个数据集上验证发现会出现不基于 batch size 自动学习率缩放效果好于缩放的情形。因此暂时 MMYOLO 还没有支持自动学习率缩放功能。
+
+## 自己训练的模型权重尺寸为啥比官方发布的大？
+
+原因是用户自己训练的权重通常包括 `optimizer`、`ema_state_dict` 和 `message_hub` 等额外数据，这部分数据我们会在模型发布时候自动删掉，而用户直接基于框架跑的模型权重是全部保留的，所以用户自己训练的模型权重尺寸为啥比官方发布的大。
+你可以使用 [publish_model.py](https://github.com/open-mmlab/mmyolo/blob/main/tools/misc/publish_model.py) 脚本删掉额外字段。
+
+## RTMDet 为何训练所占显存比 YOLOv5 多很多？
+
+训练显存较多的原因主要是 assigner 部分的差异。YOLOv5 采用的是非常简单且高效的 shape 匹配 assigner，而 RTMDet 中采用的是动态的全 batch 计算的 dynamic soft label assigner，其内部的 Cost 矩阵需要消耗比较多的显存，特别是如果当前 batch 中标注框过多时候。
+后续我们会考虑解决这个问题。
+
+## 修改一些代码后是否需要重新安装 MMYOLO
+
+在不新增 py 代码情况下， 如果你遵循最佳实践，即使用 `mim install -v -e .` 安装的 MMYOLO，则对本地代码所作的任何修改都会生效，无需重新安装。但是如果你是新增了 py 文件然后在里面新增的代码，则依然需要重新安装即运行 `mim install -v -e .`。
+
+## 如何使用多个 MMYOLO 版本进行开发
+
+推荐你拥有多个 MMYOLO 工程文件夹，例如 mmyolo-v1, mmyolo-v2。 在使用不同版本 MMYOLO 时候，你可以在终端运行前设置
+
+```shell
+PYTHONPATH="$(dirname $0)/..":$PYTHONPATH
+```
+
+使得当前环境生效。如果要使用环境中安装默认的 MMYOLO 而不是当前正在在使用的，可以删除出现上述命令或者通过如下命令重置
+
+```shell
+unset PYTHONPATH
+```
+
+## 训练中保存最好模型
+
+只需要在配置中设置 `default_hooks.checkpoint.save_best` 为 auto 字符串或者训练时候通过命令行设置 `--cfg-options default_hooks.checkpoint.save_best=auto` 即可。
diff --git a/docs/zh_cn/tutorials/warning_notes.md b/docs/zh_cn/tutorials/warning_notes.md
new file mode 100644
index 00000000..d1051ba1
--- /dev/null
+++ b/docs/zh_cn/tutorials/warning_notes.md
@@ -0,0 +1,22 @@
+# 常见警告说明
+
+本文档收集用户经常疑惑的警告信息说明，方便大家理解。
+
+## xxx registry in mmyolo did not set import location
+
+完整信息为 The xxx registry in mmyolo did not set import location. Fallback to call `mmyolo.utils.register_all_modules` instead.。
+这个警告的含义说某个模块在导入时候发现没有设置导入的 location，导致无法确定其位置，因此会自动调用 `mmyolo.utils.register_all_modules` 触发包的导入。这个警告属于 MMEngine 中非常底层的模块警告，
+用户理解起来可能比较困难，不过对大家使用没有任何影响，可以直接忽略。
+
+## save_param_schedulers is true but self.param_schedulers is None
+
+以 YOLOv5 算法为例，这是因为 YOLOv5 中重新写了参数调度器策略 `YOLOv5ParamSchedulerHook`，因此 MMEngine 中设计的 ParamScheduler 是没有使用的，但是 YOLOv5 配置中也没有设置 `save_param_schedulers` 为 False。
+首先这个警告对性能和恢复训练没有任何影响，用户如果觉得这个警告会影响体验，可以设置 `default_hooks.checkpoint.save_param_scheduler` 为 False 或者训练时候通过命令行设置 `--cfg-options default_hooks.checkpoint.save_param_scheduler=False` 即可。
+
+## The loss_cls will be 0. This is a normal phenomenon.
+
+这个和具体算法有关。以 YOLOv5 为例，其分类 loss 是只考虑正样本的，如果类别是 1，那么分类 loss 和 obj loss 就是功能重复的了，因此在设计上当类别是 1 的时候 loss_cls 是不计算的，因此始终是 0，这是正常现象。
+
+## The model and loaded state dict do not match exactly
+
+这个警告是否会影响性能要根据进一步的打印信息来确定。如果是在微调模式下，由于用户自定义类别不一样无法加载 Head 模块的 COCO 预训练，这是一个正常现象，不会影响性能。
diff --git a/tools/misc/publish_model.py b/tools/misc/publish_model.py
new file mode 100644
index 00000000..a2ccbf08
--- /dev/null
+++ b/tools/misc/publish_model.py
@@ -0,0 +1,57 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import subprocess
+
+import torch
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Process a checkpoint to be published')
+    parser.add_argument('in_file', help='input checkpoint filename')
+    parser.add_argument('out_file', help='output checkpoint filename')
+    args = parser.parse_args()
+    return args
+
+
+def process_checkpoint(in_file, out_file):
+    checkpoint = torch.load(in_file, map_location='cpu')
+
+    # remove optimizer for smaller file size
+    if 'optimizer' in checkpoint:
+        del checkpoint['optimizer']
+    if 'message_hub' in checkpoint:
+        del checkpoint['message_hub']
+    if 'ema_state_dict' in checkpoint:
+        del checkpoint['ema_state_dict']
+
+    for key in list(checkpoint['state_dict']):
+        if key.startswith('data_preprocessor'):
+            checkpoint['state_dict'].pop(key)
+        elif 'priors_base_sizes' in key:
+            checkpoint['state_dict'].pop(key)
+        elif 'grid_offset' in key:
+            checkpoint['state_dict'].pop(key)
+        elif 'prior_inds' in key:
+            checkpoint['state_dict'].pop(key)
+
+    if torch.__version__ >= '1.6':
+        torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False)
+    else:
+        torch.save(checkpoint, out_file)
+    sha = subprocess.check_output(['sha256sum', out_file]).decode()
+    if out_file.endswith('.pth'):
+        out_file_name = out_file[:-4]
+    else:
+        out_file_name = out_file
+    final_file = out_file_name + f'-{sha[:8]}.pth'
+    subprocess.Popen(['mv', out_file, final_file])
+
+
+def main():
+    args = parse_args()
+    process_checkpoint(args.in_file, args.out_file)
+
+
+if __name__ == '__main__':
+    main()

From 6038da2728de093b51235b509ea879b0305b8ab3 Mon Sep 17 00:00:00 2001
From: leling <34684833+landhill@users.noreply.github.com>
Date: Thu, 23 Feb 2023 17:13:57 +0800
Subject: [PATCH 40/64] [Feature] Support training on a single channel image
 dataset. (#460)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Add v0.3.0 changelog (#409)

[Docs]Add an example project to 'projects\' folder (#409)

* fix lint

* [Docs]Add an example project to 'projects' folder (#409)

* [Docs]Add an example project to 'projects' folder (#409)

* [Docs]Add an example project to 'projects' folder (#409)

* [Docs]Add an example project to 'projects' folder (#409)

* [Docs]Add an example project to 'projects' folder (#409)

* [Docs] (#409)

* [Docs] (#409)

* [Docs] (#409)

* [Docs] (#409)

* [Docs] (#409)

* [Docs](#409)

* [Docs] (#409)

* [Docs] (#409)

* [Docs] (#409)

* [Docs] (#409)

* UPDATE

* [Docs] (#409)

* [Docs] (#409)

* [Docs] (#409)

* [Docs] (#409)

* [Docs] (#409)

* [Docs] (#409)

* update

---------

Co-authored-by: huanghaian <huanghaian@sensetime.com>
Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>
---
 README.md                                     |   1 +
 README_zh-CN.md                               |   1 +
 .../single_multi_channel_applications.md      | 188 ++++++++++++++++++
 docs/en/index.rst                             |   1 +
 .../single_multi_channel_applications.md      | 188 ++++++++++++++++++
 docs/zh_cn/index.rst                          |   1 +
 6 files changed, 380 insertions(+)
 create mode 100644 docs/en/common_usage/single_multi_channel_applications.md
 create mode 100644 docs/zh_cn/common_usage/single_multi_channel_applications.md

diff --git a/README.md b/README.md
index 5300c747..885e9a82 100644
--- a/README.md
+++ b/README.md
@@ -204,6 +204,7 @@ For different parts from MMDetection, we have also prepared user guides and adva
 - [Cross-library calls using mim](docs/en/common_usage/mim_usage.md)
 - [Apply multiple Necks](docs/en/common_usage/multi_necks.md)
 - [Specify specific device training or inference](docs/en/common_usage/specify_device.md)
+- [Single and multi-channel application examples](docs/en/common_usage/single_multi_channel_applications.md)
 
 </details>
 
diff --git a/README_zh-CN.md b/README_zh-CN.md
index a378415b..767c3178 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -225,6 +225,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 - [使用 mim 跨库调用其他 OpenMMLab 仓库的脚本](docs/zh_cn/common_usage/mim_usage.md)
 - [应用多个 Neck](docs/zh_cn/common_usage/multi_necks.md)
 - [指定特定设备训练或推理](docs/zh_cn/common_usage/specify_device.md)
+- [单通道和多通道应用案例](docs/zh_cn/common_usage/single_multi_channel_applications.md)
 
 </details>
 
diff --git a/docs/en/common_usage/single_multi_channel_applications.md b/docs/en/common_usage/single_multi_channel_applications.md
new file mode 100644
index 00000000..30932708
--- /dev/null
+++ b/docs/en/common_usage/single_multi_channel_applications.md
@@ -0,0 +1,188 @@
+# Single and multi-channel application examples
+
+## Training example on a single-channel image dataset
+
+The default training images in MMYOLO are all color three-channel data. If you want to use a single-channel dataset for training and testing, it is expected that the following modifications are needed.
+
+1. All image processing pipelines have to support single channel operations
+2. The input channel of the first convolutional layer of the backbone network of the model needs to be changed from 3 to 1
+3. If you wish to load COCO pre-training weights, you need to handle the first convolutional layer weight size mismatch
+
+The following uses the `cat` dataset as an example to describe the entire modification process, if you are using a custom grayscale image dataset, you can skip the dataset preprocessing step.
+
+### 1 Dataset pre-processing
+
+The processing training of the custom dataset can be found in [Annotation-to-deployment workflow for custom dataset](../recommended_topics/labeling_to_deployment_tutorials.md)。
+
+`cat` is a three-channel color image dataset. For demonstration purpose, you can run the following code and commands to replace the dataset images with single-channel images for subsequent validation.
+
+**1. Download the `cat` dataset for decompression**
+
+```shell
+python tools/misc/download_dataset.py --dataset-name cat --save-dir ./data/cat --unzip --delete
+```
+
+**2. Convert datasets to grayscale maps**
+
+```python
+import argparse
+import imghdr
+import os
+from typing import List
+import cv2
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='data_path')
+    parser.add_argument('path', type=str, help='Original dataset path')
+    return parser.parse_args()
+
+def main():
+    args = parse_args()
+
+    path = args.path + '/images/'
+    save_path = path
+    file_list: List[str] = os.listdir(path)
+    # Grayscale conversion of each imager
+    for file in file_list:
+        if imghdr.what(path + '/' + file) != 'jpeg':
+            continue
+        img = cv2.imread(path + '/' + file)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        cv2.imwrite(save_path + '/' + file, img)
+
+if __name__ == '__main__':
+    main()
+```
+
+Name the above script as `cvt_single_channel.py`, and run the command as:
+
+```shell
+python cvt_single_channel.py data/cat
+```
+
+### 2 Modify the base configuration file
+
+**At present, some image processing functions of MMYOLO, such as color space transformation, are not compatible with single-channel images, so if we use single-channel data for training directly, we need to modify part of the pipeline, which is a large amount of work**. In order to solve the incompatibility problem, the recommended approach is to load the single-channel image as a three-channel image as a three-channel data, but convert it to single-channel format before input to the network. This approach will slightly increase the arithmetic burden, but the user basically does not need to modify the code to use.
+
+Take `projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py` as the `base` configuration, copy it to the `configs/yolov5` directory, and add `yolov5_s- v61_syncbn_fast_1xb32-100e_cat_single_channel.py` file. We can inherit `YOLOv5DetDataPreprocessor` from the `mmyolo/models/data_preprocessors/data_preprocessor.py` file and name the new class `YOLOv5SCDetDataPreprocessor`, in which convert the image to a single channel, add the dependency library and register the new class in `mmyolo/models/data_preprocessors/__init__.py`. The `YOLOv5SCDetDataPreprocessor` sample code is：
+
+```python
+@MODELS.register_module()
+class YOLOv5SCDetDataPreprocessor(YOLOv5DetDataPreprocessor):
+    """Rewrite collate_fn to get faster training speed.
+
+    Note: It must be used together with `mmyolo.datasets.utils.yolov5_collate`
+    """
+
+    def forward(self, data: dict, training: bool = False) -> dict:
+        """Perform normalization, padding, bgr2rgb conversion and convert to single channel image based on ``DetDataPreprocessor``.
+
+        Args:
+            data (dict): Data sampled from dataloader.
+            training (bool): Whether to enable training time augmentation.
+
+        Returns:
+            dict: Data in the same format as the model input.
+        """
+        if not training:
+            return super().forward(data, training)
+
+        data = self.cast_data(data)
+        inputs, data_samples = data['inputs'], data['data_samples']
+        assert isinstance(data['data_samples'], dict)
+
+        # TODO: Supports multi-scale training
+        if self._channel_conversion and inputs.shape[1] == 3:
+            inputs = inputs[:, [2, 1, 0], ...]
+
+        if self._enable_normalize:
+            inputs = (inputs - self.mean) / self.std
+
+        if self.batch_augments is not None:
+            for batch_aug in self.batch_augments:
+                inputs, data_samples = batch_aug(inputs, data_samples)
+
+        img_metas = [{'batch_input_shape': inputs.shape[2:]}] * len(inputs)
+        data_samples = {
+            'bboxes_labels': data_samples['bboxes_labels'],
+            'img_metas': img_metas
+        }
+
+        # Convert to single channel image
+        inputs = inputs.mean(dim=1, keepdim=True)
+
+        return {'inputs': inputs, 'data_samples': data_samples}
+```
+
+At this point, the `yolov5_s-v61_syncbn_fast_1xb32-100e_cat_single_channel.py` configuration file reads as follows.
+
+```python
+_base_ = 'yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py'
+
+_base_.model.data_preprocessor.type = 'YOLOv5SCDetDataPreprocessor'
+```
+
+### 3 Pre-training model loading problem
+
+When using a pre-trained 3-channel model directly, it's theoretically possible to experience a decrease in accuracy, though this has not been experimentally verified. To mitigate this potential issue, there are several solutions, including adjusting the weight of each channel in the input layer. One approach is to set the weight of each channel in the input layer to the average of the weights of the original 3 channels. Alternatively, the weight of each channel could be set to one of the weights of the original 3 channels, or the input layer could be trained directly without modifying the weights, depending on the specific circumstances. In this work, we chose to adjust the weights of the 3 channels in the input layer to the average of the weights of the pre-trained 3 channels.
+
+```python
+import torch
+
+def main():
+    # Load weights file
+    state_dict = torch.load(
+        'checkpoints/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'
+    )
+
+    # Modify input layer weights
+    weights = state_dict['state_dict']['backbone.stem.conv.weight']
+    avg_weight = weights.mean(dim=1, keepdim=True)
+    state_dict['state_dict']['backbone.stem.conv.weight'] = avg_weight
+
+    # Save the modified weights to a new file
+    torch.save(
+        state_dict,
+        'checkpoints/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187_single_channel.pth'
+    )
+
+if __name__ == '__main__':
+    main()
+```
+
+At this point, the `yolov5_s-v61_syncbn_fast_1xb32-100e_cat_single_channel.py` configuration file reads as follows：
+
+```python
+_base_ = 'yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py'
+
+_base_.model.data_preprocessor.type = 'YOLOv5SCDetDataPreprocessor'
+
+load_from = './checkpoints/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187_single_channel.pth'
+```
+
+### 4 Model training effect
+
+<img src="https://raw.githubusercontent.com/landhill/mmyolo/main/resources/cat_single_channel_test.jpeg"/>
+
+The left figure shows the actual label and the right figure shows the target detection result.
+
+```shell
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.958
+ Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
+ Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 1.000
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.958
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.881
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.969
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.969
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.969
+bbox_mAP_copypaste: 0.958 1.000 1.000 -1.000 -1.000 0.958
+Epoch(val) [100][116/116]  coco/bbox_mAP: 0.9580  coco/bbox_mAP_50: 1.0000  coco/bbox_mAP_75: 1.0000  coco/bbox_mAP_s: -1.0000  coco/bbox_mAP_m: -1.0000  coco/bbox_mAP_l: 0.9580
+```
+
+## Training example on a multi-channel image dataset
+
+TODO
diff --git a/docs/en/index.rst b/docs/en/index.rst
index 175f166b..92837c47 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -45,6 +45,7 @@ You can switch between Chinese and English documents in the top-right corner of
    common_usage/mim_usage.md
    common_usage/multi_necks.md
    common_usage/specify_device.md
+   common_usage/single_multi_channel_applications.md
 
 
 .. toctree::
diff --git a/docs/zh_cn/common_usage/single_multi_channel_applications.md b/docs/zh_cn/common_usage/single_multi_channel_applications.md
new file mode 100644
index 00000000..a20ef904
--- /dev/null
+++ b/docs/zh_cn/common_usage/single_multi_channel_applications.md
@@ -0,0 +1,188 @@
+# 单通道和多通道应用案例
+
+## 在单通道图像数据集上训练示例
+
+MMYOLO 中默认的训练图片均为彩色三通道数据，如果希望采用单通道数据集进行训练和测试，预计需要修改的地方包括：
+
+1. 所有的图片处理 pipeline 都要支持单通道运算
+2. 模型的骨干网络的第一个卷积层输入通道需要从 3 改成 1
+3. 如果希望加载 COCO 预训练权重，则需要处理第一个卷积层权重尺寸不匹配问题
+
+下面以 `cat` 数据集为例，描述整个修改过程，如果你使用的是自定义灰度图像数据集，你可以跳过数据集预处理这一步。
+
+### 1 数据集预处理
+
+自定义数据集的处理训练可参照[自定义数据集 标注+训练+测试+部署 全流程](../recommended_topics/labeling_to_deployment_tutorials.md)。
+
+`cat` 是一个三通道彩色图片数据集，为了方便演示，你可以运行下面的代码和命令，将数据集图片替换为单通道图片，方便后续验证。
+
+**1. 下载 `cat` 数据集进行解压**
+
+```shell
+python tools/misc/download_dataset.py --dataset-name cat --save-dir ./data/cat --unzip --delete
+```
+
+**2. 将数据集转换为灰度图**
+
+```python
+import argparse
+import imghdr
+import os
+from typing import List
+import cv2
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='data_path')
+    parser.add_argument('path', type=str, help='Original dataset path')
+    return parser.parse_args()
+
+def main():
+    args = parse_args()
+
+    path = args.path + '/images/'
+    save_path = path
+    file_list: List[str] = os.listdir(path)
+    # Grayscale conversion of each imager
+    for file in file_list:
+        if imghdr.what(path + '/' + file) != 'jpeg':
+            continue
+        img = cv2.imread(path + '/' + file)
+        img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
+        cv2.imwrite(save_path + '/' + file, img)
+
+if __name__ == '__main__':
+    main()
+```
+
+将上述脚本命名为 `cvt_single_channel.py`, 运行命令为：
+
+```shell
+python cvt_single_channel.py data/cat
+```
+
+### 2 修改 base 配置文件
+
+**目前 MMYOLO 的一些图像处理函数例如颜色空间变换还不兼容单通道图片，如果直接采用单通道数据训练需要修改部分 pipeline，工作量较大**。为了解决不兼容问题，推荐的做法是将单通道图片作为采用三通道图片方式读取将其加载为三通道数据，但是在输入到网络前将其转换为单通道格式。这种做法会稍微增加一些运算负担，但是用户基本不需要修改代码即可使用。
+
+以 `projects/misc/custom_dataset/yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py`为 `base` 配置,将其复制到 `configs/yolov5` 目录下，在同级配置路径下新增 `yolov5_s-v61_syncbn_fast_1xb32-100e_cat_single_channel.py` 文件。 我们可以 `mmyolo/models/data_preprocessors/data_preprocessor.py` 文件中继承 `YOLOv5DetDataPreprocessor` 并命名新类为 `YOLOv5SCDetDataPreprocessor`, 在其中将图片转成单通道，添加依赖库并在`mmyolo/models/data_preprocessors/__init__.py`中注册新类。 `YOLOv5SCDetDataPreprocessor` 示例代码为：
+
+```python
+@MODELS.register_module()
+class YOLOv5SCDetDataPreprocessor(YOLOv5DetDataPreprocessor):
+    """Rewrite collate_fn to get faster training speed.
+
+    Note: It must be used together with `mmyolo.datasets.utils.yolov5_collate`
+    """
+
+    def forward(self, data: dict, training: bool = False) -> dict:
+        """Perform normalization, padding, bgr2rgb conversion and convert to single channel image based on ``DetDataPreprocessor``.
+
+        Args:
+            data (dict): Data sampled from dataloader.
+            training (bool): Whether to enable training time augmentation.
+
+        Returns:
+            dict: Data in the same format as the model input.
+        """
+        if not training:
+            return super().forward(data, training)
+
+        data = self.cast_data(data)
+        inputs, data_samples = data['inputs'], data['data_samples']
+        assert isinstance(data['data_samples'], dict)
+
+        # TODO: Supports multi-scale training
+        if self._channel_conversion and inputs.shape[1] == 3:
+            inputs = inputs[:, [2, 1, 0], ...]
+
+        if self._enable_normalize:
+            inputs = (inputs - self.mean) / self.std
+
+        if self.batch_augments is not None:
+            for batch_aug in self.batch_augments:
+                inputs, data_samples = batch_aug(inputs, data_samples)
+
+        img_metas = [{'batch_input_shape': inputs.shape[2:]}] * len(inputs)
+        data_samples = {
+            'bboxes_labels': data_samples['bboxes_labels'],
+            'img_metas': img_metas
+        }
+
+        # Convert to single channel image
+        inputs = inputs.mean(dim=1, keepdim=True)
+
+        return {'inputs': inputs, 'data_samples': data_samples}
+```
+
+此时 `yolov5_s-v61_syncbn_fast_1xb32-100e_cat_single_channel.py`配置文件内容为如下所示：
+
+```python
+_base_ = 'yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py'
+
+_base_.model.data_preprocessor.type = 'YOLOv5SCDetDataPreprocessor'
+```
+
+### 3 预训练模型加载问题
+
+直接使用原三通道的预训练模型，理论上会导致精度有所降低（未实验验证）。可采用的解决思路：将输入层 3 通道每个通道的权重调整为原 3 通道权重的平均值, 或将输入层每个通道的权重调整为原3通道某一通道权重，也可以对输入层权重不做修改直接训练，具体效果根据实际情况有所不同。这里采用将输入层 3 个通道权重调整为预训练 3 通道权重平均值的方式。
+
+```python
+import torch
+
+def main():
+    # 加载权重文件
+    state_dict = torch.load(
+        'checkpoints/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'
+    )
+
+    # 修改输入层权重
+    weights = state_dict['state_dict']['backbone.stem.conv.weight']
+    avg_weight = weights.mean(dim=1, keepdim=True)
+    state_dict['state_dict']['backbone.stem.conv.weight'] = avg_weight
+
+    # 保存修改后的权重到新文件
+    torch.save(
+        state_dict,
+        'checkpoints/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187_single_channel.pth'
+    )
+
+if __name__ == '__main__':
+    main()
+```
+
+此时 `yolov5_s-v61_syncbn_fast_1xb32-100e_cat_single_channel.py`配置文件内容为如下所示：
+
+```python
+_base_ = 'yolov5_s-v61_syncbn_fast_1xb32-100e_cat.py'
+
+_base_.model.data_preprocessor.type = 'YOLOv5SCDetDataPreprocessor'
+
+load_from = './checkpoints/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187_single_channel.pth'
+```
+
+### 4 模型训练效果
+
+<img src="https://raw.githubusercontent.com/landhill/mmyolo/main/resources/cat_single_channel_test.jpeg"/>
+
+左图是实际标签，右图是目标检测结果。
+
+```shell
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.958
+ Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
+ Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 1.000
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.958
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.881
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.969
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.969
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.969
+bbox_mAP_copypaste: 0.958 1.000 1.000 -1.000 -1.000 0.958
+Epoch(val) [100][116/116]  coco/bbox_mAP: 0.9580  coco/bbox_mAP_50: 1.0000  coco/bbox_mAP_75: 1.0000  coco/bbox_mAP_s: -1.0000  coco/bbox_mAP_m: -1.0000  coco/bbox_mAP_l: 0.9580
+```
+
+## 在多通道图像数据集上训练示例
+
+TODO
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index 1f27366c..5026c30e 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -45,6 +45,7 @@
    common_usage/mim_usage.md
    common_usage/multi_necks.md
    common_usage/specify_device.md
+   common_usage/single_multi_channel_applications.md
 
 
 .. toctree::

From ea8e3f05a1231ace24c38a766b1fb531bef93cc9 Mon Sep 17 00:00:00 2001
From: Xin Li <7219519+xin-li-67@users.noreply.github.com>
Date: Fri, 24 Feb 2023 09:36:17 +0800
Subject: [PATCH 41/64] [Docs] add deploy doc to 15_min_od tutorial (#584)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add deploy doc to 15_min

* Update docs/zh_cn/get_started/15_minutes_object_detection.md

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* 1. reset model to checkpoint 2. update demo photo and add version number

---------

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>
---
 .../15_minutes_object_detection.md            | 113 +++++++++++++++++-
 projects/easydeploy/tools/image-demo.py       |   7 +-
 2 files changed, 116 insertions(+), 4 deletions(-)

diff --git a/docs/zh_cn/get_started/15_minutes_object_detection.md b/docs/zh_cn/get_started/15_minutes_object_detection.md
index 6523dd51..c53d8040 100644
--- a/docs/zh_cn/get_started/15_minutes_object_detection.md
+++ b/docs/zh_cn/get_started/15_minutes_object_detection.md
@@ -406,6 +406,117 @@ python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
 
 ## EasyDeploy 模型部署
 
-TODO
+此处我们将通过 MMYOLO 的 [EasyDeploy](../../../projects/easydeploy/) 来演示模型的转换部署和基本推理。
+
+首先需要在当前 MMYOLO 的虚拟环境中按照 EasyDeploy 的 [基本文档](../../../projects/easydeploy/docs/model_convert.md) 对照自己的设备安装好所需的各个库。
+
+```shell
+pip install onnx
+pip install onnx-simplifier # 如果需要使用 simplify 功能需要安装
+pip install tensorrt        # 如果有 GPU 环境并且需要输出 TensorRT 模型需要继续执行
+```
+
+完成安装后就可以用以下命令对已经训练好的针对 cat 数据集的模型一键转换部署，当前设备的 ONNX 版本为 1.13.0，TensorRT 版本为 8.5.3.1，故可保持 `--opset` 为 11，其余各项参数的具体含义和参数值需要对照使用的 config 文件进行调整。此处我们先导出 CPU 版本的 ONNX 模型，`--backend` 为 1。
+
+```shell
+python projects/easydeploy/tools/export.py \
+	configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+	work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
+	--work-dir work_dirs/yolov5_s-v61_fast_1xb12-40e_cat \
+    --img-size 640 640 \
+    --batch 1 \
+    --device cpu \
+    --simplify \
+	--opset 11 \
+	--backend 1 \
+	--pre-topk 1000 \
+	--keep-topk 100 \
+	--iou-threshold 0.65 \
+	--score-threshold 0.25
+```
+
+成功运行后就可以在 `work-dir` 下得到转换后的 ONNX 模型，默认使用 `end2end.onnx` 命名。
+
+接下来我们使用此 `end2end.onnx` 模型来进行一个基本的图片推理:
+
+```shell
+python projects/easydeploy/tools/image-demo.py \
+    data/cat/images/IMG_20210728_205312.jpg \
+    configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+    work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/end2end.onnx \
+    --device cpu
+```
+
+成功完成推理后会在默认的 MMYOLO 根目录下的 `output` 文件夹生成推理结果图，如果想直观看到结果而不需要保存，可以在上述命令结尾加上 `--show` ，为了方便展示，下图是生成结果的截取部分。
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/7219519/221061210-b91e0b5b-652d-4dfc-8451-86a9a36f7d04.png" width="800" alt="image"/>
+</div>
+
+我们继续转换对应 TensorRT 的 engine 文件，因为 TensorRT 需要对应当前环境以及部署使用的版本进行，所以一定要确认导出参数，这里我们导出对应 TensorRT8 版本的文件，`--backend` 为 2。
+
+```shell
+python projects/easydeploy/tools/export.py \
+    configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+    work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
+    --work-dir work_dirs/yolov5_s-v61_fast_1xb12-40e_cat \
+    --img-size 640 640 \
+    --batch 1 \
+    --device cuda:0 \
+    --simplify \
+    --opset 11 \
+    --backend 2 \
+    --pre-topk 1000 \
+    --keep-topk 100 \
+    --iou-threshold 0.65 \
+    --score-threshold 0.25
+```
+
+成功执行后得到的 `end2end.onnx` 就是对应 TensorRT8 部署需要的 ONNX 文件，我们使用这个文件完成 TensorRT engine 的转换。
+
+```shell
+python projects/easydeploy/tools/build_engine.py \
+    work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/end2end.onnx \
+    --img-size 640 640 \
+    --device cuda:0
+```
+
+成功执行后会在 `work-dir` 下生成 `end2end.engine` 文件：
+
+```shell
+work_dirs/yolov5_s-v61_fast_1xb12-40e_cat
+├── 202302XX_XXXXXX
+│   ├── 202302XX_XXXXXX.log
+│   └── vis_data
+│       ├── 202302XX_XXXXXX.json
+│       ├── config.py
+│       └── scalars.json
+├── best_coco
+│   └── bbox_mAP_epoch_40.pth
+├── end2end.engine
+├── end2end.onnx
+├── epoch_30.pth
+├── epoch_40.pth
+├── last_checkpoint
+└── yolov5_s-v61_fast_1xb12-40e_cat.py
+```
+
+我们继续使用 `image-demo.py` 进行图片推理：
+
+```shell
+python projects/easydeploy/tools/image-demo.py \
+    data/cat/images/IMG_20210728_205312.jpg \
+    configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+    work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/end2end.engine \
+    --device cuda:0
+```
+
+此处依旧选择在 `output` 下保存推理结果而非直接显示结果，同样为了方便展示，下图是生成结果的截取部分。
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/7219519/221061291-e7490bb6-5f0c-45ab-9fc4-caf2b62419d6.png" width="800" alt="image"/>
+</div>
+
+这样我们就完成了将训练完成的模型进行转换部署并且检查推理结果的工作。至此本教程结束。
 
 以上完整内容可以查看 [15_minutes_object_detection.ipynb](<>)
diff --git a/projects/easydeploy/tools/image-demo.py b/projects/easydeploy/tools/image-demo.py
index c676949f..197ad070 100644
--- a/projects/easydeploy/tools/image-demo.py
+++ b/projects/easydeploy/tools/image-demo.py
@@ -125,15 +125,16 @@ def main():
         for (bbox, score, label) in zip(bboxes, scores, labels):
             bbox = bbox.tolist()
             color = colors[label]
-            name = f'cls:{label}_score:{score:0.4f}'
+            label_name = cfg.get('class_name', {})[label]
+            name = f'cls:{label_name}_score:{score:0.4f}'
 
             cv2.rectangle(bgr, bbox[:2], bbox[2:], color, 2)
             cv2.putText(
                 bgr,
                 name, (bbox[0], bbox[1] - 2),
                 cv2.FONT_HERSHEY_SIMPLEX,
-                0.75, [225, 255, 255],
-                thickness=2)
+                2.0, [225, 255, 255],
+                thickness=3)
 
         if args.show:
             mmcv.imshow(bgr, 'result', 0)

From a22a208d85b3ecd7505f0c055da96f413c639504 Mon Sep 17 00:00:00 2001
From: kitecats <90194592+kitecats@users.noreply.github.com>
Date: Fri, 24 Feb 2023 12:59:26 +0800
Subject: [PATCH 42/64] [Docs] Refine doc about faq and troubleshooting_steps
 (#587)

* refine doc

* fix ci
---
 .../troubleshooting_steps.md                  | 10 ++++----
 docs/zh_cn/tutorials/faq.md                   | 23 ++++++++++++++-----
 2 files changed, 23 insertions(+), 10 deletions(-)

diff --git a/docs/zh_cn/recommended_topics/troubleshooting_steps.md b/docs/zh_cn/recommended_topics/troubleshooting_steps.md
index 7cca926f..803c7271 100644
--- a/docs/zh_cn/recommended_topics/troubleshooting_steps.md
+++ b/docs/zh_cn/recommended_topics/troubleshooting_steps.md
@@ -47,10 +47,12 @@
 
 ## ValueError: need at least one array to concatenate
 
-这个是一个非常场景的错误，可能出现在训练一开始或者训练正常但是评估时候。不管出现在何阶段，均说明你的配置不对，最常见的错误就是 `num_classes` 参数设置不对。
-在 MMYOLO 或者 MMDet 中大部分配置都是以 COCO 数据为例，因此配置中默认的 `num_classes` 是 80, 如果用户自定义数据集没有正确修改这个字段则会出现上述错误。
-MMYOLO 中有些算法配置会在多个模块中都需要 `num_classes` 参数，用户经常出现的错误就是仅仅修改了某一个地方的 `num_classes` 而没有将所有的 `num_classes` 都修改。想快速解决这个问题，可以使用 [print_config](https://github.com/open-mmlab/mmyolo/blob/main/tools/misc/print_config.py)
-脚本打印下全配置，然后全局搜索 `num_classes` 确认是否有没有修改的模块。
+这个是一个非常常见的错误，可能出现在训练一开始或者训练正常但是评估时候。不管出现在何阶段，均说明你的配置不对：
+
+1. 最常见的错误就是 `num_classes` 参数设置不对。在 MMYOLO 或者 MMDet 中大部分配置都是以 COCO 数据为例，因此配置中默认的 `num_classes` 是 80, 如果用户自定义数据集没有正确修改这个字段则会出现上述错误。
+   MMYOLO 中有些算法配置会在多个模块中都需要 `num_classes` 参数，用户经常出现的错误就是仅仅修改了某一个地方的 `num_classes` 而没有将所有的 `num_classes` 都修改。想快速解决这个问题，可以使用 [print_config](https://github.com/open-mmlab/mmyolo/blob/main/tools/misc/print_config.py)
+   脚本打印下全配置，然后全局搜索 `num_classes` 确认是否有没有修改的模块。
+2. 该错误还可能会出现在对 `dataset.metainfo.classes` 参数设置不对造成的。当用户希望训练自己的数据集但是未能正确的修改 `dataset.metainfo.classes` 参数，而默认的使用 `COCO` 数据集中的类别时，且用户自定义数据集的所有类别不在 `COCO` 数据集的类别里就会出现该错误。这时需要用户核对并修改正确的 `dataset.metainfo.classes` 信息。
 
 ## 评估时候 IndexError: list index out of range
 
diff --git a/docs/zh_cn/tutorials/faq.md b/docs/zh_cn/tutorials/faq.md
index a088b5bd..f8575037 100644
--- a/docs/zh_cn/tutorials/faq.md
+++ b/docs/zh_cn/tutorials/faq.md
@@ -42,8 +42,8 @@ projects 快速支持一些在当前版本中较难支持或者想快速支持
 
 ## MMYOLO 中是否可以加入纯背景图片进行训练？
 
-将纯背景图片加入训练大部分情况可以抑制误报率，是否将纯背景图片加入训练功能已经大部分数据集上支持了。以 `YOLOv5CocoDataset` 为例，核心控制参数是 `train_dataloader.dataset.filter_cfg.filter_empty_gt`，如果 `filter_empty_gt` 为 True 表示将纯背景图片过滤掉不加入训练，反正将纯
-背景图片加入到训练中。 目前 MMYOLO 中大部分算法都是默认将纯背景图片加入训练中。
+将纯背景图片加入训练大部分情况可以抑制误报率，是否将纯背景图片加入训练功能已经大部分数据集上支持了。以 `YOLOv5CocoDataset` 为例，核心控制参数是 `train_dataloader.dataset.filter_cfg.filter_empty_gt`，如果 `filter_empty_gt` 为 True 表示将纯背景图片过滤掉不加入训练，
+反之将纯背景图片加入到训练中。 目前 MMYOLO 中大部分算法都是默认将纯背景图片加入训练中。
 
 ## MMYOLO 是否有计算模型推理 FPS 脚本？
 
@@ -61,16 +61,16 @@ EasyDeploy 支持的功能目前没有 MMDeploy 多，但是使用上更加简
 
 ## MMYOLO 中为何没有支持 MMDet 类似的自动学习率缩放功能？
 
-原因是实验发现 YOLO 系列算法不是非常满足现象缩放功能。在多个数据集上验证发现会出现不基于 batch size 自动学习率缩放效果好于缩放的情形。因此暂时 MMYOLO 还没有支持自动学习率缩放功能。
+原因是实验发现 YOLO 系列算法不是非常满足线性缩放功能。在多个数据集上验证发现会出现不基于 batch size 自动学习率缩放效果好于缩放的情形。因此暂时 MMYOLO 还没有支持自动学习率缩放功能。
 
 ## 自己训练的模型权重尺寸为啥比官方发布的大？
 
-原因是用户自己训练的权重通常包括 `optimizer`、`ema_state_dict` 和 `message_hub` 等额外数据，这部分数据我们会在模型发布时候自动删掉，而用户直接基于框架跑的模型权重是全部保留的，所以用户自己训练的模型权重尺寸为啥比官方发布的大。
+原因是用户自己训练的权重通常包括 `optimizer`、`ema_state_dict` 和 `message_hub` 等额外数据，这部分数据我们会在模型发布时候自动删掉，而用户直接基于框架跑的模型权重是全部保留的，所以用户自己训练的模型权重尺寸会比官方发布的大。
 你可以使用 [publish_model.py](https://github.com/open-mmlab/mmyolo/blob/main/tools/misc/publish_model.py) 脚本删掉额外字段。
 
 ## RTMDet 为何训练所占显存比 YOLOv5 多很多？
 
-训练显存较多的原因主要是 assigner 部分的差异。YOLOv5 采用的是非常简单且高效的 shape 匹配 assigner，而 RTMDet 中采用的是动态的全 batch 计算的 dynamic soft label assigner，其内部的 Cost 矩阵需要消耗比较多的显存，特别是如果当前 batch 中标注框过多时候。
+训练显存较多的原因主要是 assigner 部分的差异。YOLOv5 采用的是非常简单且高效的 shape 匹配 assigner，而 RTMDet 中采用的是动态的全 batch 计算的 dynamic soft label assigner，其内部的 Cost 矩阵需要消耗比较多的显存，特别是当前 batch 中标注框过多时候。
 后续我们会考虑解决这个问题。
 
 ## 修改一些代码后是否需要重新安装 MMYOLO
@@ -93,4 +93,15 @@ unset PYTHONPATH
 
 ## 训练中保存最好模型
 
-只需要在配置中设置 `default_hooks.checkpoint.save_best` 为 auto 字符串或者训练时候通过命令行设置 `--cfg-options default_hooks.checkpoint.save_best=auto` 即可。
+用户可以通过在配置中设置 `default_hooks.checkpoint.save_best` 参数来选择根据什么指标来筛选最优模型。以 `COCO` 数据集检测任务为例，
+则 `default_hooks.checkpoint.save_best` 可以选择输入的参数有:
+
+1. `auto` 将会根据验证集中的第一个评价指标作为筛选条件。
+2. `coco/bbox_mAP` 将会根据 `bbox_mAP` 作为筛选条件。
+3. `coco/bbox_mAP_50` 将会根据 `bbox_mAP_50` 作为筛选条件。
+4. `coco/bbox_mAP_75` 将会根据 `bbox_mAP_75` 作为筛选条件。
+5. `coco/bbox_mAP_s` 将会根据 `bbox_mAP_s` 作为筛选条件。
+6. `coco/bbox_mAP_m` 将会根据 `bbox_mAP_m` 作为筛选条件。
+7. `coco/bbox_mAP_l` 将会根据 `bbox_mAP_l` 作为筛选条件。
+
+此外用户还可以选择筛选的逻辑，通过设置配置中的 `default_hooks.checkpoint.rule` 来选择判断逻辑，如：`default_hooks.checkpoint.rule=greater` 表示指标越大越好。更详细的使用可以参考 [checkpoint_hook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py) 来修改

From 78dc0fde99dbe01c4f9c70d620a76d73110d3909 Mon Sep 17 00:00:00 2001
From: kitecats <90194592+kitecats@users.noreply.github.com>
Date: Fri, 24 Feb 2023 15:53:44 +0800
Subject: [PATCH 43/64] [Docs] Refine doc about troubleshooting_steps and
 15_minutes_object_detection (#589)

* refine doc

* fix ci

* refine doc about troubleshooting_steps and 15_minutes_object_detection
---
 docs/zh_cn/get_started/15_minutes_object_detection.md  | 4 ++--
 docs/zh_cn/recommended_topics/troubleshooting_steps.md | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/zh_cn/get_started/15_minutes_object_detection.md b/docs/zh_cn/get_started/15_minutes_object_detection.md
index c53d8040..38cec820 100644
--- a/docs/zh_cn/get_started/15_minutes_object_detection.md
+++ b/docs/zh_cn/get_started/15_minutes_object_detection.md
@@ -92,7 +92,7 @@ class_name = ('cat', ) # 数据集类别名称
 num_classes = len(class_name) # 数据集类别数
 # metainfo 必须要传给后面的 dataloader 配置，否则无效
 # palette 是可视化时候对应类别的显示颜色
-# palette 长度必须大于等于和 classes 长度
+# palette 长度必须大于或等于 classes 长度
 metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
 
 # 基于 tools/analysis_tools/optimize_anchors.py 自适应计算的 anchor
@@ -278,7 +278,7 @@ python tools/test.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
                      --show-dir show_results
 ```
 
-运行以上测试命令， 你不不仅可以得到**模型训练**部分所打印的 AP 性能，还可以将推理结果图片自动保存至 `work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/{timestamp}/show_results` 文件夹中。下面为其中一张结果图片，左图为实际标注，右图为模型推理结果。
+运行以上测试命令， 你不仅可以得到**模型训练**部分所打印的 AP 性能，还可以将推理结果图片自动保存至 `work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/{timestamp}/show_results` 文件夹中。下面为其中一张结果图片，左图为实际标注，右图为模型推理结果。
 
 <div align=center>
 <img src="https://user-images.githubusercontent.com/17425982/220251677-6c7e5c8f-9417-4803-97fc-a968d0172ab7.png" alt="result_img"/>
diff --git a/docs/zh_cn/recommended_topics/troubleshooting_steps.md b/docs/zh_cn/recommended_topics/troubleshooting_steps.md
index 803c7271..cc4fc2e2 100644
--- a/docs/zh_cn/recommended_topics/troubleshooting_steps.md
+++ b/docs/zh_cn/recommended_topics/troubleshooting_steps.md
@@ -31,7 +31,7 @@
 
 ## 基于官方配置继承新建的配置出现 unexpected keyword argument
 
-这通常是由于你没有删除 base 配置中的额外参数。 可以在你新建配置中的修改字典中增加 `__delete__=True` 删掉 base 中该类之前的所有参数。
+这通常是由于你没有删除 base 配置中的额外参数。 可以在你新建配置所修改的字典中增加 `_delete_=True` 删掉 base 中该类之前的所有参数。
 
 ## The testing results of the whole dataset is empty
 

From 1c833eb195db2ec91a7985d97af41e60acaf8098 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Fri, 24 Feb 2023 21:46:24 +0800
Subject: [PATCH 44/64] [Feature] Support TTA (#553)

* support TTA

* update note
---
 README.md                                     |  3 +-
 README_zh-CN.md                               |  1 +
 configs/_base_/det_p5_tta.py                  | 57 ++++++++++++
 .../ppyoloe_plus_s_fast_8xb8-80e_coco.py      |  2 +-
 configs/rtmdet/README.md                      | 15 ++--
 .../rtmdet_l_syncbn_fast_8xb32-300e_coco.py   |  2 +-
 configs/yolov5/README.md                      | 22 ++---
 .../voc/yolov5_s-v61_fast_1xb64-50e_voc.py    | 36 ++++++++
 ...v5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py | 35 ++++++++
 .../yolov5_s-v61_syncbn_8xb16-300e_coco.py    |  2 +-
 .../yolov6_s_syncbn_fast_8xb32-400e_coco.py   |  2 +-
 .../yolov7_l_syncbn_fast_8x16b-300e_coco.py   |  2 +-
 ...yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py | 53 +++++++++--
 configs/yolov8/README.md                      | 25 +++---
 .../yolov8_s_syncbn_fast_8xb16-500e_coco.py   |  2 +-
 configs/yolox/yolox_p5_tta.py                 | 55 ++++++++++++
 configs/yolox/yolox_s_fast_8xb8-300e_coco.py  |  2 +-
 .../yolox/yolox_tiny_fast_8xb8-300e_coco.py   | 37 +++++++-
 docs/en/common_usage/tta.md                   | 87 +++++++++++++++++++
 docs/en/index.rst                             |  1 +
 docs/zh_cn/common_usage/tta.md                | 87 +++++++++++++++++++
 docs/zh_cn/index.rst                          |  1 +
 mmyolo/datasets/transforms/transforms.py      |  5 +-
 tools/misc/print_config.py                    | 59 +++++++++++++
 tools/test.py                                 | 23 ++++-
 25 files changed, 568 insertions(+), 48 deletions(-)
 create mode 100644 configs/_base_/det_p5_tta.py
 create mode 100644 configs/yolox/yolox_p5_tta.py
 create mode 100644 docs/en/common_usage/tta.md
 create mode 100644 docs/zh_cn/common_usage/tta.md
 create mode 100644 tools/misc/print_config.py

diff --git a/README.md b/README.md
index 885e9a82..ecf32b4f 100644
--- a/README.md
+++ b/README.md
@@ -196,8 +196,9 @@ For different parts from MMDetection, we have also prepared user guides and adva
 - [Resume training](docs/en/common_usage/resume_training.md)
 - [Enabling and disabling SyncBatchNorm](docs/en/common_usage/syncbn.md)
 - [Enabling AMP](docs/en/common_usage/amp_training.md)
+- [TTA Related Notes](docs/en/common_usage/tta.md)
 - [Add plugins to the backbone network](docs/en/common_usage/plugins.md)
-- [Freeze layers](docs/en/common_usage/common_usage/freeze_layers.md)
+- [Freeze layers](docs/en/common_usage/freeze_layers.md)
 - [Output model predictions](docs/en/common_usage/output_predictions.md)
 - [Set random seed](docs/en/common_usage/set_random_seed.md)
 - [Module combination](docs/en/common_usage/module_combination.md)
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 767c3178..eae02d86 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -217,6 +217,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 - [恢复训练](docs/zh_cn/common_usage/resume_training.md)
 - [开启和关闭 SyncBatchNorm](docs/zh_cn/common_usage/syncbn.md)
 - [开启混合精度训练](docs/zh_cn/common_usage/amp_training.md)
+- [测试时增强相关说明](docs/zh_cn/common_usage/tta.md)
 - [给主干网络增加插件](docs/zh_cn/common_usage/plugins.md)
 - [冻结指定网络层权重](docs/zh_cn/common_usage/common_usage/freeze_layers.md)
 - [输出模型预测结果](docs/zh_cn/common_usage/output_predictions.md)
diff --git a/configs/_base_/det_p5_tta.py b/configs/_base_/det_p5_tta.py
new file mode 100644
index 00000000..cbbaf2e6
--- /dev/null
+++ b/configs/_base_/det_p5_tta.py
@@ -0,0 +1,57 @@
+# TODO: Need to solve the problem of multiple file_client_args parameters
+# _file_client_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+_file_client_args = dict(backend='disk')
+
+tta_model = dict(
+    type='mmdet.DetTTAModel',
+    tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=300))
+
+img_scales = [(640, 640), (320, 320), (960, 960)]
+
+#                                LoadImageFromFile
+#                     /                 |                     \
+# (RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize) # noqa
+#        /      \                    /      \                    /        \
+#  RandomFlip RandomFlip      RandomFlip RandomFlip        RandomFlip RandomFlip # noqa
+#      |          |                |         |                  |         |
+#  LoadAnn    LoadAnn           LoadAnn    LoadAnn           LoadAnn    LoadAnn
+#      |          |                |         |                  |         |
+#  PackDetIn  PackDetIn         PackDetIn  PackDetIn        PackDetIn  PackDetIn # noqa
+
+_multiscale_resize_transforms = [
+    dict(
+        type='Compose',
+        transforms=[
+            dict(type='YOLOv5KeepRatioResize', scale=s),
+            dict(
+                type='LetterResize',
+                scale=s,
+                allow_scale_up=False,
+                pad_val=dict(img=114))
+        ]) for s in img_scales
+]
+
+tta_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_file_client_args),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            _multiscale_resize_transforms,
+            [
+                dict(type='mmdet.RandomFlip', prob=1.),
+                dict(type='mmdet.RandomFlip', prob=0.)
+            ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='mmdet.PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'scale_factor', 'pad_param', 'flip',
+                               'flip_direction'))
+            ]
+        ])
+]
diff --git a/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py b/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py
index 7c5ce298..e44dc34a 100644
--- a/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py
+++ b/configs/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco.py
@@ -1,4 +1,4 @@
-_base_ = '../_base_/default_runtime.py'
+_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
 
 # dataset settings
 data_root = 'data/coco/'
diff --git a/configs/rtmdet/README.md b/configs/rtmdet/README.md
index 3059a575..1089b71b 100644
--- a/configs/rtmdet/README.md
+++ b/configs/rtmdet/README.md
@@ -23,18 +23,19 @@ RTMDet-l model structure
 
 ## Object Detection
 
-|    Model    | size | box AP | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) |                         Config                         |                                                                                                                                                                 Download                                                                                                                                                                 |
-| :---------: | :--: | :----: | :-------: | :------: | :------------------: | :----------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| RTMDet-tiny | 640  |  41.0  |    4.8    |   8.1    |         0.98         | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
-|  RTMDet-s   | 640  |  44.6  |   8.89    |   14.8   |         1.22         |  [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json)       |
-|  RTMDet-m   | 640  |  49.3  |   24.71   |  39.27   |         1.62         |  [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json)       |
-|  RTMDet-l   | 640  |  51.4  |   52.3    |  80.23   |         2.44         |  [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json)       |
-|  RTMDet-x   | 640  |  52.8  |   94.86   |  141.67  |         3.10         |  [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json)       |
+|    Model    | size | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | box AP | TTA box AP |                         Config                         |                                                                                                                                                                 Download                                                                                                                                                                 |
+| :---------: | :--: | :-------: | :------: | :------------------: | :----: | :--------: | :----------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| RTMDet-tiny | 640  |    4.8    |   8.1    |         0.98         |  41.0  |    42.7    | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
+|  RTMDet-s   | 640  |   8.89    |   14.8   |         1.22         |  44.6  |    45.8    |  [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json)       |
+|  RTMDet-m   | 640  |   24.71   |  39.27   |         1.62         |  49.3  |    50.9    |  [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json)       |
+|  RTMDet-l   | 640  |   52.3    |  80.23   |         2.44         |  51.4  |    53.1    |  [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json)       |
+|  RTMDet-x   | 640  |   94.86   |  141.67  |         3.10         |  52.8  |    54.2    |  [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json)       |
 
 **Note**:
 
 1. The inference speed of RTMDet is measured on an NVIDIA 3090 GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS.
 2. For a fair comparison, the config of bbox postprocessing is changed to be consistent with YOLOv5/6/7 after [PR#9494](https://github.com/open-mmlab/mmdetection/pull/9494), bringing about 0.1~0.3% AP improvement.
+3. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable.  see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
 
 ## Citation
 
diff --git a/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
index 64ccc598..fecd0d31 100644
--- a/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
+++ b/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = '../_base_/default_runtime.py'
+_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
 
 # ========================Frequently modified parameters======================
 # -----data related-----
diff --git a/configs/yolov5/README.md b/configs/yolov5/README.md
index 399de4f2..b22d880f 100644
--- a/configs/yolov5/README.md
+++ b/configs/yolov5/README.md
@@ -20,16 +20,16 @@ YOLOv5-l-P6 model structure
 
 ### COCO
 
-| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP |                                                         Config                                                         |                                                                                                                                                                         Download                                                                                                                                                                         |
-| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| YOLOv5-n |  P5  | 640  |  Yes   | Yes |   1.5    |  28.0  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json)       |
-| YOLOv5-s |  P5  | 640  |  Yes   | Yes |   2.7    |  37.7  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json)       |
-| YOLOv5-m |  P5  | 640  |  Yes   | Yes |   5.0    |  45.3  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json)       |
-| YOLOv5-l |  P5  | 640  |  Yes   | Yes |   8.1    |  48.8  |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json)       |
-| YOLOv5-n |  P6  | 1280 |  Yes   | Yes |   5.8    |  35.9  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) |
-| YOLOv5-s |  P6  | 1280 |  Yes   | Yes |   10.5   |  44.4  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) |
-| YOLOv5-m |  P6  | 1280 |  Yes   | Yes |   19.1   |  51.3  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) |
-| YOLOv5-l |  P6  | 1280 |  Yes   | Yes |   30.5   |  53.7  | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) |
+| Backbone | Arch | size | SyncBN | AMP | Mem (GB) | box AP | TTA box AP |                                                         Config                                                         |                                                                                                                                                                         Download                                                                                                                                                                         |
+| :------: | :--: | :--: | :----: | :-: | :------: | :----: | :--------: | :--------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| YOLOv5-n |  P5  | 640  |  Yes   | Yes |   1.5    |  28.0  |    30.7    |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739.log.json)       |
+| YOLOv5-s |  P5  | 640  |  Yes   | Yes |   2.7    |  37.7  |    40.2    |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json)       |
+| YOLOv5-m |  P5  | 640  |  Yes   | Yes |   5.0    |  45.3  |    46.9    |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944-516a710f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-v61_syncbn_fast_8xb16-300e_coco/yolov5_m-v61_syncbn_fast_8xb16-300e_coco_20220917_204944.log.json)       |
+| YOLOv5-l |  P5  | 640  |  Yes   | Yes |   8.1    |  48.8  |    49.9    |  [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007-096ef0eb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-v61_syncbn_fast_8xb16-300e_coco/yolov5_l-v61_syncbn_fast_8xb16-300e_coco_20220917_031007.log.json)       |
+| YOLOv5-n |  P6  | 1280 |  Yes   | Yes |   5.8    |  35.9  |            | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705-d493c5f3.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_n-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_224705.log.json) |
+| YOLOv5-s |  P6  | 1280 |  Yes   | Yes |   10.5   |  44.4  |            | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044-58865c19.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_215044.log.json) |
+| YOLOv5-m |  P6  | 1280 |  Yes   | Yes |   19.1   |  51.3  |            | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453-49564d58.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_m-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_230453.log.json) |
+| YOLOv5-l |  P6  | 1280 |  Yes   | Yes |   30.5   |  53.7  |            | [config](https://github.com/open-mmlab/mmyolo/tree/main/configs/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308-7a2ba6bf.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco/yolov5_l-p6-v62_syncbn_fast_8xb16-300e_coco_20221027_234308.log.json) |
 
 **Note**:
 In the official YOLOv5 code, the `random_perspective` data augmentation in COCO object detection task training uses mask annotation information, which leads to higher performance. Object detection should not use mask annotation, so only box annotation information is used in `MMYOLO`. We will use the mask annotation information in the instance segmentation task. See https://github.com/ultralytics/yolov5/issues/9917 for details.
@@ -39,7 +39,7 @@ In the official YOLOv5 code, the `random_perspective` data augmentation in COCO
 3. `SyncBN` means use SyncBN, `AMP` indicates training with mixed precision.
 4. We use 8x A100 for training, and the single-GPU batch size is 16. This is different from the official code.
 5. The performance is unstable and may fluctuate by about 0.4 mAP and the highest performance weight in `COCO` training in `YOLOv5` may not be the last epoch.
-6. `balloon` means that this is a demo configuration.
+6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable.  see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
 
 ### VOC
 
diff --git a/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py b/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
index 54f6cdeb..9585b51f 100644
--- a/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
+++ b/configs/yolov5/voc/yolov5_s-v61_fast_1xb64-50e_voc.py
@@ -29,6 +29,8 @@ num_det_layers = 3
 
 load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'  # noqa
 
+tta_img_scales = [img_scale, (416, 416), (640, 640)]
+
 # Hyperparameter reference from:
 # https://github.com/ultralytics/yolov5/blob/master/data/hyps/hyp.VOC.yaml
 model = dict(
@@ -232,3 +234,37 @@ val_evaluator = dict(
 test_evaluator = val_evaluator
 
 train_cfg = dict(max_epochs=max_epochs)
+
+# Config for Test Time Augmentation. (TTA)
+_multiscale_resize_transforms = [
+    dict(
+        type='Compose',
+        transforms=[
+            dict(type='YOLOv5KeepRatioResize', scale=s),
+            dict(
+                type='LetterResize',
+                scale=s,
+                allow_scale_up=False,
+                pad_val=dict(img=114))
+        ]) for s in tta_img_scales
+]
+
+tta_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            _multiscale_resize_transforms,
+            [
+                dict(type='mmdet.RandomFlip', prob=1.),
+                dict(type='mmdet.RandomFlip', prob=0.)
+            ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='mmdet.PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'scale_factor', 'pad_param', 'flip',
+                               'flip_direction'))
+            ]
+        ])
+]
diff --git a/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py
index 3d14484f..0af1fcb8 100644
--- a/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_s-p6-v62_syncbn_fast_8xb16-300e_coco.py
@@ -26,6 +26,7 @@ loss_obj_weight = 1.0
 obj_level_weights = [4.0, 1.0, 0.25, 0.06]
 affine_scale = 0.5  # YOLOv5RandomAffine scaling ratio
 
+tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)]
 # =======================Unmodified in most cases==================
 model = dict(
     backbone=dict(arch='P6', out_indices=(2, 3, 4, 5)),
@@ -101,3 +102,37 @@ val_dataloader = dict(
     dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg))
 
 test_dataloader = val_dataloader
+
+# Config for Test Time Augmentation. (TTA)
+_multiscale_resize_transforms = [
+    dict(
+        type='Compose',
+        transforms=[
+            dict(type='YOLOv5KeepRatioResize', scale=s),
+            dict(
+                type='LetterResize',
+                scale=s,
+                allow_scale_up=False,
+                pad_val=dict(img=114))
+        ]) for s in tta_img_scales
+]
+
+tta_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            _multiscale_resize_transforms,
+            [
+                dict(type='mmdet.RandomFlip', prob=1.),
+                dict(type='mmdet.RandomFlip', prob=0.)
+            ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='mmdet.PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'scale_factor', 'pad_param', 'flip',
+                               'flip_direction'))
+            ]
+        ])
+]
diff --git a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
index 77070b5d..30503413 100644
--- a/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
+++ b/configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = '../_base_/default_runtime.py'
+_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
 
 # ========================Frequently modified parameters======================
 # -----data related-----
diff --git a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py
index bda6562a..0b5fa560 100644
--- a/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py
+++ b/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py
@@ -1,4 +1,4 @@
-_base_ = '../_base_/default_runtime.py'
+_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
 
 # ======================= Frequently modified parameters =====================
 # -----data related-----
diff --git a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
index 1247774e..6712002c 100644
--- a/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
+++ b/configs/yolov7/yolov7_l_syncbn_fast_8x16b-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = '../_base_/default_runtime.py'
+_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
 
 # ========================Frequently modified parameters======================
 # -----data related-----
diff --git a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py
index 17cb84da..11164d21 100644
--- a/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py
+++ b/configs/yolov7/yolov7_w-p6_syncbn_fast_8x16b-300e_coco.py
@@ -10,6 +10,7 @@ batch_shapes_cfg = dict(
     img_size=img_scale[
         0],  # The image scale of padding should be divided by pad_size_divisor
     size_divisor=64)  # Additional paddings for pixel scale
+tta_img_scales = [(1280, 1280), (1024, 1024), (1536, 1536)]
 
 # -----model related-----
 # Basic size of multi-scale prior box
@@ -35,8 +36,16 @@ mixup_beta = 8.0  # YOLOv5MixUp
 loss_cls_weight = 0.3
 loss_bbox_weight = 0.05
 loss_obj_weight = 0.7
+obj_level_weights = [4.0, 1.0, 0.25, 0.06]
+simota_candidate_topk = 20
+
+# The only difference between P6 and P5 in terms of
+# hyperparameters is lr_factor
+lr_factor = 0.2
 
 # ===============================Unmodified in most cases====================
+pre_transform = _base_.pre_transform
+
 model = dict(
     backbone=dict(arch='W', out_indices=(2, 3, 4, 5)),
     neck=dict(
@@ -52,16 +61,14 @@ model = dict(
             norm_cfg=norm_cfg,
             act_cfg=dict(type='SiLU', inplace=True)),
         prior_generator=dict(base_sizes=anchors, strides=strides),
-        simota_candidate_topk=20,  # note
+        simota_candidate_topk=simota_candidate_topk,  # note
         # scaled based on number of detection layers
         loss_cls=dict(loss_weight=loss_cls_weight *
                       (num_classes / 80 * 3 / num_det_layers)),
         loss_bbox=dict(loss_weight=loss_bbox_weight * (3 / num_det_layers)),
         loss_obj=dict(loss_weight=loss_obj_weight *
                       ((img_scale[0] / 640)**2 * 3 / num_det_layers)),
-        obj_level_weights=[4.0, 1.0, 0.25, 0.06]))
-
-pre_transform = _base_.pre_transform
+        obj_level_weights=obj_level_weights))
 
 mosiac4_pipeline = [
     dict(
@@ -138,6 +145,38 @@ val_dataloader = dict(
     dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg))
 test_dataloader = val_dataloader
 
-# The only difference between P6 and P5 in terms of
-# hyperparameters is lr_factor
-default_hooks = dict(param_scheduler=dict(lr_factor=0.2))
+default_hooks = dict(param_scheduler=dict(lr_factor=lr_factor))
+
+# Config for Test Time Augmentation. (TTA)
+_multiscale_resize_transforms = [
+    dict(
+        type='Compose',
+        transforms=[
+            dict(type='YOLOv5KeepRatioResize', scale=s),
+            dict(
+                type='LetterResize',
+                scale=s,
+                allow_scale_up=False,
+                pad_val=dict(img=114))
+        ]) for s in tta_img_scales
+]
+
+tta_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            _multiscale_resize_transforms,
+            [
+                dict(type='mmdet.RandomFlip', prob=1.),
+                dict(type='mmdet.RandomFlip', prob=0.)
+            ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='mmdet.PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'scale_factor', 'pad_param', 'flip',
+                               'flip_direction'))
+            ]
+        ])
+]
diff --git a/configs/yolov8/README.md b/configs/yolov8/README.md
index 47075b6c..a284e237 100644
--- a/configs/yolov8/README.md
+++ b/configs/yolov8/README.md
@@ -20,18 +20,18 @@ YOLOv8-P5 model structure
 
 ### COCO
 
-| Backbone | Arch | size | Mask Refine | SyncBN | AMP | Mem (GB) |   box AP    |                                 Config                                  |                                                                                                                                                                                   Download                                                                                                                                                                                   |
-| :------: | :--: | :--: | :---------: | :----: | :-: | :------: | :---------: | :---------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| YOLOv8-n |  P5  | 640  |     No      |  Yes   | Yes |   2.8    |    37.2     |       [config](../yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json)                         |
-| YOLOv8-n |  P5  | 640  |     Yes     |  Yes   | Yes |   2.5    | 37.4 (+0.2) | [config](../yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206.log.json) |
-| YOLOv8-s |  P5  | 640  |     No      |  Yes   | Yes |   4.0    |    44.2     |       [config](../yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json)                         |
-| YOLOv8-s |  P5  | 640  |     Yes     |  Yes   | Yes |   4.0    | 45.1 (+0.9) | [config](../yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938.log.json) |
-| YOLOv8-m |  P5  | 640  |     No      |  Yes   | Yes |   7.2    |    49.8     |       [config](../yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json)                         |
-| YOLOv8-m |  P5  | 640  |     Yes     |  Yes   | Yes |   7.0    | 50.6 (+0.8) | [config](../yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400.log.json) |
-| YOLOv8-l |  P5  | 640  |     No      |  Yes   | Yes |   9.8    |    52.1     |       [config](../yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526.log.json)                         |
-| YOLOv8-l |  P5  | 640  |     Yes     |  Yes   | Yes |   9.1    | 53.0 (+0.9) | [config](../yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100.log.json) |
-| YOLOv8-x |  P5  | 640  |     No      |  Yes   | Yes |   12.2   |    52.7     |       [config](../yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338.log.json)                         |
-| YOLOv8-x |  P5  | 640  |     Yes     |  Yes   | Yes |   12.4   | 54.0 (+1.3) | [config](../yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411.log.json) |
+| Backbone | Arch | size | Mask Refine | SyncBN | AMP | Mem (GB) |   box AP    | TTA box AP |                                 Config                                  |                                                                                                                                                                                   Download                                                                                                                                                                                   |
+| :------: | :--: | :--: | :---------: | :----: | :-: | :------: | :---------: | :--------: | :---------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| YOLOv8-n |  P5  | 640  |     No      |  Yes   | Yes |   2.8    |    37.2     |            |       [config](../yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804-88c11cdb.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_syncbn_fast_8xb16-500e_coco/yolov8_n_syncbn_fast_8xb16-500e_coco_20230114_131804.log.json)                         |
+| YOLOv8-n |  P5  | 640  |     Yes     |  Yes   | Yes |   2.5    | 37.4 (+0.2) |    39.9    | [config](../yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206-b975b1cd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_n_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_101206.log.json) |
+| YOLOv8-s |  P5  | 640  |     No      |  Yes   | Yes |   4.0    |    44.2     |            |       [config](../yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101.log.json)                         |
+| YOLOv8-s |  P5  | 640  |     Yes     |  Yes   | Yes |   4.0    | 45.1 (+0.9) |    46.8    | [config](../yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938-ce3c1b3f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_s_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_095938.log.json) |
+| YOLOv8-m |  P5  | 640  |     No      |  Yes   | Yes |   7.2    |    49.8     |            |       [config](../yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200-c22e560a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_syncbn_fast_8xb16-500e_coco/yolov8_m_syncbn_fast_8xb16-500e_coco_20230115_192200.log.json)                         |
+| YOLOv8-m |  P5  | 640  |     Yes     |  Yes   | Yes |   7.0    | 50.6 (+0.8) |    52.3    | [config](../yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400-f40abfcd.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_m_mask-refine_syncbn_fast_8xb16-500e_coco_20230216_223400.log.json) |
+| YOLOv8-l |  P5  | 640  |     No      |  Yes   | Yes |   9.8    |    52.1     |            |       [config](../yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526-189611b6.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_syncbn_fast_8xb16-500e_coco/yolov8_l_syncbn_fast_8xb16-500e_coco_20230217_182526.log.json)                         |
+| YOLOv8-l |  P5  | 640  |     Yes     |  Yes   | Yes |   9.1    | 53.0 (+0.9) |    54.4    | [config](../yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100-5881dec4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_l_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120100.log.json) |
+| YOLOv8-x |  P5  | 640  |     No      |  Yes   | Yes |   12.2   |    52.7     |            |       [config](../yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco.py)       |                         [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338-5674673c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_syncbn_fast_8xb16-500e_coco/yolov8_x_syncbn_fast_8xb16-500e_coco_20230218_023338.log.json)                         |
+| YOLOv8-x |  P5  | 640  |     Yes     |  Yes   | Yes |   12.4   | 54.0 (+1.3) |    55.0    | [config](../yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411-079ca8d1.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco/yolov8_x_mask-refine_syncbn_fast_8xb16-500e_coco_20230217_120411.log.json) |
 
 **Note**
 
@@ -40,5 +40,6 @@ YOLOv8-P5 model structure
 3. We provide [scripts](https://github.com/open-mmlab/mmyolo/tree/dev/tools/model_converters/yolov8_to_mmyolo.py) to convert official weights to MMYOLO.
 4. `SyncBN` means using SyncBN, `AMP` indicates training with mixed precision.
 5. The performance of `Mask Refine` training is for the weight performance officially released by YOLOv8. `Mask Refine` means refining bbox by mask while loading annotations and transforming after `YOLOv5RandomAffine`, and the L and X models use `Copy Paste`.
+6. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable.  see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
 
 ## Citation
diff --git a/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py b/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py
index 58441a99..adb9c7fe 100644
--- a/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py
+++ b/configs/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco.py
@@ -1,4 +1,4 @@
-_base_ = '../_base_/default_runtime.py'
+_base_ = ['../_base_/default_runtime.py', '../_base_/det_p5_tta.py']
 
 # ========================Frequently modified parameters======================
 # -----data related-----
diff --git a/configs/yolox/yolox_p5_tta.py b/configs/yolox/yolox_p5_tta.py
new file mode 100644
index 00000000..3a5b4652
--- /dev/null
+++ b/configs/yolox/yolox_p5_tta.py
@@ -0,0 +1,55 @@
+# TODO: Need to solve the problem of multiple file_client_args parameters
+# _file_client_args = dict(
+#     backend='petrel',
+#     path_mapping=dict({
+#         './data/': 's3://openmmlab/datasets/detection/',
+#         'data/': 's3://openmmlab/datasets/detection/'
+#     }))
+_file_client_args = dict(backend='disk')
+
+tta_model = dict(
+    type='mmdet.DetTTAModel',
+    tta_cfg=dict(nms=dict(type='nms', iou_threshold=0.65), max_per_img=300))
+
+img_scales = [(640, 640), (320, 320), (960, 960)]
+
+#                                LoadImageFromFile
+#              /                        |                          \
+#          Resize                     Resize                       Resize  # noqa
+#        /      \                    /      \                    /        \
+#  RandomFlip RandomFlip      RandomFlip RandomFlip        RandomFlip RandomFlip # noqa
+#      |          |                |         |                  |         |
+#  LoadAnn    LoadAnn           LoadAnn    LoadAnn           LoadAnn    LoadAnn
+#      |          |                |         |                  |         |
+#  PackDetIn  PackDetIn         PackDetIn  PackDetIn        PackDetIn  PackDetIn # noqa
+
+tta_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_file_client_args),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            [
+                dict(type='mmdet.Resize', scale=s, keep_ratio=True)
+                for s in img_scales
+            ],
+            [
+                # ``RandomFlip`` must be placed before ``Pad``, otherwise
+                # bounding box coordinates after flipping cannot be
+                # recovered correctly.
+                dict(type='mmdet.RandomFlip', prob=1.),
+                dict(type='mmdet.RandomFlip', prob=0.)
+            ],
+            [
+                dict(
+                    type='mmdet.Pad',
+                    pad_to_square=True,
+                    pad_val=dict(img=(114.0, 114.0, 114.0))),
+            ],
+            [
+                dict(
+                    type='mmdet.PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'scale_factor', 'flip', 'flip_direction'))
+            ]
+        ])
+]
diff --git a/configs/yolox/yolox_s_fast_8xb8-300e_coco.py b/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
index b51a1087..e751b0d6 100644
--- a/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_s_fast_8xb8-300e_coco.py
@@ -1,4 +1,4 @@
-_base_ = '../_base_/default_runtime.py'
+_base_ = ['../_base_/default_runtime.py', 'yolox_p5_tta.py']
 
 # ========================Frequently modified parameters======================
 # -----data related-----
diff --git a/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py b/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
index e8c822e0..14187342 100644
--- a/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
+++ b/configs/yolox/yolox_tiny_fast_8xb8-300e_coco.py
@@ -9,6 +9,9 @@ scaling_ratio_range = (0.5, 1.5)
 img_scale = _base_.img_scale
 pre_transform = _base_.pre_transform
 
+test_img_scale = (416, 416)
+tta_img_scales = [test_img_scale, (320, 320), (640, 640)]
+
 # model settings
 model = dict(
     data_preprocessor=dict(batch_augments=[
@@ -48,7 +51,7 @@ train_pipeline_stage1 = [
 
 test_pipeline = [
     dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
-    dict(type='mmdet.Resize', scale=(416, 416), keep_ratio=True),  # note
+    dict(type='mmdet.Resize', scale=test_img_scale, keep_ratio=True),  # note
     dict(
         type='mmdet.Pad',
         pad_to_square=True,
@@ -63,3 +66,35 @@ test_pipeline = [
 train_dataloader = dict(dataset=dict(pipeline=train_pipeline_stage1))
 val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
 test_dataloader = val_dataloader
+
+# Config for Test Time Augmentation. (TTA)
+tta_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            [
+                dict(type='mmdet.Resize', scale=s, keep_ratio=True)
+                for s in tta_img_scales
+            ],
+            [
+                # ``RandomFlip`` must be placed before ``Pad``, otherwise
+                # bounding box coordinates after flipping cannot be
+                # recovered correctly.
+                dict(type='mmdet.RandomFlip', prob=1.),
+                dict(type='mmdet.RandomFlip', prob=0.)
+            ],
+            [
+                dict(
+                    type='mmdet.Pad',
+                    pad_to_square=True,
+                    pad_val=dict(img=(114.0, 114.0, 114.0))),
+            ],
+            [
+                dict(
+                    type='mmdet.PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'scale_factor', 'flip', 'flip_direction'))
+            ]
+        ])
+]
diff --git a/docs/en/common_usage/tta.md b/docs/en/common_usage/tta.md
new file mode 100644
index 00000000..517d34b8
--- /dev/null
+++ b/docs/en/common_usage/tta.md
@@ -0,0 +1,87 @@
+# TTA Related Notes
+
+## Test Time Augmentation (TTA)
+
+MMYOLO support for TTA in v0.5.0+, so that users can specify the `-tta` parameter to enable it during evaluation. Take `YOLOv5-s` as an example, its single GPU TTA test command is as follows
+
+```shell
+python tools/test.py configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth  --tta
+```
+
+For TTA to work properly, you must ensure that the variables `tta_model` and `tta_pipeline` are present in the configuration, see [det_p5_tta.py](https://github.com/open-mmlab/mmyolo/blob/dev/configs/_base_/det_p5_tta.py) for details.
+
+The default TTA in MMYOLO performs 3 multi-scale enhancements, followed by 2 horizontal flip enhancements, for a total of 6 parallel pipelines. take `YOLOv5-s` as an example, its TTA configuration is as follows
+
+```python
+img_scales = [(640, 640), (320, 320), (960, 960)]
+
+_multiscale_resize_transforms = [
+    dict(
+        type='Compose',
+        transforms=[
+            dict(type='YOLOv5KeepRatioResize', scale=s),
+            dict(
+                type='LetterResize',
+                scale=s,
+                allow_scale_up=False,
+                pad_val=dict(img=114))
+        ]) for s in img_scales
+]
+
+tta_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            _multiscale_resize_transforms,
+            [
+                dict(type='mmdet.RandomFlip', prob=1.),
+                dict(type='mmdet.RandomFlip', prob=0.)
+            ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='mmdet.PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'scale_factor', 'pad_param', 'flip',
+                               'flip_direction'))
+            ]
+        ])
+]
+```
+
+The schematic diagram is shown below.
+
+```text
+                               LoadImageFromFile
+                    /                 |                     \
+(RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize)
+       /      \                    /      \                    /        \
+ RandomFlip RandomFlip      RandomFlip RandomFlip        RandomFlip RandomFlip
+     |          |                |         |                  |         |
+ LoadAnn    LoadAnn           LoadAnn    LoadAnn           LoadAnn    LoadAnn
+     |          |                |         |                  |         |
+ PackDetIn  PackDetIn         PackDetIn  PackDetIn        PackDetIn  PackDetIn
+```
+
+You can modify `img_scales` to support different multi-scale enhancements, or you can insert a new pipeline to implement custom TTA requirements. Assuming you only want to do horizontal flip enhancements, the configuration should be modified as follows.
+
+```python
+tta_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            [
+                dict(type='mmdet.RandomFlip', prob=1.),
+                dict(type='mmdet.RandomFlip', prob=0.)
+            ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='mmdet.PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'scale_factor', 'pad_param', 'flip',
+                               'flip_direction'))
+            ]
+        ])
+]
+```
diff --git a/docs/en/index.rst b/docs/en/index.rst
index 92837c47..004bf0e7 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -37,6 +37,7 @@ You can switch between Chinese and English documents in the top-right corner of
    common_usage/resume_training.md
    common_usage/syncbn.md
    common_usage/amp_training.md
+   common_usage/tta.md
    common_usage/plugins.md
    common_usage/freeze_layers.md
    common_usage/output_predictions.md
diff --git a/docs/zh_cn/common_usage/tta.md b/docs/zh_cn/common_usage/tta.md
new file mode 100644
index 00000000..9983665f
--- /dev/null
+++ b/docs/zh_cn/common_usage/tta.md
@@ -0,0 +1,87 @@
+# 测试时增强相关说明
+
+## 测试时增强 TTA
+
+MMYOLO 在 v0.5.0+ 版本中增加对 TTA 的支持，用户可以在进行评估时候指定 `--tta` 参数使能。 以 `YOLOv5-s` 为例，其单卡 TTA 测试命令为：
+
+```shell
+python tools/test.py configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth  --tta
+```
+
+TTA 功能的正常运行必须确保配置中存在 `tta_model` 和 `tta_pipeline` 两个变量，详情可以参考 [det_p5_tta.py](https://github.com/open-mmlab/mmyolo/blob/dev/configs/_base_/det_p5_tta.py)。
+
+MMYOLO 中默认的 TTA 会先执行 3 个多尺度增强，然后再增强两个水平翻转增强，一共 6 个并行的 pipeline。以 `YOLOv5-s` 为例，其 TTA 配置为：
+
+```python
+img_scales = [(640, 640), (320, 320), (960, 960)]
+
+_multiscale_resize_transforms = [
+    dict(
+        type='Compose',
+        transforms=[
+            dict(type='YOLOv5KeepRatioResize', scale=s),
+            dict(
+                type='LetterResize',
+                scale=s,
+                allow_scale_up=False,
+                pad_val=dict(img=114))
+        ]) for s in img_scales
+]
+
+tta_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            _multiscale_resize_transforms,
+            [
+                dict(type='mmdet.RandomFlip', prob=1.),
+                dict(type='mmdet.RandomFlip', prob=0.)
+            ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='mmdet.PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'scale_factor', 'pad_param', 'flip',
+                               'flip_direction'))
+            ]
+        ])
+]
+```
+
+其示意图如下所示：
+
+```text
+                               LoadImageFromFile
+                    /                 |                     \
+(RatioResize,LetterResize) (RatioResize,LetterResize) (RatioResize,LetterResize)
+       /      \                    /      \                    /        \
+ RandomFlip RandomFlip      RandomFlip RandomFlip        RandomFlip RandomFlip
+     |          |                |         |                  |         |
+ LoadAnn    LoadAnn           LoadAnn    LoadAnn           LoadAnn    LoadAnn
+     |          |                |         |                  |         |
+ PackDetIn  PackDetIn         PackDetIn  PackDetIn        PackDetIn  PackDetIn
+```
+
+你可以修改 `img_scales` 来支持不同的多尺度增强，也可以插入新的 pipeline 从而实现自定义 TTA 需求。 假设你只想进行水平翻转增强，则配置应该修改为如下：
+
+```python
+tta_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            [
+                dict(type='mmdet.RandomFlip', prob=1.),
+                dict(type='mmdet.RandomFlip', prob=0.)
+            ], [dict(type='mmdet.LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='mmdet.PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'scale_factor', 'pad_param', 'flip',
+                               'flip_direction'))
+            ]
+        ])
+]
+```
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index 5026c30e..80d7cbe1 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -37,6 +37,7 @@
    common_usage/resume_training.md
    common_usage/syncbn.md
    common_usage/amp_training.md
+   common_usage/tta.md
    common_usage/plugins.md
    common_usage/freeze_layers.md
    common_usage/output_predictions.md
diff --git a/mmyolo/datasets/transforms/transforms.py b/mmyolo/datasets/transforms/transforms.py
index 926af7cc..a58084f3 100644
--- a/mmyolo/datasets/transforms/transforms.py
+++ b/mmyolo/datasets/transforms/transforms.py
@@ -7,7 +7,7 @@ import cv2
 import mmcv
 import numpy as np
 import torch
-from mmcv.transforms import BaseTransform
+from mmcv.transforms import BaseTransform, Compose
 from mmcv.transforms.utils import cache_randomness
 from mmdet.datasets.transforms import LoadAnnotations as MMDET_LoadAnnotations
 from mmdet.datasets.transforms import Resize as MMDET_Resize
@@ -18,6 +18,9 @@ from numpy import random
 
 from mmyolo.registry import TRANSFORMS
 
+# TODO: Waiting for MMCV support
+TRANSFORMS.register_module(module=Compose, force=True)
+
 
 @TRANSFORMS.register_module()
 class YOLOv5KeepRatioResize(MMDET_Resize):
diff --git a/tools/misc/print_config.py b/tools/misc/print_config.py
new file mode 100644
index 00000000..2c2efe33
--- /dev/null
+++ b/tools/misc/print_config.py
@@ -0,0 +1,59 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import os
+
+from mmdet.utils import replace_cfg_vals, update_data_root
+from mmengine import Config, DictAction
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Print the whole config')
+    parser.add_argument('config', help='config file path')
+    parser.add_argument(
+        '--save-path',
+        default=None,
+        help='save path of whole config, suffixed with .py, .json or .yml')
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    args = parser.parse_args()
+
+    return args
+
+
+def main():
+    args = parse_args()
+
+    cfg = Config.fromfile(args.config)
+
+    # replace the ${key} with the value of cfg.key
+    cfg = replace_cfg_vals(cfg)
+
+    # update data root according to MMDET_DATASETS
+    update_data_root(cfg)
+
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+    print(f'Config:\n{cfg.pretty_text}')
+
+    if args.save_path is not None:
+        save_path = args.save_path
+
+        suffix = os.path.splitext(save_path)[-1]
+        assert suffix in ['.py', '.json', '.yml']
+
+        if not os.path.exists(os.path.split(save_path)[0]):
+            os.makedirs(os.path.split(save_path)[0])
+        cfg.dump(save_path)
+        print(f'Config saving at {save_path}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/test.py b/tools/test.py
index 53a617fd..7262234b 100644
--- a/tools/test.py
+++ b/tools/test.py
@@ -4,7 +4,7 @@ import os
 import os.path as osp
 
 from mmdet.engine.hooks.utils import trigger_visualization_hook
-from mmengine.config import Config, DictAction
+from mmengine.config import Config, ConfigDict, DictAction
 from mmengine.evaluator import DumpResults
 from mmengine.runner import Runner
 
@@ -31,6 +31,10 @@ def parse_args():
         help='the prefix of the output json file without perform evaluation, '
         'which is useful when you want to format the result to a specific '
         'format and submit it to the test server')
+    parser.add_argument(
+        '--tta',
+        action='store_true',
+        help='Whether to use test time augmentation')
     parser.add_argument(
         '--show', action='store_true', help='show prediction results')
     parser.add_argument(
@@ -109,6 +113,23 @@ def main():
     # Determine whether the custom metainfo fields are all lowercase
     is_metainfo_lower(cfg)
 
+    if args.tta:
+        assert 'tta_model' in cfg, 'Cannot find ``tta_model`` in config.' \
+                                   " Can't use tta !"
+        assert 'tta_pipeline' in cfg, 'Cannot find ``tta_pipeline`` ' \
+                                      "in config. Can't use tta !"
+
+        cfg.model = ConfigDict(**cfg.tta_model, module=cfg.model)
+        test_data_cfg = cfg.test_dataloader.dataset
+        while 'dataset' in test_data_cfg:
+            test_data_cfg = test_data_cfg['dataset']
+
+        # batch_shapes_cfg will force control the size of the output image,
+        # it is not compatible with tta.
+        if 'batch_shapes_cfg' in test_data_cfg:
+            test_data_cfg.batch_shapes_cfg = None
+        test_data_cfg.pipeline = cfg.tta_pipeline
+
     # build the runner from config
     if 'runner_type' not in cfg:
         # build the default runner

From d156228c65ff7a7fb8cd2fa4d42944cfc583b7a0 Mon Sep 17 00:00:00 2001
From: tripleMu <gpu@163.com>
Date: Tue, 28 Feb 2023 10:25:44 +0800
Subject: [PATCH 45/64] [Improve] Remove `cast_data` (#595)

* Remove castdata

* Support other argsa
---
 .../data_preprocessors/data_preprocessor.py   | 27 +++----------------
 1 file changed, 3 insertions(+), 24 deletions(-)

diff --git a/mmyolo/models/data_preprocessors/data_preprocessor.py b/mmyolo/models/data_preprocessors/data_preprocessor.py
index e06c0036..611ecb22 100644
--- a/mmyolo/models/data_preprocessors/data_preprocessor.py
+++ b/mmyolo/models/data_preprocessors/data_preprocessor.py
@@ -1,6 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import random
-from typing import List, Mapping, Sequence, Tuple, Union
+from typing import List, Optional, Tuple, Union
 
 import torch
 import torch.nn.functional as F
@@ -64,29 +64,8 @@ class YOLOv5DetDataPreprocessor(DetDataPreprocessor):
     Note: It must be used together with `mmyolo.datasets.utils.yolov5_collate`
     """
 
-    # TODO: Can be deleted after mmdet support
-    def cast_data(self, data: CastData) -> CastData:
-        """Copying data to the target device.
-
-        Args:
-            data (dict): Data returned by ``DataLoader``.
-
-        Returns:
-            CollatedResult: Inputs and data sample at target device.
-        """
-        if isinstance(data, Mapping):
-            return {key: self.cast_data(data[key]) for key in data}
-        elif isinstance(data, (str, bytes)) or data is None:
-            return data
-        elif isinstance(data, tuple) and hasattr(data, '_fields'):
-            # namedtuple
-            return type(data)(*(self.cast_data(sample) for sample in data))  # type: ignore  # noqa: E501  # yapf:disable
-        elif isinstance(data, Sequence):
-            return type(data)(self.cast_data(sample) for sample in data)  # type: ignore  # noqa: E501  # yapf:disable
-        elif isinstance(data, (torch.Tensor, BaseDataElement)):
-            return data.to(self.device, non_blocking=True)
-        else:
-            return data
+    def __init__(self, *args, non_blocking: Optional[bool] = True, **kwargs):
+        super().__init__(*args, non_blocking=non_blocking, **kwargs)
 
     def forward(self, data: dict, training: bool = False) -> dict:
         """Perform normalization, padding and bgr2rgb conversion based on

From e9e007973e2cd845fc47038e82f4241f80a646ca Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Tue, 28 Feb 2023 11:01:13 +0800
Subject: [PATCH 46/64] [Feature] Support auto registry based on mmdet 3.0.0rc6
 (#597)

* support auto registry

* update version

* update
---
 .circleci/test.yml                          |  4 +-
 README.md                                   |  6 +-
 README_zh-CN.md                             |  6 +-
 demo/boxam_vis_demo.py                      |  3 -
 demo/featmap_vis_demo.py                    |  7 +--
 demo/image_demo.py                          |  5 +-
 demo/large_image_demo.py                    |  5 +-
 demo/video_demo.py                          |  4 --
 docker/Dockerfile                           |  2 +-
 docker/Dockerfile_deployment                |  2 +-
 docs/en/get_started/dependencies.md         |  4 +-
 docs/en/get_started/installation.md         |  8 +--
 docs/en/tutorials/custom_installation.md    | 10 ++--
 docs/zh_cn/get_started/dependencies.md      |  3 +-
 docs/zh_cn/get_started/installation.md      |  8 +--
 docs/zh_cn/tutorials/custom_installation.md | 10 ++--
 docs/zh_cn/useful_tools/browse_dataset.md   | 12 ++--
 mmyolo/__init__.py                          |  6 +-
 mmyolo/registry.py                          | 66 +++++++++++++++------
 mmyolo/utils/boxam_utils.py                 |  2 +
 mmyolo/version.py                           |  2 +-
 requirements/docs.txt                       |  6 +-
 requirements/mminstall.txt                  |  6 +-
 tools/analysis_tools/benchmark.py           |  6 +-
 tools/analysis_tools/browse_dataset.py      |  7 +--
 tools/analysis_tools/confusion_matrix.py    |  5 +-
 tools/analysis_tools/dataset_analysis.py    |  5 +-
 tools/analysis_tools/optimize_anchors.py    |  5 +-
 tools/analysis_tools/vis_scheduler.py       |  7 +--
 tools/test.py                               |  6 +-
 tools/train.py                              |  6 +-
 31 files changed, 121 insertions(+), 113 deletions(-)

diff --git a/.circleci/test.yml b/.circleci/test.yml
index 7d08ccf3..9e24535f 100644
--- a/.circleci/test.yml
+++ b/.circleci/test.yml
@@ -67,7 +67,7 @@ jobs:
           command: |
             pip install -U openmim
             mim install git+https://github.com/open-mmlab/mmengine.git@main
-            mim install 'mmcv >= 2.0.0rc1'
+            mim install 'mmcv >= 2.0.0rc4'
             mim install git+https://github.com/open-mmlab/mmdetection.git@dev-3.x
             pip install -r requirements/albu.txt
             pip install -r requirements/tests.txt
@@ -125,7 +125,7 @@ jobs:
           command: |
             docker exec mmyolo pip install -U openmim
             docker exec mmyolo mim install -e /mmengine
-            docker exec mmyolo mim install 'mmcv >= 2.0.0rc1'
+            docker exec mmyolo mim install 'mmcv >= 2.0.0rc4'
             docker exec mmyolo pip install -e /mmdetection
             docker exec mmyolo pip install -r requirements/albu.txt
             docker exec mmyolo pip install -r requirements/tests.txt
diff --git a/README.md b/README.md
index ecf32b4f..284c2f9f 100644
--- a/README.md
+++ b/README.md
@@ -141,9 +141,9 @@ MMYOLO relies on PyTorch, MMCV, MMEngine, and MMDetection. Below are quick steps
 conda create -n open-mmlab python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
 conda activate open-mmlab
 pip install openmim
-mim install "mmengine>=0.3.1"
-mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc5,<3.1.0"
+mim install "mmengine>=0.6.0"
+mim install "mmcv>=2.0.0rc4,<2.1.0"
+mim install "mmdet>=3.0.0rc6,<3.1.0"
 git clone https://github.com/open-mmlab/mmyolo.git
 cd mmyolo
 # Install albumentations
diff --git a/README_zh-CN.md b/README_zh-CN.md
index eae02d86..2bc90ac9 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -162,9 +162,9 @@ MMYOLO 依赖 PyTorch, MMCV, MMEngine 和 MMDetection，以下是安装的简要
 conda create -n open-mmlab python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
 conda activate open-mmlab
 pip install openmim
-mim install "mmengine>=0.3.1"
-mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc5,<3.1.0"
+mim install "mmengine>=0.6.0"
+mim install "mmcv>=2.0.0rc4,<2.1.0"
+mim install "mmdet>=3.0.0rc6,<3.1.0"
 git clone https://github.com/open-mmlab/mmyolo.git
 cd mmyolo
 # Install albumentations
diff --git a/demo/boxam_vis_demo.py b/demo/boxam_vis_demo.py
index 3672b727..e7f039ef 100644
--- a/demo/boxam_vis_demo.py
+++ b/demo/boxam_vis_demo.py
@@ -18,7 +18,6 @@ import mmcv
 from mmengine import Config, DictAction, MessageHub
 from mmengine.utils import ProgressBar
 
-from mmyolo.utils import register_all_modules
 from mmyolo.utils.boxam_utils import (BoxAMDetectorVisualizer,
                                       BoxAMDetectorWrapper, DetAblationLayer,
                                       DetBoxScoreTarget, GradCAM,
@@ -177,8 +176,6 @@ def init_detector_and_visualizer(args, cfg):
 
 
 def main():
-    register_all_modules()
-
     args = parse_args()
 
     # hard code
diff --git a/demo/featmap_vis_demo.py b/demo/featmap_vis_demo.py
index 2006c7af..892e73d6 100644
--- a/demo/featmap_vis_demo.py
+++ b/demo/featmap_vis_demo.py
@@ -6,10 +6,10 @@ from typing import Sequence
 import mmcv
 from mmdet.apis import inference_detector, init_detector
 from mmengine import Config, DictAction
+from mmengine.registry import init_default_scope
 from mmengine.utils import ProgressBar
 
 from mmyolo.registry import VISUALIZERS
-from mmyolo.utils import register_all_modules
 from mmyolo.utils.misc import auto_arrange_images, get_file_list
 
 
@@ -96,13 +96,12 @@ class ActivationsWrapper:
 def main():
     args = parse_args()
 
-    # register all modules in mmdet into the registries
-    register_all_modules()
-
     cfg = Config.fromfile(args.config)
     if args.cfg_options is not None:
         cfg.merge_from_dict(args.cfg_options)
 
+    init_default_scope(cfg.get('default_scope', 'mmyolo'))
+
     channel_reduction = args.channel_reduction
     if channel_reduction == 'None':
         channel_reduction = None
diff --git a/demo/image_demo.py b/demo/image_demo.py
index 8f9fde1a..be2217c9 100644
--- a/demo/image_demo.py
+++ b/demo/image_demo.py
@@ -8,7 +8,7 @@ from mmengine.logging import print_log
 from mmengine.utils import ProgressBar, path
 
 from mmyolo.registry import VISUALIZERS
-from mmyolo.utils import register_all_modules, switch_to_deploy
+from mmyolo.utils import switch_to_deploy
 from mmyolo.utils.labelme_utils import LabelmeFormat
 from mmyolo.utils.misc import get_file_list, show_data_classes
 
@@ -51,9 +51,6 @@ def main():
         raise RuntimeError('`--to-labelme` or `--show` only '
                            'can choose one at the same time.')
 
-    # register all modules in mmdet into the registries
-    register_all_modules()
-
     # build the model from a config file and a checkpoint file
     model = init_detector(args.config, args.checkpoint, device=args.device)
 
diff --git a/demo/large_image_demo.py b/demo/large_image_demo.py
index 27d14949..73106b28 100644
--- a/demo/large_image_demo.py
+++ b/demo/large_image_demo.py
@@ -28,7 +28,7 @@ except ImportError:
                       'to install sahi first for large image inference.')
 
 from mmyolo.registry import VISUALIZERS
-from mmyolo.utils import register_all_modules, switch_to_deploy
+from mmyolo.utils import switch_to_deploy
 from mmyolo.utils.large_image import merge_results_by_nms, shift_predictions
 from mmyolo.utils.misc import get_file_list
 
@@ -90,9 +90,6 @@ def parse_args():
 def main():
     args = parse_args()
 
-    # register all modules in mmdet into the registries
-    register_all_modules()
-
     # build the model from a config file and a checkpoint file
     model = init_detector(args.config, args.checkpoint, device=args.device)
 
diff --git a/demo/video_demo.py b/demo/video_demo.py
index 7020c9fe..d8317a2c 100644
--- a/demo/video_demo.py
+++ b/demo/video_demo.py
@@ -20,7 +20,6 @@ from mmdet.apis import inference_detector, init_detector
 from mmengine.utils import track_iter_progress
 
 from mmyolo.registry import VISUALIZERS
-from mmyolo.utils import register_all_modules
 
 
 def parse_args():
@@ -49,9 +48,6 @@ def main():
         ('Please specify at least one operation (save/show the '
          'video) with the argument "--out" or "--show"')
 
-    # register all modules in mmdet into the registries
-    register_all_modules()
-
     # build the model from a config file and a checkpoint file
     model = init_detector(args.config, args.checkpoint, device=args.device)
 
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 2bd00697..65689dd5 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -26,7 +26,7 @@ RUN apt-get update \
 
 # Install MMEngine , MMCV and MMDet
 RUN pip install --no-cache-dir openmim && \
-    mim install --no-cache-dir "mmengine>=0.3.1" "mmcv>=2.0.0rc1,<2.1.0" "mmdet>=3.0.0rc5,<3.1.0"
+    mim install --no-cache-dir "mmengine>=0.6.0" "mmcv>=2.0.0rc4,<2.1.0" "mmdet>=3.0.0rc6,<3.1.0"
 
 # Install MMYOLO
 RUN git clone https://github.com/open-mmlab/mmyolo.git /mmyolo && \
diff --git a/docker/Dockerfile_deployment b/docker/Dockerfile_deployment
index 7f63c1cc..1a0a226a 100644
--- a/docker/Dockerfile_deployment
+++ b/docker/Dockerfile_deployment
@@ -30,7 +30,7 @@ RUN wget -q https://github.com/microsoft/onnxruntime/releases/download/v${ONNXRU
 
 # Install OPENMIM MMENGINE MMDET
 RUN pip install --no-cache-dir openmim \
-    && mim install --no-cache-dir "mmengine>=0.3.1" "mmdet>=3.0.0rc5,<3.1.0" \
+    && mim install --no-cache-dir "mmengine>=0.6.0" "mmdet>=3.0.0rc6,<3.1.0" \
     && mim install --no-cache-dir opencv-python==4.5.5.64 opencv-python-headless==4.5.5.64
 
 RUN git clone https://github.com/open-mmlab/mmcv.git -b 2.x mmcv \
diff --git a/docs/en/get_started/dependencies.md b/docs/en/get_started/dependencies.md
index d75275f1..03cd6760 100644
--- a/docs/en/get_started/dependencies.md
+++ b/docs/en/get_started/dependencies.md
@@ -4,7 +4,9 @@ Compatible MMEngine, MMCV and MMDetection versions are shown as below. Please in
 
 | MMYOLO version |   MMDetection version    |     MMEngine version     |      MMCV version       |
 | :------------: | :----------------------: | :----------------------: | :---------------------: |
-|      main      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|      main      | mmdet>=3.0.0rc6, \<3.1.0 | mmengine>=0.6.0, \<1.0.0 | mmcv>=2.0.0rc4, \<2.1.0 |
+|     0.5.0      | mmdet>=3.0.0rc6, \<3.1.0 | mmengine>=0.6.0, \<1.0.0 | mmcv>=2.0.0rc4, \<2.1.0 |
+|     0.4.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.3.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.2.0      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.1.3      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
diff --git a/docs/en/get_started/installation.md b/docs/en/get_started/installation.md
index d73bede7..85217375 100644
--- a/docs/en/get_started/installation.md
+++ b/docs/en/get_started/installation.md
@@ -6,9 +6,9 @@
 
 ```shell
 pip install -U openmim
-mim install "mmengine>=0.3.1"
-mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc5,<3.1.0"
+mim install "mmengine>=0.6.0"
+mim install "mmcv>=2.0.0rc4,<2.1.0"
+mim install "mmdet>=3.0.0rc6,<3.1.0"
 ```
 
 **Note:**
@@ -79,9 +79,7 @@ Option (b). If you install MMYOLO with MIM, open your python interpreter and cop
 
 ```python
 from mmdet.apis import init_detector, inference_detector
-from mmyolo.utils import register_all_modules
 
-register_all_modules()
 config_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
 checkpoint_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'
 model = init_detector(config_file, checkpoint_file, device='cpu')  # or device='cuda:0'
diff --git a/docs/en/tutorials/custom_installation.md b/docs/en/tutorials/custom_installation.md
index 4aafe6a3..327de64e 100644
--- a/docs/en/tutorials/custom_installation.md
+++ b/docs/en/tutorials/custom_installation.md
@@ -20,7 +20,7 @@ To install MMEngine with pip instead of MIM, please follow \[MMEngine installati
 For example, you can install MMEngine by the following command.
 
 ```shell
-pip install "mmengine>=0.3.1"
+pip install "mmengine>=0.6.0"
 ```
 
 ## Install MMCV without MIM
@@ -32,7 +32,7 @@ To install MMCV with pip instead of MIM, please follow [MMCV installation guides
 For example, the following command installs MMCV built for PyTorch 1.12.x and CUDA 11.6.
 
 ```shell
-pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.12.0/index.html
+pip install "mmcv>=2.0.0rc4" -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.12.0/index.html
 ```
 
 ## Install on CPU-only platforms
@@ -73,9 +73,9 @@ thus we only need to install MMEngine, MMCV, MMDetection, and MMYOLO with the fo
 
 ```shell
 !pip3 install openmim
-!mim install "mmengine>=0.3.1"
-!mim install "mmcv>=2.0.0rc1,<2.1.0"
-!mim install "mmdet>=3.0.0rc5,<3.1.0"
+!mim install "mmengine>=0.6.0"
+!mim install "mmcv>=2.0.0rc4,<2.1.0"
+!mim install "mmdet>=3.0.0rc6,<3.1.0"
 ```
 
 **Step 2.** Install MMYOLO from the source.
diff --git a/docs/zh_cn/get_started/dependencies.md b/docs/zh_cn/get_started/dependencies.md
index fcd3f661..b950519c 100644
--- a/docs/zh_cn/get_started/dependencies.md
+++ b/docs/zh_cn/get_started/dependencies.md
@@ -4,7 +4,8 @@
 
 | MMYOLO version |   MMDetection version    |     MMEngine version     |      MMCV version       |
 | :------------: | :----------------------: | :----------------------: | :---------------------: |
-|      main      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
+|      main      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc4, \<2.1.0 |
+|     0.5.0      | mmdet>=3.0.0rc6, \<3.1.0 | mmengine>=0.6.0, \<1.0.0 | mmcv>=2.0.0rc4, \<2.1.0 |
 |     0.4.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.3.0      | mmdet>=3.0.0rc5, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
 |     0.2.0      | mmdet>=3.0.0rc3, \<3.1.0 | mmengine>=0.3.1, \<1.0.0 | mmcv>=2.0.0rc0, \<2.1.0 |
diff --git a/docs/zh_cn/get_started/installation.md b/docs/zh_cn/get_started/installation.md
index 147d204f..32927b6e 100644
--- a/docs/zh_cn/get_started/installation.md
+++ b/docs/zh_cn/get_started/installation.md
@@ -6,9 +6,9 @@
 
 ```shell
 pip install -U openmim
-mim install "mmengine>=0.3.1"
-mim install "mmcv>=2.0.0rc1,<2.1.0"
-mim install "mmdet>=3.0.0rc5,<3.1.0"
+mim install "mmengine>=0.6.0"
+mim install "mmcv>=2.0.0rc4,<2.1.0"
+mim install "mmdet>=3.0.0rc6,<3.1.0"
 ```
 
 如果你当前已经处于 mmyolo 工程目录下，则可以采用如下简化写法
@@ -86,9 +86,7 @@ python demo/image_demo.py demo/demo.jpg \
 
 ```python
 from mmdet.apis import init_detector, inference_detector
-from mmyolo.utils import register_all_modules
 
-register_all_modules()
 config_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
 checkpoint_file = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'
 model = init_detector(config_file, checkpoint_file, device='cpu')  # or device='cuda:0'
diff --git a/docs/zh_cn/tutorials/custom_installation.md b/docs/zh_cn/tutorials/custom_installation.md
index df9bf0cf..cdec9ed3 100644
--- a/docs/zh_cn/tutorials/custom_installation.md
+++ b/docs/zh_cn/tutorials/custom_installation.md
@@ -23,7 +23,7 @@
 例如，你可以通过以下命令安装 MMEngine：
 
 ```shell
-pip install "mmengine>=0.3.1"
+pip install "mmengine>=0.6.0"
 ```
 
 ## 不使用 MIM 安装 MMCV
@@ -36,7 +36,7 @@ MMCV 包含 C++ 和 CUDA 扩展，因此其对 PyTorch 的依赖比较复杂。M
 例如，下述命令将会安装基于 PyTorch 1.12.x 和 CUDA 11.6 编译的 mmcv：
 
 ```shell
-pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.12.0/index.html
+pip install "mmcv>=2.0.0rc4" -f https://download.openmmlab.com/mmcv/dist/cu116/torch1.12.0/index.html
 ```
 
 ## 在 CPU 环境中安装
@@ -75,9 +75,9 @@ pip install "mmcv>=2.0.0rc1" -f https://download.openmmlab.com/mmcv/dist/cu116/t
 
 ```shell
 !pip3 install openmim
-!mim install "mmengine>=0.3.1"
-!mim install "mmcv>=2.0.0rc1,<2.1.0"
-!mim install "mmdet>=3.0.0rc5,<3.1.0"
+!mim install "mmengine>=0.6.0"
+!mim install "mmcv>=2.0.0rc4,<2.1.0"
+!mim install "mmdet>=3.0.0rc6,<3.1.0"
 ```
 
 **步骤 2.** 使用源码安装 MMYOLO：
diff --git a/docs/zh_cn/useful_tools/browse_dataset.md b/docs/zh_cn/useful_tools/browse_dataset.md
index ab991daa..5d6f0872 100644
--- a/docs/zh_cn/useful_tools/browse_dataset.md
+++ b/docs/zh_cn/useful_tools/browse_dataset.md
@@ -3,7 +3,7 @@
 ```shell
 python tools/analysis_tools/browse_dataset.py \
     ${CONFIG_FILE} \
-    [-o, --output-dir ${OUTPUT_DIR}] \
+    [-o, --out-dir ${OUTPUT_DIR}] \
     [-p, --phase ${DATASET_PHASE}] \
     [-n, --show-number ${NUMBER_IMAGES_DISPLAY}] \
     [-i, --show-interval ${SHOW_INTERRVAL}] \
@@ -14,7 +14,7 @@ python tools/analysis_tools/browse_dataset.py \
 **所有参数的说明**：
 
 - `config` : 模型配置文件的路径。
-- `-o, --output-dir`: 保存图片文件夹，如果没有指定，默认为 `'./output'`。
+- `-o, --out-dir`: 保存图片文件夹，如果没有指定，默认为 `'./output'`。
 - **`-p, --phase`**: 可视化数据集的阶段，只能为 `['train', 'val', 'test']` 之一，默认为 `'train'`。
 - **`-n, --show-number`**: 可视化样本数量。如果没有指定，默认展示数据集的所有图片。
 - **`-m, --mode`**: 可视化的模式，只能为 `['original', 'transformed', 'pipeline']` 之一。 默认为 `'transformed'`。
@@ -32,24 +32,24 @@ python tools/analysis_tools/browse_dataset.py \
 1. **'original'** 模式 ：
 
 ```shell
-python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py --phase val --output-dir tmp --mode original
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py --phase val --out-dir tmp --mode original
 ```
 
 - `--phase val`: 可视化验证集, 可简化为 `-p val`;
-- `--output-dir tmp`: 可视化结果保存在 "tmp" 文件夹, 可简化为 `-o tmp`;
+- `--out-dir tmp`: 可视化结果保存在 "tmp" 文件夹, 可简化为 `-o tmp`;
 - `--mode original`: 可视化原图, 可简化为 `-m original`;
 - `--show-number 100`: 可视化100张图，可简化为 `-n 100`;
 
 2. **'transformed'** 模式 ：
 
 ```shell
-python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py
 ```
 
 3. **'pipeline'** 模式 ：
 
 ```shell
-python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_balloon.py -m pipeline
+python ./tools/analysis_tools/browse_dataset.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.py -m pipeline
 ```
 
 <div align=center>
diff --git a/mmyolo/__init__.py b/mmyolo/__init__.py
index 757c4084..a7a2f333 100644
--- a/mmyolo/__init__.py
+++ b/mmyolo/__init__.py
@@ -6,15 +6,15 @@ from mmengine.utils import digit_version
 
 from .version import __version__, version_info
 
-mmcv_minimum_version = '2.0.0rc0'
+mmcv_minimum_version = '2.0.0rc4'
 mmcv_maximum_version = '2.1.0'
 mmcv_version = digit_version(mmcv.__version__)
 
-mmengine_minimum_version = '0.3.1'
+mmengine_minimum_version = '0.6.0'
 mmengine_maximum_version = '1.0.0'
 mmengine_version = digit_version(mmengine.__version__)
 
-mmdet_minimum_version = '3.0.0rc5'
+mmdet_minimum_version = '3.0.0rc6'
 mmdet_maximum_version = '3.1.0'
 mmdet_version = digit_version(mmdet.__version__)
 
diff --git a/mmyolo/registry.py b/mmyolo/registry.py
index 63967d86..71f43e6c 100644
--- a/mmyolo/registry.py
+++ b/mmyolo/registry.py
@@ -30,44 +30,74 @@ from mmengine.registry import \
 from mmengine.registry import Registry
 
 # manage all kinds of runners like `EpochBasedRunner` and `IterBasedRunner`
-RUNNERS = Registry('runner', parent=MMENGINE_RUNNERS)
+RUNNERS = Registry(
+    'runner', parent=MMENGINE_RUNNERS, locations=['mmyolo.engine'])
 # manage runner constructors that define how to initialize runners
 RUNNER_CONSTRUCTORS = Registry(
-    'runner constructor', parent=MMENGINE_RUNNER_CONSTRUCTORS)
+    'runner constructor',
+    parent=MMENGINE_RUNNER_CONSTRUCTORS,
+    locations=['mmyolo.engine'])
 # manage all kinds of loops like `EpochBasedTrainLoop`
-LOOPS = Registry('loop', parent=MMENGINE_LOOPS)
+LOOPS = Registry('loop', parent=MMENGINE_LOOPS, locations=['mmyolo.engine'])
 # manage all kinds of hooks like `CheckpointHook`
-HOOKS = Registry('hook', parent=MMENGINE_HOOKS)
+HOOKS = Registry(
+    'hook', parent=MMENGINE_HOOKS, locations=['mmyolo.engine.hooks'])
 
 # manage data-related modules
-DATASETS = Registry('dataset', parent=MMENGINE_DATASETS)
-DATA_SAMPLERS = Registry('data sampler', parent=MMENGINE_DATA_SAMPLERS)
-TRANSFORMS = Registry('transform', parent=MMENGINE_TRANSFORMS)
+DATASETS = Registry(
+    'dataset', parent=MMENGINE_DATASETS, locations=['mmyolo.datasets'])
+DATA_SAMPLERS = Registry(
+    'data sampler',
+    parent=MMENGINE_DATA_SAMPLERS,
+    locations=['mmyolo.datasets'])
+TRANSFORMS = Registry(
+    'transform',
+    parent=MMENGINE_TRANSFORMS,
+    locations=['mmyolo.datasets.transforms'])
 
 # manage all kinds of modules inheriting `nn.Module`
-MODELS = Registry('model', parent=MMENGINE_MODELS)
+MODELS = Registry('model', parent=MMENGINE_MODELS, locations=['mmyolo.models'])
 # manage all kinds of model wrappers like 'MMDistributedDataParallel'
-MODEL_WRAPPERS = Registry('model_wrapper', parent=MMENGINE_MODEL_WRAPPERS)
+MODEL_WRAPPERS = Registry(
+    'model_wrapper',
+    parent=MMENGINE_MODEL_WRAPPERS,
+    locations=['mmyolo.models'])
 # manage all kinds of weight initialization modules like `Uniform`
 WEIGHT_INITIALIZERS = Registry(
-    'weight initializer', parent=MMENGINE_WEIGHT_INITIALIZERS)
+    'weight initializer',
+    parent=MMENGINE_WEIGHT_INITIALIZERS,
+    locations=['mmyolo.models'])
 
 # manage all kinds of optimizers like `SGD` and `Adam`
-OPTIMIZERS = Registry('optimizer', parent=MMENGINE_OPTIMIZERS)
-OPTIM_WRAPPERS = Registry('optim_wrapper', parent=MMENGINE_OPTIM_WRAPPERS)
+OPTIMIZERS = Registry(
+    'optimizer',
+    parent=MMENGINE_OPTIMIZERS,
+    locations=['mmyolo.engine.optimizers'])
+OPTIM_WRAPPERS = Registry(
+    'optim_wrapper',
+    parent=MMENGINE_OPTIM_WRAPPERS,
+    locations=['mmyolo.engine.optimizers'])
 # manage constructors that customize the optimization hyperparameters.
 OPTIM_WRAPPER_CONSTRUCTORS = Registry(
-    'optimizer constructor', parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS)
+    'optimizer constructor',
+    parent=MMENGINE_OPTIM_WRAPPER_CONSTRUCTORS,
+    locations=['mmyolo.engine.optimizers'])
 # manage all kinds of parameter schedulers like `MultiStepLR`
 PARAM_SCHEDULERS = Registry(
-    'parameter scheduler', parent=MMENGINE_PARAM_SCHEDULERS)
+    'parameter scheduler',
+    parent=MMENGINE_PARAM_SCHEDULERS,
+    locations=['mmyolo.engine.optimizers'])
 # manage all kinds of metrics
-METRICS = Registry('metric', parent=MMENGINE_METRICS)
+METRICS = Registry(
+    'metric', parent=MMENGINE_METRICS, locations=['mmyolo.engine'])
 
 # manage task-specific modules like anchor generators and box coders
-TASK_UTILS = Registry('task util', parent=MMENGINE_TASK_UTILS)
+TASK_UTILS = Registry(
+    'task util', parent=MMENGINE_TASK_UTILS, locations=['mmyolo.models'])
 
 # manage visualizer
-VISUALIZERS = Registry('visualizer', parent=MMENGINE_VISUALIZERS)
+VISUALIZERS = Registry(
+    'visualizer', parent=MMENGINE_VISUALIZERS, locations=['mmyolo.utils'])
 # manage visualizer backend
-VISBACKENDS = Registry('vis_backend', parent=MMENGINE_VISBACKENDS)
+VISBACKENDS = Registry(
+    'vis_backend', parent=MMENGINE_VISBACKENDS, locations=['mmyolo.utils'])
diff --git a/mmyolo/utils/boxam_utils.py b/mmyolo/utils/boxam_utils.py
index a0168b66..27bfa1a1 100644
--- a/mmyolo/utils/boxam_utils.py
+++ b/mmyolo/utils/boxam_utils.py
@@ -14,6 +14,7 @@ from mmcv.transforms import Compose
 from mmdet.evaluation import get_classes
 from mmdet.utils import ConfigType
 from mmengine.config import Config
+from mmengine.registry import init_default_scope
 from mmengine.runner import load_checkpoint
 from mmengine.structures import InstanceData
 from torch import Tensor
@@ -71,6 +72,7 @@ def init_detector(
     # only change this
     # grad based method requires train_cfg
     # config.model.train_cfg = None
+    init_default_scope(config.get('default_scope', 'mmyolo'))
 
     model = MODELS.build(config.model)
     if checkpoint is not None:
diff --git a/mmyolo/version.py b/mmyolo/version.py
index 92e8d704..75c44c7b 100644
--- a/mmyolo/version.py
+++ b/mmyolo/version.py
@@ -1,6 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 
-__version__ = '0.4.0'
+__version__ = '0.5.0'
 
 from typing import Tuple
 
diff --git a/requirements/docs.txt b/requirements/docs.txt
index ca8cb940..2eacbcde 100644
--- a/requirements/docs.txt
+++ b/requirements/docs.txt
@@ -1,7 +1,7 @@
 docutils==0.16.0
-mmcv>=2.0.0rc1,<2.1.0
-mmdet>=3.0.0rc5
-mmengine>=0.3.1
+mmcv>=2.0.0rc4,<2.1.0
+mmdet>=3.0.0rc6
+mmengine>=0.6.0
 myst-parser
 -e git+https://github.com/open-mmlab/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme
 sphinx==4.0.2
diff --git a/requirements/mminstall.txt b/requirements/mminstall.txt
index a9784fb3..91727baf 100644
--- a/requirements/mminstall.txt
+++ b/requirements/mminstall.txt
@@ -1,3 +1,3 @@
-mmcv>=2.0.0rc1,<2.1.0
-mmdet>=3.0.0rc5
-mmengine>=0.3.1
+mmcv>=2.0.0rc4,<2.1.0
+mmdet>=3.0.0rc6
+mmengine>=0.6.0
diff --git a/tools/analysis_tools/benchmark.py b/tools/analysis_tools/benchmark.py
index d4db67d7..29f53a47 100644
--- a/tools/analysis_tools/benchmark.py
+++ b/tools/analysis_tools/benchmark.py
@@ -8,14 +8,12 @@ import torch
 from mmengine import Config, DictAction
 from mmengine.dist import get_world_size, init_dist
 from mmengine.logging import MMLogger, print_log
+from mmengine.registry import init_default_scope
 from mmengine.runner import Runner, load_checkpoint
 from mmengine.utils import mkdir_or_exist
 from mmengine.utils.dl_utils import set_multi_processing
 
 from mmyolo.registry import MODELS
-from mmyolo.utils import register_all_modules
-
-register_all_modules()
 
 
 # TODO: Refactoring and improving
@@ -163,6 +161,8 @@ def main():
     if args.cfg_options is not None:
         cfg.merge_from_dict(args.cfg_options)
 
+    init_default_scope(cfg.get('default_scope', 'mmyolo'))
+
     distributed = False
     if args.launcher != 'none':
         init_dist(args.launcher, **cfg.get('env_cfg', {}).get('dist_cfg', {}))
diff --git a/tools/analysis_tools/browse_dataset.py b/tools/analysis_tools/browse_dataset.py
index 2a76ebce..fc8b52c5 100644
--- a/tools/analysis_tools/browse_dataset.py
+++ b/tools/analysis_tools/browse_dataset.py
@@ -11,11 +11,11 @@ from mmdet.models.utils import mask2ndarray
 from mmdet.structures.bbox import BaseBoxes
 from mmengine.config import Config, DictAction
 from mmengine.dataset import Compose
+from mmengine.registry import init_default_scope
 from mmengine.utils import ProgressBar
 from mmengine.visualization import Visualizer
 
 from mmyolo.registry import DATASETS, VISUALIZERS
-from mmyolo.utils import register_all_modules
 
 
 # TODO: Support for printing the change in key of results
@@ -43,7 +43,7 @@ def parse_args():
         'the intermediate images. Defaults to "transformed".')
     parser.add_argument(
         '--out-dir',
-        default=None,
+        default='output',
         type=str,
         help='If there is no display interface, you can save it.')
     parser.add_argument('--not-show', default=False, action='store_true')
@@ -182,8 +182,7 @@ def main():
     if args.cfg_options is not None:
         cfg.merge_from_dict(args.cfg_options)
 
-    # register all modules in mmyolo into the registries
-    register_all_modules()
+    init_default_scope(cfg.get('default_scope', 'mmyolo'))
 
     dataset_cfg = cfg.get(args.phase + '_dataloader').get('dataset')
     dataset = DATASETS.build(dataset_cfg)
diff --git a/tools/analysis_tools/confusion_matrix.py b/tools/analysis_tools/confusion_matrix.py
index a92ed1cc..f48abdb9 100644
--- a/tools/analysis_tools/confusion_matrix.py
+++ b/tools/analysis_tools/confusion_matrix.py
@@ -9,10 +9,10 @@ from mmdet.evaluation import bbox_overlaps
 from mmdet.utils import replace_cfg_vals, update_data_root
 from mmengine import Config, DictAction
 from mmengine.fileio import load
+from mmengine.registry import init_default_scope
 from mmengine.utils import ProgressBar
 
 from mmyolo.registry import DATASETS
-from mmyolo.utils import register_all_modules
 
 
 def parse_args():
@@ -235,7 +235,6 @@ def plot_confusion_matrix(confusion_matrix,
 
 
 def main():
-    register_all_modules()
     args = parse_args()
 
     cfg = Config.fromfile(args.config)
@@ -249,6 +248,8 @@ def main():
     if args.cfg_options is not None:
         cfg.merge_from_dict(args.cfg_options)
 
+    init_default_scope(cfg.get('default_scope', 'mmyolo'))
+
     results = load(args.prediction_path)
 
     if not os.path.exists(args.save_dir):
diff --git a/tools/analysis_tools/dataset_analysis.py b/tools/analysis_tools/dataset_analysis.py
index a1fad78e..b2164e16 100644
--- a/tools/analysis_tools/dataset_analysis.py
+++ b/tools/analysis_tools/dataset_analysis.py
@@ -7,11 +7,11 @@ import matplotlib.patches as mpatches
 import matplotlib.pyplot as plt
 import numpy as np
 from mmengine.config import Config
+from mmengine.registry import init_default_scope
 from mmengine.utils import ProgressBar
 from prettytable import PrettyTable
 
 from mmyolo.registry import DATASETS
-from mmyolo.utils import register_all_modules
 from mmyolo.utils.misc import show_data_classes
 
 
@@ -348,8 +348,7 @@ def main():
     args = parse_args()
     cfg = Config.fromfile(args.config)
 
-    # register all modules in mmdet into the registries
-    register_all_modules()
+    init_default_scope(cfg.get('default_scope', 'mmyolo'))
 
     def replace_pipeline_to_none(cfg):
         """Recursively iterate over all dataset(or datasets) and set their
diff --git a/tools/analysis_tools/optimize_anchors.py b/tools/analysis_tools/optimize_anchors.py
index 70f15172..34d4d067 100644
--- a/tools/analysis_tools/optimize_anchors.py
+++ b/tools/analysis_tools/optimize_anchors.py
@@ -41,12 +41,12 @@ from mmdet.utils import replace_cfg_vals, update_data_root
 from mmengine.config import Config
 from mmengine.fileio import dump
 from mmengine.logging import MMLogger
+from mmengine.registry import init_default_scope
 from mmengine.utils import ProgressBar
 from scipy.optimize import differential_evolution
 from torch import Tensor
 
 from mmyolo.registry import DATASETS
-from mmyolo.utils import register_all_modules
 
 try:
     from scipy.cluster.vq import kmeans
@@ -581,7 +581,6 @@ def main():
     args = parse_args()
     cfg = args.config
     cfg = Config.fromfile(cfg)
-    register_all_modules()
 
     # replace the ${key} with the value of cfg.key
     cfg = replace_cfg_vals(cfg)
@@ -589,6 +588,8 @@ def main():
     # update data root according to MMDET_DATASETS
     update_data_root(cfg)
 
+    init_default_scope(cfg.get('default_scope', 'mmyolo'))
+
     input_shape = args.input_shape
     assert len(input_shape) == 2
 
diff --git a/tools/analysis_tools/vis_scheduler.py b/tools/analysis_tools/vis_scheduler.py
index 91b8f5fe..8a2922d8 100644
--- a/tools/analysis_tools/vis_scheduler.py
+++ b/tools/analysis_tools/vis_scheduler.py
@@ -28,13 +28,12 @@ import torch.nn as nn
 from mmengine.config import Config, DictAction
 from mmengine.hooks import Hook
 from mmengine.model import BaseModel
+from mmengine.registry import init_default_scope
 from mmengine.runner import Runner
 from mmengine.utils.path import mkdir_or_exist
 from mmengine.visualization import Visualizer
 from rich.progress import BarColumn, MofNCompleteColumn, Progress, TextColumn
 
-from mmyolo.utils import register_all_modules
-
 
 def parse_args():
     parser = argparse.ArgumentParser(
@@ -229,8 +228,8 @@ def main():
                                 osp.splitext(osp.basename(args.config))[0])
 
     cfg.log_level = args.log_level
-    # register all modules in mmyolo into the registries
-    register_all_modules()
+
+    init_default_scope(cfg.get('default_scope', 'mmyolo'))
 
     # init logger
     print('Param_scheduler :')
diff --git a/tools/test.py b/tools/test.py
index 7262234b..c05defe3 100644
--- a/tools/test.py
+++ b/tools/test.py
@@ -9,7 +9,7 @@ from mmengine.evaluator import DumpResults
 from mmengine.runner import Runner
 
 from mmyolo.registry import RUNNERS
-from mmyolo.utils import is_metainfo_lower, register_all_modules
+from mmyolo.utils import is_metainfo_lower
 
 
 # TODO: support fuse_conv_bn
@@ -73,10 +73,6 @@ def parse_args():
 def main():
     args = parse_args()
 
-    # register all modules in mmdet into the registries
-    # do not init the default scope here because it will be init in the runner
-    register_all_modules(init_default_scope=False)
-
     # load config
     cfg = Config.fromfile(args.config)
     # replace the ${key} with the value of cfg.key
diff --git a/tools/train.py b/tools/train.py
index 816a3b35..1060b631 100644
--- a/tools/train.py
+++ b/tools/train.py
@@ -9,7 +9,7 @@ from mmengine.logging import print_log
 from mmengine.runner import Runner
 
 from mmyolo.registry import RUNNERS
-from mmyolo.utils import is_metainfo_lower, register_all_modules
+from mmyolo.utils import is_metainfo_lower
 
 
 def parse_args():
@@ -55,10 +55,6 @@ def parse_args():
 def main():
     args = parse_args()
 
-    # register all modules in mmdet into the registries
-    # do not init the default scope here because it will be init in the runner
-    register_all_modules(init_default_scope=False)
-
     # load config
     cfg = Config.fromfile(args.config)
     # replace the ${key} with the value of cfg.key

From bd09a5fe37622e7d4aeca76cfc45edf84fda7fe1 Mon Sep 17 00:00:00 2001
From: kitecats <90194592+kitecats@users.noreply.github.com>
Date: Tue, 28 Feb 2023 11:15:08 +0800
Subject: [PATCH 47/64] [Docs] Refine TTA Docs (#594)

* Refine doc about tta-zh

* refine doc
---
 docs/zh_cn/common_usage/tta.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/zh_cn/common_usage/tta.md b/docs/zh_cn/common_usage/tta.md
index 9983665f..7baf7a4e 100644
--- a/docs/zh_cn/common_usage/tta.md
+++ b/docs/zh_cn/common_usage/tta.md
@@ -2,7 +2,7 @@
 
 ## 测试时增强 TTA
 
-MMYOLO 在 v0.5.0+ 版本中增加对 TTA 的支持，用户可以在进行评估时候指定 `--tta` 参数使能。 以 `YOLOv5-s` 为例，其单卡 TTA 测试命令为：
+MMYOLO 在 v0.5.0+ 版本中增加对 TTA 的支持，用户可以在进行评估时候指定 `--tta` 参数使用。 以 `YOLOv5-s` 为例，其单卡 TTA 测试命令为：
 
 ```shell
 python tools/test.py configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco/yolov5_n-v61_syncbn_fast_8xb16-300e_coco_20220919_090739-b804c1ad.pth  --tta
@@ -10,7 +10,7 @@ python tools/test.py configs/yolov5/yolov5_n-v61_syncbn_fast_8xb16-300e_coco.py
 
 TTA 功能的正常运行必须确保配置中存在 `tta_model` 和 `tta_pipeline` 两个变量，详情可以参考 [det_p5_tta.py](https://github.com/open-mmlab/mmyolo/blob/dev/configs/_base_/det_p5_tta.py)。
 
-MMYOLO 中默认的 TTA 会先执行 3 个多尺度增强，然后再增强两个水平翻转增强，一共 6 个并行的 pipeline。以 `YOLOv5-s` 为例，其 TTA 配置为：
+MMYOLO 中默认的 TTA 会先执行 3 个多尺度增强，然后在每个尺度中执行 2 种水平翻转增强，一共 6 个并行的 pipeline。以 `YOLOv5-s` 为例，其 TTA 配置为：
 
 ```python
 img_scales = [(640, 640), (320, 320), (960, 960)]

From 8973096d04528be4d7f39fcdb0e8ee3eab2232d6 Mon Sep 17 00:00:00 2001
From: tianlei <tianlei@mail.ustc.edu.cn>
Date: Tue, 28 Feb 2023 13:49:08 +0800
Subject: [PATCH 48/64] [Doc] Add EN get_started (#600)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* [Doc] Add get started EN

* add pre-commit

* Update installation.md

删除register_all_modules

* Delete article.md
---
 .../15_minutes_object_detection.md            | 522 +++++++++++++++++-
 docs/en/get_started/article.md                |   1 -
 docs/en/get_started/dependencies.md           |  17 +-
 docs/en/get_started/installation.md           |  16 +-
 docs/en/get_started/overview.md               |  67 +++
 5 files changed, 616 insertions(+), 7 deletions(-)
 delete mode 100644 docs/en/get_started/article.md

diff --git a/docs/en/get_started/15_minutes_object_detection.md b/docs/en/get_started/15_minutes_object_detection.md
index 37409e5a..470ed919 100644
--- a/docs/en/get_started/15_minutes_object_detection.md
+++ b/docs/en/get_started/15_minutes_object_detection.md
@@ -1,3 +1,523 @@
 # 15 minutes to get started with MMYOLO object detection
 
-TODO
+Object detection task refers to that given a picture, the network predicts all the categories of objects included in the picture and the corresponding boundary boxes
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220232979-fffa480b-9ae6-4601-8af6-4116265dc650.png" alt="object detection" width="100%"/>
+</div>
+
+Take the small dataset of cat as an example, you can easily learn MMYOLO object detection in 15 minutes. The whole process consists of the following steps:
+
+- [Installation](#installation)
+- [Dataset](#dataset)
+- [Config](#config)
+- [Training](#training)
+- [Testing](#testing)
+- [EasyDeploy](#easydeploy-deployment)
+
+## Installation
+
+Assuming you've already installed Conda in advance, install PyTorch
+
+```shell
+conda create -n mmyolo python=3.8 -y
+conda activate mmyolo
+# If you have GPU
+conda install pytorch torchvision -c pytorch
+# If you only have CPU
+# conda install pytorch torchvision cpuonly -c pytorch
+```
+
+Install MMYOLO and dependency libraries
+
+```shell
+git clone https://github.com/open-mmlab/mmyolo.git
+cd mmyolo
+pip install -U openmim
+mim install -r requirements/mminstall.txt
+# Install albumentations
+mim install -r requirements/albu.txt
+# Install MMYOLO
+mim install -v -e .
+# "-v" means verbose, or more output
+# "-e" means installing a project in editable mode,
+# thus any local modifications made to the code will take effect without reinstallation.
+```
+
+```{note}
+Note: Since this repo uses OpenMMLab 2.0, it is better to create a new conda virtual environment to prevent conflicts with the repo installed in OpenMMLab 1.0.
+```
+
+For details about how to configure the environment, see [Installation and verification](./installation.md)
+
+## Dataset
+
+The Cat dataset is a single-category dataset consisting of 144 pictures (the original pictures are provided by @RangeKing, and cleaned by @PeterH0323), which contains the annotation information required for training. The sample image is shown below:
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/25873202/205423220-c4b8f2fd-22ba-4937-8e47-1b3f6a8facd8.png" alt="cat dataset"/>
+</div>
+
+You can download and use it directly by the following command:
+
+```shell
+python tools/misc/download_dataset.py --dataset-name cat --save-dir ./data/cat --unzip --delete
+```
+
+This dataset is automatically downloaded to the `./data/cat` dir with the following directory structure:
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220072078-48b88a08-6179-483e-b8d3-0549e1b465de.png" alt="image"/>
+</div>
+
+The cat dataset is located in the mmyolo project dir, and `data/cat/annotations` stores annotations in COCO format, and `data/cat/images` stores all images
+
+## Config
+
+Taking YOLOv5 algorithm as an example, considering the limited GPU memory of users, we need to modify some default training parameters to make them run smoothly. The key parameters to be modified are as follows:
+
+- YOLOv5 is an Anchor-Based algorithm, and different datasets need to calculate suitable anchors adaptively
+- The default config uses 8 GPUs with a batch size of 16 per GPU. Now change it to a single GPU with a batch size of 12.
+- The default training epoch is 300. Change it to 40 epoch
+- Given the small size of the dataset, we opted to use fixed backbone weights
+- In principle, the learning rate should be linearly scaled accordingly when the batch size is changed, but actual measurements have found that this is not necessary
+
+Create a `yolov5_s-v61_fast_1xb12-40e_cat.py` config file in the `configs/yolov5` folder (we have provided this config for you to use directly) and copy the following into the config file.
+
+```python
+# Inherit and overwrite part of the config based on this config
+_base_ = 'yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py'
+
+data_root = './data/cat/' # dataset root
+class_name = ('cat', ) # dataset category name
+num_classes = len(class_name) # dataset category number
+# metainfo is a configuration that must be passed to the dataloader, otherwise it is invalid
+# palette is a display color for category at visualization
+# The palette length must be greater than or equal to the length of the classes
+metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
+
+# Adaptive anchor based on tools/analysis_tools/optimize_anchors.py
+anchors = [
+    [(68, 69), (154, 91), (143, 162)],  # P3/8
+    [(242, 160), (189, 287), (391, 207)],  # P4/16
+    [(353, 337), (539, 341), (443, 432)]  # P5/32
+]
+# Max training 40 epoch
+max_epochs = 40
+# bs = 12
+train_batch_size_per_gpu = 12
+# dataloader num workers
+train_num_workers = 4
+
+# load COCO pre-trained weight
+load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth'  # noqa
+
+model = dict(
+    # Fixed the weight of the entire backbone without training
+    backbone=dict(frozen_stages=4),
+    bbox_head=dict(
+        head_module=dict(num_classes=num_classes),
+        prior_generator=dict(base_sizes=anchors)
+    ))
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    dataset=dict(
+        data_root=data_root,
+        metainfo=metainfo,
+        # Dataset annotation file of json path
+        ann_file='annotations/trainval.json',
+        # Dataset prefix
+        data_prefix=dict(img='images/')))
+
+val_dataloader = dict(
+    dataset=dict(
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
+
+test_dataloader = val_dataloader
+
+val_evaluator = dict(ann_file=data_root + 'annotations/trainval.json')
+test_evaluator = val_evaluator
+
+default_hooks = dict(
+    # Save weights every 10 epochs and a maximum of two weights can be saved.
+    # The best model is saved automatically during model evaluation
+    checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
+    param_scheduler=dict(max_epochs=max_epochs),
+    # The log printing interval is 5
+    logger=dict(type='LoggerHook', interval=5))
+# The evaluation interval is 10
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+```
+
+The above config is inherited from `yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py`. According to the characteristics of cat dataset updated `data_root`, `metainfo`, `train_dataloader`, `val_dataloader`, `num_classes` and other config.
+
+## Training
+
+```shell
+python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
+```
+
+Run the above training command, `work_dirs/yolov5_s-v61_fast_1xb12-40e_cat` folder will be automatically generated, the checkpoint file and the training config file will be saved in this folder. On a low-end 1660 GPU, the entire training process takes about eight minutes.
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220236361-bd113606-248e-4a0e-a484-c0dc9e355b5b.png" alt="image"/>
+</div>
+
+The performance on `trainval.json` is as follows:
+
+```text
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.685
+ Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.953
+ Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.852
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.685
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.664
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.749
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.761
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.761
+```
+
+The above properties are printed via the COCO API, where -1 indicates that no object exists for the scale. According to the rules defined by COCO, the Cat dataset contains all large sized objects, and there are no small or medium-sized objects.
+
+### Some Notes
+
+Two key warnings are printed during training:
+
+- You are using `YOLOv5Head` with num_classes == 1. The loss_cls will be 0. This is a normal phenomenon.
+- The model and loaded state dict do not match exactly
+
+Neither of these warnings will have any impact on performance. The first warning is because the `num_classes` currently trained is 1, the loss of the classification branch is always 0 according to the community of the YOLOv5 algorithm, which is a normal phenomenon. The second warning is because we are currently training in fine-tuning mode, we load the COCO pre-trained weights for 80 classes,
+This will lead to the final Head module convolution channel number does not correspond, resulting in this part of the weight can not be loaded, which is also a normal phenomenon.
+
+### Training is resumed after the interruption
+
+If you stop training, you can add `--resume` to the end of the training command and the program will automatically resume training with the latest weights file from `work_dirs`.
+
+```shell
+python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py --resume
+```
+
+### Save GPU memory strategy
+
+The above config requires about 3G RAM, so if you don't have enough, consider turning on mixed-precision training
+
+```shell
+python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py --amp
+```
+
+### Training visualization
+
+MMYOLO currently supports local, TensorBoard, WandB and other back-end visualization. The default is to use local visualization, and you can switch to WandB and other real-time visualization of various indicators in the training process.
+
+#### 1 WandB
+
+WandB visualization need registered in website, and in the https://wandb.ai/settings for wandb API Keys.
+
+<div align=center>
+<img src="https://cdn.vansin.top/img/20220913212628.png" alt="image"/>
+</div>
+
+```shell
+pip install wandb
+# After running wandb login, enter the API Keys obtained above, and the login is successful.
+wandb login
+```
+
+Add the wandb config at the end of config file we just created: `configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py`.
+
+```python
+visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')])
+```
+
+Running the training command and you will see the loss, learning rate, and coco/bbox_mAP visualizations in the link.
+
+```shell
+python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220238131-08eacedc-28a7-4008-af8c-f36dc239ecaa.png" alt="image"/>
+</div>
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220238535-f363a6ba-876c-4bb7-80d6-9d8d8ca9b966.png" alt="image"/>
+</div>
+
+#### 2 Tensorboard
+
+Install Tensorboard environment
+
+```shell
+pip install tensorboard
+```
+
+Add the `tensorboard` config at the end of config file we just created: `configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py`.
+
+```python
+visualizer = dict(vis_backends=[dict(type='LocalVisBackend'),dict(type='TensorboardVisBackend')])
+```
+
+After re-running the training command, Tensorboard file will be generated in the visualization folder `work_dirs/yolov5_s-v61_fast_1xb12-40e_cat.py/{timestamp}/vis_data`.
+We can use Tensorboard to view the loss, learning rate, and coco/bbox_mAP visualizations from a web link by running the following command:
+
+```shell
+tensorboard --logdir=work_dirs/yolov5_s-v61_fast_1xb12-40e_cat.py
+```
+
+## Testing
+
+```shell
+python tools/test.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                     work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
+                     --show-dir show_results
+```
+
+Run the above test command, you can not only get the AP performance printed in the **Training** section, You can also automatically save the result images to the `work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/{timestamp}/show_results` folder. Below is one of the result images, the left image is the actual annotation, and the right image is the inference result of the model.
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220251677-6c7e5c8f-9417-4803-97fc-a968d0172ab7.png" alt="result_img"/>
+</div>
+
+You can also visualize model inference results in a browser window if you use 'WandbVisBackend' or 'TensorboardVisBackend'.
+
+## Feature map visualization
+
+MMYOLO provides visualization scripts for feature map to analyze the current model training. Please refer to [Feature Map Visualization](../recommended_topics/visualization.md)
+
+Due to the bias of direct visualization of `test_pipeline`, we need to `configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py` of `test_pipeline`
+
+```python
+test_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        file_client_args=_base_.file_client_args),
+    dict(type='YOLOv5KeepRatioResize', scale=img_scale),
+    dict(
+        type='LetterResize',
+        scale=img_scale,
+        allow_scale_up=False,
+        pad_val=dict(img=114)),
+    dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor', 'pad_param'))
+]
+```
+
+modify to the following config:
+
+```python
+test_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        file_client_args=_base_.file_client_args),
+    dict(type='mmdet.Resize', scale=img_scale, keep_ratio=False), # modify the LetterResize to mmdet.Resize
+    dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+```
+
+Let's choose the `data/cat/images/IMG_20221020_112705.jpg` image as an example to visualize the output feature maps of YOLOv5 backbone and neck layers.
+
+**1. Visualize the three channels of YOLOv5 backbone**
+
+```shell
+python demo/featmap_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
+                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                --target-layers backbone \
+                                --channel-reduction squeeze_mean
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220292217-b343a6f4-0c88-4fdb-9680-35d0ff8e5bdb.png" width="800" alt="image"/>
+</div>
+
+The result will be saved to the output folder in current path. Three output feature maps plotted in the above figure correspond to small, medium and large output feature maps. As the backbone of this training is not actually involved in training, it can be seen from the above figure that the big object cat is predicted on the small feature map, which is in line with the idea of hierarchical detection of object detection.
+
+**2. Visualize the three channels of YOLOv5 neck**
+
+```shell
+python demo/featmap_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
+                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                --target-layers neck \
+                                --channel-reduction squeeze_mean
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220293382-0a241415-e717-4688-a718-5f6d5c844785.png" width="800" alt="image"/>
+</div>
+
+As can be seen from the above figure, because neck is involved in training, and we also reset anchor, the three output feature maps are forced to simulate the same scale object, resulting in the three output maps of neck are similar, which destroys the original pre-training distribution of backbone. At the same time, it can also be seen that 40 epochs are not enough to train the above dataset, and the feature maps do not perform well.
+
+**3. Grad-Based CAM visualization**
+
+Based on the above feature map visualization, we can analyze Grad CAM at the feature layer of bbox level.
+
+(a) View Grad CAM of the minimum output feature map of the neck
+
+```shell
+python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
+                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                --target-layer neck.out_layers[2]
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220298462-b0631f27-2366-4864-915a-a4ee21acd4b9.png" width="800" alt="image"/>
+</div>
+
+(b) View Grad CAM of the medium output feature map of the neck
+
+```shell
+python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
+                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                --target-layer neck.out_layers[1]
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220298090-6f335786-0b35-4ab8-9c5a-0dbdb6b6c967.png" width="800" alt="image"/>
+</div>
+
+(c) View Grad CAM of the maximum output feature map of the neck
+
+```shell
+python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
+                                configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                --target-layer neck.out_layers[0]
+```
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220297905-e23369db-d383-48f9-b15e-528a70ec7b23.png" width="800" alt="image"/>
+</div>
+
+## EasyDeploy deployment
+
+Here we'll use MMYOLO's [EasyDeploy](../../../projects/easydeploy/) to demonstrate the transformation deployment and basic inference of model.
+
+First you need to follow EasyDeploy's [basic documentation](../../../projects/easydeploy/docs/model_convert.md) controls own equipment installed for each library.
+
+```shell
+pip install onnx
+pip install onnx-simplifier # Install if you want to use simplify
+pip install tensorrt        # If you have GPU environment and need to output TensorRT model you need to continue execution
+```
+
+Once installed, you can use the following command to transform and deploy the trained model on the cat dataset with one click. The current ONNX version is 1.13.0 and TensorRT version is 8.5.3.1, so keep the `--opset` value of 11. The remaining parameters need to be adjusted according to the config used. Here we export the CPU version of ONNX with the `--backend` set to 1.
+
+```shell
+python projects/easydeploy/tools/export.py \
+	configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+	work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
+	--work-dir work_dirs/yolov5_s-v61_fast_1xb12-40e_cat \
+    --img-size 640 640 \
+    --batch 1 \
+    --device cpu \
+    --simplify \
+	--opset 11 \
+	--backend 1 \
+	--pre-topk 1000 \
+	--keep-topk 100 \
+	--iou-threshold 0.65 \
+	--score-threshold 0.25
+```
+
+On success, you will get the converted ONNX model under `work-dir`, which is named `end2end.onnx` by default.
+
+Let's use `end2end.onnx` model to perform a basic image inference:
+
+```shell
+python projects/easydeploy/tools/image-demo.py \
+    data/cat/images/IMG_20210728_205312.jpg \
+    configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+    work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/end2end.onnx \
+    --device cpu
+```
+
+After successful inference, the result image will be generated in the `output` folder of the default MMYOLO root directory. If you want to see the result without saving it, you can add `--show` to the end of the above command. For convenience, the following is the generated result.
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/7219519/221061210-b91e0b5b-652d-4dfc-8451-86a9a36f7d04.png" width="800" alt="image"/>
+</div>
+
+Let's go on to convert the engine file for TensorRT, because TensorRT needs to be specific to the current environment and deployment version, so make sure to export the parameters, here we export the TensorRT8 file, the `--backend` is 2.
+
+```shell
+python projects/easydeploy/tools/export.py \
+    configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+    work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
+    --work-dir work_dirs/yolov5_s-v61_fast_1xb12-40e_cat \
+    --img-size 640 640 \
+    --batch 1 \
+    --device cuda:0 \
+    --simplify \
+    --opset 11 \
+    --backend 2 \
+    --pre-topk 1000 \
+    --keep-topk 100 \
+    --iou-threshold 0.65 \
+    --score-threshold 0.25
+```
+
+The resulting `end2end.onnx` is the ONNX file for the TensorRT8 deployment, which we will use to complete the TensorRT engine transformation.
+
+```shell
+python projects/easydeploy/tools/build_engine.py \
+    work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/end2end.onnx \
+    --img-size 640 640 \
+    --device cuda:0
+```
+
+Successful execution will generate the `end2end.engine` file under `work-dir`:
+
+```shell
+work_dirs/yolov5_s-v61_fast_1xb12-40e_cat
+├── 202302XX_XXXXXX
+│   ├── 202302XX_XXXXXX.log
+│   └── vis_data
+│       ├── 202302XX_XXXXXX.json
+│       ├── config.py
+│       └── scalars.json
+├── best_coco
+│   └── bbox_mAP_epoch_40.pth
+├── end2end.engine
+├── end2end.onnx
+├── epoch_30.pth
+├── epoch_40.pth
+├── last_checkpoint
+└── yolov5_s-v61_fast_1xb12-40e_cat.py
+```
+
+Let's continue use `image-demo.py` for image inference:
+
+```shell
+python projects/easydeploy/tools/image-demo.py \
+    data/cat/images/IMG_20210728_205312.jpg \
+    configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
+    work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/end2end.engine \
+    --device cuda:0
+```
+
+Here we choose to save the inference results under `output` instead of displaying them directly. The following shows the inference results.
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/7219519/221061291-e7490bb6-5f0c-45ab-9fc4-caf2b62419d6.png" width="800" alt="image"/>
+</div>
+
+This completes the transformation deployment of the trained model and checks the inference results. This is the end of the tutorial.
+
+The full content above can be viewed: [15_minutes_object_detection.ipynb](<>)
diff --git a/docs/en/get_started/article.md b/docs/en/get_started/article.md
deleted file mode 100644
index ea28d491..00000000
--- a/docs/en/get_started/article.md
+++ /dev/null
@@ -1 +0,0 @@
-# Resources summary
diff --git a/docs/en/get_started/dependencies.md b/docs/en/get_started/dependencies.md
index 03cd6760..06802025 100644
--- a/docs/en/get_started/dependencies.md
+++ b/docs/en/get_started/dependencies.md
@@ -16,7 +16,12 @@ Compatible MMEngine, MMCV and MMDetection versions are shown as below. Please in
 
 In this section, we demonstrate how to prepare an environment with PyTorch.
 
-MMDetection works on Linux, Windows, and macOS. It requires Python 3.7+, CUDA 9.2+, and PyTorch 1.7+.
+MMDetection works on Linux, Windows, and macOS. It requires:
+
+- Python 3.7+
+- PyTorch 1.7+
+- CUDA 9.2+
+- GCC 5.4+
 
 ```{note}
 If you are experienced with PyTorch and have already installed it, just skip this part and jump to the [next section](#installation). Otherwise, you can follow these steps for the preparation.
@@ -31,7 +36,7 @@ conda create --name openmmlab python=3.8 -y
 conda activate openmmlab
 ```
 
-**Step 2.** Install PyTorch following [official instructions](https://pytorch.org/get-started/locally/), e.g.
+**Step 2.** Install PyTorch following [official commands](https://pytorch.org/get-started/locally/), e.g.
 
 On GPU platforms:
 
@@ -44,3 +49,11 @@ On CPU platforms:
 ```shell
 conda install pytorch torchvision cpuonly -c pytorch
 ```
+
+**Step 3.** Verify PyTorch installation
+
+```shell
+python -c "import torch; print(torch.__version__); print(torch.cuda.is_available())"
+```
+
+If the GPU is used, the version information and `True` are printed; otherwise, the version information and `False` are printed.
diff --git a/docs/en/get_started/installation.md b/docs/en/get_started/installation.md
index 85217375..113f2904 100644
--- a/docs/en/get_started/installation.md
+++ b/docs/en/get_started/installation.md
@@ -11,11 +11,19 @@ mim install "mmcv>=2.0.0rc4,<2.1.0"
 mim install "mmdet>=3.0.0rc6,<3.1.0"
 ```
 
+If you are currently in the mmyolo project directory, you can use the following simplified commands
+
+```shell
+cd mmyolo
+pip install -U openmom
+mim install -r requirements/mminstall.txt
+```
+
 **Note:**
 
 a. In MMCV-v2.x, `mmcv-full` is rename to `mmcv`, if you want to install `mmcv` without CUDA ops, you can use `mim install "mmcv-lite>=2.0.0rc1"` to install the lite version.
 
-b. If you would like to use albumentations, we suggest using pip install -r requirements/albu.txt or pip install -U albumentations --no-binary qudida,albumentations. If you simply use pip install albumentations==1.0.1, it will install opencv-python-headless simultaneously (even though you have already installed opencv-python). We recommended checking the environment after installing albumentation to ensure that opencv-python and opencv-python-headless are not installed at the same time, because it might cause unexpected issues if they both installed. Please refer to [official documentation](https://albumentations.ai/docs/getting_started/installation/#note-on-opencv-dependencies) for more details.
+b. If you would like to use `albumentations`, we suggest using `pip install -r requirements/albu.txt` or `pip install -U albumentations --no-binary qudida,albumentations`. If you simply use `pip install albumentations==1.0.1`, it will install `opencv-python-headless` simultaneously (even though you have already installed `opencv-python`). We recommended checking the environment after installing albumentation to ensure that `opencv-python` and `opencv-python-headless` are not installed at the same time, because it might cause unexpected issues if they both installed. Please refer to [official documentation](https://albumentations.ai/docs/getting_started/installation/#note-on-opencv-dependencies) for more details.
 
 **Step 1.** Install MMYOLO.
 
@@ -41,7 +49,7 @@ mim install "mmyolo"
 
 ## Verify the installation
 
-To verify whether MMYOLO is installed correctly, we provide some sample codes to run an inference demo.
+To verify whether MMYOLO is installed correctly, we provide  an inference demo.
 
 **Step 1.** We need to download config and checkpoint files.
 
@@ -92,7 +100,7 @@ You will see a list of `DetDataSample`, and the predictions are in the `pred_ins
 
 We provide a [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile) to build an image. Ensure that your [docker version](https://docs.docker.com/engine/install/) >=19.03.
 
-Reminder: If you find out that your download speed is very slow, we suggest that you can canceling the comments in the last two lines of `Optional` in the [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile#L19-L20) to obtain a rocket like download speed:
+Reminder: If you find out that your download speed is very slow, we suggest canceling the comments in the last two lines of `Optional` in the [Dockerfile](https://github.com/open-mmlab/mmyolo/blob/main/docker/Dockerfile#L19-L20) to obtain a rocket like download speed:
 
 ```dockerfile
 # (Optional)
@@ -115,6 +123,8 @@ export DATA_DIR=/path/to/your/dataset
 docker run --gpus all --shm-size=8g -it -v ${DATA_DIR}:/mmyolo/data mmyolo
 ```
 
+For other customized inatallation, see [Customized Installation](../tutorials/custom_installation.md)
+
 ## Troubleshooting
 
 If you have some issues during the installation, please first view the [FAQ](../tutorials/faq.md) page.
diff --git a/docs/en/get_started/overview.md b/docs/en/get_started/overview.md
index 07dd0c5c..36ff7778 100644
--- a/docs/en/get_started/overview.md
+++ b/docs/en/get_started/overview.md
@@ -1 +1,68 @@
 # Overview
+
+## MMYOLO Introduction
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/45811724/190993591-bd3f1f11-1c30-4b93-b5f4-05c9ff64ff7f.gif" alt="image"/>
+</div>
+
+MMYOLO is an open-source algorithms toolkit of YOLO based on PyTorch and MMDetection, part of the [OpenMMLab](https://openmmlab.com/) project. MMYOLO is positioned as a popular open-source library of YOLO series and core library of industrial applications. Its vision diagram is shown as follows:
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/220060451-d50414e5-a239-45b7-a4db-ed8699820300.png" alt="vision diagram"/>
+</div>
+
+The YOLO series of algorithms currently supported are as follows:
+
+<details open>
+<summary><b>Algorithms currently supported</b></summary>
+
+- YOLOv5
+- YOLOX
+- RTMDet
+- YOLOv6
+- YOLOv7
+- PPYOLOE
+- YOLOv8
+
+</details>
+
+The following tasks are currently supported:
+
+<details open>
+<summary><b>Tasks currently supported</b></summary>
+
+- object detection
+- rotated object detection
+
+</details>
+
+MMYOLO runs on Linux, Windows, macOS, and supports PyTorch 1.7 or later. It has the following three characteristics:
+
+- 🕹️ **Unified and convenient algorithm evaluation**
+
+  MMYOLO unifies various YOLO algorithm modules and provides a unified evaluation process, so that users can compare and analyze fairly and conveniently.
+
+- 📚 **Extensive documentation for started and advanced**
+
+  MMYOLO provides a series of documents, including getting started, deployment, advanced practice and algorithm analysis, which is convenient for different users to get started and expand.
+
+- 🧩 **Modular Design**
+
+  MMYOLO disentangled the framework into modular components, and users can easily build custom models by combining different modules and training and testing strategies.
+
+<img src="https://user-images.githubusercontent.com/27466624/199999337-0544a4cb-3cbd-4f3e-be26-bcd9e74db7ff.jpg" alt="Base module-P5"/>
+  This image is provided by RangeKing@GitHub, thanks very much!
+
+## User guide for this documentation
+
+MMYOLO divides the document structure into 6 parts, corresponding to different user needs.
+
+- **Get started with MMYOLO**. This part is must read for first-time MMYOLO users, so please read it carefully.
+- **Recommend Topics**. This part is the essence documentation provided in MMYOLO by topics, including lots of MMYOLO features, etc. Highly recommended reading for all MMYOLO users.
+- **Common functions**. This part provides a list of common features that you will use during the training and testing process, so you can refer back to them when you need.
+- **Useful tools**. This part is useful tools summary under `tools`, so that you can quickly and happily use the various scripts provided in MMYOLO.
+- **Basic and advanced tutorials**. This part introduces some basic concepts and advanced tutorials in MMYOLO. It is suitable for users who want to understand the design idea and structure design of MMYOLO in detail.
+- **Others**. The rest includes model repositories, specifications and interface documentation, etc.
+
+Users with different needs can choose your favorite content to read. If you have any questions about this documentation or a better idea to improve it, welcome to post a Pull Request to MMYOLO ~. Please refer to [How to Contribute to MMYOLO](../recommended_topics/contributing.md)

From a321cfae80d1289e174632d2685ed8d2524e5cd3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Tue, 28 Feb 2023 15:22:56 +0800
Subject: [PATCH 49/64] Fix box cam error (#601)

* fix box cam

* fix box cam
---
 ...yolov5_s-v61_fast_1xb12-40e_608x352_cat.py | 70 +++++++++++++++++++
 demo/boxam_vis_demo.py                        |  3 +
 docs/zh_cn/tutorials/faq.md                   |  4 ++
 mmyolo/utils/boxam_utils.py                   |  2 +
 4 files changed, 79 insertions(+)
 create mode 100644 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_608x352_cat.py

diff --git a/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_608x352_cat.py b/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_608x352_cat.py
new file mode 100644
index 00000000..a7ea4f44
--- /dev/null
+++ b/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_608x352_cat.py
@@ -0,0 +1,70 @@
+_base_ = 'yolov5_s-v61_fast_1xb12-40e_cat.py'
+
+# This configuration is used to provide non-square training examples
+# Must be a multiple of 32
+img_scale = (608, 352)  # w h
+
+anchors = [
+    [(65, 35), (159, 45), (119, 80)],  # P3/8
+    [(215, 77), (224, 116), (170, 166)],  # P4/16
+    [(376, 108), (339, 176), (483, 190)]  # P5/32
+]
+
+# ===============================Unmodified in most cases====================
+_base_.model.bbox_head.loss_obj.loss_weight = 1.0 * ((img_scale[1] / 640)**2)
+_base_.model.bbox_head.prior_generator.base_sizes = anchors
+
+train_pipeline = [
+    *_base_.pre_transform,
+    dict(
+        type='Mosaic',
+        img_scale=img_scale,
+        pad_val=114.0,
+        pre_transform=_base_.pre_transform),
+    dict(
+        type='YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        scaling_ratio_range=(1 - _base_.affine_scale, 1 + _base_.affine_scale),
+        # img_scale is (width, height)
+        border=(-img_scale[0] // 2, -img_scale[1] // 2),
+        border_val=(114, 114, 114)),
+    dict(
+        type='mmdet.Albu',
+        transforms=_base_.albu_train_transforms,
+        bbox_params=dict(
+            type='BboxParams',
+            format='pascal_voc',
+            label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
+        keymap={
+            'img': 'image',
+            'gt_bboxes': 'bboxes'
+        }),
+    dict(type='YOLOv5HSVRandomAug'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
+                   'flip_direction'))
+]
+
+_base_.train_dataloader.dataset.pipeline = train_pipeline
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='YOLOv5KeepRatioResize', scale=img_scale),
+    dict(
+        type='LetterResize',
+        scale=img_scale,
+        allow_scale_up=False,
+        pad_val=dict(img=114)),
+    dict(type='mmdet.LoadAnnotations', with_bbox=True),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor', 'pad_param'))
+]
+
+val_dataloader = dict(
+    dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None))
+test_dataloader = val_dataloader
diff --git a/demo/boxam_vis_demo.py b/demo/boxam_vis_demo.py
index e7f039ef..e5c5df6c 100644
--- a/demo/boxam_vis_demo.py
+++ b/demo/boxam_vis_demo.py
@@ -46,6 +46,9 @@ IGNORE_LOSS_PARAMS = {
     'yolov6': ['loss_cls'],
     'yolox': ['loss_obj'],
     'rtmdet': ['loss_cls'],
+    'yolov7': ['loss_obj'],
+    'yolov8': ['loss_cls'],
+    'ppyoloe': ['loss_cls'],
 }
 
 # This parameter is required in some algorithms
diff --git a/docs/zh_cn/tutorials/faq.md b/docs/zh_cn/tutorials/faq.md
index f8575037..053cbb32 100644
--- a/docs/zh_cn/tutorials/faq.md
+++ b/docs/zh_cn/tutorials/faq.md
@@ -105,3 +105,7 @@ unset PYTHONPATH
 7. `coco/bbox_mAP_l` 将会根据 `bbox_mAP_l` 作为筛选条件。
 
 此外用户还可以选择筛选的逻辑，通过设置配置中的 `default_hooks.checkpoint.rule` 来选择判断逻辑，如：`default_hooks.checkpoint.rule=greater` 表示指标越大越好。更详细的使用可以参考 [checkpoint_hook](https://github.com/open-mmlab/mmengine/blob/main/mmengine/hooks/checkpoint_hook.py) 来修改
+
+## 如何进行非正方形输入尺寸训练和测试 ?
+
+在 YOLO 系列算法中默认配置基本上都是 640x640 或者 1280x1280 正方形尺度输入训练的。用户如果想进行非正方形尺度训练，你可以修改配置中 `image_scale` 参数，并将其他对应位置进行修改即可。用户可以参考我们提供的 [yolov5_s-v61_fast_1xb12-40e_608x352_cat.py](https://github.com/open-mmlab/mmyolo/tree/dev/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_608x352_cat.py) 配置。
diff --git a/mmyolo/utils/boxam_utils.py b/mmyolo/utils/boxam_utils.py
index 27bfa1a1..4a46f21c 100644
--- a/mmyolo/utils/boxam_utils.py
+++ b/mmyolo/utils/boxam_utils.py
@@ -203,6 +203,7 @@ class BoxAMDetectorWrapper(nn.Module):
             # Maybe this is a direction that can be optimized
             # self.detector.init_weights()
 
+            self.detector.bbox_head.head_module.training = True
             if hasattr(self.detector.bbox_head, 'featmap_sizes'):
                 # Prevent the model algorithm error when calculating loss
                 self.detector.bbox_head.featmap_sizes = None
@@ -218,6 +219,7 @@ class BoxAMDetectorWrapper(nn.Module):
 
             return [loss]
         else:
+            self.detector.bbox_head.head_module.training = False
             with torch.no_grad():
                 results = self.detector.test_step(self.input_data)
                 return results

From 28b6a88fe768d694ce2f4c5ddd54aa6761782cc1 Mon Sep 17 00:00:00 2001
From: LinXiaoZheng <90811472+Zheng-LinXiao@users.noreply.github.com>
Date: Tue, 28 Feb 2023 16:19:59 +0800
Subject: [PATCH 50/64] [Docs] Modify contributing.md (#596)

* add_git_introduce

* add en_contributing

* edit picture

* fix
---
 docs/en/recommended_topics/contributing.md    | 54 +++++++++++++++++++
 docs/zh_cn/recommended_topics/contributing.md | 54 +++++++++++++++++++
 2 files changed, 108 insertions(+)

diff --git a/docs/en/recommended_topics/contributing.md b/docs/en/recommended_topics/contributing.md
index 31858572..9efb8871 100644
--- a/docs/en/recommended_topics/contributing.md
+++ b/docs/en/recommended_topics/contributing.md
@@ -21,6 +21,60 @@ The steps to fix the bug of code implementation are as follows.
 
 You can directly post a pull request to fix documents. If you want to add a document, you should first create an issue to check if it is reasonable.
 
+## Preparation
+
+The commands for processing pull requests are implemented using Git, and this chapter details `Git Configuration` and `associated GitHub`.
+
+### 1. Git Configuration
+
+First, make sure you have Git installed on your computer. For Linux systems and macOS systems, Git is generally installed by default. If it is not installed, it can be downloaded at [Git-Downloads](https://git-scm.com/downloads).
+
+```shell
+# view the Git version
+git --version
+```
+
+Second, check your `Git Config`
+
+```shell
+# view the Git config
+git config --global --list
+```
+
+If `user.name` and `user.email` are empty, run the command.
+
+```shell
+git config --global user.name "Change your username here"
+git config --global user.email "Change your useremail here"
+```
+
+Finally, run the command in `git bash` or `terminal` to generate the key file. After the generation is successful, a `.ssh` file will appear in the user directory, and `id_rsa.pub` is the public key file.
+
+```shell
+# useremail is GitHub's email address
+ssh-keygen -t rsa -C "useremail"
+```
+
+### 2. Associated GitHub
+
+First, open `id_rsa.pub` and copy the entire contents.
+
+Second, log in to your GitHub account to set it up.
+
+<img src="https://user-images.githubusercontent.com/90811472/221778382-a075167d-b028-4f68-a1c7-49a8f6f3d97b.png" width="1200">
+
+Click `New SSH key` to add a new SSH keys, and paste the copied content into Key.
+
+<img src="https://user-images.githubusercontent.com/90811472/221577720-af5536b3-47ba-48bc-adcc-c2b83f22081d.png" width="1200">
+
+Finally, verify that SSH matches the GitHub account by running the command in `git bash` or `terminal`. If it matches, enter `yes` to succeed.
+
+```shell
+ssh -T git@github.com
+```
+
+<img src="https://user-images.githubusercontent.com/90811472/221573637-30e5d04d-955c-4c8f-86ab-ed6608644fc8.png" width="1200">
+
 ## Pull Request Workflow
 
 If you're not familiar with Pull Request, don't worry! The following guidance will tell you how to create a Pull Request step by step. If you want to dive into the development mode of Pull Request, you can refer to the [official documents](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)
diff --git a/docs/zh_cn/recommended_topics/contributing.md b/docs/zh_cn/recommended_topics/contributing.md
index ff3b2ca3..a950f1d2 100644
--- a/docs/zh_cn/recommended_topics/contributing.md
+++ b/docs/zh_cn/recommended_topics/contributing.md
@@ -23,6 +23,60 @@
 1. 提交 issue，确认添加文档的必要性。
 2. 添加文档，提交拉取请求。
 
+## 准备工作
+
+拉取请求工作的命令都是用 Git 去实现的，该章节详细描述 `Git 配置` 以及与 `GitHub 绑定`
+
+### 1. Git 配置
+
+首先，确认电脑是否安装了 Git。Linux 系统和 macOS 系统，一般默认安装 Git，如未安装可在 [Git-Downloads](https://git-scm.com/downloads) 下载。
+
+```shell
+# 在命令提示符（cmd）或终端下输入以下命令，查看 Git 版本
+git --version
+```
+
+其次，检测自己 `Git Config` 是否配置
+
+```shell
+# 在命令提示符（cmd）或终端下输入以下命令，查看 Git Config 是否配置
+git config --global --list
+```
+
+若 `user.name` 和 `user.email` 为空，则输入以下命令进行配置。
+
+```shell
+git config --global user.name "这里换上你的用户名"
+git config --global user.email "这里换上你的邮箱"
+```
+
+最后，在 `git bash` 或者 `终端` 中，输入以下命令生成密钥文件。生成成功后，会在用户目录下出现 `.ssh` 文件，其中 `id_rsa.pub` 是公钥文件。
+
+```shell
+# useremail 是 GitHub 的邮箱
+ssh-keygen -t rsa -C "useremail"
+```
+
+### 2. GitHub 绑定
+
+首先，用记事本打开 `id_rsa.pub` 公钥文件，并复制里面全部内容。
+
+其次，登录 GitHub 账户找到下图位置进行设置。
+
+<img src="https://user-images.githubusercontent.com/90811472/221778382-a075167d-b028-4f68-a1c7-49a8f6f3d97b.png" width="1200">
+
+点击 `New SSH key` 新增一个 SSH keys，将刚才复制的内容粘贴到下图所示的 Key 中，Title 可以写设备名称，最后确认即可。
+
+<img src="https://user-images.githubusercontent.com/90811472/221549754-53670c19-5efe-48b2-9ac5-bafb43891903.png" width="1200">
+
+最后，在 `git bash` 或者 `终端` 中输入以下命令，验证 SSH 是否与 GitHub 账户匹配。如果匹配，输入 `yes` 就成功啦~
+
+```shell
+ssh -T git@github.com
+```
+
+<img src="https://user-images.githubusercontent.com/90811472/221573637-30e5d04d-955c-4c8f-86ab-ed6608644fc8.png" width="1200">
+
 ## 拉取请求工作流
 
 如果你对拉取请求不了解，没关系，接下来的内容将会从零开始，一步一步地指引你如何创建一个拉取请求。如果你想深入了解拉取请求的开发模式，可以参考 github [官方文档](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/about-pull-requests)

From eb9b2037f282a9ceae153d780afa72c9c43b78f9 Mon Sep 17 00:00:00 2001
From: tang576225574 <576225574@qq.com>
Date: Tue, 28 Feb 2023 19:28:39 +0800
Subject: [PATCH 51/64] Added a tool of querying the registered components for
 specified repository. (#559)

* correct the scale_factor and pad_param value in LetterResize

* correct the scale_factor and pad_param value in LetterResize

* correct the scale_factor and pad_param value in LetterResize

* tang

* update

* tang

* tang

* fix linting

* tang

* tang

* tang

* add print registers

* add print registers

* add print registers

* add print registers

* add print registers

* tang

* tang v1 finished

* tang

* print registers v1.2

* update

* tang

* update version

* update

* update

* remove exclude_prefix

---------

Co-authored-by: hha <1286304229@qq.com>
Co-authored-by: huanghaian <huanghaian@sensetime.com>
---
 .dev_scripts/print_registers.py            | 448 ++++++++++++
 README_zh-CN.md                            |   1 +
 docs/zh_cn/common_usage/registries_info.md | 788 +++++++++++++++++++++
 docs/zh_cn/index.rst                       |   1 +
 4 files changed, 1238 insertions(+)
 create mode 100644 .dev_scripts/print_registers.py
 create mode 100644 docs/zh_cn/common_usage/registries_info.md

diff --git a/.dev_scripts/print_registers.py b/.dev_scripts/print_registers.py
new file mode 100644
index 00000000..52646da2
--- /dev/null
+++ b/.dev_scripts/print_registers.py
@@ -0,0 +1,448 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import importlib
+import os
+import os.path as osp
+import pkgutil
+import sys
+import tempfile
+from multiprocessing import Pool
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+
+# host_addr = 'https://gitee.com/open-mmlab'
+host_addr = 'https://github.com/open-mmlab'
+tools_list = ['tools', '.dev_scripts']
+proxy_names = {
+    'mmdet': 'mmdetection',
+    'mmseg': 'mmsegmentation',
+    'mmcls': 'mmclassification'
+}
+merge_module_keys = {'mmcv': ['mmengine']}
+# exclude_prefix = {'mmcv': ['<class \'mmengine.model.']}
+exclude_prefix = {}
+markdown_title = '# MM 系列开源库注册表\n'
+markdown_title += '（注意：本文档是通过 .dev_scripts/print_registers.py 脚本自动生成）'
+
+
+def capitalize(repo_name):
+    lower = repo_name.lower()
+    if lower == 'mmcv':
+        return repo_name.upper()
+    elif lower.startswith('mm'):
+        return 'MM' + repo_name[2:]
+    return repo_name.capitalize()
+
+
+def mkdir_or_exist(dir_name, mode=0o777):
+    if dir_name == '':
+        return
+    dir_name = osp.expanduser(dir_name)
+    os.makedirs(dir_name, mode=mode, exist_ok=True)
+
+
+def parse_repo_name(repo_name):
+    proxy_names_rev = dict(zip(proxy_names.values(), proxy_names.keys()))
+    repo_name = proxy_names.get(repo_name, repo_name)
+    module_name = proxy_names_rev.get(repo_name, repo_name)
+    return repo_name, module_name
+
+
+def git_pull_branch(repo_name, branch_name='', pulldir='.'):
+    mkdir_or_exist(pulldir)
+    exec_str = f'cd {pulldir};git init;git pull '
+    exec_str += f'{host_addr}/{repo_name}.git'
+    if branch_name:
+        exec_str += f' {branch_name}'
+    returncode = os.system(exec_str)
+    if returncode:
+        raise RuntimeError(
+            f'failed to get the remote repo, code: {returncode}')
+
+
+def load_modules_from_dir(module_name, module_root, throw_error=False):
+    print(f'loading the {module_name} modules...')
+    # # install the dependencies
+    # if osp.exists(osp.join(pkg_dir, 'requirements.txt')):
+    #     os.system('pip install -r requirements.txt')
+    # get all module list
+    module_list = []
+    error_dict = {}
+    module_root = osp.join(module_root, module_name)
+    assert osp.exists(module_root), \
+        f'cannot find the module root: {module_root}'
+    for _root, _dirs, _files in os.walk(module_root):
+        if (('__init__.py' not in _files)
+                and (osp.split(_root)[1] != '__pycache__')):
+            # add __init__.py file to the package
+            with open(osp.join(_root, '__init__.py'), 'w') as _:
+                pass
+
+    def _onerror(*args, **kwargs):
+        pass
+
+    for _finder, _name, _ispkg in pkgutil.walk_packages([module_root],
+                                                        prefix=module_name +
+                                                        '.',
+                                                        onerror=_onerror):
+        try:
+            module = importlib.import_module(_name)
+            module_list.append(module)
+        except Exception as e:
+            if throw_error:
+                raise e
+            _error_msg = f'{type(e)}: {e}.'
+            print(f'cannot import the module: {_name} ({_error_msg})')
+            assert (_name not in error_dict), \
+                f'duplicate error name was found: {_name}'
+            error_dict[_name] = _error_msg
+    for module in module_list:
+        assert module.__file__.startswith(module_root), \
+            f'the importing path of package was wrong: {module.__file__}'
+    print('modules were loaded...')
+    return module_list, error_dict
+
+
+def get_registries_from_modules(module_list):
+    registries = {}
+    objects_set = set()
+    # import the Registry class,
+    # import at the beginning is not allowed
+    # because it is not the temp package
+    from mmengine.registry import Registry
+
+    # only get the specific registries in module list
+    for module in module_list:
+        for obj_name in dir(module):
+            _obj = getattr(module, obj_name)
+            if isinstance(_obj, Registry):
+                objects_set.add(_obj)
+    for _obj in objects_set:
+        if _obj.scope not in registries:
+            registries[_obj.scope] = {}
+        registries_scope = registries[_obj.scope]
+        assert _obj.name not in registries_scope, \
+            f'multiple definition of {_obj.name} in registries'
+        registries_scope[_obj.name] = {
+            key: str(val)
+            for key, val in _obj.module_dict.items()
+        }
+    print('registries got...')
+    return registries
+
+
+def merge_registries(src_dict, dst_dict):
+    assert type(src_dict) == type(dst_dict), \
+        (f'merge type is not supported: '
+         f'{type(dst_dict)} and {type(src_dict)}')
+    if isinstance(src_dict, str):
+        return
+    for _k, _v in dst_dict.items():
+        if (_k not in src_dict):
+            src_dict.update({_k: _v})
+        else:
+            assert isinstance(_v, (dict, str)) and \
+                isinstance(src_dict[_k], (dict, str)), \
+                'merge type is not supported: ' \
+                f'{type(_v)} and {type(src_dict[_k])}'
+            merge_registries(src_dict[_k], _v)
+
+
+def exclude_registries(registries, exclude_key):
+    for _k in list(registries.keys()):
+        _v = registries[_k]
+        if isinstance(_v, str) and _v.startswith(exclude_key):
+            registries.pop(_k)
+        elif isinstance(_v, dict):
+            exclude_registries(_v, exclude_key)
+
+
+def get_scripts_from_dir(root):
+
+    def _recurse(_dict, _chain):
+        if len(_chain) <= 1:
+            _dict[_chain[0]] = None
+            return
+        _key, *_chain = _chain
+        if _key not in _dict:
+            _dict[_key] = {}
+        _recurse(_dict[_key], _chain)
+
+    # find all scripts in the root directory. (not just ('.py', '.sh'))
+    # can not use the scandir function in mmengine to scan the dir,
+    # because mmengine import is not allowed before git pull
+    scripts = {}
+    for _subroot, _dirs, _files in os.walk(root):
+        for _file in _files:
+            _script = osp.join(osp.relpath(_subroot, root), _file)
+            _recurse(scripts, Path(_script).parts)
+    return scripts
+
+
+def get_version_from_module_name(module_name, branch):
+    branch_str = str(branch) if branch is not None else ''
+    version_str = ''
+    try:
+        exec(f'import {module_name}')
+        _module = eval(f'{module_name}')
+        if hasattr(_module, '__version__'):
+            version_str = str(_module.__version__)
+        else:
+            version_str = branch_str
+        version_str = f' ({version_str})' if version_str else version_str
+    except (ImportError, AttributeError) as e:
+        print(f'can not get the version of module {module_name}: {e}')
+    return version_str
+
+
+def print_tree(print_dict):
+    # recursive print the dict tree
+    def _recurse(_dict, _connector='', n=0):
+        assert isinstance(_dict, dict), 'recursive type must be dict'
+        tree = ''
+        for idx, (_key, _val) in enumerate(_dict.items()):
+            sub_tree = ''
+            _last = (idx == (len(_dict) - 1))
+            if isinstance(_val, str):
+                _key += f' ({_val})'
+            elif isinstance(_val, dict):
+                sub_tree = _recurse(_val,
+                                    _connector + ('   ' if _last else '│  '),
+                                    n + 1)
+            else:
+                assert (_val is None), f'unknown print type {_val}'
+            tree += '  ' + _connector + \
+                    ('└─' if _last else '├─') + f'({n}) {_key}' + '\n'
+            tree += sub_tree
+        return tree
+
+    for _pname, _pdict in print_dict.items():
+        print('-' * 100)
+        print(f'{_pname}\n' + _recurse(_pdict))
+
+
+def divide_list_into_groups(_array, _maxsize_per_group):
+    if not _array:
+        return _array
+    _groups = np.asarray(len(_array) / _maxsize_per_group)
+    if len(_array) % _maxsize_per_group:
+        _groups = np.floor(_groups) + 1
+    _groups = _groups.astype(int)
+    return np.array_split(_array, _groups)
+
+
+def registries_to_html(registries, title=''):
+    max_col_per_row = 5
+    max_size_per_cell = 20
+    html = ''
+    table_data = []
+    # save repository registries
+    for registry_name, registry_dict in registries.items():
+        # filter the empty registries
+        if not registry_dict:
+            continue
+        registry_strings = []
+        if isinstance(registry_dict, dict):
+            registry_dict = list(registry_dict.keys())
+        elif isinstance(registry_dict, list):
+            pass
+        else:
+            raise TypeError(
+                f'unknown type of registry_dict {type(registry_dict)}')
+        for _k in registry_dict:
+            registry_strings.append(f'<li>{_k}</li>')
+        table_data.append((registry_name, registry_strings))
+
+    # sort the data list
+    table_data = sorted(table_data, key=lambda x: len(x[1]))
+    # split multi parts
+    table_data_multi_parts = []
+    for (registry_name, registry_strings) in table_data:
+        multi_parts = False
+        if len(registry_strings) > max_size_per_cell:
+            multi_parts = True
+        for cell_idx, registry_cell in enumerate(
+                divide_list_into_groups(registry_strings, max_size_per_cell)):
+            registry_str = ''.join(registry_cell.tolist())
+            registry_str = f'<ul>{registry_str}</ul>'
+            table_data_multi_parts.append([
+                registry_name if not multi_parts else
+                f'{registry_name} (part {cell_idx + 1})', registry_str
+            ])
+
+    for table_data in divide_list_into_groups(table_data_multi_parts,
+                                              max_col_per_row):
+        table_data = list(zip(*table_data.tolist()))
+        html += dataframe_to_html(
+            pd.DataFrame([table_data[1]], columns=table_data[0]))
+    if html:
+        html = f'<div align=\'center\'><b>{title}</b></div>\n{html}'
+        html = f'<details open>{html}</details>\n'
+    return html
+
+
+def tools_to_html(tools_dict, repo_name=''):
+
+    def _recurse(_dict, _connector, _result):
+        assert isinstance(_dict, dict), \
+            f'unknown recurse type: {_dict} ({type(_dict)})'
+        for _k, _v in _dict.items():
+            if _v is None:
+                if _connector not in _result:
+                    _result[_connector] = []
+                _result[_connector].append(_k)
+            else:
+                _recurse(_v, osp.join(_connector, _k), _result)
+
+    table_data = {}
+    title = f'{capitalize(repo_name)} Tools'
+    _recurse(tools_dict, '', table_data)
+    return registries_to_html(table_data, title)
+
+
+def dataframe_to_html(dataframe):
+    styler = dataframe.style
+    styler = styler.hide(axis='index')
+    styler = styler.format(na_rep='-')
+    styler = styler.set_properties(**{
+        'text-align': 'left',
+        'align': 'center',
+        'vertical-align': 'top'
+    })
+    styler = styler.set_table_styles([{
+        'selector':
+        'thead th',
+        'props':
+        'align:center;text-align:center;vertical-align:bottom'
+    }])
+    html = styler.to_html()
+    html = f'<div align=\'center\'>\n{html}</div>'
+    return html
+
+
+def generate_markdown_by_repository(repo_name,
+                                    module_name,
+                                    branch,
+                                    pulldir,
+                                    throw_error=False):
+    # add the pull dir to the system path so that it can be found
+    if pulldir not in sys.path:
+        sys.path.insert(0, pulldir)
+    module_list, error_dict = load_modules_from_dir(
+        module_name, pulldir, throw_error=throw_error)
+    registries_tree = get_registries_from_modules(module_list)
+    if error_dict:
+        error_dict_name = 'error_modules'
+        assert (error_dict_name not in registries_tree), \
+            f'duplicate module name was found: {error_dict_name}'
+        registries_tree.update({error_dict_name: error_dict})
+    # get the tools files
+    for tools_name in tools_list:
+        assert (tools_name not in registries_tree), \
+            f'duplicate tools name was found: {tools_name}'
+        tools_tree = osp.join(pulldir, tools_name)
+        tools_tree = get_scripts_from_dir(tools_tree)
+        registries_tree.update({tools_name: tools_tree})
+    # print_tree(registries_tree)
+    # get registries markdown string
+    module_registries = registries_tree.get(module_name, {})
+    for merge_key in merge_module_keys.get(module_name, []):
+        merge_dict = registries_tree.get(merge_key, {})
+        merge_registries(module_registries, merge_dict)
+    for exclude_key in exclude_prefix.get(module_name, []):
+        exclude_registries(module_registries, exclude_key)
+    markdown_str = registries_to_html(
+        module_registries, title=f'{capitalize(repo_name)} Module Components')
+    # get tools markdown string
+    tools_registries = {}
+    for tools_name in tools_list:
+        tools_registries.update(
+            {tools_name: registries_tree.get(tools_name, {})})
+    markdown_str += tools_to_html(tools_registries, repo_name=repo_name)
+    version_str = get_version_from_module_name(module_name, branch)
+    title_str = f'\n\n## {capitalize(repo_name)}{version_str}\n'
+    # remove the pull dir from system path
+    if pulldir in sys.path:
+        sys.path.remove(pulldir)
+    return f'{title_str}{markdown_str}'
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='print registries in openmmlab repositories')
+    parser.add_argument(
+        '-r',
+        '--repositories',
+        nargs='+',
+        default=['mmdet', 'mmcls', 'mmseg', 'mmengine', 'mmcv'],
+        type=str,
+        help='git repositories name in OpenMMLab')
+    parser.add_argument(
+        '-b',
+        '--branches',
+        nargs='+',
+        default=['3.x', '1.x', '1.x', 'main', '2.x'],
+        type=str,
+        help='the branch names of git repositories, the length of branches '
+        'must be same as the length of repositories')
+    parser.add_argument(
+        '-o', '--out', type=str, default='.', help='output path of the file')
+    parser.add_argument(
+        '--throw-error',
+        action='store_true',
+        default=False,
+        help='whether to throw error when trying to import modules')
+    args = parser.parse_args()
+    return args
+
+
+# TODO: Refine
+def main():
+    args = parse_args()
+    repositories = args.repositories
+    branches = args.branches
+    assert isinstance(repositories, list), \
+        'Type of repositories must be list'
+    if branches is None:
+        branches = [None] * len(repositories)
+    assert isinstance(branches, list) and \
+           len(branches) == len(repositories), \
+           'The length of branches must be same as ' \
+           'that of repositories'
+    assert isinstance(args.out, str), \
+        'The type of output path must be string'
+    # save path of file
+    mkdir_or_exist(args.out)
+    save_path = osp.join(args.out, 'registries_info.md')
+    with tempfile.TemporaryDirectory() as tmpdir:
+        # multi process init
+        pool = Pool(processes=len(repositories))
+        multi_proc_input_list = []
+        multi_proc_output_list = []
+        # get the git repositories
+        for branch, repository in zip(branches, repositories):
+            repo_name, module_name = parse_repo_name(repository)
+            pulldir = osp.join(tmpdir, f'tmp_{repo_name}')
+            git_pull_branch(
+                repo_name=repo_name, branch_name=branch, pulldir=pulldir)
+            multi_proc_input_list.append(
+                (repo_name, module_name, branch, pulldir, args.throw_error))
+        print('starting the multi process to get the registries')
+        for multi_proc_input in multi_proc_input_list:
+            multi_proc_output_list.append(
+                pool.apply_async(generate_markdown_by_repository,
+                                 multi_proc_input))
+        pool.close()
+        pool.join()
+        with open(save_path, 'w', encoding='utf-8') as fw:
+            fw.write(f'{markdown_title}\n')
+            for multi_proc_output in multi_proc_output_list:
+                markdown_str = multi_proc_output.get()
+                fw.write(f'{markdown_str}\n')
+    print(f'saved registries to the path: {save_path}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 2bc90ac9..a05df22d 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -227,6 +227,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 - [应用多个 Neck](docs/zh_cn/common_usage/multi_necks.md)
 - [指定特定设备训练或推理](docs/zh_cn/common_usage/specify_device.md)
 - [单通道和多通道应用案例](docs/zh_cn/common_usage/single_multi_channel_applications.md)
+- [MM 系列开源库注册表](docs/zh_cn/common_usage/registries_info.md)
 
 </details>
 
diff --git a/docs/zh_cn/common_usage/registries_info.md b/docs/zh_cn/common_usage/registries_info.md
new file mode 100644
index 00000000..4a9d184c
--- /dev/null
+++ b/docs/zh_cn/common_usage/registries_info.md
@@ -0,0 +1,788 @@
+# MM 系列开源库注册表
+
+（注意：本文档是通过 .dev_scripts/print_registers.py 脚本自动生成）
+
+## MMdetection (3.0.0rc6)
+
+<details open><div align='center'><b>MMdetection Module Components</b></div>
+<div align='center'>
+<style type="text/css">
+#T_2775b thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_2775b_row0_col0, #T_2775b_row0_col1, #T_2775b_row0_col2, #T_2775b_row0_col3, #T_2775b_row0_col4 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_2775b">
+  <thead>
+    <tr>
+      <th id="T_2775b_level0_col0" class="col_heading level0 col0" >visualizer</th>
+      <th id="T_2775b_level0_col1" class="col_heading level0 col1" >optimizer constructor</th>
+      <th id="T_2775b_level0_col2" class="col_heading level0 col2" >loop</th>
+      <th id="T_2775b_level0_col3" class="col_heading level0 col3" >parameter scheduler</th>
+      <th id="T_2775b_level0_col4" class="col_heading level0 col4" >data sampler</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_2775b_row0_col0" class="data row0 col0" ><ul><li>DetLocalVisualizer</li></ul></td>
+      <td id="T_2775b_row0_col1" class="data row0 col1" ><ul><li>LearningRateDecayOptimizerConstructor</li></ul></td>
+      <td id="T_2775b_row0_col2" class="data row0 col2" ><ul><li>TeacherStudentValLoop</li></ul></td>
+      <td id="T_2775b_row0_col3" class="data row0 col3" ><ul><li>QuadraticWarmupParamScheduler</li><li>QuadraticWarmupLR</li><li>QuadraticWarmupMomentum</li></ul></td>
+      <td id="T_2775b_row0_col4" class="data row0 col4" ><ul><li>AspectRatioBatchSampler</li><li>ClassAwareSampler</li><li>MultiSourceSampler</li><li>GroupMultiSourceSampler</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_00a84 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_00a84_row0_col0, #T_00a84_row0_col1, #T_00a84_row0_col2, #T_00a84_row0_col3, #T_00a84_row0_col4 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_00a84">
+  <thead>
+    <tr>
+      <th id="T_00a84_level0_col0" class="col_heading level0 col0" >metric</th>
+      <th id="T_00a84_level0_col1" class="col_heading level0 col1" >hook</th>
+      <th id="T_00a84_level0_col2" class="col_heading level0 col2" >dataset</th>
+      <th id="T_00a84_level0_col3" class="col_heading level0 col3" >task util (part 1)</th>
+      <th id="T_00a84_level0_col4" class="col_heading level0 col4" >task util (part 2)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_00a84_row0_col0" class="data row0 col0" ><ul><li>CityScapesMetric</li><li>CocoMetric</li><li>CocoOccludedSeparatedMetric</li><li>CocoPanopticMetric</li><li>CrowdHumanMetric</li><li>DumpDetResults</li><li>DumpProposals</li><li>LVISMetric</li><li>OpenImagesMetric</li><li>VOCMetric</li></ul></td>
+      <td id="T_00a84_row0_col1" class="data row0 col1" ><ul><li>CheckInvalidLossHook</li><li>MeanTeacherHook</li><li>MemoryProfilerHook</li><li>NumClassCheckHook</li><li>PipelineSwitchHook</li><li>SetEpochInfoHook</li><li>SyncNormHook</li><li>DetVisualizationHook</li><li>YOLOXModeSwitchHook</li><li>FastStopTrainingHook</li></ul></td>
+      <td id="T_00a84_row0_col2" class="data row0 col2" ><ul><li>BaseDetDataset</li><li>CocoDataset</li><li>CityscapesDataset</li><li>CocoPanopticDataset</li><li>CrowdHumanDataset</li><li>MultiImageMixDataset</li><li>DeepFashionDataset</li><li>LVISV05Dataset</li><li>LVISDataset</li><li>LVISV1Dataset</li><li>Objects365V1Dataset</li><li>Objects365V2Dataset</li><li>OpenImagesDataset</li><li>OpenImagesChallengeDataset</li><li>XMLDataset</li><li>VOCDataset</li><li>WIDERFaceDataset</li></ul></td>
+      <td id="T_00a84_row0_col3" class="data row0 col3" ><ul><li>MaxIoUAssigner</li><li>ApproxMaxIoUAssigner</li><li>ATSSAssigner</li><li>CenterRegionAssigner</li><li>DynamicSoftLabelAssigner</li><li>GridAssigner</li><li>HungarianAssigner</li><li>BboxOverlaps2D</li><li>BBoxL1Cost</li><li>IoUCost</li><li>ClassificationCost</li><li>FocalLossCost</li><li>DiceCost</li><li>CrossEntropyLossCost</li><li>MultiInstanceAssigner</li></ul></td>
+      <td id="T_00a84_row0_col4" class="data row0 col4" ><ul><li>PointAssigner</li><li>AnchorGenerator</li><li>SSDAnchorGenerator</li><li>LegacyAnchorGenerator</li><li>LegacySSDAnchorGenerator</li><li>YOLOAnchorGenerator</li><li>PointGenerator</li><li>MlvlPointGenerator</li><li>RegionAssigner</li><li>SimOTAAssigner</li><li>TaskAlignedAssigner</li><li>UniformAssigner</li><li>BucketingBBoxCoder</li><li>DeltaXYWHBBoxCoder</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_cd536 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_cd536_row0_col0, #T_cd536_row0_col1, #T_cd536_row0_col2, #T_cd536_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_cd536">
+  <thead>
+    <tr>
+      <th id="T_cd536_level0_col0" class="col_heading level0 col0" >task util (part 3)</th>
+      <th id="T_cd536_level0_col1" class="col_heading level0 col1" >transform (part 1)</th>
+      <th id="T_cd536_level0_col2" class="col_heading level0 col2" >transform (part 2)</th>
+      <th id="T_cd536_level0_col3" class="col_heading level0 col3" >transform (part 3)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_cd536_row0_col0" class="data row0 col0" ><ul><li>DistancePointBBoxCoder</li><li>LegacyDeltaXYWHBBoxCoder</li><li>PseudoBBoxCoder</li><li>TBLRBBoxCoder</li><li>YOLOBBoxCoder</li><li>CombinedSampler</li><li>RandomSampler</li><li>InstanceBalancedPosSampler</li><li>IoUBalancedNegSampler</li><li>MaskPseudoSampler</li><li>MultiInsRandomSampler</li><li>OHEMSampler</li><li>PseudoSampler</li><li>ScoreHLRSampler</li></ul></td>
+      <td id="T_cd536_row0_col1" class="data row0 col1" ><ul><li>AutoAugment</li><li>RandAugment</li><li>ColorTransform</li><li>Color</li><li>Brightness</li><li>Contrast</li><li>Sharpness</li><li>Solarize</li><li>SolarizeAdd</li><li>Posterize</li><li>Equalize</li><li>AutoContrast</li><li>Invert</li><li>PackDetInputs</li><li>ToTensor</li><li>ImageToTensor</li><li>Transpose</li><li>WrapFieldsToLists</li><li>GeomTransform</li><li>ShearX</li></ul></td>
+      <td id="T_cd536_row0_col2" class="data row0 col2" ><ul><li>ShearY</li><li>Rotate</li><li>TranslateX</li><li>TranslateY</li><li>InstaBoost</li><li>LoadImageFromNDArray</li><li>LoadMultiChannelImageFromFiles</li><li>LoadAnnotations</li><li>LoadPanopticAnnotations</li><li>LoadProposals</li><li>FilterAnnotations</li><li>LoadEmptyAnnotations</li><li>InferencerLoader</li><li>Resize</li><li>FixShapeResize</li><li>RandomFlip</li><li>RandomShift</li><li>Pad</li><li>RandomCrop</li></ul></td>
+      <td id="T_cd536_row0_col3" class="data row0 col3" ><ul><li>SegRescale</li><li>PhotoMetricDistortion</li><li>Expand</li><li>MinIoURandomCrop</li><li>Corrupt</li><li>Albu</li><li>RandomCenterCropPad</li><li>CutOut</li><li>Mosaic</li><li>MixUp</li><li>RandomAffine</li><li>YOLOXHSVRandomAug</li><li>CopyPaste</li><li>RandomErasing</li><li>CachedMosaic</li><li>CachedMixUp</li><li>MultiBranch</li><li>RandomOrder</li><li>ProposalBroadcaster</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_bb293 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_bb293_row0_col0, #T_bb293_row0_col1, #T_bb293_row0_col2, #T_bb293_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_bb293">
+  <thead>
+    <tr>
+      <th id="T_bb293_level0_col0" class="col_heading level0 col0" >model (part 1)</th>
+      <th id="T_bb293_level0_col1" class="col_heading level0 col1" >model (part 2)</th>
+      <th id="T_bb293_level0_col2" class="col_heading level0 col2" >model (part 3)</th>
+      <th id="T_bb293_level0_col3" class="col_heading level0 col3" >model (part 4)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_bb293_row0_col0" class="data row0 col0" ><ul><li>SiLU</li><li>DropBlock</li><li>ExpMomentumEMA</li><li>SinePositionalEncoding</li><li>LearnedPositionalEncoding</li><li>DynamicConv</li><li>MSDeformAttnPixelDecoder</li><li>Linear</li><li>NormedLinear</li><li>NormedConv2d</li><li>PixelDecoder</li><li>TransformerEncoderPixelDecoder</li><li>CSPDarknet</li><li>CSPNeXt</li><li>Darknet</li><li>ResNet</li><li>ResNetV1d</li><li>DetectoRS_ResNet</li><li>DetectoRS_ResNeXt</li><li>EfficientNet</li></ul></td>
+      <td id="T_bb293_row0_col1" class="data row0 col1" ><ul><li>HourglassNet</li><li>HRNet</li><li>MobileNetV2</li><li>PyramidVisionTransformer</li><li>PyramidVisionTransformerV2</li><li>ResNeXt</li><li>RegNet</li><li>Res2Net</li><li>ResNeSt</li><li>BFP</li><li>ChannelMapper</li><li>CSPNeXtPAFPN</li><li>CTResNetNeck</li><li>DilatedEncoder</li><li>DyHead</li><li>FPG</li><li>FPN</li><li>FPN_CARAFE</li><li>HRFPN</li><li>NASFPN</li></ul></td>
+      <td id="T_bb293_row0_col2" class="data row0 col2" ><ul><li>NASFCOS_FPN</li><li>PAFPN</li><li>RFP</li><li>SSDNeck</li><li>SSH</li><li>YOLOV3Neck</li><li>YOLOXPAFPN</li><li>SSDVGG</li><li>SwinTransformer</li><li>TridentResNet</li><li>DetDataPreprocessor</li><li>BatchSyncRandomResize</li><li>BatchFixedSizePad</li><li>MultiBranchDataPreprocessor</li><li>BatchResize</li><li>BoxInstDataPreprocessor</li><li>AnchorFreeHead</li><li>AnchorHead</li><li>ATSSHead</li><li>FCOSHead</li></ul></td>
+      <td id="T_bb293_row0_col3" class="data row0 col3" ><ul><li>AutoAssignHead</li><li>CondInstBboxHead</li><li>CondInstMaskHead</li><li>BoxInstBboxHead</li><li>BoxInstMaskHead</li><li>RPNHead</li><li>StageCascadeRPNHead</li><li>CascadeRPNHead</li><li>CenterNetHead</li><li>CenterNetUpdateHead</li><li>CornerHead</li><li>CentripetalHead</li><li>DETRHead</li><li>ConditionalDETRHead</li><li>DABDETRHead</li><li>DDODHead</li><li>DeformableDETRHead</li><li>DINOHead</li><li>EmbeddingRPNHead</li><li>FoveaHead</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_39d04 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_39d04_row0_col0, #T_39d04_row0_col1, #T_39d04_row0_col2, #T_39d04_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_39d04">
+  <thead>
+    <tr>
+      <th id="T_39d04_level0_col0" class="col_heading level0 col0" >model (part 5)</th>
+      <th id="T_39d04_level0_col1" class="col_heading level0 col1" >model (part 6)</th>
+      <th id="T_39d04_level0_col2" class="col_heading level0 col2" >model (part 7)</th>
+      <th id="T_39d04_level0_col3" class="col_heading level0 col3" >model (part 8)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_39d04_row0_col0" class="data row0 col0" ><ul><li>RetinaHead</li><li>FreeAnchorRetinaHead</li><li>AssociativeEmbeddingLoss</li><li>BalancedL1Loss</li><li>CrossEntropyLoss</li><li>DiceLoss</li><li>FocalLoss</li><li>GaussianFocalLoss</li><li>QualityFocalLoss</li><li>DistributionFocalLoss</li><li>GHMC</li><li>GHMR</li><li>IoULoss</li><li>BoundedIoULoss</li><li>GIoULoss</li><li>DIoULoss</li><li>CIoULoss</li><li>EIoULoss</li><li>KnowledgeDistillationKLDivLoss</li><li>MSELoss</li></ul></td>
+      <td id="T_39d04_row0_col1" class="data row0 col1" ><ul><li>SeesawLoss</li><li>SmoothL1Loss</li><li>L1Loss</li><li>VarifocalLoss</li><li>FSAFHead</li><li>GuidedAnchorHead</li><li>GARetinaHead</li><li>GARPNHead</li><li>GFLHead</li><li>PAAHead</li><li>LADHead</li><li>LDHead</li><li>MaskFormerHead</li><li>Mask2FormerHead</li><li>NASFCOSHead</li><li>PISARetinaHead</li><li>SSDHead</li><li>PISASSDHead</li><li>RepPointsHead</li><li>RetinaSepBNHead</li></ul></td>
+      <td id="T_39d04_row0_col2" class="data row0 col2" ><ul><li>RTMDetHead</li><li>RTMDetSepBNHead</li><li>RTMDetInsHead</li><li>RTMDetInsSepBNHead</li><li>SABLRetinaHead</li><li>SOLOHead</li><li>DecoupledSOLOHead</li><li>DecoupledSOLOLightHead</li><li>SOLOV2Head</li><li>TOODHead</li><li>VFNetHead</li><li>YOLACTHead</li><li>YOLACTProtonet</li><li>YOLOV3Head</li><li>YOLOFHead</li><li>YOLOXHead</li><li>SingleStageDetector</li><li>ATSS</li><li>AutoAssign</li></ul></td>
+      <td id="T_39d04_row0_col3" class="data row0 col3" ><ul><li>DetectionTransformer</li><li>SingleStageInstanceSegmentor</li><li>BoxInst</li><li>TwoStageDetector</li><li>CascadeRCNN</li><li>CenterNet</li><li>CondInst</li><li>DETR</li><li>ConditionalDETR</li><li>CornerNet</li><li>CrowdDet</li><li>Detectron2Wrapper</li><li>DABDETR</li><li>DDOD</li><li>DeformableDETR</li><li>DINO</li><li>FastRCNN</li><li>FasterRCNN</li><li>FCOS</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_1d4b5 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_1d4b5_row0_col0, #T_1d4b5_row0_col1, #T_1d4b5_row0_col2, #T_1d4b5_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_1d4b5">
+  <thead>
+    <tr>
+      <th id="T_1d4b5_level0_col0" class="col_heading level0 col0" >model (part 9)</th>
+      <th id="T_1d4b5_level0_col1" class="col_heading level0 col1" >model (part 10)</th>
+      <th id="T_1d4b5_level0_col2" class="col_heading level0 col2" >model (part 11)</th>
+      <th id="T_1d4b5_level0_col3" class="col_heading level0 col3" >model (part 12)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_1d4b5_row0_col0" class="data row0 col0" ><ul><li>FOVEA</li><li>FSAF</li><li>GFL</li><li>GridRCNN</li><li>HybridTaskCascade</li><li>KnowledgeDistillationSingleStageDetector</li><li>LAD</li><li>MaskFormer</li><li>Mask2Former</li><li>MaskRCNN</li><li>MaskScoringRCNN</li><li>NASFCOS</li><li>PAA</li><li>TwoStagePanopticSegmentor</li><li>PanopticFPN</li><li>PointRend</li><li>SparseRCNN</li><li>QueryInst</li><li>RepPointsDetector</li></ul></td>
+      <td id="T_1d4b5_row0_col1" class="data row0 col1" ><ul><li>RetinaNet</li><li>RPN</li><li>RTMDet</li><li>SCNet</li><li>SemiBaseDetector</li><li>SoftTeacher</li><li>SOLO</li><li>SOLOv2</li><li>TOOD</li><li>TridentFasterRCNN</li><li>VFNet</li><li>YOLACT</li><li>YOLOV3</li><li>YOLOF</li><li>YOLOX</li><li>BBoxHead</li><li>ConvFCBBoxHead</li><li>Shared2FCBBoxHead</li><li>Shared4Conv1FCBBoxHead</li></ul></td>
+      <td id="T_1d4b5_row0_col2" class="data row0 col2" ><ul><li>DIIHead</li><li>DoubleConvFCBBoxHead</li><li>MultiInstanceBBoxHead</li><li>SABLHead</li><li>SCNetBBoxHead</li><li>CascadeRoIHead</li><li>StandardRoIHead</li><li>DoubleHeadRoIHead</li><li>DynamicRoIHead</li><li>GridRoIHead</li><li>HybridTaskCascadeRoIHead</li><li>FCNMaskHead</li><li>CoarseMaskHead</li><li>DynamicMaskHead</li><li>FeatureRelayHead</li><li>FusedSemanticHead</li><li>GlobalContextHead</li><li>GridHead</li><li>HTCMaskHead</li></ul></td>
+      <td id="T_1d4b5_row0_col3" class="data row0 col3" ><ul><li>MaskPointHead</li><li>MaskIoUHead</li><li>SCNetMaskHead</li><li>SCNetSemanticHead</li><li>MaskScoringRoIHead</li><li>MultiInstanceRoIHead</li><li>PISARoIHead</li><li>PointRendRoIHead</li><li>GenericRoIExtractor</li><li>SingleRoIExtractor</li><li>SCNetRoIHead</li><li>ResLayer</li><li>SparseRoIHead</li><li>TridentRoIHead</li><li>BaseSemanticHead</li><li>PanopticFPNHead</li><li>BasePanopticFusionHead</li><li>HeuristicFusionHead</li><li>MaskFormerFusionHead</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div></details>
+<details open><div align='center'><b>MMdetection Tools</b></div>
+<div align='center'>
+<style type="text/css">
+#T_a7e06 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_a7e06_row0_col0, #T_a7e06_row0_col1, #T_a7e06_row0_col2, #T_a7e06_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_a7e06">
+  <thead>
+    <tr>
+      <th id="T_a7e06_level0_col0" class="col_heading level0 col0" >tools/dataset_converters</th>
+      <th id="T_a7e06_level0_col1" class="col_heading level0 col1" >tools/deployment</th>
+      <th id="T_a7e06_level0_col2" class="col_heading level0 col2" >tools</th>
+      <th id="T_a7e06_level0_col3" class="col_heading level0 col3" >tools/misc</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_a7e06_row0_col0" class="data row0 col0" ><ul><li>pascal_voc.py</li><li>images2coco.py</li><li>cityscapes.py</li></ul></td>
+      <td id="T_a7e06_row0_col1" class="data row0 col1" ><ul><li>mmdet2torchserve.py</li><li>test_torchserver.py</li><li>mmdet_handler.py</li></ul></td>
+      <td id="T_a7e06_row0_col2" class="data row0 col2" ><ul><li>dist_test.sh</li><li>slurm_test.sh</li><li>test.py</li><li>dist_train.sh</li><li>train.py</li><li>slurm_train.sh</li></ul></td>
+      <td id="T_a7e06_row0_col3" class="data row0 col3" ><ul><li>download_dataset.py</li><li>get_image_metas.py</li><li>gen_coco_panoptic_test_info.py</li><li>split_coco.py</li><li>get_crowdhuman_id_hw.py</li><li>print_config.py</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_63a8e thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_63a8e_row0_col0, #T_63a8e_row0_col1, #T_63a8e_row0_col2, #T_63a8e_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_63a8e">
+  <thead>
+    <tr>
+      <th id="T_63a8e_level0_col0" class="col_heading level0 col0" >tools/model_converters</th>
+      <th id="T_63a8e_level0_col1" class="col_heading level0 col1" >tools/analysis_tools</th>
+      <th id="T_63a8e_level0_col2" class="col_heading level0 col2" >.dev_scripts (part 1)</th>
+      <th id="T_63a8e_level0_col3" class="col_heading level0 col3" >.dev_scripts (part 2)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_63a8e_row0_col0" class="data row0 col0" ><ul><li>upgrade_model_version.py</li><li>upgrade_ssd_version.py</li><li>detectron2_to_mmdet.py</li><li>selfsup2mmdet.py</li><li>detectron2pytorch.py</li><li>regnet2mmdet.py</li><li>publish_model.py</li></ul></td>
+      <td id="T_63a8e_row0_col1" class="data row0 col1" ><ul><li>benchmark.py</li><li>eval_metric.py</li><li>robustness_eval.py</li><li>confusion_matrix.py</li><li>optimize_anchors.py</li><li>browse_dataset.py</li><li>test_robustness.py</li><li>coco_error_analysis.py</li><li>coco_occluded_separated_recall.py</li><li>analyze_results.py</li><li>analyze_logs.py</li><li>get_flops.py</li></ul></td>
+      <td id="T_63a8e_row0_col2" class="data row0 col2" ><ul><li>convert_test_benchmark_script.py</li><li>gather_test_benchmark_metric.py</li><li>benchmark_valid_flops.py</li><li>benchmark_train.py</li><li>test_benchmark.sh</li><li>download_checkpoints.py</li><li>benchmark_test_image.py</li><li>covignore.cfg</li><li>benchmark_full_models.txt</li><li>test_init_backbone.py</li><li>batch_train_list.txt</li><li>diff_coverage_test.sh</li></ul></td>
+      <td id="T_63a8e_row0_col3" class="data row0 col3" ><ul><li>batch_test_list.py</li><li>linter.sh</li><li>gather_train_benchmark_metric.py</li><li>train_benchmark.sh</li><li>benchmark_inference_fps.py</li><li>benchmark_options.py</li><li>check_links.py</li><li>benchmark_test.py</li><li>benchmark_train_models.txt</li><li>convert_train_benchmark_script.py</li><li>gather_models.py</li><li>benchmark_filter.py</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div></details>
+
+## MMclassification (1.0.0rc5)
+
+<details open><div align='center'><b>MMclassification Module Components</b></div>
+<div align='center'>
+<style type="text/css">
+#T_0c9eb thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_0c9eb_row0_col0, #T_0c9eb_row0_col1, #T_0c9eb_row0_col2, #T_0c9eb_row0_col3, #T_0c9eb_row0_col4 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_0c9eb">
+  <thead>
+    <tr>
+      <th id="T_0c9eb_level0_col0" class="col_heading level0 col0" >visualizer</th>
+      <th id="T_0c9eb_level0_col1" class="col_heading level0 col1" >data sampler</th>
+      <th id="T_0c9eb_level0_col2" class="col_heading level0 col2" >optimizer</th>
+      <th id="T_0c9eb_level0_col3" class="col_heading level0 col3" >batch augment</th>
+      <th id="T_0c9eb_level0_col4" class="col_heading level0 col4" >metric</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_0c9eb_row0_col0" class="data row0 col0" ><ul><li>ClsVisualizer</li></ul></td>
+      <td id="T_0c9eb_row0_col1" class="data row0 col1" ><ul><li>RepeatAugSampler</li></ul></td>
+      <td id="T_0c9eb_row0_col2" class="data row0 col2" ><ul><li>Adan</li><li>Lamb</li></ul></td>
+      <td id="T_0c9eb_row0_col3" class="data row0 col3" ><ul><li>Mixup</li><li>CutMix</li><li>ResizeMix</li></ul></td>
+      <td id="T_0c9eb_row0_col4" class="data row0 col4" ><ul><li>Accuracy</li><li>SingleLabelMetric</li><li>MultiLabelMetric</li><li>AveragePrecision</li><li>MultiTasksMetric</li><li>VOCMultiLabelMetric</li><li>VOCAveragePrecision</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_896f9 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_896f9_row0_col0, #T_896f9_row0_col1, #T_896f9_row0_col2, #T_896f9_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_896f9">
+  <thead>
+    <tr>
+      <th id="T_896f9_level0_col0" class="col_heading level0 col0" >hook</th>
+      <th id="T_896f9_level0_col1" class="col_heading level0 col1" >dataset</th>
+      <th id="T_896f9_level0_col2" class="col_heading level0 col2" >transform (part 1)</th>
+      <th id="T_896f9_level0_col3" class="col_heading level0 col3" >transform (part 2)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_896f9_row0_col0" class="data row0 col0" ><ul><li>ClassNumCheckHook</li><li>EMAHook</li><li>SetAdaptiveMarginsHook</li><li>PreciseBNHook</li><li>PrepareProtoBeforeValLoopHook</li><li>SwitchRecipeHook</li><li>VisualizationHook</li></ul></td>
+      <td id="T_896f9_row0_col1" class="data row0 col1" ><ul><li>BaseDataset</li><li>CIFAR10</li><li>CIFAR100</li><li>CUB</li><li>CustomDataset</li><li>KFoldDataset</li><li>ImageNet</li><li>ImageNet21k</li><li>MNIST</li><li>FashionMNIST</li><li>MultiLabelDataset</li><li>MultiTaskDataset</li><li>VOC</li></ul></td>
+      <td id="T_896f9_row0_col2" class="data row0 col2" ><ul><li>AutoAugment</li><li>RandAugment</li><li>Shear</li><li>Translate</li><li>Rotate</li><li>AutoContrast</li><li>Invert</li><li>Equalize</li><li>Solarize</li><li>SolarizeAdd</li><li>Posterize</li><li>Contrast</li><li>ColorTransform</li><li>Brightness</li><li>Sharpness</li><li>Cutout</li></ul></td>
+      <td id="T_896f9_row0_col3" class="data row0 col3" ><ul><li>PackClsInputs</li><li>PackMultiTaskInputs</li><li>Transpose</li><li>ToPIL</li><li>ToNumpy</li><li>Collect</li><li>RandomCrop</li><li>RandomResizedCrop</li><li>EfficientNetRandomCrop</li><li>RandomErasing</li><li>EfficientNetCenterCrop</li><li>ResizeEdge</li><li>ColorJitter</li><li>Lighting</li><li>Albumentations</li><li>Albu</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_74e1e thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_74e1e_row0_col0, #T_74e1e_row0_col1, #T_74e1e_row0_col2, #T_74e1e_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_74e1e">
+  <thead>
+    <tr>
+      <th id="T_74e1e_level0_col0" class="col_heading level0 col0" >model (part 1)</th>
+      <th id="T_74e1e_level0_col1" class="col_heading level0 col1" >model (part 2)</th>
+      <th id="T_74e1e_level0_col2" class="col_heading level0 col2" >model (part 3)</th>
+      <th id="T_74e1e_level0_col3" class="col_heading level0 col3" >model (part 4)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_74e1e_row0_col0" class="data row0 col0" ><ul><li>AlexNet</li><li>ShiftWindowMSA</li><li>ClsDataPreprocessor</li><li>VisionTransformer</li><li>BEiT</li><li>Conformer</li><li>ConvMixer</li><li>ResNet</li><li>ResNetV1c</li><li>ResNetV1d</li><li>ResNeXt</li><li>CSPDarkNet</li><li>CSPResNet</li><li>CSPResNeXt</li><li>DaViT</li><li>DistilledVisionTransformer</li><li>DeiT3</li><li>DenseNet</li><li>PoolFormer</li><li>EfficientFormer</li></ul></td>
+      <td id="T_74e1e_row0_col1" class="data row0 col1" ><ul><li>EfficientNet</li><li>EfficientNetV2</li><li>HorNet</li><li>HRNet</li><li>InceptionV3</li><li>LeNet5</li><li>MixMIMTransformer</li><li>MlpMixer</li><li>MobileNetV2</li><li>MobileNetV3</li><li>MobileOne</li><li>MViT</li><li>RegNet</li><li>RepLKNet</li><li>RepMLPNet</li><li>RepVGG</li><li>Res2Net</li><li>ResNeSt</li><li>ResNet_CIFAR</li><li>RevVisionTransformer</li></ul></td>
+      <td id="T_74e1e_row0_col2" class="data row0 col2" ><ul><li>SEResNet</li><li>SEResNeXt</li><li>ShuffleNetV1</li><li>ShuffleNetV2</li><li>SwinTransformer</li><li>SwinTransformerV2</li><li>T2T_ViT</li><li>TIMMBackbone</li><li>TNT</li><li>PCPVT</li><li>SVT</li><li>VAN</li><li>VGG</li><li>HuggingFaceClassifier</li><li>ImageClassifier</li><li>TimmClassifier</li><li>ClsHead</li><li>ConformerHead</li><li>VisionTransformerClsHead</li><li>DeiTClsHead</li></ul></td>
+      <td id="T_74e1e_row0_col3" class="data row0 col3" ><ul><li>EfficientFormerClsHead</li><li>LinearClsHead</li><li>AsymmetricLoss</li><li>CrossEntropyLoss</li><li>FocalLoss</li><li>LabelSmoothLoss</li><li>SeesawLoss</li><li>ArcFaceClsHead</li><li>MultiLabelClsHead</li><li>CSRAClsHead</li><li>MultiLabelLinearClsHead</li><li>MultiTaskHead</li><li>StackedLinearClsHead</li><li>GlobalAveragePooling</li><li>GeneralizedMeanPooling</li><li>HRFuseScales</li><li>LinearReduction</li><li>ImageToImageRetriever</li><li>AverageClsScoreTTA</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div></details>
+<details open><div align='center'><b>MMclassification Tools</b></div>
+<div align='center'>
+<style type="text/css">
+#T_ba612 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_ba612_row0_col0, #T_ba612_row0_col1, #T_ba612_row0_col2, #T_ba612_row0_col3, #T_ba612_row0_col4 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_ba612">
+  <thead>
+    <tr>
+      <th id="T_ba612_level0_col0" class="col_heading level0 col0" >tools/misc</th>
+      <th id="T_ba612_level0_col1" class="col_heading level0 col1" >tools/visualizations</th>
+      <th id="T_ba612_level0_col2" class="col_heading level0 col2" >tools/torchserve</th>
+      <th id="T_ba612_level0_col3" class="col_heading level0 col3" >.dev_scripts</th>
+      <th id="T_ba612_level0_col4" class="col_heading level0 col4" >tools/analysis_tools</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_ba612_row0_col0" class="data row0 col0" ><ul><li>verify_dataset.py</li><li>print_config.py</li></ul></td>
+      <td id="T_ba612_row0_col1" class="data row0 col1" ><ul><li>browse_dataset.py</li><li>vis_scheduler.py</li><li>vis_cam.py</li></ul></td>
+      <td id="T_ba612_row0_col2" class="data row0 col2" ><ul><li>mmcls_handler.py</li><li>mmcls2torchserve.py</li><li>test_torchserver.py</li></ul></td>
+      <td id="T_ba612_row0_col3" class="data row0 col3" ><ul><li>compare_init.py</li><li>ckpt_tree.py</li><li>generate_readme.py</li></ul></td>
+      <td id="T_ba612_row0_col4" class="data row0 col4" ><ul><li>eval_metric.py</li><li>analyze_results.py</li><li>analyze_logs.py</li><li>get_flops.py</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_3bd40 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_3bd40_row0_col0, #T_3bd40_row0_col1, #T_3bd40_row0_col2, #T_3bd40_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_3bd40">
+  <thead>
+    <tr>
+      <th id="T_3bd40_level0_col0" class="col_heading level0 col0" >.dev_scripts/benchmark_regression</th>
+      <th id="T_3bd40_level0_col1" class="col_heading level0 col1" >tools</th>
+      <th id="T_3bd40_level0_col2" class="col_heading level0 col2" >tools/model_converters (part 1)</th>
+      <th id="T_3bd40_level0_col3" class="col_heading level0 col3" >tools/model_converters (part 2)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_3bd40_row0_col0" class="data row0 col0" ><ul><li>bench_train.yml</li><li>4-benchmark_speed.py</li><li>3-benchmark_train.py</li><li>1-benchmark_valid.py</li><li>2-benchmark_test.py</li></ul></td>
+      <td id="T_3bd40_row0_col1" class="data row0 col1" ><ul><li>dist_test.sh</li><li>slurm_test.sh</li><li>test.py</li><li>dist_train.sh</li><li>train.py</li><li>slurm_train.sh</li><li>kfold-cross-valid.py</li></ul></td>
+      <td id="T_3bd40_row0_col2" class="data row0 col2" ><ul><li>efficientnet_to_mmcls.py</li><li>repvgg_to_mmcls.py</li><li>clip_to_mmcls.py</li><li>reparameterize_model.py</li><li>shufflenetv2_to_mmcls.py</li><li>van2mmcls.py</li><li>hornet2mmcls.py</li><li>mixmimx_to_mmcls.py</li><li>edgenext_to_mmcls.py</li><li>torchvision_to_mmcls.py</li><li>twins2mmcls.py</li><li>revvit_to_mmcls.py</li></ul></td>
+      <td id="T_3bd40_row0_col3" class="data row0 col3" ><ul><li>convnext_to_mmcls.py</li><li>replknet_to_mmcls.py</li><li>efficientnetv2_to_mmcls.py</li><li>mobilenetv2_to_mmcls.py</li><li>mlpmixer_to_mmcls.py</li><li>davit_to_mmcls.py</li><li>vgg_to_mmcls.py</li><li>deit3_to_mmcls.py</li><li>eva_to_mmcls.py</li><li>publish_model.py</li><li>tinyvit_to_mmcls.py</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div></details>
+
+## MMsegmentation (1.0.0rc5)
+
+<details open><div align='center'><b>MMsegmentation Module Components</b></div>
+<div align='center'>
+<style type="text/css">
+#T_aa436 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_aa436_row0_col0, #T_aa436_row0_col1, #T_aa436_row0_col2, #T_aa436_row0_col3, #T_aa436_row0_col4 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_aa436">
+  <thead>
+    <tr>
+      <th id="T_aa436_level0_col0" class="col_heading level0 col0" >task util</th>
+      <th id="T_aa436_level0_col1" class="col_heading level0 col1" >visualizer</th>
+      <th id="T_aa436_level0_col2" class="col_heading level0 col2" >hook</th>
+      <th id="T_aa436_level0_col3" class="col_heading level0 col3" >optimizer wrapper constructor</th>
+      <th id="T_aa436_level0_col4" class="col_heading level0 col4" >metric</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_aa436_row0_col0" class="data row0 col0" ><ul><li>OHEMPixelSampler</li></ul></td>
+      <td id="T_aa436_row0_col1" class="data row0 col1" ><ul><li>SegLocalVisualizer</li></ul></td>
+      <td id="T_aa436_row0_col2" class="data row0 col2" ><ul><li>SegVisualizationHook</li></ul></td>
+      <td id="T_aa436_row0_col3" class="data row0 col3" ><ul><li>LearningRateDecayOptimizerConstructor</li><li>LayerDecayOptimizerConstructor</li></ul></td>
+      <td id="T_aa436_row0_col4" class="data row0 col4" ><ul><li>CitysMetric</li><li>IoUMetric</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_f41af thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_f41af_row0_col0, #T_f41af_row0_col1, #T_f41af_row0_col2, #T_f41af_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_f41af">
+  <thead>
+    <tr>
+      <th id="T_f41af_level0_col0" class="col_heading level0 col0" >dataset (part 1)</th>
+      <th id="T_f41af_level0_col1" class="col_heading level0 col1" >dataset (part 2)</th>
+      <th id="T_f41af_level0_col2" class="col_heading level0 col2" >transform (part 1)</th>
+      <th id="T_f41af_level0_col3" class="col_heading level0 col3" >transform (part 2)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_f41af_row0_col0" class="data row0 col0" ><ul><li>BaseSegDataset</li><li>ADE20KDataset</li><li>ChaseDB1Dataset</li><li>CityscapesDataset</li><li>COCOStuffDataset</li><li>DarkZurichDataset</li><li>MultiImageMixDataset</li><li>DecathlonDataset</li><li>DRIVEDataset</li><li>HRFDataset</li><li>iSAIDDataset</li></ul></td>
+      <td id="T_f41af_row0_col1" class="data row0 col1" ><ul><li>ISPRSDataset</li><li>LIPDataset</li><li>LoveDADataset</li><li>NightDrivingDataset</li><li>PascalContextDataset</li><li>PascalContextDataset59</li><li>PotsdamDataset</li><li>STAREDataset</li><li>SynapseDataset</li><li>PascalVOCDataset</li></ul></td>
+      <td id="T_f41af_row0_col2" class="data row0 col2" ><ul><li>PackSegInputs</li><li>LoadAnnotations</li><li>LoadImageFromNDArray</li><li>LoadBiomedicalImageFromFile</li><li>LoadBiomedicalAnnotation</li><li>LoadBiomedicalData</li><li>ResizeToMultiple</li><li>Rerange</li><li>CLAHE</li><li>RandomCrop</li><li>RandomRotate</li><li>RGB2Gray</li><li>AdjustGamma</li></ul></td>
+      <td id="T_f41af_row0_col3" class="data row0 col3" ><ul><li>SegRescale</li><li>PhotoMetricDistortion</li><li>RandomCutOut</li><li>RandomRotFlip</li><li>RandomMosaic</li><li>GenerateEdge</li><li>ResizeShortestEdge</li><li>BioMedical3DRandomCrop</li><li>BioMedicalGaussianNoise</li><li>BioMedicalGaussianBlur</li><li>BioMedicalRandomGamma</li><li>BioMedical3DPad</li><li>BioMedical3DRandomFlip</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_d6c05 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_d6c05_row0_col0, #T_d6c05_row0_col1, #T_d6c05_row0_col2, #T_d6c05_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_d6c05">
+  <thead>
+    <tr>
+      <th id="T_d6c05_level0_col0" class="col_heading level0 col0" >model (part 1)</th>
+      <th id="T_d6c05_level0_col1" class="col_heading level0 col1" >model (part 2)</th>
+      <th id="T_d6c05_level0_col2" class="col_heading level0 col2" >model (part 3)</th>
+      <th id="T_d6c05_level0_col3" class="col_heading level0 col3" >model (part 4)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_d6c05_row0_col0" class="data row0 col0" ><ul><li>VisionTransformer</li><li>BEiT</li><li>BiSeNetV1</li><li>BiSeNetV2</li><li>CGNet</li><li>ERFNet</li><li>CrossEntropyLoss</li><li>DiceLoss</li><li>FocalLoss</li><li>LovaszLoss</li><li>TverskyLoss</li><li>ANNHead</li><li>APCHead</li><li>ASPPHead</li><li>FCNHead</li><li>CCHead</li><li>DAHead</li><li>DMHead</li><li>DNLHead</li></ul></td>
+      <td id="T_d6c05_row0_col1" class="data row0 col1" ><ul><li>DPTHead</li><li>EMAHead</li><li>EncHead</li><li>FPNHead</li><li>GCHead</li><li>ISAHead</li><li>KernelUpdator</li><li>KernelUpdateHead</li><li>IterativeDecodeHead</li><li>LRASPPHead</li><li>Mask2FormerHead</li><li>MaskFormerHead</li><li>NLHead</li><li>OCRHead</li><li>PointHead</li><li>PSAHead</li><li>PSPHead</li><li>SegformerHead</li><li>SegmenterMaskTransformerHead</li></ul></td>
+      <td id="T_d6c05_row0_col2" class="data row0 col2" ><ul><li>DepthwiseSeparableASPPHead</li><li>DepthwiseSeparableFCNHead</li><li>SETRMLAHead</li><li>SETRUPHead</li><li>STDCHead</li><li>UPerHead</li><li>FastSCNN</li><li>ResNet</li><li>ResNetV1c</li><li>ResNetV1d</li><li>HRNet</li><li>ICNet</li><li>MAE</li><li>MixVisionTransformer</li><li>MobileNetV2</li><li>MobileNetV3</li><li>ResNeSt</li><li>ResNeXt</li><li>STDCNet</li></ul></td>
+      <td id="T_d6c05_row0_col3" class="data row0 col3" ><ul><li>STDCContextPathNet</li><li>SwinTransformer</li><li>TIMMBackbone</li><li>PCPVT</li><li>SVT</li><li>DeconvModule</li><li>InterpConv</li><li>UNet</li><li>SegDataPreProcessor</li><li>Feature2Pyramid</li><li>FPN</li><li>ICNeck</li><li>JPU</li><li>MLANeck</li><li>MultiLevelNeck</li><li>EncoderDecoder</li><li>CascadeEncoderDecoder</li><li>SegTTAModel</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div></details>
+<details open><div align='center'><b>MMsegmentation Tools</b></div>
+<div align='center'>
+<style type="text/css">
+#T_2c057 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_2c057_row0_col0, #T_2c057_row0_col1, #T_2c057_row0_col2, #T_2c057_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_2c057">
+  <thead>
+    <tr>
+      <th id="T_2c057_level0_col0" class="col_heading level0 col0" >tools/deployment</th>
+      <th id="T_2c057_level0_col1" class="col_heading level0 col1" >tools/misc</th>
+      <th id="T_2c057_level0_col2" class="col_heading level0 col2" >tools/torchserve</th>
+      <th id="T_2c057_level0_col3" class="col_heading level0 col3" >tools/analysis_tools</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_2c057_row0_col0" class="data row0 col0" ><ul><li>pytorch2torchscript.py</li></ul></td>
+      <td id="T_2c057_row0_col1" class="data row0 col1" ><ul><li>browse_dataset.py</li><li>publish_model.py</li><li>print_config.py</li></ul></td>
+      <td id="T_2c057_row0_col2" class="data row0 col2" ><ul><li>mmseg_handler.py</li><li>mmseg2torchserve.py</li><li>test_torchserve.py</li></ul></td>
+      <td id="T_2c057_row0_col3" class="data row0 col3" ><ul><li>benchmark.py</li><li>confusion_matrix.py</li><li>analyze_logs.py</li><li>get_flops.py</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_bf7a6 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_bf7a6_row0_col0, #T_bf7a6_row0_col1, #T_bf7a6_row0_col2 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_bf7a6">
+  <thead>
+    <tr>
+      <th id="T_bf7a6_level0_col0" class="col_heading level0 col0" >tools</th>
+      <th id="T_bf7a6_level0_col1" class="col_heading level0 col1" >tools/model_converters</th>
+      <th id="T_bf7a6_level0_col2" class="col_heading level0 col2" >tools/dataset_converters</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_bf7a6_row0_col0" class="data row0 col0" ><ul><li>dist_test.sh</li><li>slurm_test.sh</li><li>test.py</li><li>dist_train.sh</li><li>train.py</li><li>slurm_train.sh</li></ul></td>
+      <td id="T_bf7a6_row0_col1" class="data row0 col1" ><ul><li>swin2mmseg.py</li><li>vitjax2mmseg.py</li><li>twins2mmseg.py</li><li>stdc2mmseg.py</li><li>vit2mmseg.py</li><li>mit2mmseg.py</li><li>beit2mmseg.py</li></ul></td>
+      <td id="T_bf7a6_row0_col2" class="data row0 col2" ><ul><li>voc_aug.py</li><li>hrf.py</li><li>drive.py</li><li>pascal_context.py</li><li>vaihingen.py</li><li>stare.py</li><li>synapse.py</li><li>isaid.py</li><li>cityscapes.py</li><li>loveda.py</li><li>potsdam.py</li><li>chase_db1.py</li><li>coco_stuff164k.py</li><li>coco_stuff10k.py</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div></details>
+
+## MMengine (0.6.0)
+
+<details open><div align='center'><b>MMengine Module Components</b></div>
+<div align='center'>
+<style type="text/css">
+#T_41b5b thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_41b5b_row0_col0, #T_41b5b_row0_col1, #T_41b5b_row0_col2, #T_41b5b_row0_col3, #T_41b5b_row0_col4 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_41b5b">
+  <thead>
+    <tr>
+      <th id="T_41b5b_level0_col0" class="col_heading level0 col0" >log_processor</th>
+      <th id="T_41b5b_level0_col1" class="col_heading level0 col1" >visualizer</th>
+      <th id="T_41b5b_level0_col2" class="col_heading level0 col2" >metric</th>
+      <th id="T_41b5b_level0_col3" class="col_heading level0 col3" >evaluator</th>
+      <th id="T_41b5b_level0_col4" class="col_heading level0 col4" >runner</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_41b5b_row0_col0" class="data row0 col0" ><ul><li>LogProcessor</li></ul></td>
+      <td id="T_41b5b_row0_col1" class="data row0 col1" ><ul><li>Visualizer</li></ul></td>
+      <td id="T_41b5b_row0_col2" class="data row0 col2" ><ul><li>DumpResults</li></ul></td>
+      <td id="T_41b5b_row0_col3" class="data row0 col3" ><ul><li>Evaluator</li></ul></td>
+      <td id="T_41b5b_row0_col4" class="data row0 col4" ><ul><li>Runner</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_f32ce thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_f32ce_row0_col0, #T_f32ce_row0_col1, #T_f32ce_row0_col2, #T_f32ce_row0_col3, #T_f32ce_row0_col4 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_f32ce">
+  <thead>
+    <tr>
+      <th id="T_f32ce_level0_col0" class="col_heading level0 col0" >optimizer wrapper constructor</th>
+      <th id="T_f32ce_level0_col1" class="col_heading level0 col1" >Collate Functions</th>
+      <th id="T_f32ce_level0_col2" class="col_heading level0 col2" >data sampler</th>
+      <th id="T_f32ce_level0_col3" class="col_heading level0 col3" >vis_backend</th>
+      <th id="T_f32ce_level0_col4" class="col_heading level0 col4" >dataset</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_f32ce_row0_col0" class="data row0 col0" ><ul><li>DefaultOptimWrapperConstructor</li></ul></td>
+      <td id="T_f32ce_row0_col1" class="data row0 col1" ><ul><li>pseudo_collate</li><li>default_collate</li></ul></td>
+      <td id="T_f32ce_row0_col2" class="data row0 col2" ><ul><li>DefaultSampler</li><li>InfiniteSampler</li></ul></td>
+      <td id="T_f32ce_row0_col3" class="data row0 col3" ><ul><li>LocalVisBackend</li><li>WandbVisBackend</li><li>TensorboardVisBackend</li></ul></td>
+      <td id="T_f32ce_row0_col4" class="data row0 col4" ><ul><li>ConcatDataset</li><li>RepeatDataset</li><li>ClassBalancedDataset</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_467b9 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_467b9_row0_col0, #T_467b9_row0_col1, #T_467b9_row0_col2, #T_467b9_row0_col3, #T_467b9_row0_col4 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_467b9">
+  <thead>
+    <tr>
+      <th id="T_467b9_level0_col0" class="col_heading level0 col0" >optim_wrapper</th>
+      <th id="T_467b9_level0_col1" class="col_heading level0 col1" >loop</th>
+      <th id="T_467b9_level0_col2" class="col_heading level0 col2" >model_wrapper</th>
+      <th id="T_467b9_level0_col3" class="col_heading level0 col3" >model</th>
+      <th id="T_467b9_level0_col4" class="col_heading level0 col4" >weight initializer</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_467b9_row0_col0" class="data row0 col0" ><ul><li>OptimWrapper</li><li>AmpOptimWrapper</li><li>ApexOptimWrapper</li></ul></td>
+      <td id="T_467b9_row0_col1" class="data row0 col1" ><ul><li>EpochBasedTrainLoop</li><li>IterBasedTrainLoop</li><li>ValLoop</li><li>TestLoop</li></ul></td>
+      <td id="T_467b9_row0_col2" class="data row0 col2" ><ul><li>DistributedDataParallel</li><li>DataParallel</li><li>MMDistributedDataParallel</li><li>MMSeparateDistributedDataParallel</li></ul></td>
+      <td id="T_467b9_row0_col3" class="data row0 col3" ><ul><li>StochasticWeightAverage</li><li>ExponentialMovingAverage</li><li>MomentumAnnealingEMA</li><li>BaseDataPreprocessor</li><li>ImgDataPreprocessor</li><li>BaseTTAModel</li><li>ToyModel</li></ul></td>
+      <td id="T_467b9_row0_col4" class="data row0 col4" ><ul><li>Constant</li><li>Xavier</li><li>Normal</li><li>TruncNormal</li><li>Uniform</li><li>Kaiming</li><li>Caffe2Xavier</li><li>Pretrained</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div><div align='center'>
+<style type="text/css">
+#T_d5b59 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_d5b59_row0_col0, #T_d5b59_row0_col1, #T_d5b59_row0_col2, #T_d5b59_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_d5b59">
+  <thead>
+    <tr>
+      <th id="T_d5b59_level0_col0" class="col_heading level0 col0" >hook</th>
+      <th id="T_d5b59_level0_col1" class="col_heading level0 col1" >optimizer</th>
+      <th id="T_d5b59_level0_col2" class="col_heading level0 col2" >parameter scheduler (part 1)</th>
+      <th id="T_d5b59_level0_col3" class="col_heading level0 col3" >parameter scheduler (part 2)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_d5b59_row0_col0" class="data row0 col0" ><ul><li>CheckpointHook</li><li>EMAHook</li><li>EmptyCacheHook</li><li>IterTimerHook</li><li>LoggerHook</li><li>NaiveVisualizationHook</li><li>ParamSchedulerHook</li><li>ProfilerHook</li><li>NPUProfilerHook</li><li>RuntimeInfoHook</li><li>DistSamplerSeedHook</li><li>SyncBuffersHook</li><li>PrepareTTAHook</li></ul></td>
+      <td id="T_d5b59_row0_col1" class="data row0 col1" ><ul><li>ASGD</li><li>Adadelta</li><li>Adagrad</li><li>Adam</li><li>AdamW</li><li>Adamax</li><li>LBFGS</li><li>Optimizer</li><li>RMSprop</li><li>Rprop</li><li>SGD</li><li>SparseAdam</li><li>ZeroRedundancyOptimizer</li></ul></td>
+      <td id="T_d5b59_row0_col2" class="data row0 col2" ><ul><li>StepParamScheduler</li><li>MultiStepParamScheduler</li><li>ConstantParamScheduler</li><li>ExponentialParamScheduler</li><li>CosineAnnealingParamScheduler</li><li>LinearParamScheduler</li><li>PolyParamScheduler</li><li>OneCycleParamScheduler</li><li>CosineRestartParamScheduler</li><li>ReduceOnPlateauParamScheduler</li><li>ConstantLR</li><li>CosineAnnealingLR</li><li>ExponentialLR</li><li>LinearLR</li><li>MultiStepLR</li></ul></td>
+      <td id="T_d5b59_row0_col3" class="data row0 col3" ><ul><li>StepLR</li><li>PolyLR</li><li>OneCycleLR</li><li>CosineRestartLR</li><li>ReduceOnPlateauLR</li><li>ConstantMomentum</li><li>CosineAnnealingMomentum</li><li>ExponentialMomentum</li><li>LinearMomentum</li><li>MultiStepMomentum</li><li>StepMomentum</li><li>PolyMomentum</li><li>CosineRestartMomentum</li><li>ReduceOnPlateauMomentum</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div></details>
+
+## MMCV (2.0.0rc4)
+
+<details open><div align='center'><b>MMCV Module Components</b></div>
+<div align='center'>
+<style type="text/css">
+#T_be596 thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_be596_row0_col0, #T_be596_row0_col1, #T_be596_row0_col2, #T_be596_row0_col3 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_be596">
+  <thead>
+    <tr>
+      <th id="T_be596_level0_col0" class="col_heading level0 col0" >transform</th>
+      <th id="T_be596_level0_col1" class="col_heading level0 col1" >model (part 1)</th>
+      <th id="T_be596_level0_col2" class="col_heading level0 col2" >model (part 2)</th>
+      <th id="T_be596_level0_col3" class="col_heading level0 col3" >model (part 3)</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_be596_row0_col0" class="data row0 col0" ><ul><li>LoadImageFromFile</li><li>LoadAnnotations</li><li>Compose</li><li>KeyMapper</li><li>TransformBroadcaster</li><li>RandomChoice</li><li>RandomApply</li><li>Normalize</li><li>Resize</li><li>Pad</li><li>CenterCrop</li><li>RandomGrayscale</li><li>MultiScaleFlipAug</li><li>TestTimeAug</li><li>RandomChoiceResize</li><li>RandomFlip</li><li>RandomResize</li><li>ToTensor</li><li>ImageToTensor</li></ul></td>
+      <td id="T_be596_row0_col1" class="data row0 col1" ><ul><li>ReLU</li><li>LeakyReLU</li><li>PReLU</li><li>RReLU</li><li>ReLU6</li><li>ELU</li><li>Sigmoid</li><li>Tanh</li><li>SiLU</li><li>Clamp</li><li>Clip</li><li>GELU</li><li>ContextBlock</li><li>Conv1d</li><li>Conv2d</li><li>Conv3d</li><li>Conv</li><li>Conv2dAdaptivePadding</li></ul></td>
+      <td id="T_be596_row0_col2" class="data row0 col2" ><ul><li>BN</li><li>BN1d</li><li>BN2d</li><li>BN3d</li><li>SyncBN</li><li>GN</li><li>LN</li><li>IN</li><li>IN1d</li><li>IN2d</li><li>IN3d</li><li>zero</li><li>reflect</li><li>replicate</li><li>ConvModule</li><li>ConvWS</li><li>ConvAWS</li><li>DropPath</li></ul></td>
+      <td id="T_be596_row0_col3" class="data row0 col3" ><ul><li>Dropout</li><li>GeneralizedAttention</li><li>HSigmoid</li><li>HSwish</li><li>NonLocal2d</li><li>Swish</li><li>nearest</li><li>bilinear</li><li>pixel_shuffle</li><li>deconv</li><li>ConvTranspose2d</li><li>deconv3d</li><li>ConvTranspose3d</li><li>MultiheadAttention</li><li>FFN</li><li>BaseTransformerLayer</li><li>TransformerLayerSequence</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div></details>
+<details open><div align='center'><b>MMCV Tools</b></div>
+<div align='center'>
+<style type="text/css">
+#T_ea8ce thead th {
+  align: center;
+  text-align: center;
+  vertical-align: bottom;
+}
+#T_ea8ce_row0_col0 {
+  text-align: left;
+  align: center;
+  vertical-align: top;
+}
+</style>
+<table id="T_ea8ce">
+  <thead>
+    <tr>
+      <th id="T_ea8ce_level0_col0" class="col_heading level0 col0" >.dev_scripts</th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td id="T_ea8ce_row0_col0" class="data row0 col0" ><ul><li>check_installation.py</li></ul></td>
+    </tr>
+  </tbody>
+</table>
+</div></details>
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index 80d7cbe1..16148478 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -47,6 +47,7 @@
    common_usage/multi_necks.md
    common_usage/specify_device.md
    common_usage/single_multi_channel_applications.md
+   common_usage/registries_info.md
 
 
 .. toctree::

From 43d97b76f69d80d1779c0372d8ad12d514eb3e8e Mon Sep 17 00:00:00 2001
From: leling <34684833+landhill@users.noreply.github.com>
Date: Wed, 1 Mar 2023 09:40:43 +0800
Subject: [PATCH 52/64]  [Docs] Update amp_training.md, resume_training.md
 (#599)

* Add files via upload

* cat_single_channel_test

* [Feature] Add confusion matrix drawing tool

* [Feature] Add confusion matrix drawing tool

* [Feature] Add confusion matrix drawing tool

* [Feature] Add confusion matrix drawing tool

* [Docs] update amp_training.md, resume_training.md

* [Docs] Update amp_training.md, resume_training.md

* [Docs] Update amp_training.md, resume_training.md

* [Docs] Update amp_training.md, resume_training.md

* [Docs] Update amp_training.md, resume_training.md

* [Docs] Update amp_training.md, resume_training.md
---
 docs/en/common_usage/amp_training.md       | 12 ++++++++++++
 docs/en/common_usage/resume_training.md    |  8 ++++++++
 docs/zh_cn/common_usage/amp_training.md    | 12 ++++++++++++
 docs/zh_cn/common_usage/resume_training.md |  8 ++++++++
 4 files changed, 40 insertions(+)

diff --git a/docs/en/common_usage/amp_training.md b/docs/en/common_usage/amp_training.md
index 3767114a..ac1fddd8 100644
--- a/docs/en/common_usage/amp_training.md
+++ b/docs/en/common_usage/amp_training.md
@@ -1 +1,13 @@
 # Automatic mixed precision（AMP）training
+
+To enable Automatic Mixing Precision (AMP) training, add `--amp` to the end of the training command, which is as follows:
+
+```shell
+python tools/train.py python ./tools/train.py ${CONFIG} --amp
+```
+
+Specific examples are as follows:
+
+```shell
+python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py --amp
+```
diff --git a/docs/en/common_usage/resume_training.md b/docs/en/common_usage/resume_training.md
index d33f1d28..1e1184a7 100644
--- a/docs/en/common_usage/resume_training.md
+++ b/docs/en/common_usage/resume_training.md
@@ -1 +1,9 @@
 # Resume training
+
+Resume training means to continue training from the state saved from one of the previous trainings, where the state includes the model weights, the state of the optimizer and the optimizer parameter adjustment strategy.
+
+The user can add `--resume` at the end of the training command to resume training, and the program will automatically load the latest weight file from `work_dirs` to resume training. If there is an updated checkpoint in `work_dir` (e.g. the training was interrupted during the last training), the training will be resumed from that checkpoint, otherwise (e.g. the last training did not have time to save the checkpoint or a new training task was started) the training will be restarted. Here is an example of resuming training:
+
+```shell
+python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py --resume
+```
diff --git a/docs/zh_cn/common_usage/amp_training.md b/docs/zh_cn/common_usage/amp_training.md
index d3a10e71..c7803abf 100644
--- a/docs/zh_cn/common_usage/amp_training.md
+++ b/docs/zh_cn/common_usage/amp_training.md
@@ -1 +1,13 @@
 # 自动混合精度（AMP）训练
+
+如果要开启自动混合精度（AMP）训练，在训练命令最后加上 `--amp` 即可， 命令如下：
+
+```shell
+python tools/train.py python ./tools/train.py ${CONFIG} --amp
+```
+
+具体例子如下：
+
+```shell
+python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py --amp
+```
diff --git a/docs/zh_cn/common_usage/resume_training.md b/docs/zh_cn/common_usage/resume_training.md
index cbfeba7b..36431e32 100644
--- a/docs/zh_cn/common_usage/resume_training.md
+++ b/docs/zh_cn/common_usage/resume_training.md
@@ -1 +1,9 @@
 # 恢复训练
+
+恢复训练是指从之前某次训练保存下来的状态开始继续训练，这里的状态包括模型的权重、优化器和优化器参数调整策略的状态。
+
+用户可以在训练命令最后加上 `--resume` 恢复训练，程序会自动从 `work_dirs` 中加载最新的权重文件恢复训练。如果 `work_dir` 中有最新的 checkpoint（例如该训练在上一次训练时被中断），则会从该 checkpoint 恢复训练，否则（例如上一次训练还没来得及保存 checkpoint 或者启动了新的训练任务）会重新开始训练。下面是一个恢复训练的示例:
+
+```shell
+python tools/train.py configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py --resume
+```

From d06de6d36fd05086641921aef7054f72176a0f68 Mon Sep 17 00:00:00 2001
From: tianlei <tianlei@mail.ustc.edu.cn>
Date: Wed, 1 Mar 2023 10:06:09 +0800
Subject: [PATCH 53/64] [Feature] Support Calculate FLOPs (#603)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* support compute flops

* fix yolov8 error

* Add FLOPs Doc

* Update docs/zh_cn/recommended_topics/complexity_analysis.md

Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>

* Update get_flops.py

* Update complexity_analysis.md

* Update complexity_analysis.md

* Add EN `complexity_analysis.md`

* Update complexity_analysis.md

* Add `complexity_analysis.md` index

* Add `complexity_analysis.md` index

* Update README_zh-CN.md

* Update README.md

* update

---------

Co-authored-by: huanghaian <huanghaian@sensetime.com>
Co-authored-by: Haian Huang(深度眸) <1286304229@qq.com>
---
 README.md                                     |   1 +
 README_zh-CN.md                               |   1 +
 docs/en/index.rst                             |   1 +
 .../recommended_topics/complexity_analysis.md | 120 ++++++++++++++++++
 docs/zh_cn/index.rst                          |   1 +
 .../recommended_topics/complexity_analysis.md | 117 +++++++++++++++++
 mmyolo/models/dense_heads/yolov8_head.py      |   7 +-
 tools/analysis_tools/get_flops.py             | 120 ++++++++++++++++++
 8 files changed, 367 insertions(+), 1 deletion(-)
 create mode 100644 docs/en/recommended_topics/complexity_analysis.md
 create mode 100644 docs/zh_cn/recommended_topics/complexity_analysis.md
 create mode 100644 tools/analysis_tools/get_flops.py

diff --git a/README.md b/README.md
index 284c2f9f..de1d7e5d 100644
--- a/README.md
+++ b/README.md
@@ -180,6 +180,7 @@ For different parts from MMDetection, we have also prepared user guides and adva
 - [MMYOLO model design](docs/en/recommended_topics/model_design.md)
 - [Algorithm principles and implementation](docs/en/recommended_topics/algorithm_descriptions/)
 - [Replace the backbone network](docs/en/recommended_topics/replace_backbone.md)
+- [MMYOLO model complexity analysis](docs/en/recommended_topics/complexity_analysis.md)
 - [Annotation-to-deployment workflow for custom dataset](docs/en/recommended_topics/labeling_to_deployment_tutorials.md)
 - [Visualization](docs/en/recommended_topics/visualization.md)
 - [Model deployment](docs/en/recommended_topics/deploy/)
diff --git a/README_zh-CN.md b/README_zh-CN.md
index a05df22d..ca65a944 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -201,6 +201,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 - [MMYOLO 模型结构设计](docs/zh_cn/recommended_topics/model_design.md)
 - [原理和实现全解析](docs/zh_cn/recommended_topics/algorithm_descriptions/)
 - [轻松更换主干网络](docs/zh_cn/recommended_topics/replace_backbone.md)
+- [MMYOLO 模型复杂度分析](docs/zh_cn/recommended_topics/complexity_analysis.md)
 - [标注+训练+测试+部署全流程](docs/zh_cn/recommended_topics/labeling_to_deployment_tutorials.md)
 - [关于可视化的一切](docs/zh_cn/recommended_topics/visualization.md)
 - [模型部署流程](docs/zh_cn/recommended_topics/deploy/)
diff --git a/docs/en/index.rst b/docs/en/index.rst
index 004bf0e7..b609da21 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -22,6 +22,7 @@ You can switch between Chinese and English documents in the top-right corner of
    recommended_topics/model_design.md
    recommended_topics/algorithm_descriptions/index.rst
    recommended_topics/replace_backbone.md
+   recommended_topics/complexity_analysis.md
    recommended_topics/labeling_to_deployment_tutorials.md
    recommended_topics/visualization.md
    recommended_topics/deploy/index.rst
diff --git a/docs/en/recommended_topics/complexity_analysis.md b/docs/en/recommended_topics/complexity_analysis.md
new file mode 100644
index 00000000..ae7989df
--- /dev/null
+++ b/docs/en/recommended_topics/complexity_analysis.md
@@ -0,0 +1,120 @@
+# Model Complexity Analysis
+
+We provide a `tools/analysis_tools/get_flops.py` script to help with the complexity analysis for models of MMYOLO.
+Currently, it provides the interfaces to compute parameter, activation and flops of the given model,
+and supports printing the related information layer-by-layer in terms of network structure or table.
+
+The commands as follows:
+
+```shell
+python tools/analysis_tools/get_flops.py
+    ${CONFIG_FILE} \                           # config file path
+    [--shape ${IMAGE_SIZE}] \                  # input image size (int), default 640*640
+    [--show-arch ${ARCH_DISPLAY}] \            # print related information by network layers
+    [--not-show-table ${TABLE_DISPLAY}] \      # print related information by table
+    [--cfg-options ${CFG_OPTIONS}]             # config file option
+# [] stands for optional parameter, do not type [] when actually entering the command line
+```
+
+Let's take the `rtmdet_s_syncbn_fast_8xb32-300e_coco.py` config file in RTMDet as an example to show how this script can be used:
+
+## Usage Example 1: Print Flops, Parameters and related information by table
+
+```shell
+python tools/analysis_tools/get_flops.py  configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
+```
+
+Output:
+
+```python
+==============================
+Input shape: torch.Size([640, 640])
+Model Flops: 14.835G
+Model Parameters: 8.887M
+==============================
+```
+
+| module                            | #parameters or shape | #flops  | #activations |
+| :-------------------------------- | :------------------- | :------ | :----------: |
+| model                             | 8.887M               | 14.835G |   35.676M    |
+| backbone                          | 4.378M               | 5.416G  |   22.529M    |
+| backbone.stem                     | 7.472K               | 0.765G  |    6.554M    |
+| backbone.stem.0                   | 0.464K               | 47.514M |    1.638M    |
+| backbone.stem.1                   | 2.336K               | 0.239G  |    1.638M    |
+| backbone.stem.2                   | 4.672K               | 0.478G  |    3.277M    |
+| backbone.stage1                   | 42.4K                | 0.981G  |    7.373M    |
+| backbone.stage1.0                 | 18.56K               | 0.475G  |    1.638M    |
+| backbone.stage1.1                 | 23.84K               | 0.505G  |    5.734M    |
+| backbone.stage2                   | 0.21M                | 1.237G  |    4.915M    |
+| backbone.stage2.0                 | 73.984K              | 0.473G  |    0.819M    |
+| backbone.stage2.1                 | 0.136M               | 0.764G  |    4.096M    |
+| backbone.stage3                   | 0.829M               | 1.221G  |    2.458M    |
+| backbone.stage3.0                 | 0.295M               | 0.473G  |    0.41M     |
+| backbone.stage3.1                 | 0.534M               | 0.749G  |    2.048M    |
+| backbone.stage4                   | 3.29M                | 1.211G  |    1.229M    |
+| backbone.stage4.0                 | 1.181M               | 0.472G  |    0.205M    |
+| backbone.stage4.1                 | 0.657M               | 0.263G  |    0.307M    |
+| backbone.stage4.2                 | 1.452M               | 0.476G  |    0.717M    |
+| neck                              | 3.883M               | 4.366G  |    8.141M    |
+| neck.reduce_layers.2              | 0.132M               | 52.634M |    0.102M    |
+| neck.reduce_layers.2.conv         | 0.131M               | 52.429M |    0.102M    |
+| neck.reduce_layers.2.bn           | 0.512K               | 0.205M  |      0       |
+| neck.top_down_layers              | 0.491M               | 1.23G   |    4.506M    |
+| neck.top_down_layers.0            | 0.398M               | 0.638G  |    1.638M    |
+| neck.top_down_layers.1            | 92.608K              | 0.593G  |    2.867M    |
+| neck.downsample_layers            | 0.738M               | 0.472G  |    0.307M    |
+| neck.downsample_layers.0          | 0.148M               | 0.236G  |    0.205M    |
+| neck.downsample_layers.1          | 0.59M                | 0.236G  |    0.102M    |
+| neck.bottom_up_layers             | 1.49M                | 0.956G  |    2.15M     |
+| neck.bottom_up_layers.0           | 0.3M                 | 0.48G   |    1.434M    |
+| neck.bottom_up_layers.1           | 1.19M                | 0.476G  |    0.717M    |
+| neck.out_layers                   | 1.033M               | 1.654G  |    1.075M    |
+| neck.out_layers.0                 | 0.148M               | 0.945G  |    0.819M    |
+| neck.out_layers.1                 | 0.295M               | 0.472G  |    0.205M    |
+| neck.out_layers.2                 | 0.59M                | 0.236G  |    51.2K     |
+| neck.upsample_layers              |                      | 1.229M  |      0       |
+| neck.upsample_layers.0            |                      | 0.41M   |      0       |
+| neck.upsample_layers.1            |                      | 0.819M  |      0       |
+| bbox_head.head_module             | 0.625M               | 5.053G  |    5.006M    |
+| bbox_head.head_module.cls_convs   | 0.296M               | 2.482G  |    2.15M     |
+| bbox_head.head_module.cls_convs.0 | 0.295M               | 2.481G  |    2.15M     |
+| bbox_head.head_module.cls_convs.1 | 0.512K               | 0.819M  |      0       |
+| bbox_head.head_module.cls_convs.2 | 0.512K               | 0.205M  |      0       |
+| bbox_head.head_module.reg_convs   | 0.296M               | 2.482G  |    2.15M     |
+| bbox_head.head_module.reg_convs.0 | 0.295M               | 2.481G  |    2.15M     |
+| bbox_head.head_module.reg_convs.1 | 0.512K               | 0.819M  |      0       |
+| bbox_head.head_module.reg_convs.2 | 0.512K               | 0.205M  |      0       |
+| bbox_head.head_module.rtm_cls     | 30.96K               | 86.016M |    0.672M    |
+| bbox_head.head_module.rtm_cls.0   | 10.32K               | 65.536M |    0.512M    |
+| bbox_head.head_module.rtm_cls.1   | 10.32K               | 16.384M |    0.128M    |
+| bbox_head.head_module.rtm_cls.2   | 10.32K               | 4.096M  |     32K      |
+| bbox_head.head_module.rtm_reg     | 1.548K               | 4.301M  |    33.6K     |
+| bbox_head.head_module.rtm_reg.0   | 0.516K               | 3.277M  |    25.6K     |
+| bbox_head.head_module.rtm_reg.1   | 0.516K               | 0.819M  |     6.4K     |
+| bbox_head.head_module.rtm_reg.2   | 0.516K               | 0.205M  |     1.6K     |
+
+## Usage Example 2: Print related information by network layers
+
+```shell
+python tools/analysis_tools/get_flops.py  configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py --show-arch
+```
+
+Due to the complex structure of RTMDet, the output is long.
+The following shows only the output from bbox_head.head_module.rtm_reg section:
+
+```python
+(rtm_reg): ModuleList(
+        #params: 1.55K, #flops: 4.3M, #acts: 33.6K
+        (0): Conv2d(
+          128, 4, kernel_size=(1, 1), stride=(1, 1)
+          #params: 0.52K, #flops: 3.28M, #acts: 25.6K
+        )
+        (1): Conv2d(
+          128, 4, kernel_size=(1, 1), stride=(1, 1)
+          #params: 0.52K, #flops: 0.82M, #acts: 6.4K
+        )
+        (2): Conv2d(
+          128, 4, kernel_size=(1, 1), stride=(1, 1)
+          #params: 0.52K, #flops: 0.2M, #acts: 1.6K
+        )
+```
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index 16148478..da36bb3f 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -22,6 +22,7 @@
    recommended_topics/model_design.md
    recommended_topics/algorithm_descriptions/index.rst
    recommended_topics/replace_backbone.md
+   recommended_topics/complexity_analysis.md
    recommended_topics/labeling_to_deployment_tutorials.md
    recommended_topics/visualization.md
    recommended_topics/deploy/index.rst
diff --git a/docs/zh_cn/recommended_topics/complexity_analysis.md b/docs/zh_cn/recommended_topics/complexity_analysis.md
new file mode 100644
index 00000000..362a3315
--- /dev/null
+++ b/docs/zh_cn/recommended_topics/complexity_analysis.md
@@ -0,0 +1,117 @@
+# 模型复杂度分析
+
+我们提供了 `tools/analysis_tools/get_flops.py` 脚本来帮助进行 MMYOLO 系列中所有模型的复杂度分析。目前支持计算并输出给定模型的 parameters, activation 以及 flops；同时支持以网络结构或表格的形式打印输出每一层网络的复杂度信息。
+
+调用命令如下：
+
+```shell
+python tools/analysis_tools/get_flops.py
+    ${CONFIG_FILE} \                           # 配置文件路径
+    [--shape ${IMAGE_SIZE}] \                  # 输入图像大小（int），默认取 640*640
+    [--show-arch ${ARCH_DISPLAY}] \            # 以网络结构形式逐层展示复杂度信息
+    [--not-show-table ${TABLE_DISPLAY}] \      # 以表格形式逐层展示复杂度信息
+    [--cfg-options ${CFG_OPTIONS}]             # 配置文件参数修改选项
+# [] 代表可选参数，实际输入命令行时，不用输入 []
+```
+
+接下来以 RTMDet 中的 `rtmdet_s_syncbn_fast_8xb32-300e_coco.py` 配置文件为例，详细展示该脚本的几种使用情形：
+
+## 样例 1: 打印模型的 Flops 和 Parameters，并以表格形式展示每层网络复杂度
+
+```shell
+python tools/analysis_tools/get_flops.py  configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
+```
+
+输出如下：
+
+```python
+==============================
+Input shape: torch.Size([640, 640])
+Model Flops: 14.835G
+Model Parameters: 8.887M
+==============================
+```
+
+| module                            | #parameters or shape | #flops  | #activations |
+| :-------------------------------- | :------------------- | :------ | :----------: |
+| model                             | 8.887M               | 14.835G |   35.676M    |
+| backbone                          | 4.378M               | 5.416G  |   22.529M    |
+| backbone.stem                     | 7.472K               | 0.765G  |    6.554M    |
+| backbone.stem.0                   | 0.464K               | 47.514M |    1.638M    |
+| backbone.stem.1                   | 2.336K               | 0.239G  |    1.638M    |
+| backbone.stem.2                   | 4.672K               | 0.478G  |    3.277M    |
+| backbone.stage1                   | 42.4K                | 0.981G  |    7.373M    |
+| backbone.stage1.0                 | 18.56K               | 0.475G  |    1.638M    |
+| backbone.stage1.1                 | 23.84K               | 0.505G  |    5.734M    |
+| backbone.stage2                   | 0.21M                | 1.237G  |    4.915M    |
+| backbone.stage2.0                 | 73.984K              | 0.473G  |    0.819M    |
+| backbone.stage2.1                 | 0.136M               | 0.764G  |    4.096M    |
+| backbone.stage3                   | 0.829M               | 1.221G  |    2.458M    |
+| backbone.stage3.0                 | 0.295M               | 0.473G  |    0.41M     |
+| backbone.stage3.1                 | 0.534M               | 0.749G  |    2.048M    |
+| backbone.stage4                   | 3.29M                | 1.211G  |    1.229M    |
+| backbone.stage4.0                 | 1.181M               | 0.472G  |    0.205M    |
+| backbone.stage4.1                 | 0.657M               | 0.263G  |    0.307M    |
+| backbone.stage4.2                 | 1.452M               | 0.476G  |    0.717M    |
+| neck                              | 3.883M               | 4.366G  |    8.141M    |
+| neck.reduce_layers.2              | 0.132M               | 52.634M |    0.102M    |
+| neck.reduce_layers.2.conv         | 0.131M               | 52.429M |    0.102M    |
+| neck.reduce_layers.2.bn           | 0.512K               | 0.205M  |      0       |
+| neck.top_down_layers              | 0.491M               | 1.23G   |    4.506M    |
+| neck.top_down_layers.0            | 0.398M               | 0.638G  |    1.638M    |
+| neck.top_down_layers.1            | 92.608K              | 0.593G  |    2.867M    |
+| neck.downsample_layers            | 0.738M               | 0.472G  |    0.307M    |
+| neck.downsample_layers.0          | 0.148M               | 0.236G  |    0.205M    |
+| neck.downsample_layers.1          | 0.59M                | 0.236G  |    0.102M    |
+| neck.bottom_up_layers             | 1.49M                | 0.956G  |    2.15M     |
+| neck.bottom_up_layers.0           | 0.3M                 | 0.48G   |    1.434M    |
+| neck.bottom_up_layers.1           | 1.19M                | 0.476G  |    0.717M    |
+| neck.out_layers                   | 1.033M               | 1.654G  |    1.075M    |
+| neck.out_layers.0                 | 0.148M               | 0.945G  |    0.819M    |
+| neck.out_layers.1                 | 0.295M               | 0.472G  |    0.205M    |
+| neck.out_layers.2                 | 0.59M                | 0.236G  |    51.2K     |
+| neck.upsample_layers              |                      | 1.229M  |      0       |
+| neck.upsample_layers.0            |                      | 0.41M   |      0       |
+| neck.upsample_layers.1            |                      | 0.819M  |      0       |
+| bbox_head.head_module             | 0.625M               | 5.053G  |    5.006M    |
+| bbox_head.head_module.cls_convs   | 0.296M               | 2.482G  |    2.15M     |
+| bbox_head.head_module.cls_convs.0 | 0.295M               | 2.481G  |    2.15M     |
+| bbox_head.head_module.cls_convs.1 | 0.512K               | 0.819M  |      0       |
+| bbox_head.head_module.cls_convs.2 | 0.512K               | 0.205M  |      0       |
+| bbox_head.head_module.reg_convs   | 0.296M               | 2.482G  |    2.15M     |
+| bbox_head.head_module.reg_convs.0 | 0.295M               | 2.481G  |    2.15M     |
+| bbox_head.head_module.reg_convs.1 | 0.512K               | 0.819M  |      0       |
+| bbox_head.head_module.reg_convs.2 | 0.512K               | 0.205M  |      0       |
+| bbox_head.head_module.rtm_cls     | 30.96K               | 86.016M |    0.672M    |
+| bbox_head.head_module.rtm_cls.0   | 10.32K               | 65.536M |    0.512M    |
+| bbox_head.head_module.rtm_cls.1   | 10.32K               | 16.384M |    0.128M    |
+| bbox_head.head_module.rtm_cls.2   | 10.32K               | 4.096M  |     32K      |
+| bbox_head.head_module.rtm_reg     | 1.548K               | 4.301M  |    33.6K     |
+| bbox_head.head_module.rtm_reg.0   | 0.516K               | 3.277M  |    25.6K     |
+| bbox_head.head_module.rtm_reg.1   | 0.516K               | 0.819M  |     6.4K     |
+| bbox_head.head_module.rtm_reg.2   | 0.516K               | 0.205M  |     1.6K     |
+
+## 样例 2：以网络结构形式逐层展示模型复杂度信息
+
+```shell
+python tools/analysis_tools/get_flops.py  configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py --show-arch
+```
+
+由于该网络结构复杂，输出较长。以下仅展示 bbox_head.head_module.rtm_reg 部分的输出：
+
+```python
+(rtm_reg): ModuleList(
+        #params: 1.55K, #flops: 4.3M, #acts: 33.6K
+        (0): Conv2d(
+          128, 4, kernel_size=(1, 1), stride=(1, 1)
+          #params: 0.52K, #flops: 3.28M, #acts: 25.6K
+        )
+        (1): Conv2d(
+          128, 4, kernel_size=(1, 1), stride=(1, 1)
+          #params: 0.52K, #flops: 0.82M, #acts: 6.4K
+        )
+        (2): Conv2d(
+          128, 4, kernel_size=(1, 1), stride=(1, 1)
+          #params: 0.52K, #flops: 0.2M, #acts: 1.6K
+        )
+```
diff --git a/mmyolo/models/dense_heads/yolov8_head.py b/mmyolo/models/dense_heads/yolov8_head.py
index d6f36c9a..29202417 100644
--- a/mmyolo/models/dense_heads/yolov8_head.py
+++ b/mmyolo/models/dense_heads/yolov8_head.py
@@ -165,7 +165,12 @@ class YOLOv8HeadModule(BaseModule):
         if self.reg_max > 1:
             bbox_dist_preds = bbox_dist_preds.reshape(
                 [-1, 4, self.reg_max, h * w]).permute(0, 3, 1, 2)
-            bbox_preds = bbox_dist_preds.softmax(3).matmul(self.proj)
+
+            # TODO: The get_flops script cannot handle the situation of
+            #  matmul, and needs to be fixed later
+            # bbox_preds = bbox_dist_preds.softmax(3).matmul(self.proj)
+            bbox_preds = bbox_dist_preds.softmax(3).matmul(
+                self.proj.view([-1, 1])).squeeze(-1)
             bbox_preds = bbox_preds.transpose(1, 2).reshape(b, -1, h, w)
         else:
             bbox_preds = bbox_dist_preds
diff --git a/tools/analysis_tools/get_flops.py b/tools/analysis_tools/get_flops.py
new file mode 100644
index 00000000..3e86bc57
--- /dev/null
+++ b/tools/analysis_tools/get_flops.py
@@ -0,0 +1,120 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import tempfile
+from pathlib import Path
+
+import torch
+from mmdet.registry import MODELS
+from mmengine.analysis import get_model_complexity_info
+from mmengine.config import Config, DictAction
+from mmengine.logging import MMLogger
+from mmengine.model import revert_sync_batchnorm
+from mmengine.registry import init_default_scope
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Get a detector flops')
+    parser.add_argument('config', help='train config file path')
+    parser.add_argument(
+        '--shape',
+        type=int,
+        nargs='+',
+        default=[640, 640],
+        help='input image size')
+    parser.add_argument(
+        '--show-arch',
+        action='store_true',
+        help='whether return the statistics in the form of network layers')
+    parser.add_argument(
+        '--not-show-table',
+        action='store_true',
+        help='whether return the statistics in the form of table'),
+    parser.add_argument(
+        '--cfg-options',
+        nargs='+',
+        action=DictAction,
+        help='override some settings in the used config, the key-value pair '
+        'in xxx=yyy format will be merged into config file. If the value to '
+        'be overwritten is a list, it should be like key="[a,b]" or key=a,b '
+        'It also allows nested list/tuple values, e.g. key="[(a,b),(c,d)]" '
+        'Note that the quotation marks are necessary and that no white space '
+        'is allowed.')
+    return parser.parse_args()
+
+
+def inference(args, logger):
+    config_name = Path(args.config)
+    if not config_name.exists():
+        logger.error(f'{config_name} not found.')
+
+    cfg = Config.fromfile(args.config)
+    cfg.work_dir = tempfile.TemporaryDirectory().name
+    cfg.log_level = 'WARN'
+    if args.cfg_options is not None:
+        cfg.merge_from_dict(args.cfg_options)
+
+    init_default_scope(cfg.get('default_scope', 'mmyolo'))
+
+    if len(args.shape) == 1:
+        h = w = args.shape[0]
+    elif len(args.shape) == 2:
+        h, w = args.shape
+    else:
+        raise ValueError('invalid input shape')
+
+    # model
+    model = MODELS.build(cfg.model)
+    if torch.cuda.is_available():
+        model.cuda()
+    model = revert_sync_batchnorm(model)
+    model.eval()
+
+    # input tensor
+    # automatically generate a input tensor with the given input_shape.
+    data_batch = {'inputs': [torch.rand(3, h, w)], 'batch_samples': [None]}
+    data = model.data_preprocessor(data_batch)
+    result = {'ori_shape': (h, w), 'pad_shape': data['inputs'].shape[-2:]}
+    outputs = get_model_complexity_info(
+        model,
+        input_shape=None,
+        inputs=data['inputs'],  # the input tensor of the model
+        show_table=not args.not_show_table,  # show the complexity table
+        show_arch=args.show_arch)  # show the complexity arch
+
+    result['flops'] = outputs['flops_str']
+    result['params'] = outputs['params_str']
+    result['out_table'] = outputs['out_table']
+    result['out_arch'] = outputs['out_arch']
+
+    return result
+
+
+def main():
+    args = parse_args()
+    logger = MMLogger.get_instance(name='MMLogger')
+    result = inference(args, logger)
+
+    split_line = '=' * 30
+
+    ori_shape = result['ori_shape']
+    pad_shape = result['pad_shape']
+    flops = result['flops']
+    params = result['params']
+
+    print(result['out_table'])  # print related information by table
+    print(result['out_arch'])  # print related information by network layers
+
+    if pad_shape != ori_shape:
+        print(f'{split_line}\nUse size divisor set input shape '
+              f'from {ori_shape} to {pad_shape}')
+
+    print(f'{split_line}\n'
+          f'Input shape: {pad_shape}\nModel Flops: {flops}\n'
+          f'Model Parameters: {params}\n{split_line}')
+    print('!!!Please be cautious if you use the results in papers. '
+          'You may need to check if all ops are supported and verify '
+          'that the flops computation is correct.')
+
+
+if __name__ == '__main__':
+    main()

From 6f38b781bd688d9a81f0d8aa9fcb4d4089e9927b Mon Sep 17 00:00:00 2001
From: whcao <41630003+HIT-cwh@users.noreply.github.com>
Date: Wed, 1 Mar 2023 16:20:45 +0800
Subject: [PATCH 54/64] [Feature] Add RTMDet distillation cfg (#544)

* add rtm distillation cfg

* rename the cfg file

* use norm connector

* fix cfg

* fix cfg

* support rtm distillation

* fix readme and cfgs

* fix readme

* add docstring

* add links of ckpts and logs

* Update configs/rtmdet/README.md

Co-authored-by: RangiLyu <lyuchqi@gmail.com>

* fix cfgs

* rename stop distillation hook

* rename stop_epoch

* fix cfg

* add model converter

* add metafile and tta results

* fix metafile

* fix readme

* mv distillation/metafile to metafile

---------

Co-authored-by: RangiLyu <lyuchqi@gmail.com>
---
 configs/rtmdet/README.md                      |  19 ++-
 configs/rtmdet/distillation/README.md         | 146 ++++++++++++++++++
 .../kd_l_rtmdet_x_neck_300e_coco.py           |  99 ++++++++++++
 .../kd_m_rtmdet_l_neck_300e_coco.py           |  99 ++++++++++++
 .../kd_s_rtmdet_m_neck_300e_coco.py           |  99 ++++++++++++
 .../kd_tiny_rtmdet_s_neck_300e_coco.py        |  99 ++++++++++++
 configs/rtmdet/metafile.yml                   |  52 +++++++
 .../convert_kd_ckpt_to_student.py             |  48 ++++++
 8 files changed, 654 insertions(+), 7 deletions(-)
 create mode 100644 configs/rtmdet/distillation/README.md
 create mode 100644 configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py
 create mode 100644 configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py
 create mode 100644 configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py
 create mode 100644 configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py
 create mode 100644 tools/model_converters/convert_kd_ckpt_to_student.py

diff --git a/configs/rtmdet/README.md b/configs/rtmdet/README.md
index 1089b71b..302e366a 100644
--- a/configs/rtmdet/README.md
+++ b/configs/rtmdet/README.md
@@ -23,19 +23,24 @@ RTMDet-l model structure
 
 ## Object Detection
 
-|    Model    | size | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) | box AP | TTA box AP |                         Config                         |                                                                                                                                                                 Download                                                                                                                                                                 |
-| :---------: | :--: | :-------: | :------: | :------------------: | :----: | :--------: | :----------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-| RTMDet-tiny | 640  |    4.8    |   8.1    |         0.98         |  41.0  |    42.7    | [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json) |
-|  RTMDet-s   | 640  |   8.89    |   14.8   |         1.22         |  44.6  |    45.8    |  [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json)       |
-|  RTMDet-m   | 640  |   24.71   |  39.27   |         1.62         |  49.3  |    50.9    |  [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json)       |
-|  RTMDet-l   | 640  |   52.3    |  80.23   |         2.44         |  51.4  |    53.1    |  [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json)       |
-|  RTMDet-x   | 640  |   94.86   |  141.67  |         3.10         |  52.8  |    54.2    |  [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py)   |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json)       |
+|     Model      | size | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) |   box AP    | TTA box AP  |                           Config                            |                                                                                                                                                                 Download                                                                                                                                                                  |
+| :------------: | :--: | :-------: | :------: | :------------------: | :---------: | :---------: | :---------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|  RTMDet-tiny   | 640  |    4.8    |   8.1    |         0.98         |    41.0     |    42.7     |   [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py)    | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json)  |
+| RTMDet-tiny \* | 640  |    4.8    |   8.1    |         0.98         | 41.8 (+0.8) | 43.2 (+0.5) | [config](./distillation/kd_tiny_rtmdet_s_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-176901d8.json) |
+|    RTMDet-s    | 640  |   8.89    |   14.8   |         1.22         |    44.6     |    45.8     |     [config](./rtmdet_s_syncbn_fast_8xb32-300e_coco.py)     |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329.log.json)        |
+|  RTMDet-s \*   | 640  |   8.89    |   14.8   |         1.22         | 45.7 (+1.1) | 47.3 (+1.5) |  [config](./distillation/kd_s_rtmdet_m_neck_300e_coco.py)   |       [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-89862269.json)       |
+|    RTMDet-m    | 640  |   24.71   |  39.27   |         1.62         |    49.3     |    50.9     |     [config](./rtmdet_m_syncbn_fast_8xb32-300e_coco.py)     |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952.log.json)        |
+|  RTMDet-m \*   | 640  |   24.71   |  39.27   |         1.62         | 50.2 (+0.9) | 51.9 (+1.0) |  [config](./distillation/kd_m_rtmdet_l_neck_300e_coco.py)   |       [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-bd028fd3.json)       |
+|    RTMDet-l    | 640  |   52.3    |  80.23   |         2.44         |    51.4     |    53.1     |     [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py)     |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928.log.json)        |
+|  RTMDet-l \*   | 640  |   52.3    |  80.23   |         2.44         | 52.3 (+0.9) | 53.7 (+0.6) |  [config](./distillation/kd_l_rtmdet_x_neck_300e_coco.py)   |       [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c5c4e17b.json)       |
+|    RTMDet-x    | 640  |   94.86   |  141.67  |         3.10         |    52.8     |    54.2     |     [config](./rtmdet_x_syncbn_fast_8xb32-300e_coco.py)     |       [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345.log.json)        |
 
 **Note**:
 
 1. The inference speed of RTMDet is measured on an NVIDIA 3090 GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and without NMS.
 2. For a fair comparison, the config of bbox postprocessing is changed to be consistent with YOLOv5/6/7 after [PR#9494](https://github.com/open-mmlab/mmdetection/pull/9494), bringing about 0.1~0.3% AP improvement.
 3. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable.  see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
+4. \* means checkpoints are trained with knowledge distillation. More details can be found in [RTMDet distillation](./distillation).
 
 ## Citation
 
diff --git a/configs/rtmdet/distillation/README.md b/configs/rtmdet/distillation/README.md
new file mode 100644
index 00000000..452a46cb
--- /dev/null
+++ b/configs/rtmdet/distillation/README.md
@@ -0,0 +1,146 @@
+# Distill RTM Detectors Based on MMRazor
+
+## Description
+
+To further improve the model accuracy while not introducing much additional
+computation cost, we apply the feature-based distillation to the training phase
+of these RTM detectors. In summary, our distillation strategy are threefold:
+
+(1) Inspired by [PKD](https://arxiv.org/abs/2207.02039), we first normalize
+the intermediate feature maps to have zero mean and unit variances before calculating
+the distillation loss.
+
+(2) Inspired by [CWD](https://arxiv.org/abs/2011.13256), we adopt the channel-wise
+distillation paradigm, which can pay more attention to the most salient regions
+of each channel.
+
+(3) Inspired by [DAMO-YOLO](https://arxiv.org/abs/2211.15444), the distillation
+process is split into two stages. 1) The teacher distills the student at the
+first stage (280 epochs) on strong mosaic domain. 2) The student finetunes itself
+on no masaic domain at the second stage (20 epochs).
+
+## Results and Models
+
+| Location | Dataset |                                                      Teacher                                                      |                                                         Student                                                         |     mAP     | mAP(T) | mAP(S) |                    Config                    | Download                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                      |
+| :------: | :-----: | :---------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------: | :---------: | :----: | :----: | :------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+|   FPN    |  COCO   | [RTMDet-s](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py) | [RTMDet-tiny](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py) | 41.8 (+0.8) |  44.6  |  41.0  | [config](kd_tiny_rtmdet_s_neck_300e_coco.py) | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-176901d8.json) |
+|   FPN    |  COCO   | [RTMDet-m](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py) |    [RTMDet-s](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py)    | 45.7 (+1.1) |  49.3  |  44.6  |  [config](kd_s_rtmdet_m_neck_300e_coco.py)   | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth)         \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-89862269.json)     |
+|   FPN    |  COCO   | [RTMDet-l](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py) |    [RTMDet-m](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py)    | 50.2 (+0.9) |  51.4  |  49.3  |  [config](kd_m_rtmdet_l_neck_300e_coco.py)   | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-bd028fd3.json)             |
+|   FPN    |  COCO   | [RTMDet-x](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py) |    [RTMDet-l](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py)    | 52.3 (+0.9) |  52.8  |  51.4  |  [config](kd_l_rtmdet_x_neck_300e_coco.py)   | [teacher](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth) \|[model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c5c4e17b.json)             |
+
+## Usage
+
+### Prerequisites
+
+- [MMRazor dev-1.x](https://github.com/open-mmlab/mmrazor/tree/dev-1.x)
+
+Install MMRazor from source
+
+```
+git clone -b dev-1.x https://github.com/open-mmlab/mmrazor.git
+cd mmrazor
+# Install MMRazor
+mim install -v -e .
+```
+
+### Training commands
+
+In MMYOLO's root directory, run the following command to train the RTMDet-tiny
+with 8 GPUs, using RTMDet-s as the teacher:
+
+```bash
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 PORT=29500 ./tools/dist_train.sh configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py
+```
+
+### Testing commands
+
+In MMYOLO's root directory, run the following command to test the model:
+
+```bash
+CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py ${CHECKPOINT_PATH}
+```
+
+### Getting student-only checkpoint
+
+After training, the checkpoint contains parameters for both student and teacher models.
+Run the following command to convert it to student-only checkpoint:
+
+```bash
+python ./tools/model_converters/convert_kd_ckpt_to_student.py ${CHECKPOINT_PATH} --out-path ${OUTPUT_CHECKPOINT_PATH}
+```
+
+## Configs
+
+Here we provide detection configs and models for MMRazor in MMYOLO. For clarify,
+we take `./kd_tiny_rtmdet_s_neck_300e_coco.py` as an example to show how to
+distill a RTM detector based on MMRazor.
+
+Here is the main part of `./kd_tiny_rtmdet_s_neck_300e_coco.py`.
+
+```shell
+norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
+
+distiller=dict(
+    type='ConfigurableDistiller',
+    student_recorders=dict(
+        fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
+        fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
+        fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
+    ),
+    teacher_recorders=dict(
+        fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
+        fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
+        fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
+    connectors=dict(
+        fpn0_s=dict(type='ConvModuleConnector', in_channel=96,
+            out_channel=128, bias=False, norm_cfg=norm_cfg,
+            act_cfg=None),
+        fpn0_t=dict(
+            type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
+        fpn1_s=dict(
+            type='ConvModuleConnector', in_channel=96,
+            out_channel=128, bias=False, norm_cfg=norm_cfg,
+            act_cfg=None),
+        fpn1_t=dict(
+            type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
+        fpn2_s=dict(
+            type='ConvModuleConnector', in_channel=96,
+            out_channel=128, bias=False, norm_cfg=norm_cfg,
+            act_cfg=None),
+        fpn2_t=dict(
+            type='NormConnector', in_channels=128, norm_cfg=norm_cfg)),
+    distill_losses=dict(
+        loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
+        loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
+        loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
+    loss_forward_mappings=dict(
+        loss_fpn0=dict(
+            preds_S=dict(from_student=True, recorder='fpn0', connector='fpn0_s'),
+            preds_T=dict(from_student=False, recorder='fpn0', connector='fpn0_t')),
+        loss_fpn1=dict(
+            preds_S=dict(from_student=True, recorder='fpn1', connector='fpn1_s'),
+            preds_T=dict(from_student=False, recorder='fpn1', connector='fpn1_t')),
+        loss_fpn2=dict(
+            preds_S=dict(from_student=True, recorder='fpn2', connector='fpn2_s'),
+            preds_T=dict(from_student=False, recorder='fpn2', connector='fpn2_t'))))
+
+```
+
+`recorders` are used to record various intermediate results during the model forward.
+In this example, they can help record the output of 3 `nn.Module` of the teacher
+and the student. Details are list in [Recorder](https://github.com/open-mmlab/mmrazor/blob/dev-1.x/docs/en/advanced_guides/recorder.md) and [MMRazor Distillation](https://zhuanlan.zhihu.com/p/596582609) (if users can read Chinese).
+
+`connectors` are adaptive layers which usually map teacher's and students features
+to the same dimension.
+
+`distill_losses` are configs for multiple distill losses.
+
+`loss_forward_mappings` are mappings between distill loss forward arguments and records.
+
+In addition, the student finetunes itself on no masaic domain at the last 20 epochs,
+so we add a new hook named `StopDistillHook` to stop distillation on time.
+We need to add this hook to the `custom_hooks` list like this:
+
+```shell
+custom_hooks = [..., dict(type='mmrazor.StopDistillHook', detach_epoch=280)]
+```
diff --git a/configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py b/configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py
new file mode 100644
index 00000000..2bab26a0
--- /dev/null
+++ b/configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py
@@ -0,0 +1,99 @@
+_base_ = '../rtmdet_l_syncbn_fast_8xb32-300e_coco.py'
+
+teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth'  # noqa: E501
+
+norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
+
+model = dict(
+    _delete_=True,
+    _scope_='mmrazor',
+    type='FpnTeacherDistill',
+    architecture=dict(
+        cfg_path='mmyolo::rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py'),
+    teacher=dict(
+        cfg_path='mmyolo::rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py'),
+    teacher_ckpt=teacher_ckpt,
+    distiller=dict(
+        type='ConfigurableDistiller',
+        # `recorders` are used to record various intermediate results during
+        # the model forward.
+        student_recorders=dict(
+            fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
+            fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
+            fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
+        ),
+        teacher_recorders=dict(
+            fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
+            fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
+            fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
+        # `connectors` are adaptive layers which usually map teacher's and
+        # students features to the same dimension.
+        connectors=dict(
+            fpn0_s=dict(
+                type='ConvModuleConnector',
+                in_channel=256,
+                out_channel=320,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn0_t=dict(
+                type='NormConnector', in_channels=320, norm_cfg=norm_cfg),
+            fpn1_s=dict(
+                type='ConvModuleConnector',
+                in_channel=256,
+                out_channel=320,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn1_t=dict(
+                type='NormConnector', in_channels=320, norm_cfg=norm_cfg),
+            fpn2_s=dict(
+                type='ConvModuleConnector',
+                in_channel=256,
+                out_channel=320,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn2_t=dict(
+                type='NormConnector', in_channels=320, norm_cfg=norm_cfg)),
+        distill_losses=dict(
+            loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
+            loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
+            loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
+        # `loss_forward_mappings` are mappings between distill loss forward
+        # arguments and records.
+        loss_forward_mappings=dict(
+            loss_fpn0=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn0', connector='fpn0_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn0', connector='fpn0_t')),
+            loss_fpn1=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn1', connector='fpn1_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn1', connector='fpn1_t')),
+            loss_fpn2=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn2', connector='fpn2_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn2',
+                    connector='fpn2_t')))))
+
+find_unused_parameters = True
+
+custom_hooks = [
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49),
+    dict(
+        type='mmdet.PipelineSwitchHook',
+        switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
+        switch_pipeline=_base_.train_pipeline_stage2),
+    # stop distillation after the 280th epoch
+    dict(type='mmrazor.StopDistillHook', stop_epoch=280)
+]
diff --git a/configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py b/configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py
new file mode 100644
index 00000000..f7d7f921
--- /dev/null
+++ b/configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py
@@ -0,0 +1,99 @@
+_base_ = '../rtmdet_m_syncbn_fast_8xb32-300e_coco.py'
+
+teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth'  # noqa: E501
+
+norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
+
+model = dict(
+    _delete_=True,
+    _scope_='mmrazor',
+    type='FpnTeacherDistill',
+    architecture=dict(
+        cfg_path='mmyolo::rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py'),
+    teacher=dict(
+        cfg_path='mmyolo::rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py'),
+    teacher_ckpt=teacher_ckpt,
+    distiller=dict(
+        type='ConfigurableDistiller',
+        # `recorders` are used to record various intermediate results during
+        # the model forward.
+        student_recorders=dict(
+            fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
+            fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
+            fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
+        ),
+        teacher_recorders=dict(
+            fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
+            fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
+            fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
+        # `connectors` are adaptive layers which usually map teacher's and
+        # students features to the same dimension.
+        connectors=dict(
+            fpn0_s=dict(
+                type='ConvModuleConnector',
+                in_channel=192,
+                out_channel=256,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn0_t=dict(
+                type='NormConnector', in_channels=256, norm_cfg=norm_cfg),
+            fpn1_s=dict(
+                type='ConvModuleConnector',
+                in_channel=192,
+                out_channel=256,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn1_t=dict(
+                type='NormConnector', in_channels=256, norm_cfg=norm_cfg),
+            fpn2_s=dict(
+                type='ConvModuleConnector',
+                in_channel=192,
+                out_channel=256,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn2_t=dict(
+                type='NormConnector', in_channels=256, norm_cfg=norm_cfg)),
+        distill_losses=dict(
+            loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
+            loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
+            loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
+        # `loss_forward_mappings` are mappings between distill loss forward
+        # arguments and records.
+        loss_forward_mappings=dict(
+            loss_fpn0=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn0', connector='fpn0_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn0', connector='fpn0_t')),
+            loss_fpn1=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn1', connector='fpn1_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn1', connector='fpn1_t')),
+            loss_fpn2=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn2', connector='fpn2_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn2',
+                    connector='fpn2_t')))))
+
+find_unused_parameters = True
+
+custom_hooks = [
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49),
+    dict(
+        type='mmdet.PipelineSwitchHook',
+        switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
+        switch_pipeline=_base_.train_pipeline_stage2),
+    # stop distillation after the 280th epoch
+    dict(type='mmrazor.StopDistillHook', stop_epoch=280)
+]
diff --git a/configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py b/configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py
new file mode 100644
index 00000000..99b5dc5e
--- /dev/null
+++ b/configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py
@@ -0,0 +1,99 @@
+_base_ = '../rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
+
+teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth'  # noqa: E501
+
+norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
+
+model = dict(
+    _delete_=True,
+    _scope_='mmrazor',
+    type='FpnTeacherDistill',
+    architecture=dict(
+        cfg_path='mmyolo::rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'),
+    teacher=dict(
+        cfg_path='mmyolo::rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py'),
+    teacher_ckpt=teacher_ckpt,
+    distiller=dict(
+        type='ConfigurableDistiller',
+        # `recorders` are used to record various intermediate results during
+        # the model forward.
+        student_recorders=dict(
+            fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
+            fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
+            fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
+        ),
+        teacher_recorders=dict(
+            fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
+            fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
+            fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
+        # `connectors` are adaptive layers which usually map teacher's and
+        # students features to the same dimension.
+        connectors=dict(
+            fpn0_s=dict(
+                type='ConvModuleConnector',
+                in_channel=128,
+                out_channel=192,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn0_t=dict(
+                type='NormConnector', in_channels=192, norm_cfg=norm_cfg),
+            fpn1_s=dict(
+                type='ConvModuleConnector',
+                in_channel=128,
+                out_channel=192,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn1_t=dict(
+                type='NormConnector', in_channels=192, norm_cfg=norm_cfg),
+            fpn2_s=dict(
+                type='ConvModuleConnector',
+                in_channel=128,
+                out_channel=192,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn2_t=dict(
+                type='NormConnector', in_channels=192, norm_cfg=norm_cfg)),
+        distill_losses=dict(
+            loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
+            loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
+            loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
+        # `loss_forward_mappings` are mappings between distill loss forward
+        # arguments and records.
+        loss_forward_mappings=dict(
+            loss_fpn0=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn0', connector='fpn0_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn0', connector='fpn0_t')),
+            loss_fpn1=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn1', connector='fpn1_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn1', connector='fpn1_t')),
+            loss_fpn2=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn2', connector='fpn2_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn2',
+                    connector='fpn2_t')))))
+
+find_unused_parameters = True
+
+custom_hooks = [
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49),
+    dict(
+        type='mmdet.PipelineSwitchHook',
+        switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
+        switch_pipeline=_base_.train_pipeline_stage2),
+    # stop distillation after the 280th epoch
+    dict(type='mmrazor.StopDistillHook', stop_epoch=280)
+]
diff --git a/configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py b/configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py
new file mode 100644
index 00000000..50c23580
--- /dev/null
+++ b/configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py
@@ -0,0 +1,99 @@
+_base_ = '../rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py'
+
+teacher_ckpt = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth'  # noqa: E501
+
+norm_cfg = dict(type='BN', affine=False, track_running_stats=False)
+
+model = dict(
+    _delete_=True,
+    _scope_='mmrazor',
+    type='FpnTeacherDistill',
+    architecture=dict(
+        cfg_path='mmyolo::rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py'),
+    teacher=dict(
+        cfg_path='mmyolo::rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py'),
+    teacher_ckpt=teacher_ckpt,
+    distiller=dict(
+        type='ConfigurableDistiller',
+        # `recorders` are used to record various intermediate results during
+        # the model forward.
+        student_recorders=dict(
+            fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
+            fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
+            fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv'),
+        ),
+        teacher_recorders=dict(
+            fpn0=dict(type='ModuleOutputs', source='neck.out_layers.0.conv'),
+            fpn1=dict(type='ModuleOutputs', source='neck.out_layers.1.conv'),
+            fpn2=dict(type='ModuleOutputs', source='neck.out_layers.2.conv')),
+        # `connectors` are adaptive layers which usually map teacher's and
+        # students features to the same dimension.
+        connectors=dict(
+            fpn0_s=dict(
+                type='ConvModuleConnector',
+                in_channel=96,
+                out_channel=128,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn0_t=dict(
+                type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
+            fpn1_s=dict(
+                type='ConvModuleConnector',
+                in_channel=96,
+                out_channel=128,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn1_t=dict(
+                type='NormConnector', in_channels=128, norm_cfg=norm_cfg),
+            fpn2_s=dict(
+                type='ConvModuleConnector',
+                in_channel=96,
+                out_channel=128,
+                bias=False,
+                norm_cfg=norm_cfg,
+                act_cfg=None),
+            fpn2_t=dict(
+                type='NormConnector', in_channels=128, norm_cfg=norm_cfg)),
+        distill_losses=dict(
+            loss_fpn0=dict(type='ChannelWiseDivergence', loss_weight=1),
+            loss_fpn1=dict(type='ChannelWiseDivergence', loss_weight=1),
+            loss_fpn2=dict(type='ChannelWiseDivergence', loss_weight=1)),
+        # `loss_forward_mappings` are mappings between distill loss forward
+        # arguments and records.
+        loss_forward_mappings=dict(
+            loss_fpn0=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn0', connector='fpn0_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn0', connector='fpn0_t')),
+            loss_fpn1=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn1', connector='fpn1_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn1', connector='fpn1_t')),
+            loss_fpn2=dict(
+                preds_S=dict(
+                    from_student=True, recorder='fpn2', connector='fpn2_s'),
+                preds_T=dict(
+                    from_student=False, recorder='fpn2',
+                    connector='fpn2_t')))))
+
+find_unused_parameters = True
+
+custom_hooks = [
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49),
+    dict(
+        type='mmdet.PipelineSwitchHook',
+        switch_epoch=_base_.max_epochs - _base_.num_epochs_stage2,
+        switch_pipeline=_base_.train_pipeline_stage2),
+    # stop distillation after the 280th epoch
+    dict(type='mmrazor.StopDistillHook', stop_epoch=280)
+]
diff --git a/configs/rtmdet/metafile.yml b/configs/rtmdet/metafile.yml
index 64abc880..291cbc99 100644
--- a/configs/rtmdet/metafile.yml
+++ b/configs/rtmdet/metafile.yml
@@ -28,6 +28,19 @@ Models:
           box AP: 41.0
     Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth
 
+  - Name: kd_tiny_rtmdet_s_neck_300e_coco
+    In Collection: RTMDet
+    Config: configs/rtmdet/distillation/kd_tiny_rtmdet_s_neck_300e_coco.py
+    Metadata:
+      Training Memory (GB): 11.9
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 41.8
+    Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth
+
   - Name: rtmdet_s_syncbn_fast_8xb32-300e_coco
     In Collection: RTMDet
     Config: configs/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco.py
@@ -41,6 +54,19 @@ Models:
           box AP: 44.6
     Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_s_syncbn_fast_8xb32-300e_coco/rtmdet_s_syncbn_fast_8xb32-300e_coco_20221230_182329-0a8c901a.pth
 
+  - Name: kd_s_rtmdet_m_neck_300e_coco
+    In Collection: RTMDet
+    Config: configs/rtmdet/distillation/kd_s_rtmdet_m_neck_300e_coco.py
+    Metadata:
+      Training Memory (GB): 16.3
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 45.7
+    Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_s_rtmdet_m_neck_300e_coco/kd_s_rtmdet_m_neck_300e_coco_20230220_140647-446ff003.pth
+
   - Name: rtmdet_m_syncbn_fast_8xb32-300e_coco
     In Collection: RTMDet
     Config: configs/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco.py
@@ -54,6 +80,19 @@ Models:
           box AP: 49.3
     Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_m_syncbn_fast_8xb32-300e_coco/rtmdet_m_syncbn_fast_8xb32-300e_coco_20230102_135952-40af4fe8.pth
 
+  - Name: kd_m_rtmdet_l_neck_300e_coco
+    In Collection: RTMDet
+    Config: configs/rtmdet/distillation/kd_m_rtmdet_l_neck_300e_coco.py
+    Metadata:
+      Training Memory (GB): 29.0
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 50.2
+    Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_m_rtmdet_l_neck_300e_coco/kd_m_rtmdet_l_neck_300e_coco_20230220_141313-b806f503.pth
+
   - Name: rtmdet_l_syncbn_fast_8xb32-300e_coco
     In Collection: RTMDet
     Config: configs/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco.py
@@ -67,6 +106,19 @@ Models:
           box AP: 51.4
     Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth
 
+  - Name: kd_l_rtmdet_x_neck_300e_coco
+    In Collection: RTMDet
+    Config: configs/rtmdet/distillation/kd_l_rtmdet_x_neck_300e_coco.py
+    Metadata:
+      Training Memory (GB): 45.2
+      Epochs: 300
+    Results:
+      - Task: Object Detection
+        Dataset: COCO
+        Metrics:
+          box AP: 52.3
+    Weights: https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_l_rtmdet_x_neck_300e_coco/kd_l_rtmdet_x_neck_300e_coco_20230220_141912-c9979722.pth
+
   - Name: rtmdet_x_syncbn_fast_8xb32-300e_coco
     In Collection: RTMDet
     Config: configs/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco.py
diff --git a/tools/model_converters/convert_kd_ckpt_to_student.py b/tools/model_converters/convert_kd_ckpt_to_student.py
new file mode 100644
index 00000000..e44f66d0
--- /dev/null
+++ b/tools/model_converters/convert_kd_ckpt_to_student.py
@@ -0,0 +1,48 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+from pathlib import Path
+
+import torch
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        description='Convert KD checkpoint to student-only checkpoint')
+    parser.add_argument('checkpoint', help='input checkpoint filename')
+    parser.add_argument('--out-path', help='save checkpoint path')
+    parser.add_argument(
+        '--inplace', action='store_true', help='replace origin ckpt')
+    args = parser.parse_args()
+    return args
+
+
+def main():
+    args = parse_args()
+    checkpoint = torch.load(args.checkpoint, map_location='cpu')
+    new_state_dict = dict()
+    new_meta = checkpoint['meta']
+
+    for key, value in checkpoint['state_dict'].items():
+        if key.startswith('architecture.'):
+            new_key = key.replace('architecture.', '')
+            new_state_dict[new_key] = value
+
+    checkpoint = dict()
+    checkpoint['meta'] = new_meta
+    checkpoint['state_dict'] = new_state_dict
+
+    if args.inplace:
+        torch.save(checkpoint, args.checkpoint)
+    else:
+        ckpt_path = Path(args.checkpoint)
+        ckpt_name = ckpt_path.stem
+        if args.out_path:
+            ckpt_dir = Path(args.out_path)
+        else:
+            ckpt_dir = ckpt_path.parent
+        new_ckpt_path = ckpt_dir / f'{ckpt_name}_student.pth'
+        torch.save(checkpoint, new_ckpt_path)
+
+
+if __name__ == '__main__':
+    main()

From cdc885550acd73b826b45caf066e482024194a70 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Wed, 1 Mar 2023 16:34:17 +0800
Subject: [PATCH 55/64] [Feature] Add model zoo (#607)

* Update model_zoo.md

* fix

* update

* update

* update
---
 README.md                          | 18 ++++++
 README_zh-CN.md                    | 18 ++++++
 configs/razor/subnets/README.md    |  2 +-
 configs/rtmdet/README.md           |  2 +-
 configs/yolov6/README.md           |  2 +-
 docs/en/get_started/overview.md    | 20 +++++--
 docs/en/index.rst                  | 14 ++---
 docs/en/model_zoo.md               | 96 +++++++++++++++++++++++-------
 docs/zh_cn/get_started/overview.md | 20 +++++--
 docs/zh_cn/index.rst               | 14 ++---
 docs/zh_cn/model_zoo.md            | 96 +++++++++++++++++++++++-------
 tools/analysis_tools/get_flops.py  |  3 +
 12 files changed, 238 insertions(+), 67 deletions(-)

diff --git a/README.md b/README.md
index de1d7e5d..5c57ae6e 100644
--- a/README.md
+++ b/README.md
@@ -259,6 +259,14 @@ For different parts from MMDetection, we have also prepared user guides and adva
 
 Results and models are available in the [model zoo](docs/en/model_zoo.md).
 
+<details open>
+<summary><b>Supported Tasks</b></summary>
+
+- [x] Object detection
+- [x] Rotated object detection
+
+</details>
+
 <details open>
 <summary><b>Supported Algorithms</b></summary>
 
@@ -272,6 +280,16 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
 
 </details>
 
+<details open>
+<summary><b>Supported Datasets</b></summary>
+
+- [x] COCO Dataset
+- [x] VOC Dataset
+- [x] CrowdHuman Dataset
+- [x] DOTA 1.0 Dataset
+
+</details>
+
 <details open>
 <div align="center">
   <b>Module Components</b>
diff --git a/README_zh-CN.md b/README_zh-CN.md
index ca65a944..3c5d70bf 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -281,6 +281,14 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 
 测试结果和模型可以在 [模型库](docs/zh_cn/model_zoo.md) 中找到。
 
+<details open>
+<summary><b>支持的任务</b></summary>
+
+- [x] 目标检测
+- [x] 旋转框目标检测
+
+</details>
+
 <details open>
 <summary><b>支持的算法</b></summary>
 
@@ -294,6 +302,16 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 
 </details>
 
+<details open>
+<summary><b>支持的数据集</b></summary>
+
+- [x] COCO Dataset
+- [x] VOC Dataset
+- [x] CrowdHuman Dataset
+- [x] DOTA 1.0 Dataset
+
+</details>
+
 <details open>
 <div align="center">
   <b>模块组件</b>
diff --git a/configs/razor/subnets/README.md b/configs/razor/subnets/README.md
index 692492d3..ad7a716b 100644
--- a/configs/razor/subnets/README.md
+++ b/configs/razor/subnets/README.md
@@ -62,7 +62,7 @@ CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh configs/razor/subnets/yol
 
 Here we provide the baseline version of YOLO Series with NAS backbone.
 
-|           Model            | size | box AP |  Params(M)   | FLOPS(G) |                                                                   Config                                                                   |                                                                                Download                                                                                 |
+|           Model            | size | box AP |  Params(M)   | FLOPs(G) |                                                                   Config                                                                   |                                                                                Download                                                                                 |
 | :------------------------: | :--: | :----: | :----------: | :------: | :----------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
 |          yolov5-s          | 640  |  37.7  |    7.235     |  8.265   |            [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py)             | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) |
 | yolov5_s_spos_shufflenetv2 | 640  |  37.9  | 7.04(-2.7%)  |   7.03   |     [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py)     |              [model](https://download.openmmlab.com/mmrazor/v1/spos/yolov5/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco_20230109_155302-777fd6f1.pth)              |
diff --git a/configs/rtmdet/README.md b/configs/rtmdet/README.md
index 302e366a..ddcd52ba 100644
--- a/configs/rtmdet/README.md
+++ b/configs/rtmdet/README.md
@@ -23,7 +23,7 @@ RTMDet-l model structure
 
 ## Object Detection
 
-|     Model      | size | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) |   box AP    | TTA box AP  |                           Config                            |                                                                                                                                                                 Download                                                                                                                                                                  |
+|     Model      | size | Params(M) | FLOPs(G) | TRT-FP16-Latency(ms) |   box AP    | TTA box AP  |                           Config                            |                                                                                                                                                                 Download                                                                                                                                                                  |
 | :------------: | :--: | :-------: | :------: | :------------------: | :---------: | :---------: | :---------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
 |  RTMDet-tiny   | 640  |    4.8    |   8.1    |         0.98         |    41.0     |    42.7     |   [config](./rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py)    | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json)  |
 | RTMDet-tiny \* | 640  |    4.8    |   8.1    |         0.98         | 41.8 (+0.8) | 43.2 (+0.5) | [config](./distillation/kd_tiny_rtmdet_s_neck_300e_coco.py) | [model](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-e1e4197c.pth) \| [log](https://download.openmmlab.com/mmrazor/v1/rtmdet_distillation/kd_tiny_rtmdet_s_neck_300e_coco/kd_tiny_rtmdet_s_neck_300e_coco_20230213_104240-176901d8.json) |
diff --git a/configs/yolov6/README.md b/configs/yolov6/README.md
index 603ba555..4dac37f4 100644
--- a/configs/yolov6/README.md
+++ b/configs/yolov6/README.md
@@ -31,7 +31,7 @@ YOLOv6-l model structure
 | YOLOv6-n |  P5  | 640  |  400  |  Yes   | Yes |   6.04   |  36.2  | [config](../yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco/yolov6_n_syncbn_fast_8xb32-400e_coco_20221030_202726-d99b2e82.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_n_syncbn_fast_8xb32-400e_coco/yolov6_n_syncbn_fast_8xb32-400e_coco_20221030_202726.log.json) |
 | YOLOv6-t |  P5  | 640  |  400  |  Yes   | Yes |   8.13   |  41.0  | [config](../yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco/yolov6_t_syncbn_fast_8xb32-400e_coco_20221030_143755-cf0d278f.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_t_syncbn_fast_8xb32-400e_coco/yolov6_t_syncbn_fast_8xb32-400e_coco_20221030_143755.log.json) |
 | YOLOv6-s |  P5  | 640  |  400  |  Yes   | Yes |   8.88   |  44.0  | [config](../yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035.log.json) |
-| YOLOv6-m |  P5  | 640  |  300  |  Yes   | Yes |  16.69   |  48.4  | [config](../yolov6/yolov6_m_syncbn_fast_8xb32-400e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658-85bda3f4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658.log.json) |
+| YOLOv6-m |  P5  | 640  |  300  |  Yes   | Yes |  16.69   |  48.4  | [config](../yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658-85bda3f4.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_m_syncbn_fast_8xb32-300e_coco/yolov6_m_syncbn_fast_8xb32-300e_coco_20221109_182658.log.json) |
 | YOLOv6-l |  P5  | 640  |  300  |  Yes   | Yes |  20.86   |  51.0  | [config](../yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco.py) | [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco/yolov6_l_syncbn_fast_8xb32-300e_coco_20221109_183156-91e3c447.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_l_syncbn_fast_8xb32-300e_coco/yolov6_l_syncbn_fast_8xb32-300e_coco_20221109_183156.log.json) |
 
 **Note**:
diff --git a/docs/en/get_started/overview.md b/docs/en/get_started/overview.md
index 36ff7778..0af5ab62 100644
--- a/docs/en/get_started/overview.md
+++ b/docs/en/get_started/overview.md
@@ -12,6 +12,16 @@ MMYOLO is an open-source algorithms toolkit of YOLO based on PyTorch and MMDetec
 <img src="https://user-images.githubusercontent.com/17425982/220060451-d50414e5-a239-45b7-a4db-ed8699820300.png" alt="vision diagram"/>
 </div>
 
+The following tasks are currently supported:
+
+<details open>
+<summary><b>Tasks currently supported</b></summary>
+
+- object detection
+- rotated object detection
+
+</details>
+
 The YOLO series of algorithms currently supported are as follows:
 
 <details open>
@@ -27,13 +37,15 @@ The YOLO series of algorithms currently supported are as follows:
 
 </details>
 
-The following tasks are currently supported:
+The datasets currently supported are as follows:
 
 <details open>
-<summary><b>Tasks currently supported</b></summary>
+<summary><b>Datasets currently supported</b></summary>
 
-- object detection
-- rotated object detection
+- COCO Dataset
+- VOC Dataset
+- CrowdHuman Dataset
+- DOTA 1.0 Dataset
 
 </details>
 
diff --git a/docs/en/index.rst b/docs/en/index.rst
index b609da21..a4edc8ef 100644
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@@ -78,19 +78,14 @@ You can switch between Chinese and English documents in the top-right corner of
 
 
 .. toctree::
-   :maxdepth: 1
+   :maxdepth: 2
    :caption: Advanced Tutorials
 
    advanced_guides/cross-library_application.md
 
-.. toctree::
-   :maxdepth: 1
-   :caption: API Reference
-
-   api.rst
 
 .. toctree::
-   :maxdepth: 1
+   :maxdepth: 2
    :caption: Model Zoo
 
    model_zoo.md
@@ -104,6 +99,11 @@ You can switch between Chinese and English documents in the top-right corner of
    notes/conventions.md
    notes/code_style.md
 
+.. toctree::
+   :maxdepth: 1
+   :caption: API Reference
+
+   api.rst
 
 .. toctree::
    :caption: Switch Language
diff --git a/docs/en/model_zoo.md b/docs/en/model_zoo.md
index ea55875e..5934821e 100644
--- a/docs/en/model_zoo.md
+++ b/docs/en/model_zoo.md
@@ -1,36 +1,90 @@
-# Benchmark and Model Zoo
+# Model Zoo and Benchmark
 
-## Common settings
+This page is used to summarize the performance and related evaluation metrics of various models supported in MMYOLO for users to compare and analyze.
 
-- All models were trained on `coco_2017_train`, and tested on the `coco_2017_val`.
-- We use distributed training.
+## COCO dataset
 
-## Baselines
+|      Model       | Arch | Size | Batch Size | Epoch | SyncBN | AMP | Mem (GB) | Params(M) | FLOPs(G) | TRT-FP16-GPU-Latency(ms) | Box AP | TTA Box AP |
+| :--------------: | :--: | :--: | :--------: | :---: | :----: | :-: | :------: | :-------: | :------: | :----------------------: | :----: | :--------: |
+|     YOLOv5-n     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   1.5    |   1.87    |   2.26   |           1.14           |  28.0  |    30.7    |
+|  YOLOv6-v2.0-n   |  P5  | 640  |   8xb32    |  400  |  Yes   | Yes |   6.04   |   4.32    |   5.52   |           1.37           |  36.2  |            |
+|     YOLOv8-n     |  P5  | 640  |   8xb16    |  500  |  Yes   | Yes |   2.5    |   3.16    |   4.4    |           1.53           |  37.4  |    39.9    |
+|   RTMDet-tiny    |  P5  | 640  |   8xb32    |  300  |  Yes   | No  |   11.9   |   4.90    |   8.09   |           2.31           |  41.8  |    43.2    |
+| YOLOv6-v2.0-tiny |  P5  | 640  |   8xb32    |  400  |  Yes   | Yes |   8.13   |   9.70    |  12.37   |           2.19           |  41.0  |            |
+|   YOLOv7-tiny    |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   2.7    |   6.23    |   6.89   |           1.88           |  37.5  |            |
+|    YOLOX-tiny    |  P5  | 416  |   8xb32    |  300  |   No   | Yes |   4.9    |   5.06    |   7.63   |           1.19           |  34.3  |            |
+|     RTMDet-s     |  P5  | 640  |   8xb32    |  300  |  Yes   | No  |   16.3   |   8.89    |  14.84   |           2.89           |  45.7  |    47.3    |
+|     YOLOv5-s     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   2.7    |   7.24    |   8.27   |           1.89           |  37.7  |    40.2    |
+|  YOLOv6-v2.0-s   |  P5  | 640  |   8xb32    |  400  |  Yes   | Yes |   8.88   |   17.22   |  21.94   |           2.67           |  44.0  |            |
+|     YOLOv8-s     |  P5  | 640  |   8xb16    |  500  |  Yes   | Yes |   4.0    |   11.17   |  14.36   |           2.61           |  45.1  |    46.8    |
+|     YOLOX-s      |  P5  | 640  |   8xb32    |  300  |   No   | Yes |   9.8    |   8.97    |  13.40   |           2.38           |  41.9  |            |
+|   PPYOLOE+ -s    |  P5  | 640  |    8xb8    |  80   |  Yes   | No  |   4.7    |   7.93    |   8.68   |           2.54           |  43.5  |            |
+|     RTMDet-m     |  P5  | 640  |   8xb32    |  300  |  Yes   | No  |   29.0   |   24.71   |  39.21   |           6.23           |  50.2  |    51.9    |
+|     YOLOv5-m     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   5.0    |   21.19   |  24.53   |           4.28           |  45.3  |    46.9    |
+|  YOLOv6-v2.0-m   |  P5  | 640  |   8xb32    |  300  |  Yes   | Yes |  16.69   |   34.25   |   40.7   |           5.12           |  48.4  |            |
+|     YOLOv8-m     |  P5  | 640  |   8xb16    |  500  |  Yes   | Yes |   7.0    |   25.9    |  39.57   |           5.78           |  50.6  |    52.3    |
+|     YOLOX-m      |  P5  | 640  |   8xb32    |  300  |   No   | Yes |   17.6   |   25.33   |  36.88   |           5.31           |  47.5  |            |
+|   PPYOLOE+ -m    |  P5  | 640  |    8xb8    |  80   |  Yes   | No  |   8.4    |   23.43   |  24.97   |           5.47           |  49.5  |            |
+|     RTMDet-l     |  P5  | 640  |   8xb32    |  300  |  Yes   | No  |   45.2   |   52.32   |  80.12   |          10.13           |  52.3  |    53.7    |
+|     YOLOv5-l     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   8.1    |   46.56   |  54.65   |           6.8            |  48.8  |    49.9    |
+|  YOLOv6-v2.0-l   |  P5  | 640  |   8xb32    |  300  |  Yes   | Yes |  20.86   |   58.53   |  71.43   |           8.78           |  51.0  |            |
+|     YOLOv7-l     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   10.3   |   36.93   |  52.42   |           6.63           |  50.9  |            |
+|     YOLOv8-l     |  P5  | 640  |   8xb16    |  500  |  Yes   | Yes |   9.1    |   43.69   |  82.73   |           8.97           |  53.0  |    54.4    |
+|     YOLOX-l      |  P5  | 640  |    8xb8    |  300  |   No   | Yes |   8.0    |   54.21   |  77.83   |           9.23           |  50.1  |            |
+|   PPYOLOE+ -l    |  P5  | 640  |    8xb8    |  80   |  Yes   | No  |   13.2   |   52.20   |  55.05   |           8.2            |  52.6  |            |
+|     RTMDet-x     |  P5  | 640  |   8xb32    |  300  |  Yes   | No  |   63.4   |   94.86   |  145.41  |          17.89           |  52.8  |    54.2    |
+|     YOLOv7-x     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   13.7   |   71.35   |  95.06   |          11.63           |  52.8  |            |
+|     YOLOv8-x     |  P5  | 640  |   8xb16    |  500  |  Yes   | Yes |   12.4   |   68.23   |  132.10  |          14.22           |  54.0  |    55.0    |
+|     YOLOX-x      |  P5  | 640  |    8xb8    |  300  |   No   | Yes |   9.8    |   99.07   |  144.39  |          15.35           |  51.4  |            |
+|   PPYOLOE+ -x    |  P5  | 640  |    8xb8    |  80   |  Yes   | No  |   19.1   |   98.42   |  105.48  |          14.02           |  54.2  |            |
+|     YOLOv5-n     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   5.8    |   3.25    |   2.30   |                          |  35.9  |            |
+|     YOLOv5-s     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   10.5   |   12.63   |   8.45   |                          |  44.4  |            |
+|     YOLOv5-m     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   19.1   |   35.73   |  25.05   |                          |  51.3  |            |
+|     YOLOv5-l     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   30.5   |   76.77   |  55.77   |                          |  53.7  |            |
+|     YOLOv7-w     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   27.0   |   82.31   |  45.07   |                          |  54.1  |            |
+|     YOLOv7-e     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   42.5   |  114.69   |  64.48   |                          |  55.1  |            |
 
-### YOLOv5
+- All the models are trained on COCO train2017 dataset and evaluated on val2017 dataset.
+- TRT-FP16-GPU-Latency(ms) is the GPU Compute time on NVIDIA Tesla T4 device with TensorRT 8.4, a batch size of 1, a test shape of 640x640 and only model forward (The test shape for YOLOX-tiny is 416x416)
+- The number of model parameters and FLOPs are obtained using the [get_flops](https://github.com/open-mmlab/mmyolo/blob/dev/tools/analysis_tools/get_flops.py) script. Different calculation methods may vary slightly
+- RTMDet performance is the result of training with [MMRazor Knowledge Distillation](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/distillation/README.md)
+- Only YOLOv6 version 2.0 is implemented in MMYOLO for now, and L and M are the results without knowledge distillation
+- YOLOv8 results are optimized using mask instance annotation, but YOLOv5, YOLOv6 and YOLOv7 do not use
+- PPYOLOE+ uses Obj365 as pre-training weights, so the number of epochs for COCO training only needs 80
+- YOLOX-tiny, YOLOX-s and YOLOX-m are trained with the optimizer parameters proposed in RTMDet, with different degrees of performance improvement compared to the original implementation.
 
-Please refer to [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5)。
+Please see below items for more details
 
-### YOLOv6
+- [RTMDet](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet)
+- [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5)
+- [YOLOv6](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov6)
+- [YOLOv7](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov7)
+- [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov8)
+- [YOLOX](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolox)
+- [PPYOLO-E](https://github.com/open-mmlab/mmyolo/blob/main/configs/ppyoloe)
 
-Please refer to [YOLOv6](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov6)。
+## VOC dataset
 
-### YOLOv7
+| Backbone | size | Batchsize | AMP | Mem (GB) | box AP(COCO metric) |
+| :------: | :--: | :-------: | :-: | :------: | :-----------------: |
+| YOLOv5-n | 512  |    64     | Yes |   3.5    |        51.2         |
+| YOLOv5-s | 512  |    64     | Yes |   6.5    |        62.7         |
+| YOLOv5-m | 512  |    64     | Yes |   12.0   |        70.1         |
+| YOLOv5-l | 512  |    32     | Yes |   10.0   |        73.1         |
 
-Please refer to [YOLOv7](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov7)。
+Please see below items for more details
 
-### YOLOv8
+- [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5)
 
-Please refer to [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov8)。
+## CrowdHuman dataset
 
-### YOLOX
+| Backbone | size | SyncBN | AMP | Mem (GB) | ignore_iof_thr | box AP50(CrowDHuman Metric) |  MR  |  JI   |
+| :------: | :--: | :----: | :-: | :------: | :------------: | :-------------------------: | :--: | :---: |
+| YOLOv5-s | 640  |  Yes   | Yes |   2.6    |       -1       |            85.79            | 48.7 | 75.33 |
+| YOLOv5-s | 640  |  Yes   | Yes |   2.6    |      0.5       |            86.17            | 48.8 | 75.87 |
 
-Please refer to [YOLOX](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolox)。
+Please see below items for more details
 
-### PPYOLO-E
+- [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5)
 
-Please refer to [PPYOLO-E](https://github.com/open-mmlab/mmyolo/blob/main/configs/ppyoloe)。
-
-### RTMDet
-
-Please refer to [RTMDet](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet)。
+## DOTA 1.0 dataset
diff --git a/docs/zh_cn/get_started/overview.md b/docs/zh_cn/get_started/overview.md
index c87454ba..81a9c436 100644
--- a/docs/zh_cn/get_started/overview.md
+++ b/docs/zh_cn/get_started/overview.md
@@ -12,6 +12,16 @@ MMYOLO 是一个基于 PyTorch 和 MMDetection 的 YOLO 系列算法开源工具
 <img src="https://user-images.githubusercontent.com/17425982/220060451-d50414e5-a239-45b7-a4db-ed8699820300.png" alt="愿景图"/>
 </div>
 
+目前支持的任务如下：
+
+<details open>
+<summary><b>支持的任务</b></summary>
+
+- 目标检测
+- 旋转框目标检测
+
+</details>
+
 目前支持的 YOLO 系列算法如下：
 
 <details open>
@@ -27,13 +37,15 @@ MMYOLO 是一个基于 PyTorch 和 MMDetection 的 YOLO 系列算法开源工具
 
 </details>
 
-目前支持的任务如下：
+目前支持的数据集如下：
 
 <details open>
-<summary><b>支持的任务</b></summary>
+<summary><b>支持的数据集</b></summary>
 
-- 目标检测
-- 旋转框目标检测
+- COCO Dataset
+- VOC Dataset
+- CrowdHuman Dataset
+- DOTA 1.0 Dataset
 
 </details>
 
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index da36bb3f..b7b022f7 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -79,19 +79,14 @@
 
 
 .. toctree::
-   :maxdepth: 1
+   :maxdepth: 2
    :caption: 进阶教程
 
    advanced_guides/cross-library_application.md
 
-.. toctree::
-   :maxdepth: 1
-   :caption: 接口文档（英文）
-
-   api.rst
 
 .. toctree::
-   :maxdepth: 1
+   :maxdepth: 2
    :caption: 模型仓库
 
    model_zoo.md
@@ -105,6 +100,11 @@
    notes/conventions.md
    notes/code_style.md
 
+.. toctree::
+   :maxdepth: 1
+   :caption: 接口文档（英文）
+
+   api.rst
 
 .. toctree::
    :caption: 语言切换
diff --git a/docs/zh_cn/model_zoo.md b/docs/zh_cn/model_zoo.md
index c42cbf7a..9f52585d 100644
--- a/docs/zh_cn/model_zoo.md
+++ b/docs/zh_cn/model_zoo.md
@@ -1,36 +1,90 @@
-# 模型库
+# 模型库和评测
 
-## 共同设置
+本页面用于汇总 MMYOLO 中支持的各类模型性能和相关评测指标，方便用户对比分析。
 
-- 所有模型都是在 `coco_2017_train` 上训练，在 `coco_2017_val` 上测试。
-- 我们使用分布式训练。
+## COCO 数据集
 
-## Baselines
+|      Model       | Arch | Size | Batch Size | Epoch | SyncBN | AMP | Mem (GB) | Params(M) | FLOPs(G) | TRT-FP16-GPU-Latency(ms) | Box AP | TTA Box AP |
+| :--------------: | :--: | :--: | :--------: | :---: | :----: | :-: | :------: | :-------: | :------: | :----------------------: | :----: | :--------: |
+|     YOLOv5-n     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   1.5    |   1.87    |   2.26   |           1.14           |  28.0  |    30.7    |
+|  YOLOv6-v2.0-n   |  P5  | 640  |   8xb32    |  400  |  Yes   | Yes |   6.04   |   4.32    |   5.52   |           1.37           |  36.2  |            |
+|     YOLOv8-n     |  P5  | 640  |   8xb16    |  500  |  Yes   | Yes |   2.5    |   3.16    |   4.4    |           1.53           |  37.4  |    39.9    |
+|   RTMDet-tiny    |  P5  | 640  |   8xb32    |  300  |  Yes   | No  |   11.9   |   4.90    |   8.09   |           2.31           |  41.8  |    43.2    |
+| YOLOv6-v2.0-tiny |  P5  | 640  |   8xb32    |  400  |  Yes   | Yes |   8.13   |   9.70    |  12.37   |           2.19           |  41.0  |            |
+|   YOLOv7-tiny    |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   2.7    |   6.23    |   6.89   |           1.88           |  37.5  |            |
+|    YOLOX-tiny    |  P5  | 416  |   8xb32    |  300  |   No   | Yes |   4.9    |   5.06    |   7.63   |           1.19           |  34.3  |            |
+|     RTMDet-s     |  P5  | 640  |   8xb32    |  300  |  Yes   | No  |   16.3   |   8.89    |  14.84   |           2.89           |  45.7  |    47.3    |
+|     YOLOv5-s     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   2.7    |   7.24    |   8.27   |           1.89           |  37.7  |    40.2    |
+|  YOLOv6-v2.0-s   |  P5  | 640  |   8xb32    |  400  |  Yes   | Yes |   8.88   |   17.22   |  21.94   |           2.67           |  44.0  |            |
+|     YOLOv8-s     |  P5  | 640  |   8xb16    |  500  |  Yes   | Yes |   4.0    |   11.17   |  14.36   |           2.61           |  45.1  |    46.8    |
+|     YOLOX-s      |  P5  | 640  |   8xb32    |  300  |   No   | Yes |   9.8    |   8.97    |  13.40   |           2.38           |  41.9  |            |
+|   PPYOLOE+ -s    |  P5  | 640  |    8xb8    |  80   |  Yes   | No  |   4.7    |   7.93    |   8.68   |           2.54           |  43.5  |            |
+|     RTMDet-m     |  P5  | 640  |   8xb32    |  300  |  Yes   | No  |   29.0   |   24.71   |  39.21   |           6.23           |  50.2  |    51.9    |
+|     YOLOv5-m     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   5.0    |   21.19   |  24.53   |           4.28           |  45.3  |    46.9    |
+|  YOLOv6-v2.0-m   |  P5  | 640  |   8xb32    |  300  |  Yes   | Yes |  16.69   |   34.25   |   40.7   |           5.12           |  48.4  |            |
+|     YOLOv8-m     |  P5  | 640  |   8xb16    |  500  |  Yes   | Yes |   7.0    |   25.9    |  39.57   |           5.78           |  50.6  |    52.3    |
+|     YOLOX-m      |  P5  | 640  |   8xb32    |  300  |   No   | Yes |   17.6   |   25.33   |  36.88   |           5.31           |  47.5  |            |
+|   PPYOLOE+ -m    |  P5  | 640  |    8xb8    |  80   |  Yes   | No  |   8.4    |   23.43   |  24.97   |           5.47           |  49.5  |            |
+|     RTMDet-l     |  P5  | 640  |   8xb32    |  300  |  Yes   | No  |   45.2   |   52.32   |  80.12   |          10.13           |  52.3  |    53.7    |
+|     YOLOv5-l     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   8.1    |   46.56   |  54.65   |           6.8            |  48.8  |    49.9    |
+|  YOLOv6-v2.0-l   |  P5  | 640  |   8xb32    |  300  |  Yes   | Yes |  20.86   |   58.53   |  71.43   |           8.78           |  51.0  |            |
+|     YOLOv7-l     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   10.3   |   36.93   |  52.42   |           6.63           |  50.9  |            |
+|     YOLOv8-l     |  P5  | 640  |   8xb16    |  500  |  Yes   | Yes |   9.1    |   43.69   |  82.73   |           8.97           |  53.0  |    54.4    |
+|     YOLOX-l      |  P5  | 640  |    8xb8    |  300  |   No   | Yes |   8.0    |   54.21   |  77.83   |           9.23           |  50.1  |            |
+|   PPYOLOE+ -l    |  P5  | 640  |    8xb8    |  80   |  Yes   | No  |   13.2   |   52.20   |  55.05   |           8.2            |  52.6  |            |
+|     RTMDet-x     |  P5  | 640  |   8xb32    |  300  |  Yes   | No  |   63.4   |   94.86   |  145.41  |          17.89           |  52.8  |    54.2    |
+|     YOLOv7-x     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   13.7   |   71.35   |  95.06   |          11.63           |  52.8  |            |
+|     YOLOv8-x     |  P5  | 640  |   8xb16    |  500  |  Yes   | Yes |   12.4   |   68.23   |  132.10  |          14.22           |  54.0  |    55.0    |
+|     YOLOX-x      |  P5  | 640  |    8xb8    |  300  |   No   | Yes |   9.8    |   99.07   |  144.39  |          15.35           |  51.4  |            |
+|   PPYOLOE+ -x    |  P5  | 640  |    8xb8    |  80   |  Yes   | No  |   19.1   |   98.42   |  105.48  |          14.02           |  54.2  |            |
+|     YOLOv5-n     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   5.8    |   3.25    |   2.30   |                          |  35.9  |            |
+|     YOLOv5-s     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   10.5   |   12.63   |   8.45   |                          |  44.4  |            |
+|     YOLOv5-m     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   19.1   |   35.73   |  25.05   |                          |  51.3  |            |
+|     YOLOv5-l     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   30.5   |   76.77   |  55.77   |                          |  53.7  |            |
+|     YOLOv7-w     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   27.0   |   82.31   |  45.07   |                          |  54.1  |            |
+|     YOLOv7-e     |  P6  | 1280 |   8xb16    |  300  |  Yes   | Yes |   42.5   |  114.69   |  64.48   |                          |  55.1  |            |
 
-### YOLOv5
+- 所有模型均使用 COCO train2017 作为训练集，在 COCO val2017 上验证精度
+- TRT-FP16-GPU-Latency(ms) 是指在 NVIDIA Tesla T4 设备上采用 TensorRT 8.4，batch size 为 1， 测试 shape 为 640x640 且仅包括模型 forward 的 GPU Compute time (YOLOX-tiny 测试 shape 是 416x416)
+- 模型参数量和 FLOPs 是采用 [get_flops](https://github.com/open-mmlab/mmyolo/blob/dev/tools/analysis_tools/get_flops.py) 脚本得到，不同的运算方式可能略有不同
+- RTMDet 性能是通过 [MMRazor 知识蒸馏](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/distillation/README.md) 训练后的结果
+- MMYOLO 中暂时只实现了 YOLOv6 2.0 版本，并且 L 和 M 为没有经过知识蒸馏的结果
+- YOLOv8 是引入了实例分割标注优化后的结果，YOLOv5、YOLOv6 和 YOLOv7 没有采用实例分割标注优化
+- PPYOLOE+ 使用 Obj365 作为预训练权重，因此 COCO 训练的 epoch 数只需要 80
+- YOLOX-tiny、YOLOX-s 和 YOLOX-m 为采用了 RTMDet 中所提优化器参数训练所得，性能相比原始实现有不同程度提升
 
-请参考 [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5)。
+详情见如下内容
 
-### YOLOv6
+- [RTMDet](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet)
+- [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5)
+- [YOLOv6](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov6)
+- [YOLOv7](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov7)
+- [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov8)
+- [YOLOX](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolox)
+- [PPYOLO-E](https://github.com/open-mmlab/mmyolo/blob/main/configs/ppyoloe)
 
-请参考 [YOLOv6](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov6)。
+## VOC 数据集
 
-### YOLOv7
+| Backbone | size | Batchsize | AMP | Mem (GB) | box AP(COCO metric) |
+| :------: | :--: | :-------: | :-: | :------: | :-----------------: |
+| YOLOv5-n | 512  |    64     | Yes |   3.5    |        51.2         |
+| YOLOv5-s | 512  |    64     | Yes |   6.5    |        62.7         |
+| YOLOv5-m | 512  |    64     | Yes |   12.0   |        70.1         |
+| YOLOv5-l | 512  |    32     | Yes |   10.0   |        73.1         |
 
-请参考 [YOLOv7](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov7)。
+详情见如下内容
 
-### YOLOv8
+- [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5)
 
-请参考 [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov8)。
+## CrowdHuman 数据集
 
-### YOLOX
+| Backbone | size | SyncBN | AMP | Mem (GB) | ignore_iof_thr | box AP50(CrowDHuman Metric) |  MR  |  JI   |
+| :------: | :--: | :----: | :-: | :------: | :------------: | :-------------------------: | :--: | :---: |
+| YOLOv5-s | 640  |  Yes   | Yes |   2.6    |       -1       |            85.79            | 48.7 | 75.33 |
+| YOLOv5-s | 640  |  Yes   | Yes |   2.6    |      0.5       |            86.17            | 48.8 | 75.87 |
 
-请参考 [YOLOX](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolox)。
+详情见如下内容
 
-### PPYOLO-E
+- [YOLOv5](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5)
 
-请参考 [PPYOLO-E](https://github.com/open-mmlab/mmyolo/blob/main/configs/ppyoloe)。
-
-### RTMDet
-
-请参考 [RTMDet](https://github.com/open-mmlab/mmyolo/blob/main/configs/rtmdet)。
+## DOTA 1.0 数据集
diff --git a/tools/analysis_tools/get_flops.py b/tools/analysis_tools/get_flops.py
index 3e86bc57..965660f7 100644
--- a/tools/analysis_tools/get_flops.py
+++ b/tools/analysis_tools/get_flops.py
@@ -11,6 +11,8 @@ from mmengine.logging import MMLogger
 from mmengine.model import revert_sync_batchnorm
 from mmengine.registry import init_default_scope
 
+from mmyolo.utils import switch_to_deploy
+
 
 def parse_args():
     parser = argparse.ArgumentParser(description='Get a detector flops')
@@ -68,6 +70,7 @@ def inference(args, logger):
         model.cuda()
     model = revert_sync_batchnorm(model)
     model.eval()
+    switch_to_deploy(model)
 
     # input tensor
     # automatically generate a input tensor with the given input_shape.

From 37e5e991c26974eac90af2d023f87f8c35847280 Mon Sep 17 00:00:00 2001
From: GatiLin <2267379130@qq.com>
Date: Wed, 1 Mar 2023 17:33:17 +0800
Subject: [PATCH 56/64] add plot image for model-zoo (#608)

---
 docs/en/model_zoo.md    | 4 ++++
 docs/zh_cn/model_zoo.md | 4 ++++
 2 files changed, 8 insertions(+)

diff --git a/docs/en/model_zoo.md b/docs/en/model_zoo.md
index 5934821e..1547bb9d 100644
--- a/docs/en/model_zoo.md
+++ b/docs/en/model_zoo.md
@@ -4,6 +4,10 @@ This page is used to summarize the performance and related evaluation metrics of
 
 ## COCO dataset
 
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/222087414-168175cc-dae6-4c5c-a8e3-3109a152dd19.png"/>
+</div>
+
 |      Model       | Arch | Size | Batch Size | Epoch | SyncBN | AMP | Mem (GB) | Params(M) | FLOPs(G) | TRT-FP16-GPU-Latency(ms) | Box AP | TTA Box AP |
 | :--------------: | :--: | :--: | :--------: | :---: | :----: | :-: | :------: | :-------: | :------: | :----------------------: | :----: | :--------: |
 |     YOLOv5-n     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   1.5    |   1.87    |   2.26   |           1.14           |  28.0  |    30.7    |
diff --git a/docs/zh_cn/model_zoo.md b/docs/zh_cn/model_zoo.md
index 9f52585d..1091f9f5 100644
--- a/docs/zh_cn/model_zoo.md
+++ b/docs/zh_cn/model_zoo.md
@@ -4,6 +4,10 @@
 
 ## COCO 数据集
 
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/222087414-168175cc-dae6-4c5c-a8e3-3109a152dd19.png"/>
+</div>
+
 |      Model       | Arch | Size | Batch Size | Epoch | SyncBN | AMP | Mem (GB) | Params(M) | FLOPs(G) | TRT-FP16-GPU-Latency(ms) | Box AP | TTA Box AP |
 | :--------------: | :--: | :--: | :--------: | :---: | :----: | :-: | :------: | :-------: | :------: | :----------------------: | :----: | :--------: |
 |     YOLOv5-n     |  P5  | 640  |   8xb16    |  300  |  Yes   | Yes |   1.5    |   1.87    |   2.26   |           1.14           |  28.0  |    30.7    |

From 4b996f10a534093b18bb955785e657f24b9848e2 Mon Sep 17 00:00:00 2001
From: Yanyi Liu <wolfsonliu@163.com>
Date: Thu, 2 Mar 2023 10:27:46 +0800
Subject: [PATCH 57/64] Rotated object detection and RTMDet-R (#513)

* init

* add cfg

* update align

* update

* fix regularize box

* fix comment

* update config

* remove ckpt

* update

* make mmrotate optional

* fix doc

* add mmrotate req

* support large_demo with rbbox

* add ut

* update

* add doc v01

* update doc

* fix doc

* update

* update

* update readme

* update comments

* fix

* fix doc

* fix doc

* fix

* update

* update

* fix large

* update doc

* update readme

* fix config

* fix configs

* inprove

* update doc

* update assigner

* update ut

* remove rdsl assigner

* rename aug config

* speedup ut

* add comment

* fix data root

* remove doc

* remove empty folder

* add docs

* rename configs

* fix readme

* fix readme

* fix configs

* revert

* fix name

* fix table

* fix doc link

* fix doc link

* update

* update

* update

* Refactor dota splits

* add shapely

* fix typo

* fix ci

* change

* fix type

* uppdata link

* uppdata link

* add some comment

* update

---------

Co-authored-by: huanghaian <huanghaian@sensetime.com>
---
 .dev_scripts/gather_models.py                 |   1 +
 README.md                                     |   1 +
 README_zh-CN.md                               |   1 +
 configs/rtmdet/README.md                      |  29 +-
 configs/rtmdet/metafile.yml                   |  82 +++
 ...rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py |  30 +
 .../rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py   | 331 +++++++++
 ...mdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py | 168 +++++
 ...cbn_fast_coco-pretrain_2xb4-36e_dota-ms.py |  20 +
 ...rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py |  33 +
 .../rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py   |  33 +
 .../rtmdet-r_s_fast_1xb8-36e_dota-ms.py       |  38 +
 .../rotated/rtmdet-r_s_fast_1xb8-36e_dota.py  |  38 +
 .../rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py    |  38 +
 .../rtmdet-r_tiny_fast_1xb8-36e_dota.py       |  38 +
 demo/large_image_demo.py                      |   4 +-
 .../recommended_topics/dataset_preparation.md | 144 ++++
 docs/en/tutorials/rotated_detection.md        |   3 +
 docs/zh_cn/index.rst                          |   1 +
 .../recommended_topics/dataset_preparation.md | 143 ++++
 docs/zh_cn/tutorials/rotated_detection.md     | 332 +++++++++
 mmyolo/datasets/__init__.py                   |   3 +-
 mmyolo/datasets/transforms/__init__.py        |   9 +-
 mmyolo/datasets/transforms/transforms.py      |  48 ++
 mmyolo/datasets/yolov5_dota.py                |  29 +
 mmyolo/models/dense_heads/__init__.py         |   5 +-
 .../models/dense_heads/rotated_rtmdet_head.py | 649 ++++++++++++++++++
 .../assigners/batch_dsl_assigner.py           | 111 ++-
 mmyolo/models/task_modules/coders/__init__.py |   6 +-
 .../coders/distance_angle_point_coder.py      |  94 +++
 mmyolo/models/utils/misc.py                   |  41 +-
 mmyolo/utils/large_image.py                   |  29 +-
 requirements/mmrotate.txt                     |   1 +
 requirements/tests.txt                        |   1 +
 .../test_rotated_rtmdet_head.py               | 264 +++++++
 .../test_assigners/test_batch_dsl_assigner.py | 192 ++++++
 tests/test_models/test_utils/test_misc.py     |  30 +-
 tools/dataset_converters/dota/README.md       |   3 +
 tools/dataset_converters/dota/dota_split.py   | 603 ++++++++++++++++
 .../dota/split_config/multi_scale.json        |  19 +
 .../dota/split_config/single_scale.json       |  19 +
 tools/model_converters/rtmdet_to_mmyolo.py    |   2 +-
 42 files changed, 3608 insertions(+), 58 deletions(-)
 create mode 100644 configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py
 create mode 100644 configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py
 create mode 100644 configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py
 create mode 100644 configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py
 create mode 100644 configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py
 create mode 100644 configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py
 create mode 100644 configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py
 create mode 100644 configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py
 create mode 100644 configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py
 create mode 100644 configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py
 create mode 100644 docs/en/tutorials/rotated_detection.md
 create mode 100644 docs/zh_cn/tutorials/rotated_detection.md
 create mode 100644 mmyolo/datasets/yolov5_dota.py
 create mode 100644 mmyolo/models/dense_heads/rotated_rtmdet_head.py
 create mode 100644 mmyolo/models/task_modules/coders/distance_angle_point_coder.py
 create mode 100644 requirements/mmrotate.txt
 create mode 100644 tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py
 create mode 100644 tests/test_models/test_task_modules/test_assigners/test_batch_dsl_assigner.py
 create mode 100644 tools/dataset_converters/dota/README.md
 create mode 100644 tools/dataset_converters/dota/dota_split.py
 create mode 100644 tools/dataset_converters/dota/split_config/multi_scale.json
 create mode 100644 tools/dataset_converters/dota/split_config/single_scale.json

diff --git a/.dev_scripts/gather_models.py b/.dev_scripts/gather_models.py
index 05dd2c2e..ba5039c2 100644
--- a/.dev_scripts/gather_models.py
+++ b/.dev_scripts/gather_models.py
@@ -110,6 +110,7 @@ def get_dataset_name(config):
         CocoDataset='COCO',
         YOLOv5CocoDataset='COCO',
         CocoPanopticDataset='COCO',
+        YOLOv5DOTADataset='DOTA 1.0',
         DeepFashionDataset='Deep Fashion',
         LVISV05Dataset='LVIS v0.5',
         LVISV1Dataset='LVIS v1',
diff --git a/README.md b/README.md
index 5c57ae6e..4caa1b40 100644
--- a/README.md
+++ b/README.md
@@ -232,6 +232,7 @@ For different parts from MMDetection, we have also prepared user guides and adva
 
 - [Learn about configs with YOLOv5](docs/en/tutorials/config.md)
 - [Data flow](docs/en/tutorials/data_flow.md)
+- [Rotated detection](docs/en/tutorials/rotated_detection.md)
 - [Custom Installation](docs/en/tutorials/custom_installation.md)
 - [Common Warning Notes](docs/zh_cn/tutorials/warning_notes.md)
 - [FAQ](docs/en/tutorials/faq.md)
diff --git a/README_zh-CN.md b/README_zh-CN.md
index 3c5d70bf..c85beefe 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -254,6 +254,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 
 - [学习 YOLOv5 配置文件](docs/zh_cn/tutorials/config.md)
 - [数据流](docs/zh_cn/tutorials/data_flow.md)
+- [旋转目标检测](docs/zh_cn/tutorials/rotated_detection.md)
 - [自定义安装](docs/zh_cn/tutorials/custom_installation.md)
 - [常见警告说明](docs/zh_cn/tutorials/warning_notes.md)
 - [常见问题](docs/zh_cn/tutorials/faq.md)
diff --git a/configs/rtmdet/README.md b/configs/rtmdet/README.md
index ddcd52ba..94e86546 100644
--- a/configs/rtmdet/README.md
+++ b/configs/rtmdet/README.md
@@ -21,7 +21,7 @@ RTMDet-l model structure
 
 ## Results and Models
 
-## Object Detection
+### Object Detection
 
 |     Model      | size | Params(M) | FLOPs(G) | TRT-FP16-Latency(ms) |   box AP    | TTA box AP  |                           Config                            |                                                                                                                                                                 Download                                                                                                                                                                  |
 | :------------: | :--: | :-------: | :------: | :------------------: | :---------: | :---------: | :---------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
@@ -42,6 +42,33 @@ RTMDet-l model structure
 3. `TTA` means that Test Time Augmentation. It's perform 3 multi-scaling transformations on the image, followed by 2 flipping transformations (flipping and not flipping). You only need to specify `--tta` when testing to enable.  see [TTA](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/common_usage/tta.md) for details.
 4. \* means checkpoints are trained with knowledge distillation. More details can be found in [RTMDet distillation](./distillation).
 
+### Rotated Object Detection
+
+RTMDet-R achieves state-of-the-art on various remote sensing datasets.
+
+|  Backbone   | pretrain | Epoch | Batch Size |       Aug       | mmAP  | mAP50 | mAP75 | Mem (GB) | Params(M) | FLOPS(G) | TRT-FP16-Latency(ms) |                                    Config                                    |                                                                                                                                                                             Download                                                                                                                                                                             |
+| :---------: | :------: | :---: | :--------: | :-------------: | :---: | :---: | :---: | :------: | :-------: | :------: | :------------------: | :--------------------------------------------------------------------------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+| RTMDet-tiny |    IN    |  36   |    1xb8    |       RR        | 46.94 | 75.07 | 50.11 |   12.7   |   4.88    |  20.45   |         4.40         |           [config](./rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py)            |                   [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210-e8ccfb1c.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210.log.json)                   |
+|  RTMDet-s   |    IN    |  36   |    1xb8    |       RR        | 48.99 | 77.33 | 52.65 |   16.6   |   8.86    |  37.62   |         4.86         |             [config](./rotated/rtmdet-r_s_fast_1xb8-36e_dota.py)             |                         [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307-3946a5aa.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307.log.json)                         |
+|  RTMDet-m   |    IN    |  36   |    2xb4    |       RR        | 50.38 | 78.43 | 54.28 |   10.9   |   24.67   |  99.76   |         7.82         |         [config](./rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py)          |           [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237-29ae1619.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237.log.json)           |
+|  RTMDet-l   |    IN    |  36   |    2xb4    |       RR        | 50.61 | 78.66 | 54.95 |   16.1   |   52.27   |  204.21  |        10.82         |         [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py)          |           [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544-38bc5f08.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544.log.json)           |
+| RTMDet-tiny |    IN    |  36   |    1xb8    |      MS+RR      |   -   |   -   |   -   |          |   4.88    |  20.45   |         4.40         |          [config](./rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py)          |                                                                                                                                                                                \|                                                                                                                                                                                |
+|  RTMDet-s   |    IN    |  36   |    1xb8    |      MS+RR      |   -   |   -   |   -   |          |   8.86    |  37.62   |         4.86         |           [config](./rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py)            |                                                                                                                                                                                \|                                                                                                                                                                                |
+|  RTMDet-m   |    IN    |  36   |    2xb4    |      MS+RR      |   -   |   -   |   -   |          |   24.67   |  99.76   |         7.82         |        [config](./rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py)        |                                                                                                                                                                                \|                                                                                                                                                                                |
+|  RTMDet-l   |    IN    |  36   |    2xb4    |      MS+RR      |   -   |   -   |   -   |          |   52.27   |  204.21  |        10.82         |        [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py)        |                                                                                                                                                                                \|                                                                                                                                                                                |
+|  RTMDet-l   |   COCO   |  36   |    2xb4    |      MS+RR      |   -   |   -   |   -   |          |   52.27   |  204.21  |        10.82         | [config](./rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py) |                                                                                                                                                                                \|                                                                                                                                                                                |
+|  RTMDet-l   |    IN    |  100  |    2xb4    | Mixup+Mosaic+RR | 55.05 | 80.14 | 61.32 |   19.6   |   52.27   |  204.21  |        10.82         |       [config](./rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py)       | [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735-ed4ea966.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735.log.json) |
+
+**Note**:
+
+1. Please follow doc to get start with rotated detection. [Rotated Object Detection](../../docs/zh_cn/tutorials/rotated_detection.md)
+2. We follow the latest metrics from the DOTA evaluation server, original voc format mAP is now mAP50.
+3. All models trained with image size 1024\*1024.
+4. `IN` means ImageNet pretrain, `COCO` means COCO pretrain.
+5. For Aug, RR means `RandomRotate`, MS means multi-scale augmentation in data prepare.
+6. The inference speed here is measured on an NVIDIA 2080Ti GPU with TensorRT 8.4.3, cuDNN 8.2.0, FP16, batch size=1, and with NMS.
+7. Currently, the training process of RTMDet-R tiny is unstable and may have 1% accuracy fluctuation, we will continue to investigate why.
+
 ## Citation
 
 ```latex
diff --git a/configs/rtmdet/metafile.yml b/configs/rtmdet/metafile.yml
index 291cbc99..704a44ba 100644
--- a/configs/rtmdet/metafile.yml
+++ b/configs/rtmdet/metafile.yml
@@ -13,6 +13,20 @@ Collections:
     Code:
       URL: https://github.com/open-mmlab/mmyolo/blob/main/mmyolo/models/detectors/yolo_detector.py#L12
       Version: v0.1.1
+  - Name: Rotated_RTMDet
+    Metadata:
+      Training Data: DOTAv1.0
+      Training Techniques:
+        - AdamW
+        - Flat Cosine Annealing
+      Training Resources: 1x A100 GPUs
+      Architecture:
+        - CSPNeXt
+        - CSPNeXtPAFPN
+    README: configs/rtmdet/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmyolo/blob/main/mmyolo/models/detectors/yolo_detector.py#L12
+      Version: v0.1.1
 
 Models:
   - Name: rtmdet_tiny_syncbn_fast_8xb32-300e_coco
@@ -131,3 +145,71 @@ Models:
         Metrics:
           box AP: 52.8
     Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_x_syncbn_fast_8xb32-300e_coco/rtmdet_x_syncbn_fast_8xb32-300e_coco_20221231_100345-b85cd476.pth
+
+  - Name: rtmdet-r_tiny_fast_1xb8-36e_dota
+    In Collection: Rotated_RTMDet
+    Config: configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py
+    Metadata:
+      Training Memory (GB): 12.7
+      Epochs: 36
+    Results:
+      - Task: Oriented Object Detection
+        Dataset: DOTAv1.0
+        Metrics:
+          mAP: 75.07
+    Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota/rtmdet-r_tiny_fast_1xb8-36e_dota_20230228_162210-e8ccfb1c.pth
+
+  - Name: rtmdet-r_s_fast_1xb8-36e_dota
+    In Collection: Rotated_RTMDet
+    Config: configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py
+    Metadata:
+      Training Memory (GB): 16.6
+      Epochs: 36
+    Results:
+      - Task: Oriented Object Detection
+        Dataset: DOTAv1.0
+        Metrics:
+          mAP: 77.33
+    Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota/rtmdet-r_s_fast_1xb8-36e_dota_20230224_110307-3946a5aa.pth
+
+  - Name: rtmdet-r_m_syncbn_fast_2xb4-36e_dota
+    In Collection: Rotated_RTMDet
+    Config: configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py
+    Metadata:
+      Training Resources: 2x A100 GPUs
+      Training Memory (GB): 10.9
+      Epochs: 36
+    Results:
+      - Task: Oriented Object Detection
+        Dataset: DOTAv1.0
+        Metrics:
+          mAP: 78.43
+    Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota/rtmdet-r_m_syncbn_fast_2xb4-36e_dota_20230224_124237-29ae1619.pth
+
+  - Name: rtmdet-r_l_syncbn_fast_2xb4-36e_dota
+    In Collection: Rotated_RTMDet
+    Config: configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py
+    Metadata:
+      Training Resources: 2x A100 GPUs
+      Training Memory (GB): 16.1
+      Epochs: 36
+    Results:
+      - Task: Oriented Object Detection
+        Dataset: DOTAv1.0
+        Metrics:
+          mAP: 78.66
+    Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota/rtmdet-r_l_syncbn_fast_2xb4-36e_dota_20230224_124544-38bc5f08.pth
+
+  - Name: rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota
+    In Collection: Rotated_RTMDet
+    Config: configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py
+    Metadata:
+      Training Resources: 2x A100 GPUs
+      Training Memory (GB): 19.6
+      Epochs: 100
+    Results:
+      - Task: Oriented Object Detection
+        Dataset: DOTAv1.0
+        Metrics:
+          mAP: 80.14
+    Weights: https://download.openmmlab.com/mmyolo/v0/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota_20230224_124735-ed4ea966.pth
diff --git a/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py
new file mode 100644
index 00000000..ef29a1d0
--- /dev/null
+++ b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py
@@ -0,0 +1,30 @@
+_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
+
+# ========================modified parameters======================
+data_root = 'data/split_ms_dota/'
+# Path of test images folder
+test_data_prefix = 'test/images/'
+# Submission dir for result submit
+submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
+
+# =======================Unmodified in most cases==================
+train_dataloader = dict(dataset=dict(data_root=data_root))
+
+val_dataloader = dict(dataset=dict(data_root=data_root))
+
+# Inference on val dataset
+test_dataloader = val_dataloader
+
+# Inference on test dataset and format the output results
+# for submission. Note: the test set has no annotation.
+# test_dataloader = dict(
+#     dataset=dict(
+#         data_root=data_root,
+#         ann_file='', # test set has no annotation
+#         data_prefix=dict(img_path=test_data_prefix),
+#         pipeline=_base_.test_pipeline))
+# test_evaluator = dict(
+#     type='mmrotate.DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix=submission_dir)
diff --git a/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py
new file mode 100644
index 00000000..1f34e137
--- /dev/null
+++ b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py
@@ -0,0 +1,331 @@
+_base_ = '../../_base_/default_runtime.py'
+
+checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth'  # noqa
+
+# ========================Frequently modified parameters======================
+# -----data related-----
+data_root = 'data/split_ss_dota/'
+# Path of train annotation folder
+train_ann_file = 'trainval/annfiles/'
+train_data_prefix = 'trainval/images/'  # Prefix of train image path
+# Path of val annotation folder
+val_ann_file = 'trainval/annfiles/'
+val_data_prefix = 'trainval/images/'  # Prefix of val image path
+# Path of test images folder
+test_data_prefix = 'test/images/'
+
+# Submission dir for result submit
+submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
+
+num_classes = 15  # Number of classes for classification
+# Batch size of a single GPU during training
+train_batch_size_per_gpu = 4
+# Worker to pre-fetch data for each single GPU during training
+train_num_workers = 8
+# persistent_workers must be False if num_workers is 0.
+persistent_workers = True
+
+# -----train val related-----
+# Base learning rate for optim_wrapper. Corresponding to 1xb8=8 bs
+base_lr = 0.00025  # 0.004 / 16
+max_epochs = 36  # Maximum training epochs
+
+model_test_cfg = dict(
+    # The config of multi-label for multi-class prediction.
+    multi_label=True,
+    # Decode rbox with angle, For RTMDet-R, Defaults to True.
+    # When set to True, use rbox coder such as DistanceAnglePointCoder
+    # When set to False, use hbox coder such as DistancePointBBoxCoder
+    # different setting lead to different AP.
+    decode_with_angle=True,
+    # The number of boxes before NMS
+    nms_pre=30000,
+    score_thr=0.05,  # Threshold to filter out boxes.
+    nms=dict(type='nms_rotated', iou_threshold=0.1),  # NMS type and threshold
+    max_per_img=2000)  # Max number of detections of each image
+
+# ========================Possible modified parameters========================
+# -----data related-----
+img_scale = (1024, 1024)  # width, height
+# ratio for random rotate
+random_rotate_ratio = 0.5
+# label ids for rect objs
+rotate_rect_obj_labels = [9, 11]
+# Dataset type, this will be used to define the dataset
+dataset_type = 'YOLOv5DOTADataset'
+# Batch size of a single GPU during validation
+val_batch_size_per_gpu = 8
+# Worker to pre-fetch data for each single GPU during validation
+val_num_workers = 8
+
+# Config of batch shapes. Only on val. Not use in RTMDet-R
+batch_shapes_cfg = None
+
+# -----model related-----
+# The scaling factor that controls the depth of the network structure
+deepen_factor = 1.0
+# The scaling factor that controls the width of the network structure
+widen_factor = 1.0
+# Strides of multi-scale prior box
+strides = [8, 16, 32]
+# The angle definition for model
+angle_version = 'le90'  # le90, le135, oc are available options
+
+norm_cfg = dict(type='BN')  # Normalization config
+
+# -----train val related-----
+lr_start_factor = 1.0e-5
+dsl_topk = 13  # Number of bbox selected in each level
+loss_cls_weight = 1.0
+loss_bbox_weight = 2.0
+qfl_beta = 2.0  # beta of QualityFocalLoss
+weight_decay = 0.05
+
+# Save model checkpoint and validation intervals
+save_checkpoint_intervals = 1
+# The maximum checkpoints to keep.
+max_keep_ckpts = 3
+# single-scale training is recommended to
+# be turned on, which can speed up training.
+env_cfg = dict(cudnn_benchmark=True)
+
+# ===============================Unmodified in most cases====================
+model = dict(
+    type='YOLODetector',
+    data_preprocessor=dict(
+        type='YOLOv5DetDataPreprocessor',
+        mean=[103.53, 116.28, 123.675],
+        std=[57.375, 57.12, 58.395],
+        bgr_to_rgb=False),
+    backbone=dict(
+        type='CSPNeXt',
+        arch='P5',
+        expand_ratio=0.5,
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        channel_attention=True,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='SiLU', inplace=True),
+        init_cfg=dict(
+            type='Pretrained', prefix='backbone.', checkpoint=checkpoint)),
+    neck=dict(
+        type='CSPNeXtPAFPN',
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        in_channels=[256, 512, 1024],
+        out_channels=256,
+        num_csp_blocks=3,
+        expand_ratio=0.5,
+        norm_cfg=norm_cfg,
+        act_cfg=dict(type='SiLU', inplace=True)),
+    bbox_head=dict(
+        type='RotatedRTMDetHead',
+        head_module=dict(
+            type='RotatedRTMDetSepBNHeadModule',
+            num_classes=num_classes,
+            widen_factor=widen_factor,
+            in_channels=256,
+            stacked_convs=2,
+            feat_channels=256,
+            norm_cfg=norm_cfg,
+            act_cfg=dict(type='SiLU', inplace=True),
+            share_conv=True,
+            pred_kernel_size=1,
+            featmap_strides=strides),
+        prior_generator=dict(
+            type='mmdet.MlvlPointGenerator', offset=0, strides=strides),
+        bbox_coder=dict(
+            type='DistanceAnglePointCoder', angle_version=angle_version),
+        loss_cls=dict(
+            type='mmdet.QualityFocalLoss',
+            use_sigmoid=True,
+            beta=qfl_beta,
+            loss_weight=loss_cls_weight),
+        loss_bbox=dict(
+            type='mmrotate.RotatedIoULoss',
+            mode='linear',
+            loss_weight=loss_bbox_weight),
+        angle_version=angle_version,
+        # Used for angle encode and decode, similar to bbox coder
+        angle_coder=dict(type='mmrotate.PseudoAngleCoder'),
+        # If true, it will apply loss_bbox on horizontal box, and angle_loss
+        # needs to be specified. In this case the loss_bbox should use
+        # horizontal box loss e.g. IoULoss. Arg details can be seen in
+        # `docs/zh_cn/tutorials/rotated_detection.md`
+        use_hbbox_loss=False,
+        loss_angle=None),
+    train_cfg=dict(
+        assigner=dict(
+            type='BatchDynamicSoftLabelAssigner',
+            num_classes=num_classes,
+            topk=dsl_topk,
+            iou_calculator=dict(type='mmrotate.RBboxOverlaps2D'),
+            # RBboxOverlaps2D doesn't support batch input, use loop instead.
+            batch_iou=False),
+        allowed_border=-1,
+        pos_weight=-1,
+        debug=False),
+    test_cfg=model_test_cfg,
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'),
+    dict(
+        type='mmrotate.ConvertBoxType',
+        box_type_mapping=dict(gt_bboxes='rbox')),
+    dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
+    dict(
+        type='mmdet.RandomFlip',
+        prob=0.75,
+        direction=['horizontal', 'vertical', 'diagonal']),
+    dict(
+        type='mmrotate.RandomRotate',
+        prob=random_rotate_ratio,
+        angle_range=180,
+        rotate_type='mmrotate.Rotate',
+        rect_obj_labels=rotate_rect_obj_labels),
+    dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
+    dict(type='RegularizeRotatedBox', angle_version=angle_version),
+    dict(type='mmdet.PackDetInputs')
+]
+
+val_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
+    dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
+    dict(
+        type='LoadAnnotations',
+        with_bbox=True,
+        box_type='qbox',
+        _scope_='mmdet'),
+    dict(
+        type='mmrotate.ConvertBoxType',
+        box_type_mapping=dict(gt_bboxes='rbox')),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='mmdet.Resize', scale=img_scale, keep_ratio=True),
+    dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    persistent_workers=persistent_workers,
+    pin_memory=True,
+    collate_fn=dict(type='yolov5_collate'),
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file=train_ann_file,
+        data_prefix=dict(img_path=train_data_prefix),
+        filter_cfg=dict(filter_empty_gt=True),
+        pipeline=train_pipeline))
+
+val_dataloader = dict(
+    batch_size=val_batch_size_per_gpu,
+    num_workers=val_num_workers,
+    persistent_workers=persistent_workers,
+    pin_memory=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file=val_ann_file,
+        data_prefix=dict(img_path=val_data_prefix),
+        test_mode=True,
+        batch_shapes_cfg=batch_shapes_cfg,
+        pipeline=val_pipeline))
+
+val_evaluator = dict(type='mmrotate.DOTAMetric', metric='mAP')
+
+# Inference on val dataset
+test_dataloader = val_dataloader
+test_evaluator = val_evaluator
+
+# Inference on test dataset and format the output results
+# for submission. Note: the test set has no annotation.
+# test_dataloader = dict(
+#     batch_size=val_batch_size_per_gpu,
+#     num_workers=val_num_workers,
+#     persistent_workers=True,
+#     drop_last=False,
+#     sampler=dict(type='DefaultSampler', shuffle=False),
+#     dataset=dict(
+#         type=dataset_type,
+#         data_root=data_root,
+#         data_prefix=dict(img_path=test_data_prefix),
+#         test_mode=True,
+#         batch_shapes_cfg=batch_shapes_cfg,
+#         pipeline=test_pipeline))
+# test_evaluator = dict(
+#     type='mmrotate.DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix=submission_dir)
+
+# optimizer
+optim_wrapper = dict(
+    type='OptimWrapper',
+    optimizer=dict(type='AdamW', lr=base_lr, weight_decay=weight_decay),
+    paramwise_cfg=dict(
+        norm_decay_mult=0, bias_decay_mult=0, bypass_duplicate=True))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=lr_start_factor,
+        by_epoch=False,
+        begin=0,
+        end=1000),
+    dict(
+        # use cosine lr from 150 to 300 epoch
+        type='CosineAnnealingLR',
+        eta_min=base_lr * 0.05,
+        begin=max_epochs // 2,
+        end=max_epochs,
+        T_max=max_epochs // 2,
+        by_epoch=True,
+        convert_to_iter_based=True),
+]
+
+# hooks
+default_hooks = dict(
+    checkpoint=dict(
+        type='CheckpointHook',
+        interval=save_checkpoint_intervals,
+        max_keep_ckpts=max_keep_ckpts,  # only keep latest 3 checkpoints
+        save_best='auto'))
+
+custom_hooks = [
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49)
+]
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=save_checkpoint_intervals)
+
+val_cfg = dict(type='ValLoop')
+test_cfg = dict(type='TestLoop')
+
+visualizer = dict(type='mmrotate.RotLocalVisualizer')
diff --git a/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py
new file mode 100644
index 00000000..45bbaa41
--- /dev/null
+++ b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-aug-100e_dota.py
@@ -0,0 +1,168 @@
+_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
+
+# This config use longer schedule with Mixup, Mosaic and Random Rotate.
+
+checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-l_8xb256-rsb-a1-600e_in1k-6a760974.pth'  # noqa
+
+# ========================modified parameters======================
+
+# Base learning rate for optim_wrapper. Corresponding to 1xb8=8 bs
+base_lr = 0.00025  # 0.004 / 16
+lr_start_factor = 1.0e-5
+max_epochs = 100  # Maximum training epochs
+# Change train_pipeline for final 10 epochs (stage 2)
+num_epochs_stage2 = 10
+
+img_scale = (1024, 1024)  # width, height
+# ratio range for random resize
+random_resize_ratio_range = (0.1, 2.0)
+# Cached images number in mosaic
+mosaic_max_cached_images = 40
+# Number of cached images in mixup
+mixup_max_cached_images = 20
+# ratio for random rotate
+random_rotate_ratio = 0.5
+# label ids for rect objs
+rotate_rect_obj_labels = [9, 11]
+
+# Save model checkpoint and validation intervals
+save_checkpoint_intervals = 1
+# validation intervals in stage 2
+val_interval_stage2 = 1
+# The maximum checkpoints to keep.
+max_keep_ckpts = 3
+
+# Submission dir for result submit
+submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
+
+# =======================Unmodified in most cases==================
+
+train_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'),
+    dict(
+        type='mmrotate.ConvertBoxType',
+        box_type_mapping=dict(gt_bboxes='rbox')),
+    dict(
+        type='Mosaic',
+        img_scale=img_scale,
+        use_cached=True,
+        max_cached_images=mosaic_max_cached_images,
+        pad_val=114.0),
+    dict(
+        type='mmdet.RandomResize',
+        # img_scale is (width, height)
+        scale=(img_scale[0] * 2, img_scale[1] * 2),
+        ratio_range=random_resize_ratio_range,
+        resize_type='mmdet.Resize',
+        keep_ratio=True),
+    dict(
+        type='mmrotate.RandomRotate',
+        prob=random_rotate_ratio,
+        angle_range=180,
+        rotate_type='mmrotate.Rotate',
+        rect_obj_labels=rotate_rect_obj_labels),
+    dict(type='mmdet.RandomCrop', crop_size=img_scale),
+    dict(type='mmdet.YOLOXHSVRandomAug'),
+    dict(
+        type='mmdet.RandomFlip',
+        prob=0.75,
+        direction=['horizontal', 'vertical', 'diagonal']),
+    dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
+    dict(
+        type='YOLOv5MixUp',
+        use_cached=True,
+        max_cached_images=mixup_max_cached_images),
+    dict(type='mmdet.PackDetInputs')
+]
+
+train_pipeline_stage2 = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='LoadAnnotations', with_bbox=True, box_type='qbox'),
+    dict(
+        type='mmrotate.ConvertBoxType',
+        box_type_mapping=dict(gt_bboxes='rbox')),
+    dict(
+        type='mmdet.RandomResize',
+        scale=img_scale,
+        ratio_range=random_resize_ratio_range,
+        resize_type='mmdet.Resize',
+        keep_ratio=True),
+    dict(
+        type='mmrotate.RandomRotate',
+        prob=random_rotate_ratio,
+        angle_range=180,
+        rotate_type='mmrotate.Rotate',
+        rect_obj_labels=rotate_rect_obj_labels),
+    dict(type='mmdet.RandomCrop', crop_size=img_scale),
+    dict(type='mmdet.YOLOXHSVRandomAug'),
+    dict(
+        type='mmdet.RandomFlip',
+        prob=0.75,
+        direction=['horizontal', 'vertical', 'diagonal']),
+    dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
+    dict(type='mmdet.PackDetInputs')
+]
+
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+
+# learning rate
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=lr_start_factor,
+        by_epoch=False,
+        begin=0,
+        end=1000),
+    dict(
+        # use cosine lr from 150 to 300 epoch
+        type='CosineAnnealingLR',
+        eta_min=base_lr * 0.05,
+        begin=max_epochs // 2,
+        end=max_epochs,
+        T_max=max_epochs // 2,
+        by_epoch=True,
+        convert_to_iter_based=True),
+]
+
+# hooks
+default_hooks = dict(
+    checkpoint=dict(
+        type='CheckpointHook',
+        interval=save_checkpoint_intervals,
+        max_keep_ckpts=max_keep_ckpts,  # only keep latest 3 checkpoints
+        save_best='auto'))
+
+custom_hooks = [
+    dict(
+        type='EMAHook',
+        ema_type='ExpMomentumEMA',
+        momentum=0.0002,
+        update_buffers=True,
+        strict_load=False,
+        priority=49),
+    dict(
+        type='mmdet.PipelineSwitchHook',
+        switch_epoch=max_epochs - num_epochs_stage2,
+        switch_pipeline=train_pipeline_stage2)
+]
+
+train_cfg = dict(
+    type='EpochBasedTrainLoop',
+    max_epochs=max_epochs,
+    val_interval=save_checkpoint_intervals,
+    dynamic_intervals=[(max_epochs - num_epochs_stage2, val_interval_stage2)])
+
+# Inference on test dataset and format the output results
+# for submission. Note: the test set has no annotation.
+# test_dataloader = dict(
+#     dataset=dict(
+#         data_root=_base_.data_root,
+#         ann_file='', # test set has no annotation
+#         data_prefix=dict(img_path=_base_.test_data_prefix),
+#         pipeline=_base_.test_pipeline))
+# test_evaluator = dict(
+#     type='mmrotate.DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix=submission_dir)
diff --git a/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py
new file mode 100644
index 00000000..1a9f50cd
--- /dev/null
+++ b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_coco-pretrain_2xb4-36e_dota-ms.py
@@ -0,0 +1,20 @@
+_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
+
+load_from = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_l_syncbn_fast_8xb32-300e_coco/rtmdet_l_syncbn_fast_8xb32-300e_coco_20230102_135928-ee3abdc4.pth'  # noqa
+
+# Submission dir for result submit
+submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
+
+# Inference on test dataset and format the output results
+# for submission. Note: the test set has no annotation.
+# test_dataloader = dict(
+#     dataset=dict(
+#         data_root=_base_.data_root,
+#         ann_file='', # test set has no annotation
+#         data_prefix=dict(img_path=_base_.test_data_prefix),
+#         pipeline=_base_.test_pipeline))
+# test_evaluator = dict(
+#     type='mmrotate.DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix=submission_dir)
diff --git a/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py b/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py
new file mode 100644
index 00000000..4be8605f
--- /dev/null
+++ b/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota-ms.py
@@ -0,0 +1,33 @@
+_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
+
+checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth'  # noqa
+
+# ========================modified parameters======================
+deepen_factor = 0.67
+widen_factor = 0.75
+
+# Submission dir for result submit
+submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
+
+# =======================Unmodified in most cases==================
+model = dict(
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        init_cfg=dict(checkpoint=checkpoint)),
+    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+# Inference on test dataset and format the output results
+# for submission. Note: the test set has no annotation.
+# test_dataloader = dict(
+#     dataset=dict(
+#         data_root=_base_.data_root,
+#         ann_file='', # test set has no annotation
+#         data_prefix=dict(img_path=_base_.test_data_prefix),
+#         pipeline=_base_.test_pipeline))
+# test_evaluator = dict(
+#     type='mmrotate.DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix=submission_dir)
diff --git a/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py b/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py
new file mode 100644
index 00000000..8df61cff
--- /dev/null
+++ b/configs/rtmdet/rotated/rtmdet-r_m_syncbn_fast_2xb4-36e_dota.py
@@ -0,0 +1,33 @@
+_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
+
+checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-m_8xb256-rsb-a1-600e_in1k-ecb3bbd9.pth'  # noqa
+
+# ========================modified parameters======================
+deepen_factor = 0.67
+widen_factor = 0.75
+
+# Submission dir for result submit
+submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
+
+# =======================Unmodified in most cases==================
+model = dict(
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        init_cfg=dict(checkpoint=checkpoint)),
+    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+# Inference on test dataset and format the output results
+# for submission. Note: the test set has no annotation.
+# test_dataloader = dict(
+#     dataset=dict(
+#         data_root=_base_.data_root,
+#         ann_file='', # test set has no annotation
+#         data_prefix=dict(img_path=_base_.test_data_prefix),
+#         pipeline=_base_.test_pipeline))
+# test_evaluator = dict(
+#     type='mmrotate.DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix=submission_dir)
diff --git a/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py b/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py
new file mode 100644
index 00000000..2b7b0b6f
--- /dev/null
+++ b/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota-ms.py
@@ -0,0 +1,38 @@
+_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
+
+checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth'  # noqa
+
+# ========================modified parameters======================
+deepen_factor = 0.33
+widen_factor = 0.5
+
+# Batch size of a single GPU during training
+train_batch_size_per_gpu = 8
+
+# Submission dir for result submit
+submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
+
+# =======================Unmodified in most cases==================
+model = dict(
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        init_cfg=dict(checkpoint=checkpoint)),
+    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
+
+# Inference on test dataset and format the output results
+# for submission. Note: the test set has no annotation.
+# test_dataloader = dict(
+#     dataset=dict(
+#         data_root=_base_.data_root,
+#         ann_file='', # test set has no annotation
+#         data_prefix=dict(img_path=_base_.test_data_prefix),
+#         pipeline=_base_.test_pipeline))
+# test_evaluator = dict(
+#     type='mmrotate.DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix=submission_dir)
diff --git a/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py b/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py
new file mode 100644
index 00000000..d200dd76
--- /dev/null
+++ b/configs/rtmdet/rotated/rtmdet-r_s_fast_1xb8-36e_dota.py
@@ -0,0 +1,38 @@
+_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
+
+checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-s_imagenet_600e.pth'  # noqa
+
+# ========================modified parameters======================
+deepen_factor = 0.33
+widen_factor = 0.5
+
+# Batch size of a single GPU during training
+train_batch_size_per_gpu = 8
+
+# Submission dir for result submit
+submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
+
+# =======================Unmodified in most cases==================
+model = dict(
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        init_cfg=dict(checkpoint=checkpoint)),
+    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
+
+# Inference on test dataset and format the output results
+# for submission. Note: the test set has no annotation.
+# test_dataloader = dict(
+#     dataset=dict(
+#         data_root=_base_.data_root,
+#         ann_file='', # test set has no annotation
+#         data_prefix=dict(img_path=_base_.test_data_prefix),
+#         pipeline=_base_.test_pipeline))
+# test_evaluator = dict(
+#     type='mmrotate.DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix=submission_dir)
diff --git a/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py b/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py
new file mode 100644
index 00000000..56bf038b
--- /dev/null
+++ b/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota-ms.py
@@ -0,0 +1,38 @@
+_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota-ms.py'
+
+checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth'  # noqa
+
+# ========================modified parameters======================
+deepen_factor = 0.167
+widen_factor = 0.375
+
+# Batch size of a single GPU during training
+train_batch_size_per_gpu = 8
+
+# Submission dir for result submit
+submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
+
+# =======================Unmodified in most cases==================
+model = dict(
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        init_cfg=dict(checkpoint=checkpoint)),
+    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
+
+# Inference on test dataset and format the output results
+# for submission. Note: the test set has no annotation.
+# test_dataloader = dict(
+#     dataset=dict(
+#         data_root=_base_.data_root,
+#         ann_file='', # test set has no annotation
+#         data_prefix=dict(img_path=_base_.test_data_prefix),
+#         pipeline=_base_.test_pipeline))
+# test_evaluator = dict(
+#     type='mmrotate.DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix=submission_dir)
diff --git a/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py b/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py
new file mode 100644
index 00000000..739a2de8
--- /dev/null
+++ b/configs/rtmdet/rotated/rtmdet-r_tiny_fast_1xb8-36e_dota.py
@@ -0,0 +1,38 @@
+_base_ = './rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py'
+
+checkpoint = 'https://download.openmmlab.com/mmdetection/v3.0/rtmdet/cspnext_rsb_pretrain/cspnext-tiny_imagenet_600e.pth'  # noqa
+
+# ========================modified parameters======================
+deepen_factor = 0.167
+widen_factor = 0.375
+
+# Batch size of a single GPU during training
+train_batch_size_per_gpu = 8
+
+# Submission dir for result submit
+submission_dir = './work_dirs/{{fileBasenameNoExtension}}/submission'
+
+# =======================Unmodified in most cases==================
+model = dict(
+    backbone=dict(
+        deepen_factor=deepen_factor,
+        widen_factor=widen_factor,
+        init_cfg=dict(checkpoint=checkpoint)),
+    neck=dict(deepen_factor=deepen_factor, widen_factor=widen_factor),
+    bbox_head=dict(head_module=dict(widen_factor=widen_factor)))
+
+train_dataloader = dict(batch_size=train_batch_size_per_gpu)
+
+# Inference on test dataset and format the output results
+# for submission. Note: the test set has no annotation.
+# test_dataloader = dict(
+#     dataset=dict(
+#         data_root=_base_.data_root,
+#         ann_file='', # test set has no annotation
+#         data_prefix=dict(img_path=_base_.test_data_prefix),
+#         pipeline=_base_.test_pipeline))
+# test_evaluator = dict(
+#     type='mmrotate.DOTAMetric',
+#     format_only=True,
+#     merge_patches=True,
+#     outfile_prefix=submission_dir)
diff --git a/demo/large_image_demo.py b/demo/large_image_demo.py
index 73106b28..24b540f4 100644
--- a/demo/large_image_demo.py
+++ b/demo/large_image_demo.py
@@ -56,7 +56,7 @@ def parse_args():
         '--patch-size', type=int, default=640, help='The size of patches')
     parser.add_argument(
         '--patch-overlap-ratio',
-        type=int,
+        type=float,
         default=0.25,
         help='Ratio of overlap between two patches')
     parser.add_argument(
@@ -235,7 +235,7 @@ def main():
             src_image_shape=(height, width),
             nms_cfg={
                 'type': args.merge_nms_type,
-                'iou_thr': args.merge_iou_thr
+                'iou_threshold': args.merge_iou_thr
             })
 
         visualizer.add_datasample(
diff --git a/docs/en/recommended_topics/dataset_preparation.md b/docs/en/recommended_topics/dataset_preparation.md
index 5c573910..af670d89 100644
--- a/docs/en/recommended_topics/dataset_preparation.md
+++ b/docs/en/recommended_topics/dataset_preparation.md
@@ -1 +1,145 @@
 # Dataset preparation and description
+
+## DOTA Dataset
+
+### Download dataset
+
+The DOTA dataset can be downloaded from [DOTA](https://captain-whu.github.io/DOTA/dataset.html)
+or [OpenDataLab](https://opendatalab.org.cn/DOTA_V1.0).
+
+We recommend using [OpenDataLab](https://opendatalab.org.cn/DOTA_V1.0) to download the dataset, as the folder structure has already been arranged as needed and can be directly extracted without the need to adjust the folder structure.
+
+Please unzip the file and place it in the following structure.
+
+```none
+${DATA_ROOT}
+├── train
+│   ├── images
+│   │   ├── P0000.png
+│   │   ├── ...
+│   ├── labelTxt-v1.0
+│   │   ├── labelTxt
+│   │   │   ├── P0000.txt
+│   │   │   ├── ...
+│   │   ├── trainset_reclabelTxt
+│   │   │   ├── P0000.txt
+│   │   │   ├── ...
+├── val
+│   ├── images
+│   │   ├── P0003.png
+│   │   ├── ...
+│   ├── labelTxt-v1.0
+│   │   ├── labelTxt
+│   │   │   ├── P0003.txt
+│   │   │   ├── ...
+│   │   ├── valset_reclabelTxt
+│   │   │   ├── P0003.txt
+│   │   │   ├── ...
+├── test
+│   ├── images
+│   │   ├── P0006.png
+│   │   ├── ...
+
+```
+
+The folder ending with reclabelTxt stores the labels for the horizontal boxes and is not used when slicing.
+
+### Split DOTA dataset
+
+Script `tools/dataset_converters/dota/dota_split.py` can split and prepare DOTA dataset.
+
+```shell
+python tools/dataset_converters/dota/dota_split.py \
+    [--splt-config ${SPLIT_CONFIG}] \
+    [--data-root ${DATA_ROOT}] \
+    [--out-dir ${OUT_DIR}] \
+    [--ann-subdir ${ANN_SUBDIR}] \
+    [--phase ${DATASET_PHASE}] \
+    [--nproc ${NPROC}] \
+    [--save-ext ${SAVE_EXT}] \
+    [--overwrite]
+```
+
+shapely is required, please install shapely first by `pip install shapely`.
+
+**Description of all parameters**：
+
+- `--split-config` : The split config for image slicing.
+- `--data-root`: Root dir of DOTA dataset.
+- `--out-dir`: Output dir for split result.
+- `--ann-subdir`: The subdir name for annotation. Defaults to `labelTxt-v1.0`.
+- `--phase`: Phase of the data set to be prepared. Defaults to `trainval test`
+- `--nproc`: Number of processes. Defaults to 8.
+- `--save-ext`: Extension of the saved image. Defaults to `png`
+- `--overwrite`: Whether to allow overwrite if annotation folder exist.
+
+Based on the configuration in the DOTA paper, we provide two commonly used split config.
+
+- `./split_config/single_scale.json` means single-scale split.
+- `./split_config/multi_scale.json` means multi-scale split.
+
+DOTA dataset usually uses the trainval set for training and the test set for online evaluation, since most papers
+provide the results of online evaluation. If you want to evaluate the model performance locally firstly, please split
+the train set and val set.
+
+Examples:
+
+Split DOTA trainval set and test set with single scale.
+
+```shell
+python tools/dataset_converters/dota/dota_split.py
+    --split-config 'tools/dataset_converters/dota/split_config/single_scale.json'
+    --data-root ${DATA_ROOT} \
+    --out-dir ${OUT_DIR}
+```
+
+If you want to split DOTA-v1.5 dataset, which have different annotation dir 'labelTxt-v1.5'.
+
+```shell
+python tools/dataset_converters/dota/dota_split.py
+    --split-config 'tools/dataset_converters/dota/split_config/single_scale.json'
+    --data-root ${DATA_ROOT} \
+    --out-dir ${OUT_DIR} \
+    --ann-subdir 'labelTxt-v1.5'
+```
+
+If you want to split DOTA train and val set with single scale.
+
+```shell
+python tools/dataset_converters/dota/dota_split.py
+    --split-config 'tools/dataset_converters/dota/split_config/single_scale.json'
+    --data-root ${DATA_ROOT} \
+    --phase train val \
+    --out-dir ${OUT_DIR}
+```
+
+For multi scale split:
+
+```shell
+python tools/dataset_converters/dota/dota_split.py
+    --split-config 'tools/dataset_converters/dota/split_config/multi_scale.json'
+    --data-root ${DATA_ROOT} \
+    --out-dir ${OUT_DIR}
+```
+
+The new data structure is as follows:
+
+```none
+${OUT_DIR}
+├── trainval
+│   ├── images
+│   │   ├── P0000__1024__0___0.png
+│   │   ├── ...
+│   ├── annfiles
+│   │   ├── P0000__1024__0___0.txt
+│   │   ├── ...
+├── test
+│   ├── images
+│   │   ├── P0006__1024__0___0.png
+│   │   ├── ...
+│   ├── annfiles
+│   │   ├── P0006__1024__0___0.txt
+│   │   ├── ...
+```
+
+Then change `data_root` to ${OUT_DIR}.
diff --git a/docs/en/tutorials/rotated_detection.md b/docs/en/tutorials/rotated_detection.md
new file mode 100644
index 00000000..c0addb01
--- /dev/null
+++ b/docs/en/tutorials/rotated_detection.md
@@ -0,0 +1,3 @@
+# Rotated Object Detection
+
+TODO
diff --git a/docs/zh_cn/index.rst b/docs/zh_cn/index.rst
index b7b022f7..e32f76d8 100644
--- a/docs/zh_cn/index.rst
+++ b/docs/zh_cn/index.rst
@@ -73,6 +73,7 @@
 
    tutorials/config.md
    tutorials/data_flow.md
+   tutorials/rotated_detection.md
    tutorials/custom_installation.md
    tutorials/warning_notes.md
    tutorials/faq.md
diff --git a/docs/zh_cn/recommended_topics/dataset_preparation.md b/docs/zh_cn/recommended_topics/dataset_preparation.md
index 407916b5..25603a5c 100644
--- a/docs/zh_cn/recommended_topics/dataset_preparation.md
+++ b/docs/zh_cn/recommended_topics/dataset_preparation.md
@@ -1 +1,144 @@
 # 数据集格式准备和说明
+
+## DOTA 数据集
+
+### 下载 DOTA 数据集
+
+数据集可以从 DOTA 数据集的主页 [DOTA](https://captain-whu.github.io/DOTA/dataset.html)
+或 [OpenDataLab](https://opendatalab.org.cn/DOTA_V1.0) 下载。
+
+我们推荐使用 [OpenDataLab](https://opendatalab.org.cn/DOTA_V1.0) 下载，其中的文件夹结构已经按照需要排列好了，只需要解压即可，不需要费心去调整文件夹结构。
+
+下载后解压数据集，并按如下文件夹结构存放。
+
+```none
+${DATA_ROOT}
+├── train
+│   ├── images
+│   │   ├── P0000.png
+│   │   ├── ...
+│   ├── labelTxt-v1.0
+│   │   ├── labelTxt
+│   │   │   ├── P0000.txt
+│   │   │   ├── ...
+│   │   ├── trainset_reclabelTxt
+│   │   │   ├── P0000.txt
+│   │   │   ├── ...
+├── val
+│   ├── images
+│   │   ├── P0003.png
+│   │   ├── ...
+│   ├── labelTxt-v1.0
+│   │   ├── labelTxt
+│   │   │   ├── P0003.txt
+│   │   │   ├── ...
+│   │   ├── valset_reclabelTxt
+│   │   │   ├── P0003.txt
+│   │   │   ├── ...
+├── test
+│   ├── images
+│   │   ├── P0006.png
+│   │   ├── ...
+
+```
+
+其中，以 `reclabelTxt` 为结尾的文件夹存放了水平检测框的标注，目前仅使用了 `labelTxt-v1.0` 中旋转框的标注。
+
+### 数据集切片
+
+我们提供了 `tools/dataset_converters/dota/dota_split.py` 脚本用于 DOTA 数据集的准备和切片。
+
+```shell
+python tools/dataset_converters/dota/dota_split.py \
+    [--splt-config ${SPLIT_CONFIG}] \
+    [--data-root ${DATA_ROOT}] \
+    [--out-dir ${OUT_DIR}] \
+    [--ann-subdir ${ANN_SUBDIR}] \
+    [--phase ${DATASET_PHASE}] \
+    [--nproc ${NPROC}] \
+    [--save-ext ${SAVE_EXT}] \
+    [--overwrite]
+```
+
+脚本依赖于 shapely 包，请先通过 `pip install shapely` 安装 shapely。
+
+**参数说明**：
+
+- `--splt-config` : 切片参数的配置文件。
+- `--data-root`: DOTA 数据集的存放位置。
+- `--out-dir`: 切片后的输出位置。
+- `--ann-subdir`: 标注文件夹的名字。 默认为 `labelTxt-v1.0` 。
+- `--phase`: 数据集的阶段。默认为 `trainval test` 。
+- `--nproc`: 进程数量。 默认为 8 。
+- `--save-ext`: 输出图像的扩展名，如置空则与原图保持一致。 默认为 `None` 。
+- `--overwrite`: 如果目标文件夹已存在，是否允许覆盖。
+
+基于 DOTA 数据集论文中提供的配置，我们提供了两种切片配置。
+
+`./split_config/single_scale.json` 用于单尺度 `single-scale` 切片
+`./split_config/multi_scale.json` 用于多尺度 `multi-scale` 切片
+
+DOTA 数据集通常使用 `trainval` 集进行训练，然后使用 `test` 集进行在线验证，大多数论文提供的也是在线验证的精度。
+如果你需要进行本地验证，可以准备 `train` 集和 `val` 集进行训练和测试。
+
+示例:
+
+使用单尺度切片配置准备 `trainval` 和 `test` 集
+
+```shell
+python tools/dataset_converters/dota/dota_split.py
+    --split-config 'tools/dataset_converters/dota/split_config/single_scale.json'
+    --data-root ${DATA_ROOT} \
+    --out-dir ${OUT_DIR}
+```
+
+准备 DOTA-v1.5 数据集，它的标注文件夹名字是 `labelTxt-v1.5`
+
+```shell
+python tools/dataset_converters/dota/dota_split.py
+    --split-config 'tools/dataset_converters/dota/split_config/single_scale.json'
+    --data-root ${DATA_ROOT} \
+    --out-dir ${OUT_DIR} \
+    --ann-subdir 'labelTxt-v1.5'
+```
+
+使用单尺度切片配置准备 `train` 和 `val` 集
+
+```shell
+python tools/dataset_converters/dota/dota_split.py
+    --split-config 'tools/dataset_converters/dota/split_config/single_scale.json'
+    --data-root ${DATA_ROOT} \
+    --phase train val \
+    --out-dir ${OUT_DIR}
+```
+
+使用多尺度切片配置准备 `trainval` 和 `test` 集
+
+```shell
+python tools/dataset_converters/dota/dota_split.py
+    --split-config 'tools/dataset_converters/dota/split_config/multi_scale.json'
+    --data-root ${DATA_ROOT} \
+    --out-dir ${OUT_DIR}
+```
+
+在运行完成后，输出的结构如下：
+
+```none
+${OUT_DIR}
+├── trainval
+│   ├── images
+│   │   ├── P0000__1024__0___0.png
+│   │   ├── ...
+│   ├── annfiles
+│   │   ├── P0000__1024__0___0.txt
+│   │   ├── ...
+├── test
+│   ├── images
+│   │   ├── P0006__1024__0___0.png
+│   │   ├── ...
+│   ├── annfiles
+│   │   ├── P0006__1024__0___0.txt
+│   │   ├── ...
+```
+
+此时将配置文件中的 `data_root` 修改为 ${OUT_DIR} 即可开始使用 DOTA 数据集训练。
diff --git a/docs/zh_cn/tutorials/rotated_detection.md b/docs/zh_cn/tutorials/rotated_detection.md
new file mode 100644
index 00000000..b06df9b9
--- /dev/null
+++ b/docs/zh_cn/tutorials/rotated_detection.md
@@ -0,0 +1,332 @@
+# 旋转目标检测
+
+旋转目标检测（Rotated Object Detection），又称为有向目标检测（Oriented Object Detection），试图在检测出目标位置的同时得到目标的方向信息。它通过重新定义目标表示形式，以及增加回归自由度数量的操作，实现旋转矩形、四边形甚至任意形状的目标检测。旋转目标检测在人脸识别、场景文字、遥感影像、自动驾驶、医学图像、机器人抓取等领域都有广泛应用。
+
+关于旋转目标检测的详细介绍请参考文档 [MMRotate 基础知识](https://mmrotate.readthedocs.io/zh_CN/1.x/overview.html)
+
+MMYOLO 中的旋转目标检测依赖于 MMRotate 1.x，请参考文档 [开始你的第一步](https://mmrotate.readthedocs.io/zh_CN/1.x/get_started.html) 安装 MMRotate 1.x。
+
+本教程将介绍如何在 MMYOLO 中训练和使用旋转目标检测模型，目前支持了 RTMDet-R。
+
+## 数据集准备
+
+对于旋转目标检测数据集，目前最常用的数据集是 DOTA 数据集，由于DOTA数据集中的图像分辨率较大，因此需要进行切片处理，数据集准备请参考 [Preparing DOTA Dataset](https://github.com/open-mmlab/mmyolo/tools/dataset_converters/dota_split).
+
+处理后的数据集结构如下：
+
+```none
+mmyolo
+├── data
+│   ├── split_ss_dota
+│   │   ├── trainval
+│   │   │   ├── images
+│   │   │   ├── annfiles
+│   │   ├── test
+│   │   │   ├── images
+│   │   │   ├── annfiles
+│   ├── split_ms_dota
+│   │   ├── trainval
+│   │   │   ├── images
+│   │   │   ├── annfiles
+│   │   ├── test
+│   │   │   ├── images
+│   │   │   ├── annfiles
+```
+
+其中 `split_ss_dota` 是单尺度切片，`split_ms_dota` 是多尺度切片，可以根据需要选择。
+
+对于自定义数据集，我们建议将数据转换为 DOTA 格式并离线进行转换，如此您只需在数据转换后修改 config 的数据标注路径和类别即可。
+
+为了方便使用，我们同样提供了基于 COCO 格式的旋转标注格式，将多边形检测框储存在 COCO 标注的 segmentation 标签中，示例如下：
+
+```json
+{
+    "id": 131,
+    "image_id": 72,
+    "bbox": [123, 167, 11, 37],
+    "area": 271.5,
+    "category_id": 1,
+    "segmentation": [[123, 167, 128, 204, 134, 201, 132, 167]],
+    "iscrowd": 0,
+}
+```
+
+## 配置文件
+
+这里以 RTMDet-R 为例介绍旋转目标检测的配置文件，其中大部分和水平检测模型相同，主要介绍它们的差异，包括数据集和评测器配置、检测头、可视化等。
+
+得益于 MMEngine 的配置文件系统，大部分模块都可以调用 MMRotate 中的模块。
+
+### 数据集和评测器配置
+
+关于配置文件的基础请先阅读 [学习 YOLOV5 配置文件](./config.md). 下面介绍旋转目标检测的一些必要设置。
+
+```python
+dataset_type = 'YOLOv5DOTADataset'  # 数据集类型，这将被用来定义数据集
+data_root = 'data/split_ss_dota/'  # 数据的根路径
+file_client_args = dict(backend='disk')  # 文件读取后端的配置，默认从硬盘读取
+
+angle_version = 'le90' # 角度范围的定义，目前支持 oc, le90 和 le135
+
+train_pipeline = [
+    # 训练数据读取流程
+    dict(
+        type='LoadImageFromFile', # 第 1 个流程，从文件路径里加载图像
+        file_client_args=file_client_args),  # 文件读取后端的配置，默认从硬盘读取
+    dict(type='LoadAnnotations', # 第 2 个流程，对于当前图像，加载它的注释信息
+         with_bbox=True, # 是否使用标注框 (bounding box)，目标检测需要设置为 True
+         box_type='qbox'), # 指定读取的标注格式，旋转框数据集默认的数据格式为四边形
+    dict(type='mmrotate.ConvertBoxType', # 第 3 个流程，转换标注格式
+         box_type_mapping=dict(gt_bboxes='rbox')), # 将四边形标注转化为旋转框标注
+
+    # 训练数据处理流程
+    dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True),
+    dict(type='mmdet.RandomFlip',
+         prob=0.75,
+         direction=['horizontal', 'vertical', 'diagonal']),
+    dict(type='mmrotate.RandomRotate', # 旋转数据增强
+         prob=0.5, # 旋转概率 0.5
+         angle_range=180, # 旋转范围 180
+         rotate_type='mmrotate.Rotate', # 旋转方法
+         rect_obj_labels=[9, 11]), # 由于 DOTA 数据集中标号为 9 的 'storage-tank' 和标号 11 的 'roundabout' 两类为正方形标注，无需角度信息，旋转中将这两类保持为水平
+    dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
+    dict(type='RegularizeRotatedBox', # 统一旋转框表示形式
+         angle_version=angle_version), # 根据角度的定义方式进行
+    dict(type='mmdet.PackDetInputs')
+]
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    persistent_workers=persistent_workers,
+    pin_memory=True,
+    collate_fn=dict(type='yolov5_collate'),
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    dataset=dict( # 训练数据集的配置
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='trainval/annfiles/', # 标注文件夹路径
+        data_prefix=dict(img_path='trainval/images/'), # 图像路径前缀
+        img_shape=(1024, 1024), # 图像大小
+        filter_cfg=dict(filter_empty_gt=True), # 标注的过滤配置
+        pipeline=train_pipeline)) # 这是由之前创建的 train_pipeline 定义的数据处理流程
+```
+
+RTMDet-R 保持论文内的配置，默认仅采用随机旋转增强，得益于 BoxType 设计，在数据增强阶段，大部分增强无需改动代码即可直接支持，例如 MixUp 和 Mosaic 等，可以直接在 pipeline 中使用。
+
+```{Warning}
+目前已知 Albu 数据增强仅支持水平框，在使用其他的数据增强时建议先使用 可视化数据集脚本 `browse_dataset.py` 验证数据增强是否正确。
+```
+
+RTMDet-R 测试阶段仅采用 Resize 和 Pad，在验证和评测时，都采用相同的数据流进行推理。
+
+```python
+val_pipeline = [
+    dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
+    dict(type='mmdet.Resize', scale=(1024, 1024), keep_ratio=True),
+    dict(
+        type='mmdet.Pad', size=(1024, 1024),
+        pad_val=dict(img=(114, 114, 114))),
+    # 和训练时一致，先读取标注再转换标注格式
+    dict(
+        type='LoadAnnotations',
+        with_bbox=True,
+        box_type='qbox',
+        _scope_='mmdet'),
+    dict(
+        type='mmrotate.ConvertBoxType',
+        box_type_mapping=dict(gt_bboxes='rbox')),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                   'scale_factor'))
+]
+
+val_dataloader = dict(
+    batch_size=val_batch_size_per_gpu,
+    num_workers=val_num_workers,
+    persistent_workers=persistent_workers,
+    pin_memory=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        ann_file='trainval/annfiles/',
+        data_prefix=dict(img_path='trainval/images/'),
+        img_shape=(1024, 1024),
+        test_mode=True,
+        batch_shapes_cfg=batch_shapes_cfg,
+        pipeline=val_pipeline))
+```
+
+[评测器](https://mmengine.readthedocs.io/zh_CN/latest/tutorials/evaluation.html) 用于计算训练模型在验证和测试数据集上的指标。评测器的配置由一个或一组评价指标（Metric）配置组成：
+
+```python
+val_evaluator = dict( # 验证过程使用的评测器
+    type='mmrotate.DOTAMetric', # 用于评估旋转目标检测的 mAP 的 dota 评价指标
+    metric='mAP' # 需要计算的评价指标
+)
+test_evaluator = val_evaluator  # 测试过程使用的评测器
+```
+
+由于 DOTA 测试数据集没有标注文件， 如果要保存在测试数据集上的检测结果，则可以像这样编写配置：
+
+```python
+# 在测试集上推理，
+# 并将检测结果转换格式以用于提交结果
+test_dataloader = dict(
+    batch_size=val_batch_size_per_gpu,
+    num_workers=val_num_workers,
+    persistent_workers=True,
+    drop_last=False,
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    dataset=dict(
+        type=dataset_type,
+        data_root=data_root,
+        data_prefix=dict(img_path='test/images/'),
+        img_shape=(1024, 1024),
+        test_mode=True,
+        batch_shapes_cfg=batch_shapes_cfg,
+        pipeline=test_pipeline))
+test_evaluator = dict(
+    type='mmrotate.DOTAMetric',
+    format_only=True, # 只将模型输出转换为 DOTA 的 txt 提交格式并压缩成 zip
+    merge_patches=True, # 将切片结果合并成大图检测结果
+    outfile_prefix='./work_dirs/dota_detection/submission') # 输出测试文件夹的路径
+```
+
+如果使用基于 COCO 格式的旋转框标注，只需要修改 pipeline 中数据读取流程和训练数据集的配置，以训练数据为例：
+
+```python
+
+dataset_type='YOLOv5CocoDataset'
+
+train_pipeline = [
+    # 训练数据读取流程
+    dict(
+        type='LoadImageFromFile', # 第 1 个流程，从文件路径里加载图像
+        file_client_args=file_client_args),  # 文件读取后端的配置，默认从硬盘读取
+    dict(type='LoadAnnotations', # 第 2 个流程，对于当前图像，加载它的注释信息
+         with_bbox=True, # 是否使用标注框 (bounding box)，目标检测需要设置为 True
+         with_mask=True, # 读取储存在 segmentation 标注中的多边形标注
+         poly2mask=False) # 不执行 poly2mask，后续会将 poly 转化成检测框
+    dict(type='ConvertMask2BoxType', # 第 3 个流程，将 mask 标注转化为 boxtype
+         box_type='rbox'), # 目标类型是 rbox 旋转框
+    # 剩余的其他 pipeline
+    ...
+]
+
+metainfo = dict( # DOTA 数据集的 metainfo
+    classes=('plane', 'baseball-diamond', 'bridge', 'ground-track-field',
+             'small-vehicle', 'large-vehicle', 'ship', 'tennis-court',
+             'basketball-court', 'storage-tank', 'soccer-ball-field',
+             'roundabout', 'harbor', 'swimming-pool', 'helicopter'))
+
+train_dataloader = dict(
+    dataset=dict( # 训练数据集的配置
+        type=dataset_type,
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='train/train.json', # 标注文件路径
+        data_prefix=dict(img='train/images/'), # 图像路径前缀
+        filter_cfg=dict(filter_empty_gt=True), # 标注的过滤配置
+        pipeline=train_pipeline), # 数据处理流程
+)
+```
+
+### 模型配置
+
+对于旋转目标检测器，在模型配置中 backbone 和 neck 的配置和其他模型是一致的，主要差异在检测头上。目前仅支持 RTMDet-R 旋转目标检测，下面介绍新增的参数：
+
+1. `angle_version` 角度范围，用于在训练时限制角度的范围，可选的角度范围有 `le90`, `le135` 和 `oc`。
+
+2. `angle_coder` 角度编码器，和 bbox coder 类似，用于编码和解码角度。
+
+   默认使用的角度编码器是 `PseudoAngleCoder`，即”伪角度编码器“，并不进行编解码，直接回归角度参数。这样设计的目标是能更好的自定义角度编码方式，而无需重写代码，例如 CSL，DCL，PSC 等方法。
+
+3. `use_hbbox_loss` 是否使用水平框 loss。考虑到部分角度编码解码过程不可导，直接使用旋转框的损失函数无法学习角度，因此引入该参数用于将框和角度分开训练。
+
+4. `loss_angle` 角度损失函数。在设定`use_hbbox_loss=True` 时必须设定，而使用旋转框损失时可选，此时可以作为回归损失的辅助。
+
+通过组合 `use_hbbox_loss` 和 `loss_angle` 可以控制旋转框训练时的回归损失计算方式，共有三种组合方式：
+
+- `use_hbbox_loss=False` 且 `loss_angle` 为 None.
+
+  此时框预测和角度预测进行合并，直接对旋转框预测进行回归，此时 `loss_bbox` 应当设定为旋转框损失，例如 `RotatedIoULoss`。
+  这种方案和水平检测模型的回归方式基本一致，只是多了额外的角度编解码过程。
+
+  ```
+  bbox_pred────(tblr)───┐
+                        ▼
+  angle_pred          decode──►rbox_pred──(xywha)─►loss_bbox
+      │                 ▲
+      └────►decode──(a)─┘
+  ```
+
+- `use_hbbox_loss=False`，同时设定 `loss_angle`.
+
+  此时会增加额外的角度回归和分类损失，具体的角度损失类型需要根据角度编码器 `angle_code` 进行选择。
+
+  ```
+  bbox_pred────(tblr)───┐
+                        ▼
+  angle_pred          decode──►rbox_pred──(xywha)─►loss_bbox
+      │                 ▲
+      ├────►decode──(a)─┘
+      │
+      └───────────────────────────────────────────►loss_angle
+  ```
+
+- `use_hbbox_loss=True` 且 `loss_angle` 为 None.
+
+  此时框预测和角度预测完全分离，将两个分支视作两个任务进行训练。
+  此时 `loss_bbox` 要设定为水平框的损失函数，例如 `IoULoss` 。
+
+  ```
+  bbox_pred──(tblr)──►decode──►hbox_pred──(xyxy)──►loss_bbox
+
+  angle_pred──────────────────────────────────────►loss_angle
+  ```
+
+除了检测头中的参数，在test_cfg中还增加了 `decoded_with_angle` 参数用来控制推理时角度的处理逻辑，默认设定为 True 。
+设计这个参数的目标是让训练过程和推理过程的逻辑对齐，该参数会影响最终的精度。
+
+当 `decoded_with_angle=True` 时，将框和角度同时送入 `bbox_coder` 中。
+此时要使用旋转框的编解码器，例如`DistanceAnglePointCoder`。
+
+```
+bbox_pred────(tblr)───┐
+                      ▼
+angle_pred          decode──(xywha)──►rbox_pred
+    │                 ▲
+    └────►decode──(a)─┘
+```
+
+当 `decoded_with_angle=False` 时，首先解码出水平检测框，之后将角度 concat 到检测框。
+此时要使用水平框的编解码器，例如`DistancePointBBoxCoder`。
+
+```
+bbox_pred──(tblr)─►decode
+                      │ (xyxy)
+                      ▼
+                    format───(xywh)──►concat──(xywha)──►rbox_pred
+                                       ▲
+angle_pred────────►decode────(a)───────┘
+```
+
+### 可视化器
+
+由于旋转框和水平框的差异，旋转目标检测模型需要使用 MMRotate 中的 `RotLocalVisualizer`，配置如下：
+
+```python
+vis_backends = [dict(type='LocalVisBackend')]  # 可视化后端，请参考 https://mmengine.readthedocs.io/zh_CN/latest/advanced_tutorials/visualization.html
+visualizer = dict(
+    type='mmrotate.RotLocalVisualizer', vis_backends=vis_backends, name='visualizer')
+```
+
+## 实用工具
+
+目前测试可用的工具包括：
+
+[可视化数据集](../useful_tools/browse_dataset.md)
diff --git a/mmyolo/datasets/__init__.py b/mmyolo/datasets/__init__.py
index 592535eb..b3b6b971 100644
--- a/mmyolo/datasets/__init__.py
+++ b/mmyolo/datasets/__init__.py
@@ -3,9 +3,10 @@ from .transforms import *  # noqa: F401,F403
 from .utils import BatchShapePolicy, yolov5_collate
 from .yolov5_coco import YOLOv5CocoDataset
 from .yolov5_crowdhuman import YOLOv5CrowdHumanDataset
+from .yolov5_dota import YOLOv5DOTADataset
 from .yolov5_voc import YOLOv5VOCDataset
 
 __all__ = [
     'YOLOv5CocoDataset', 'YOLOv5VOCDataset', 'BatchShapePolicy',
-    'yolov5_collate', 'YOLOv5CrowdHumanDataset'
+    'yolov5_collate', 'YOLOv5CrowdHumanDataset', 'YOLOv5DOTADataset'
 ]
diff --git a/mmyolo/datasets/transforms/__init__.py b/mmyolo/datasets/transforms/__init__.py
index 7b2c6a91..58f4e6fd 100644
--- a/mmyolo/datasets/transforms/__init__.py
+++ b/mmyolo/datasets/transforms/__init__.py
@@ -1,13 +1,14 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .mix_img_transforms import Mosaic, Mosaic9, YOLOv5MixUp, YOLOXMixUp
 from .transforms import (LetterResize, LoadAnnotations, PPYOLOERandomCrop,
-                         PPYOLOERandomDistort, RemoveDataElement,
-                         YOLOv5CopyPaste, YOLOv5HSVRandomAug,
-                         YOLOv5KeepRatioResize, YOLOv5RandomAffine)
+                         PPYOLOERandomDistort, RegularizeRotatedBox,
+                         RemoveDataElement, YOLOv5CopyPaste,
+                         YOLOv5HSVRandomAug, YOLOv5KeepRatioResize,
+                         YOLOv5RandomAffine)
 
 __all__ = [
     'YOLOv5KeepRatioResize', 'LetterResize', 'Mosaic', 'YOLOXMixUp',
     'YOLOv5MixUp', 'YOLOv5HSVRandomAug', 'LoadAnnotations',
     'YOLOv5RandomAffine', 'PPYOLOERandomDistort', 'PPYOLOERandomCrop',
-    'Mosaic9', 'YOLOv5CopyPaste', 'RemoveDataElement'
+    'Mosaic9', 'YOLOv5CopyPaste', 'RemoveDataElement', 'RegularizeRotatedBox'
 ]
diff --git a/mmyolo/datasets/transforms/transforms.py b/mmyolo/datasets/transforms/transforms.py
index a58084f3..d5179fba 100644
--- a/mmyolo/datasets/transforms/transforms.py
+++ b/mmyolo/datasets/transforms/transforms.py
@@ -1507,3 +1507,51 @@ class RemoveDataElement(BaseTransform):
         repr_str = self.__class__.__name__
         repr_str += f'(keys={self.keys})'
         return repr_str
+
+
+@TRANSFORMS.register_module()
+class RegularizeRotatedBox(BaseTransform):
+    """Regularize rotated boxes.
+
+    Due to the angle periodicity, one rotated box can be represented in
+    many different (x, y, w, h, t). To make each rotated box unique,
+    ``regularize_boxes`` will take the remainder of the angle divided by
+    180 degrees.
+
+    For convenience, three angle_version can be used here:
+
+    - 'oc': OpenCV Definition. Has the same box representation as
+        ``cv2.minAreaRect`` the angle ranges in [-90, 0).
+    - 'le90': Long Edge Definition (90). the angle ranges in [-90, 90).
+        The width is always longer than the height.
+    - 'le135': Long Edge Definition (135). the angle ranges in [-45, 135).
+        The width is always longer than the height.
+
+    Required Keys:
+
+    - gt_bboxes (RotatedBoxes[torch.float32])
+
+    Modified Keys:
+
+    - gt_bboxes
+
+    Args:
+        angle_version (str): Angle version. Can only be 'oc',
+            'le90', or 'le135'. Defaults to 'le90.
+    """
+
+    def __init__(self, angle_version='le90') -> None:
+        self.angle_version = angle_version
+        try:
+            from mmrotate.structures.bbox import RotatedBoxes
+            self.box_type = RotatedBoxes
+        except ImportError:
+            raise ImportError(
+                'Please run "mim install -r requirements/mmrotate.txt" '
+                'to install mmrotate first for rotated detection.')
+
+    def transform(self, results: dict) -> dict:
+        assert isinstance(results['gt_bboxes'], self.box_type)
+        results['gt_bboxes'] = self.box_type(
+            results['gt_bboxes'].regularize_boxes(self.angle_version))
+        return results
diff --git a/mmyolo/datasets/yolov5_dota.py b/mmyolo/datasets/yolov5_dota.py
new file mode 100644
index 00000000..a9647981
--- /dev/null
+++ b/mmyolo/datasets/yolov5_dota.py
@@ -0,0 +1,29 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+
+from mmyolo.datasets.yolov5_coco import BatchShapePolicyDataset
+from ..registry import DATASETS
+
+try:
+    from mmrotate.datasets import DOTADataset
+    MMROTATE_AVAILABLE = True
+except ImportError:
+    from mmengine.dataset import BaseDataset
+    DOTADataset = BaseDataset
+    MMROTATE_AVAILABLE = False
+
+
+@DATASETS.register_module()
+class YOLOv5DOTADataset(BatchShapePolicyDataset, DOTADataset):
+    """Dataset for YOLOv5 DOTA Dataset.
+
+    We only add `BatchShapePolicy` function compared with DOTADataset. See
+    `mmyolo/datasets/utils.py#BatchShapePolicy` for details
+    """
+
+    def __init__(self, *args, **kwargs):
+        if not MMROTATE_AVAILABLE:
+            raise ImportError(
+                'Please run "mim install -r requirements/mmrotate.txt" '
+                'to install mmrotate first for rotated detection.')
+
+        super().__init__(*args, **kwargs)
diff --git a/mmyolo/models/dense_heads/__init__.py b/mmyolo/models/dense_heads/__init__.py
index 0b29f30b..eaf96209 100644
--- a/mmyolo/models/dense_heads/__init__.py
+++ b/mmyolo/models/dense_heads/__init__.py
@@ -1,5 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .ppyoloe_head import PPYOLOEHead, PPYOLOEHeadModule
+from .rotated_rtmdet_head import (RotatedRTMDetHead,
+                                  RotatedRTMDetSepBNHeadModule)
 from .rtmdet_head import RTMDetHead, RTMDetSepBNHeadModule
 from .yolov5_head import YOLOv5Head, YOLOv5HeadModule
 from .yolov6_head import YOLOv6Head, YOLOv6HeadModule
@@ -11,5 +13,6 @@ __all__ = [
     'YOLOv5Head', 'YOLOv6Head', 'YOLOXHead', 'YOLOv5HeadModule',
     'YOLOv6HeadModule', 'YOLOXHeadModule', 'RTMDetHead',
     'RTMDetSepBNHeadModule', 'YOLOv7Head', 'PPYOLOEHead', 'PPYOLOEHeadModule',
-    'YOLOv7HeadModule', 'YOLOv7p6HeadModule', 'YOLOv8Head', 'YOLOv8HeadModule'
+    'YOLOv7HeadModule', 'YOLOv7p6HeadModule', 'YOLOv8Head', 'YOLOv8HeadModule',
+    'RotatedRTMDetHead', 'RotatedRTMDetSepBNHeadModule'
 ]
diff --git a/mmyolo/models/dense_heads/rotated_rtmdet_head.py b/mmyolo/models/dense_heads/rotated_rtmdet_head.py
new file mode 100644
index 00000000..2f452ff3
--- /dev/null
+++ b/mmyolo/models/dense_heads/rotated_rtmdet_head.py
@@ -0,0 +1,649 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+import warnings
+from typing import List, Optional, Sequence, Tuple
+
+import torch
+import torch.nn as nn
+from mmdet.models.utils import filter_scores_and_topk
+from mmdet.structures.bbox import HorizontalBoxes, distance2bbox
+from mmdet.structures.bbox.transforms import bbox_cxcywh_to_xyxy, scale_boxes
+from mmdet.utils import (ConfigType, InstanceList, OptConfigType,
+                         OptInstanceList, OptMultiConfig, reduce_mean)
+from mmengine.config import ConfigDict
+from mmengine.model import normal_init
+from mmengine.structures import InstanceData
+from torch import Tensor
+
+from mmyolo.registry import MODELS, TASK_UTILS
+from ..utils import gt_instances_preprocess
+from .rtmdet_head import RTMDetHead, RTMDetSepBNHeadModule
+
+try:
+    from mmrotate.structures.bbox import RotatedBoxes, distance2obb
+    MMROTATE_AVAILABLE = True
+except ImportError:
+    RotatedBoxes = None
+    distance2obb = None
+    MMROTATE_AVAILABLE = False
+
+
+@MODELS.register_module()
+class RotatedRTMDetSepBNHeadModule(RTMDetSepBNHeadModule):
+    """Detection Head Module of RTMDet-R.
+
+    Compared with RTMDet Detection Head Module, RTMDet-R adds
+    a conv for angle prediction.
+    An `angle_out_dim` arg is added, which is generated by the
+    angle_coder module and controls the angle pred dim.
+
+    Args:
+        num_classes (int): Number of categories excluding the background
+            category.
+        in_channels (int): Number of channels in the input feature map.
+        widen_factor (float): Width multiplier, multiply number of
+            channels in each layer by this amount. Defaults to 1.0.
+        num_base_priors (int): The number of priors (points) at a point
+            on the feature grid.  Defaults to 1.
+        feat_channels (int): Number of hidden channels. Used in child classes.
+            Defaults to 256
+        stacked_convs (int): Number of stacking convs of the head.
+            Defaults to 2.
+        featmap_strides (Sequence[int]): Downsample factor of each feature map.
+             Defaults to (8, 16, 32).
+        share_conv (bool): Whether to share conv layers between stages.
+            Defaults to True.
+        pred_kernel_size (int): Kernel size of ``nn.Conv2d``. Defaults to 1.
+        angle_out_dim (int): Encoded length of angle, will passed by head.
+            Defaults to 1.
+        conv_cfg (:obj:`ConfigDict` or dict, optional): Config dict for
+            convolution layer. Defaults to None.
+        norm_cfg (:obj:`ConfigDict` or dict): Config dict for normalization
+            layer. Defaults to ``dict(type='BN')``.
+        act_cfg (:obj:`ConfigDict` or dict): Config dict for activation layer.
+            Default: dict(type='SiLU', inplace=True).
+        init_cfg (:obj:`ConfigDict` or list[:obj:`ConfigDict`] or dict or
+            list[dict], optional): Initialization config dict.
+            Defaults to None.
+    """
+
+    def __init__(
+        self,
+        num_classes: int,
+        in_channels: int,
+        widen_factor: float = 1.0,
+        num_base_priors: int = 1,
+        feat_channels: int = 256,
+        stacked_convs: int = 2,
+        featmap_strides: Sequence[int] = [8, 16, 32],
+        share_conv: bool = True,
+        pred_kernel_size: int = 1,
+        angle_out_dim: int = 1,
+        conv_cfg: OptConfigType = None,
+        norm_cfg: ConfigType = dict(type='BN'),
+        act_cfg: ConfigType = dict(type='SiLU', inplace=True),
+        init_cfg: OptMultiConfig = None,
+    ):
+        self.angle_out_dim = angle_out_dim
+        super().__init__(
+            num_classes=num_classes,
+            in_channels=in_channels,
+            widen_factor=widen_factor,
+            num_base_priors=num_base_priors,
+            feat_channels=feat_channels,
+            stacked_convs=stacked_convs,
+            featmap_strides=featmap_strides,
+            share_conv=share_conv,
+            pred_kernel_size=pred_kernel_size,
+            conv_cfg=conv_cfg,
+            norm_cfg=norm_cfg,
+            act_cfg=act_cfg,
+            init_cfg=init_cfg)
+
+    def _init_layers(self):
+        """Initialize layers of the head."""
+        super()._init_layers()
+        self.rtm_ang = nn.ModuleList()
+        for _ in range(len(self.featmap_strides)):
+            self.rtm_ang.append(
+                nn.Conv2d(
+                    self.feat_channels,
+                    self.num_base_priors * self.angle_out_dim,
+                    self.pred_kernel_size,
+                    padding=self.pred_kernel_size // 2))
+
+    def init_weights(self) -> None:
+        """Initialize weights of the head."""
+        # Use prior in model initialization to improve stability
+        super().init_weights()
+        for rtm_ang in self.rtm_ang:
+            normal_init(rtm_ang, std=0.01)
+
+    def forward(self, feats: Tuple[Tensor, ...]) -> tuple:
+        """Forward features from the upstream network.
+
+        Args:
+            feats (tuple[Tensor]): Features from the upstream network, each is
+                a 4D-tensor.
+
+        Returns:
+            tuple: Usually a tuple of classification scores and bbox prediction
+            - cls_scores (list[Tensor]): Classification scores for all scale
+              levels, each is a 4D-tensor, the channels number is
+              num_base_priors * num_classes.
+            - bbox_preds (list[Tensor]): Box energies / deltas for all scale
+              levels, each is a 4D-tensor, the channels number is
+              num_base_priors * 4.
+            - angle_preds (list[Tensor]): Angle prediction for all scale
+              levels, each is a 4D-tensor, the channels number is
+              num_base_priors * angle_out_dim.
+        """
+
+        cls_scores = []
+        bbox_preds = []
+        angle_preds = []
+        for idx, x in enumerate(feats):
+            cls_feat = x
+            reg_feat = x
+
+            for cls_layer in self.cls_convs[idx]:
+                cls_feat = cls_layer(cls_feat)
+            cls_score = self.rtm_cls[idx](cls_feat)
+
+            for reg_layer in self.reg_convs[idx]:
+                reg_feat = reg_layer(reg_feat)
+
+            reg_dist = self.rtm_reg[idx](reg_feat)
+            angle_pred = self.rtm_ang[idx](reg_feat)
+
+            cls_scores.append(cls_score)
+            bbox_preds.append(reg_dist)
+            angle_preds.append(angle_pred)
+        return tuple(cls_scores), tuple(bbox_preds), tuple(angle_preds)
+
+
+@MODELS.register_module()
+class RotatedRTMDetHead(RTMDetHead):
+    """RTMDet-R head.
+
+    Compared with RTMDetHead, RotatedRTMDetHead add some args to support
+    rotated object detection.
+
+    - `angle_version` used to limit angle_range during training.
+    - `angle_coder` used to encode and decode angle, which is similar
+      to bbox_coder.
+    - `use_hbbox_loss` and `loss_angle` allow custom regression loss
+      calculation for rotated box.
+
+      There are three combination options for regression:
+
+      1. `use_hbbox_loss=False` and loss_angle is None.
+
+      .. code:: text
+
+        bbox_pred────(tblr)───┐
+                              ▼
+        angle_pred          decode──►rbox_pred──(xywha)─►loss_bbox
+            │                 ▲
+            └────►decode──(a)─┘
+
+      2. `use_hbbox_loss=False` and loss_angle is specified.
+         A angle loss is added on angle_pred.
+
+      .. code:: text
+
+        bbox_pred────(tblr)───┐
+                              ▼
+        angle_pred          decode──►rbox_pred──(xywha)─►loss_bbox
+            │                 ▲
+            ├────►decode──(a)─┘
+            │
+            └───────────────────────────────────────────►loss_angle
+
+      3. `use_hbbox_loss=True` and loss_angle is specified.
+         In this case the loss_angle must be set.
+
+      .. code:: text
+
+        bbox_pred──(tblr)──►decode──►hbox_pred──(xyxy)──►loss_bbox
+
+        angle_pred──────────────────────────────────────►loss_angle
+
+    - There's a `decoded_with_angle` flag in test_cfg, which is similar
+      to training process.
+
+      When `decoded_with_angle=True`:
+
+      .. code:: text
+
+        bbox_pred────(tblr)───┐
+                              ▼
+        angle_pred          decode──(xywha)──►rbox_pred
+            │                 ▲
+            └────►decode──(a)─┘
+
+      When `decoded_with_angle=False`:
+
+      .. code:: text
+
+        bbox_pred──(tblr)─►decode
+                              │ (xyxy)
+                              ▼
+                           format───(xywh)──►concat──(xywha)──►rbox_pred
+                                               ▲
+        angle_pred────────►decode────(a)───────┘
+
+    Args:
+        head_module(ConfigType): Base module used for RotatedRTMDetHead.
+        prior_generator: Points generator feature maps in
+            2D points-based detectors.
+        bbox_coder (:obj:`ConfigDict` or dict): Config of bbox coder.
+        loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
+        loss_bbox (:obj:`ConfigDict` or dict): Config of localization loss.
+        loss_obj (:obj:`ConfigDict` or dict): Config of objectness loss.
+            Just for compatibility, not actually required.
+        angle_version (str): Angle representations. Defaults to 'le90'.
+        use_hbbox_loss (bool): If true, use horizontal bbox loss and
+            loss_angle should not be None. Default to False.
+        angle_coder (:obj:`ConfigDict` or dict): Config of angle coder.
+        loss_angle (:obj:`ConfigDict` or dict, optional): Config of angle loss.
+        train_cfg (:obj:`ConfigDict` or dict, optional): Training config of
+            anchor head. Defaults to None.
+        test_cfg (:obj:`ConfigDict` or dict, optional): Testing config of
+            anchor head. Defaults to None.
+        init_cfg (:obj:`ConfigDict` or list[:obj:`ConfigDict`] or dict or
+            list[dict], optional): Initialization config dict.
+            Defaults to None.
+    """
+
+    def __init__(
+            self,
+            head_module: ConfigType,
+            prior_generator: ConfigType = dict(
+                type='mmdet.MlvlPointGenerator', strides=[8, 16, 32],
+                offset=0),
+            bbox_coder: ConfigType = dict(type='DistanceAnglePointCoder'),
+            loss_cls: ConfigType = dict(
+                type='mmdet.QualityFocalLoss',
+                use_sigmoid=True,
+                beta=2.0,
+                loss_weight=1.0),
+            loss_bbox: ConfigType = dict(
+                type='mmrotate.RotatedIoULoss', mode='linear',
+                loss_weight=2.0),
+            loss_obj: ConfigType = dict(
+                type='mmdet.CrossEntropyLoss',
+                use_sigmoid=True,
+                reduction='sum',
+                loss_weight=1.0),
+            angle_version: str = 'le90',
+            use_hbbox_loss: bool = False,
+            angle_coder: ConfigType = dict(type='mmrotate.PseudoAngleCoder'),
+            loss_angle: OptConfigType = None,
+            train_cfg: OptConfigType = None,
+            test_cfg: OptConfigType = None,
+            init_cfg: OptMultiConfig = None):
+        if not MMROTATE_AVAILABLE:
+            raise ImportError(
+                'Please run "mim install -r requirements/mmrotate.txt" '
+                'to install mmrotate first for rotated detection.')
+
+        self.angle_version = angle_version
+        self.use_hbbox_loss = use_hbbox_loss
+        if self.use_hbbox_loss:
+            assert loss_angle is not None, \
+                ('When use hbbox loss, loss_angle needs to be specified')
+        self.angle_coder = TASK_UTILS.build(angle_coder)
+        self.angle_out_dim = self.angle_coder.encode_size
+        if head_module.get('angle_out_dim') is not None:
+            warnings.warn('angle_out_dim will be overridden by angle_coder '
+                          'and does not need to be set manually')
+
+        head_module['angle_out_dim'] = self.angle_out_dim
+        super().__init__(
+            head_module=head_module,
+            prior_generator=prior_generator,
+            bbox_coder=bbox_coder,
+            loss_cls=loss_cls,
+            loss_bbox=loss_bbox,
+            loss_obj=loss_obj,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            init_cfg=init_cfg)
+
+        if loss_angle is not None:
+            self.loss_angle = MODELS.build(loss_angle)
+        else:
+            self.loss_angle = None
+
+    def predict_by_feat(self,
+                        cls_scores: List[Tensor],
+                        bbox_preds: List[Tensor],
+                        angle_preds: List[Tensor],
+                        objectnesses: Optional[List[Tensor]] = None,
+                        batch_img_metas: Optional[List[dict]] = None,
+                        cfg: Optional[ConfigDict] = None,
+                        rescale: bool = True,
+                        with_nms: bool = True) -> List[InstanceData]:
+        """Transform a batch of output features extracted by the head into bbox
+        results.
+
+        Args:
+            cls_scores (list[Tensor]): Classification scores for all
+                scale levels, each is a 4D-tensor, has shape
+                (batch_size, num_priors * num_classes, H, W).
+            bbox_preds (list[Tensor]): Box energies / deltas for all
+                scale levels, each is a 4D-tensor, has shape
+                (batch_size, num_priors * 4, H, W).
+            angle_preds (list[Tensor]): Box angle for each scale level
+                with shape (N, num_points * angle_dim, H, W)
+            objectnesses (list[Tensor], Optional): Score factor for
+                all scale level, each is a 4D-tensor, has shape
+                (batch_size, 1, H, W).
+            batch_img_metas (list[dict], Optional): Batch image meta info.
+                Defaults to None.
+            cfg (ConfigDict, optional): Test / postprocessing
+                configuration, if None, test_cfg would be used.
+                Defaults to None.
+            rescale (bool): If True, return boxes in original image space.
+                Defaults to False.
+            with_nms (bool): If True, do nms before return boxes.
+                Defaults to True.
+
+        Returns:
+            list[:obj:`InstanceData`]: Object detection results of each image
+            after the post process. Each item usually contains following keys.
+            - scores (Tensor): Classification scores, has a shape
+              (num_instance, )
+            - labels (Tensor): Labels of bboxes, has a shape
+              (num_instances, ).
+            - bboxes (Tensor): Has a shape (num_instances, 5),
+              the last dimension 4 arrange as (x, y, w, h, angle).
+        """
+        assert len(cls_scores) == len(bbox_preds)
+        if objectnesses is None:
+            with_objectnesses = False
+        else:
+            with_objectnesses = True
+            assert len(cls_scores) == len(objectnesses)
+
+        cfg = self.test_cfg if cfg is None else cfg
+        cfg = copy.deepcopy(cfg)
+
+        multi_label = cfg.multi_label
+        multi_label &= self.num_classes > 1
+        cfg.multi_label = multi_label
+
+        # Whether to decode rbox with angle.
+        # different setting lead to different final results.
+        # Defaults to True.
+        decode_with_angle = cfg.get('decode_with_angle', True)
+
+        num_imgs = len(batch_img_metas)
+        featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores]
+
+        # If the shape does not change, use the previous mlvl_priors
+        if featmap_sizes != self.featmap_sizes:
+            self.mlvl_priors = self.prior_generator.grid_priors(
+                featmap_sizes,
+                dtype=cls_scores[0].dtype,
+                device=cls_scores[0].device)
+            self.featmap_sizes = featmap_sizes
+        flatten_priors = torch.cat(self.mlvl_priors)
+
+        mlvl_strides = [
+            flatten_priors.new_full(
+                (featmap_size.numel() * self.num_base_priors, ), stride) for
+            featmap_size, stride in zip(featmap_sizes, self.featmap_strides)
+        ]
+        flatten_stride = torch.cat(mlvl_strides)
+
+        # flatten cls_scores, bbox_preds and objectness
+        flatten_cls_scores = [
+            cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1,
+                                                  self.num_classes)
+            for cls_score in cls_scores
+        ]
+        flatten_bbox_preds = [
+            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)
+            for bbox_pred in bbox_preds
+        ]
+        flatten_angle_preds = [
+            angle_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1,
+                                                   self.angle_out_dim)
+            for angle_pred in angle_preds
+        ]
+
+        flatten_cls_scores = torch.cat(flatten_cls_scores, dim=1).sigmoid()
+        flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1)
+        flatten_angle_preds = torch.cat(flatten_angle_preds, dim=1)
+        flatten_angle_preds = self.angle_coder.decode(
+            flatten_angle_preds, keepdim=True)
+
+        if decode_with_angle:
+            flatten_rbbox_preds = torch.cat(
+                [flatten_bbox_preds, flatten_angle_preds], dim=-1)
+            flatten_decoded_bboxes = self.bbox_coder.decode(
+                flatten_priors[None], flatten_rbbox_preds, flatten_stride)
+        else:
+            flatten_decoded_hbboxes = self.bbox_coder.decode(
+                flatten_priors[None], flatten_bbox_preds, flatten_stride)
+            flatten_decoded_hbboxes = HorizontalBoxes.xyxy_to_cxcywh(
+                flatten_decoded_hbboxes)
+            flatten_decoded_bboxes = torch.cat(
+                [flatten_decoded_hbboxes, flatten_angle_preds], dim=-1)
+
+        if with_objectnesses:
+            flatten_objectness = [
+                objectness.permute(0, 2, 3, 1).reshape(num_imgs, -1)
+                for objectness in objectnesses
+            ]
+            flatten_objectness = torch.cat(flatten_objectness, dim=1).sigmoid()
+        else:
+            flatten_objectness = [None for _ in range(num_imgs)]
+
+        results_list = []
+        for (bboxes, scores, objectness,
+             img_meta) in zip(flatten_decoded_bboxes, flatten_cls_scores,
+                              flatten_objectness, batch_img_metas):
+            scale_factor = img_meta['scale_factor']
+            if 'pad_param' in img_meta:
+                pad_param = img_meta['pad_param']
+            else:
+                pad_param = None
+
+            score_thr = cfg.get('score_thr', -1)
+            # yolox_style does not require the following operations
+            if objectness is not None and score_thr > 0 and not cfg.get(
+                    'yolox_style', False):
+                conf_inds = objectness > score_thr
+                bboxes = bboxes[conf_inds, :]
+                scores = scores[conf_inds, :]
+                objectness = objectness[conf_inds]
+
+            if objectness is not None:
+                # conf = obj_conf * cls_conf
+                scores *= objectness[:, None]
+
+            if scores.shape[0] == 0:
+                empty_results = InstanceData()
+                empty_results.bboxes = RotatedBoxes(bboxes)
+                empty_results.scores = scores[:, 0]
+                empty_results.labels = scores[:, 0].int()
+                results_list.append(empty_results)
+                continue
+
+            nms_pre = cfg.get('nms_pre', 100000)
+            if cfg.multi_label is False:
+                scores, labels = scores.max(1, keepdim=True)
+                scores, _, keep_idxs, results = filter_scores_and_topk(
+                    scores,
+                    score_thr,
+                    nms_pre,
+                    results=dict(labels=labels[:, 0]))
+                labels = results['labels']
+            else:
+                scores, labels, keep_idxs, _ = filter_scores_and_topk(
+                    scores, score_thr, nms_pre)
+
+            results = InstanceData(
+                scores=scores,
+                labels=labels,
+                bboxes=RotatedBoxes(bboxes[keep_idxs]))
+
+            if rescale:
+                if pad_param is not None:
+                    results.bboxes.translate_([-pad_param[2], -pad_param[0]])
+
+                scale_factor = [1 / s for s in img_meta['scale_factor']]
+                results.bboxes = scale_boxes(results.bboxes, scale_factor)
+
+            if cfg.get('yolox_style', False):
+                # do not need max_per_img
+                cfg.max_per_img = len(results)
+
+            results = self._bbox_post_process(
+                results=results,
+                cfg=cfg,
+                rescale=False,
+                with_nms=with_nms,
+                img_meta=img_meta)
+
+            results_list.append(results)
+        return results_list
+
+    def loss_by_feat(
+            self,
+            cls_scores: List[Tensor],
+            bbox_preds: List[Tensor],
+            angle_preds: List[Tensor],
+            batch_gt_instances: InstanceList,
+            batch_img_metas: List[dict],
+            batch_gt_instances_ignore: OptInstanceList = None) -> dict:
+        """Compute losses of the head.
+
+        Args:
+            cls_scores (list[Tensor]): Box scores for each scale level
+                Has shape (N, num_anchors * num_classes, H, W)
+            bbox_preds (list[Tensor]): Decoded box for each scale
+                level with shape (N, num_anchors * 4, H, W) in
+                [tl_x, tl_y, br_x, br_y] format.
+            angle_preds (list[Tensor]): Angle prediction for each scale
+                level with shape (N, num_anchors * angle_out_dim, H, W).
+            batch_gt_instances (list[:obj:`InstanceData`]): Batch of
+                gt_instance.  It usually includes ``bboxes`` and ``labels``
+                attributes.
+            batch_img_metas (list[dict]): Meta information of each image, e.g.,
+                image size, scaling factor, etc.
+            batch_gt_instances_ignore (list[:obj:`InstanceData`], Optional):
+                Batch of gt_instances_ignore. It includes ``bboxes`` attribute
+                data that is ignored during training and testing.
+                Defaults to None.
+
+        Returns:
+            dict[str, Tensor]: A dictionary of loss components.
+        """
+        num_imgs = len(batch_img_metas)
+        featmap_sizes = [featmap.size()[-2:] for featmap in cls_scores]
+        assert len(featmap_sizes) == self.prior_generator.num_levels
+
+        gt_info = gt_instances_preprocess(batch_gt_instances, num_imgs)
+        gt_labels = gt_info[:, :, :1]
+        gt_bboxes = gt_info[:, :, 1:]  # xywha
+        pad_bbox_flag = (gt_bboxes.sum(-1, keepdim=True) > 0).float()
+
+        device = cls_scores[0].device
+
+        # If the shape does not equal, generate new one
+        if featmap_sizes != self.featmap_sizes_train:
+            self.featmap_sizes_train = featmap_sizes
+            mlvl_priors_with_stride = self.prior_generator.grid_priors(
+                featmap_sizes, device=device, with_stride=True)
+            self.flatten_priors_train = torch.cat(
+                mlvl_priors_with_stride, dim=0)
+
+        flatten_cls_scores = torch.cat([
+            cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1,
+                                                  self.cls_out_channels)
+            for cls_score in cls_scores
+        ], 1).contiguous()
+
+        flatten_tblrs = torch.cat([
+            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)
+            for bbox_pred in bbox_preds
+        ], 1)
+        flatten_tblrs = flatten_tblrs * self.flatten_priors_train[..., -1,
+                                                                  None]
+        flatten_angles = torch.cat([
+            angle_pred.permute(0, 2, 3, 1).reshape(
+                num_imgs, -1, self.angle_out_dim) for angle_pred in angle_preds
+        ], 1)
+        flatten_decoded_angle = self.angle_coder.decode(
+            flatten_angles, keepdim=True)
+        flatten_tblra = torch.cat([flatten_tblrs, flatten_decoded_angle],
+                                  dim=-1)
+        flatten_rbboxes = distance2obb(
+            self.flatten_priors_train[..., :2],
+            flatten_tblra,
+            angle_version=self.angle_version)
+        if self.use_hbbox_loss:
+            flatten_hbboxes = distance2bbox(self.flatten_priors_train[..., :2],
+                                            flatten_tblrs)
+
+        assigned_result = self.assigner(flatten_rbboxes.detach(),
+                                        flatten_cls_scores.detach(),
+                                        self.flatten_priors_train, gt_labels,
+                                        gt_bboxes, pad_bbox_flag)
+
+        labels = assigned_result['assigned_labels'].reshape(-1)
+        label_weights = assigned_result['assigned_labels_weights'].reshape(-1)
+        bbox_targets = assigned_result['assigned_bboxes'].reshape(-1, 5)
+        assign_metrics = assigned_result['assign_metrics'].reshape(-1)
+        cls_preds = flatten_cls_scores.reshape(-1, self.num_classes)
+
+        # FG cat_id: [0, num_classes -1], BG cat_id: num_classes
+        bg_class_ind = self.num_classes
+        pos_inds = ((labels >= 0)
+                    & (labels < bg_class_ind)).nonzero().squeeze(1)
+        avg_factor = reduce_mean(assign_metrics.sum()).clamp_(min=1).item()
+
+        loss_cls = self.loss_cls(
+            cls_preds, (labels, assign_metrics),
+            label_weights,
+            avg_factor=avg_factor)
+
+        pos_bbox_targets = bbox_targets[pos_inds]
+
+        if self.use_hbbox_loss:
+            bbox_preds = flatten_hbboxes.reshape(-1, 4)
+            pos_bbox_targets = bbox_cxcywh_to_xyxy(pos_bbox_targets[:, :4])
+        else:
+            bbox_preds = flatten_rbboxes.reshape(-1, 5)
+        angle_preds = flatten_angles.reshape(-1, self.angle_out_dim)
+
+        if len(pos_inds) > 0:
+            loss_bbox = self.loss_bbox(
+                bbox_preds[pos_inds],
+                pos_bbox_targets,
+                weight=assign_metrics[pos_inds],
+                avg_factor=avg_factor)
+            loss_angle = angle_preds.sum() * 0
+            if self.loss_angle is not None:
+                pos_angle_targets = bbox_targets[pos_inds][:, 4:5]
+                pos_angle_targets = self.angle_coder.encode(pos_angle_targets)
+                loss_angle = self.loss_angle(
+                    angle_preds[pos_inds],
+                    pos_angle_targets,
+                    weight=assign_metrics[pos_inds],
+                    avg_factor=avg_factor)
+        else:
+            loss_bbox = bbox_preds.sum() * 0
+            loss_angle = angle_preds.sum() * 0
+
+        losses = dict()
+        losses['loss_cls'] = loss_cls
+        losses['loss_bbox'] = loss_bbox
+        if self.loss_angle is not None:
+            losses['loss_angle'] = loss_angle
+
+        return losses
diff --git a/mmyolo/models/task_modules/assigners/batch_dsl_assigner.py b/mmyolo/models/task_modules/assigners/batch_dsl_assigner.py
index 58337d73..5ae0f802 100644
--- a/mmyolo/models/task_modules/assigners/batch_dsl_assigner.py
+++ b/mmyolo/models/task_modules/assigners/batch_dsl_assigner.py
@@ -14,6 +14,75 @@ INF = 100000000
 EPS = 1.0e-7
 
 
+def find_inside_points(boxes: Tensor,
+                       points: Tensor,
+                       box_dim: int = 4,
+                       eps: float = 0.01) -> Tensor:
+    """Find inside box points in batches. Boxes dimension must be 3.
+
+    Args:
+        boxes (Tensor): Boxes tensor. Must be batch input.
+            Has shape of (batch_size, n_boxes, box_dim).
+        points (Tensor): Points coordinates. Has shape of (n_points, 2).
+        box_dim (int): The dimension of box. 4 means horizontal box and
+            5 means rotated box. Defaults to 4.
+        eps (float): Make sure the points are inside not on the boundary.
+            Only use in rotated boxes. Defaults to 0.01.
+
+    Returns:
+        Tensor: A BoolTensor indicating whether a point is inside
+        boxes. The index has shape of (n_points, batch_size, n_boxes).
+    """
+    if box_dim == 4:
+        # Horizontal Boxes
+        lt_ = points[:, None, None] - boxes[..., :2]
+        rb_ = boxes[..., 2:] - points[:, None, None]
+
+        deltas = torch.cat([lt_, rb_], dim=-1)
+        is_in_gts = deltas.min(dim=-1).values > 0
+
+    elif box_dim == 5:
+        # Rotated Boxes
+        points = points[:, None, None]
+        ctrs, wh, t = torch.split(boxes, [2, 2, 1], dim=-1)
+        cos_value, sin_value = torch.cos(t), torch.sin(t)
+        matrix = torch.cat([cos_value, sin_value, -sin_value, cos_value],
+                           dim=-1).reshape(*boxes.shape[:-1], 2, 2)
+
+        offset = points - ctrs
+        offset = torch.matmul(matrix, offset[..., None])
+        offset = offset.squeeze(-1)
+        offset_x, offset_y = offset[..., 0], offset[..., 1]
+        w, h = wh[..., 0], wh[..., 1]
+        is_in_gts = (offset_x <= w / 2 - eps) & (offset_x >= - w / 2 + eps) & \
+                    (offset_y <= h / 2 - eps) & (offset_y >= - h / 2 + eps)
+    else:
+        raise NotImplementedError(f'Unsupport box_dim:{box_dim}')
+
+    return is_in_gts
+
+
+def get_box_center(boxes: Tensor, box_dim: int = 4) -> Tensor:
+    """Return a tensor representing the centers of boxes.
+
+    Args:
+        boxes (Tensor): Boxes tensor. Has shape of (b, n, box_dim)
+        box_dim (int): The dimension of box. 4 means horizontal box and
+            5 means rotated box. Defaults to 4.
+
+    Returns:
+        Tensor: Centers have shape of (b, n, 2)
+    """
+    if box_dim == 4:
+        # Horizontal Boxes, (x1, y1, x2, y2)
+        return (boxes[..., :2] + boxes[..., 2:]) / 2.0
+    elif box_dim == 5:
+        # Rotated Boxes, (x, y, w, h, a)
+        return boxes[..., :2]
+    else:
+        raise NotImplementedError(f'Unsupported box_dim:{box_dim}')
+
+
 @TASK_UTILS.register_module()
 class BatchDynamicSoftLabelAssigner(nn.Module):
     """Computes matching between predictions and ground truth with dynamic soft
@@ -28,6 +97,8 @@ class BatchDynamicSoftLabelAssigner(nn.Module):
         iou_weight (float): The scale factor of iou cost. Defaults to 3.0.
         iou_calculator (ConfigType): Config of overlaps Calculator.
             Defaults to dict(type='BboxOverlaps2D').
+        batch_iou (bool): Use batch input when calculate IoU.
+            If set to False use loop instead. Defaults to True.
     """
 
     def __init__(
@@ -36,7 +107,8 @@ class BatchDynamicSoftLabelAssigner(nn.Module):
         soft_center_radius: float = 3.0,
         topk: int = 13,
         iou_weight: float = 3.0,
-        iou_calculator: ConfigType = dict(type='mmdet.BboxOverlaps2D')
+        iou_calculator: ConfigType = dict(type='mmdet.BboxOverlaps2D'),
+        batch_iou: bool = True,
     ) -> None:
         super().__init__()
         self.num_classes = num_classes
@@ -44,6 +116,7 @@ class BatchDynamicSoftLabelAssigner(nn.Module):
         self.topk = topk
         self.iou_weight = iou_weight
         self.iou_calculator = TASK_UTILS.build(iou_calculator)
+        self.batch_iou = batch_iou
 
     @torch.no_grad()
     def forward(self, pred_bboxes: Tensor, pred_scores: Tensor, priors: Tensor,
@@ -51,8 +124,7 @@ class BatchDynamicSoftLabelAssigner(nn.Module):
                 pad_bbox_flag: Tensor) -> dict:
         num_gt = gt_bboxes.size(1)
         decoded_bboxes = pred_bboxes
-        num_bboxes = decoded_bboxes.size(1)
-        batch_size = decoded_bboxes.size(0)
+        batch_size, num_bboxes, box_dim = decoded_bboxes.size()
 
         if num_gt == 0 or num_bboxes == 0:
             return {
@@ -74,18 +146,16 @@ class BatchDynamicSoftLabelAssigner(nn.Module):
             raise NotImplementedError(
                 f'type of {type(gt_bboxes)} are not implemented !')
         else:
-            # Tensor boxes will be treated as horizontal boxes by defaults
-            lt_ = prior_center[:, None, None] - gt_bboxes[..., :2]
-            rb_ = gt_bboxes[..., 2:] - prior_center[:, None, None]
+            is_in_gts = find_inside_points(gt_bboxes, prior_center, box_dim)
 
-            deltas = torch.cat([lt_, rb_], dim=-1)
-            is_in_gts = deltas.min(dim=-1).values > 0
-            is_in_gts = is_in_gts * pad_bbox_flag[..., 0][None]
-            is_in_gts = is_in_gts.permute(1, 0, 2)
-            valid_mask = is_in_gts.sum(dim=-1) > 0
+        # (N_points, B, N_boxes)
+        is_in_gts = is_in_gts * pad_bbox_flag[..., 0][None]
+        # (N_points, B, N_boxes) -> (B, N_points, N_boxes)
+        is_in_gts = is_in_gts.permute(1, 0, 2)
+        # (B, N_points)
+        valid_mask = is_in_gts.sum(dim=-1) > 0
 
-        # Tensor boxes will be treated as horizontal boxes by defaults
-        gt_center = (gt_bboxes[..., :2] + gt_bboxes[..., 2:]) / 2.0
+        gt_center = get_box_center(gt_bboxes, box_dim)
 
         strides = priors[..., 2]
         distance = (priors[None].unsqueeze(2)[..., :2] -
@@ -96,7 +166,15 @@ class BatchDynamicSoftLabelAssigner(nn.Module):
         distance = distance * valid_mask.unsqueeze(-1)
         soft_center_prior = torch.pow(10, distance - self.soft_center_radius)
 
-        pairwise_ious = self.iou_calculator(decoded_bboxes, gt_bboxes)
+        if self.batch_iou:
+            pairwise_ious = self.iou_calculator(decoded_bboxes, gt_bboxes)
+        else:
+            ious = []
+            for box, gt in zip(decoded_bboxes, gt_bboxes):
+                iou = self.iou_calculator(box, gt)
+                ious.append(iou)
+            pairwise_ious = torch.stack(ious, dim=0)
+
         iou_cost = -torch.log(pairwise_ious + EPS) * self.iou_weight
 
         # select the predicted scores corresponded to the gt_labels
@@ -148,8 +226,9 @@ class BatchDynamicSoftLabelAssigner(nn.Module):
             assigned_bboxes=assigned_bboxes,
             assign_metrics=assign_metrics)
 
-    def dynamic_k_matching(self, cost_matrix: Tensor, pairwise_ious: Tensor,
-                           pad_bbox_flag: int) -> Tuple[Tensor, Tensor]:
+    def dynamic_k_matching(
+            self, cost_matrix: Tensor, pairwise_ious: Tensor,
+            pad_bbox_flag: int) -> Tuple[Tensor, Tensor, Tensor]:
         """Use IoU and matching cost to calculate the dynamic top-k positive
         targets.
 
diff --git a/mmyolo/models/task_modules/coders/__init__.py b/mmyolo/models/task_modules/coders/__init__.py
index 6346387c..75b6e7d6 100644
--- a/mmyolo/models/task_modules/coders/__init__.py
+++ b/mmyolo/models/task_modules/coders/__init__.py
@@ -1,6 +1,10 @@
 # Copyright (c) OpenMMLab. All rights reserved.
+from .distance_angle_point_coder import DistanceAnglePointCoder
 from .distance_point_bbox_coder import DistancePointBBoxCoder
 from .yolov5_bbox_coder import YOLOv5BBoxCoder
 from .yolox_bbox_coder import YOLOXBBoxCoder
 
-__all__ = ['YOLOv5BBoxCoder', 'YOLOXBBoxCoder', 'DistancePointBBoxCoder']
+__all__ = [
+    'YOLOv5BBoxCoder', 'YOLOXBBoxCoder', 'DistancePointBBoxCoder',
+    'DistanceAnglePointCoder'
+]
diff --git a/mmyolo/models/task_modules/coders/distance_angle_point_coder.py b/mmyolo/models/task_modules/coders/distance_angle_point_coder.py
new file mode 100644
index 00000000..a7e322f9
--- /dev/null
+++ b/mmyolo/models/task_modules/coders/distance_angle_point_coder.py
@@ -0,0 +1,94 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from typing import Optional, Sequence, Union
+
+import torch
+
+from mmyolo.registry import TASK_UTILS
+
+try:
+    from mmrotate.models.task_modules.coders import \
+        DistanceAnglePointCoder as MMROTATE_DistanceAnglePointCoder
+    MMROTATE_AVAILABLE = True
+except ImportError:
+    from mmdet.models.task_modules.coders import BaseBBoxCoder
+    MMROTATE_DistanceAnglePointCoder = BaseBBoxCoder
+    MMROTATE_AVAILABLE = False
+
+
+@TASK_UTILS.register_module()
+class DistanceAnglePointCoder(MMROTATE_DistanceAnglePointCoder):
+    """Distance Angle Point BBox coder.
+
+    This coder encodes gt bboxes (x, y, w, h, theta) into (top, bottom, left,
+    right, theta) and decode it back to the original.
+    """
+
+    def __init__(self, clip_border=True, angle_version='oc'):
+        if not MMROTATE_AVAILABLE:
+            raise ImportError(
+                'Please run "mim install -r requirements/mmrotate.txt" '
+                'to install mmrotate first for rotated detection.')
+
+        super().__init__(clip_border=clip_border, angle_version=angle_version)
+
+    def decode(
+        self,
+        points: torch.Tensor,
+        pred_bboxes: torch.Tensor,
+        stride: torch.Tensor,
+        max_shape: Optional[Union[Sequence[int], torch.Tensor,
+                                  Sequence[Sequence[int]]]] = None,
+    ) -> torch.Tensor:
+        """Decode distance prediction to bounding box.
+
+        Args:
+            points (Tensor): Shape (B, N, 2) or (N, 2).
+            pred_bboxes (Tensor): Distance from the given point to 4
+                boundaries and angle (left, top, right, bottom, angle).
+                Shape (B, N, 5) or (N, 5)
+            max_shape (Sequence[int] or torch.Tensor or Sequence[
+                Sequence[int]],optional): Maximum bounds for boxes, specifies
+                (H, W, C) or (H, W). If priors shape is (B, N, 4), then
+                the max_shape should be a Sequence[Sequence[int]],
+                and the length of max_shape should also be B.
+                Default None.
+        Returns:
+            Tensor: Boxes with shape (N, 5) or (B, N, 5)
+        """
+        assert points.size(-2) == pred_bboxes.size(-2)
+        assert points.size(-1) == 2
+        assert pred_bboxes.size(-1) == 5
+        if self.clip_border is False:
+            max_shape = None
+
+        if pred_bboxes.dim() == 2:
+            stride = stride[:, None]
+        else:
+            stride = stride[None, :, None]
+        pred_bboxes[..., :4] = pred_bboxes[..., :4] * stride
+
+        return self.distance2obb(points, pred_bboxes, max_shape,
+                                 self.angle_version)
+
+    def encode(self,
+               points: torch.Tensor,
+               gt_bboxes: torch.Tensor,
+               max_dis: float = 16.,
+               eps: float = 0.01) -> torch.Tensor:
+        """Encode bounding box to distances.
+
+        Args:
+            points (Tensor): Shape (N, 2), The format is [x, y].
+            gt_bboxes (Tensor): Shape (N, 5), The format is "xywha"
+            max_dis (float): Upper bound of the distance. Default None.
+            eps (float): a small value to ensure target < max_dis, instead <=.
+                Default 0.1.
+
+        Returns:
+            Tensor: Box transformation deltas. The shape is (N, 5).
+        """
+
+        assert points.size(-2) == gt_bboxes.size(-2)
+        assert points.size(-1) == 2
+        assert gt_bboxes.size(-1) == 5
+        return self.obb2distance(points, gt_bboxes, max_dis, eps)
diff --git a/mmyolo/models/utils/misc.py b/mmyolo/models/utils/misc.py
index 150182a1..531558b6 100644
--- a/mmyolo/models/utils/misc.py
+++ b/mmyolo/models/utils/misc.py
@@ -3,6 +3,7 @@ import math
 from typing import Sequence, Union
 
 import torch
+from mmdet.structures.bbox.transforms import get_box_tensor
 from torch import Tensor
 
 
@@ -20,28 +21,33 @@ def make_round(x: float, deepen_factor: float = 1.0) -> int:
 
 def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence],
                             batch_size: int) -> Tensor:
-    """Split batch_gt_instances with batch size, from [all_gt_bboxes, 6] to.
+    """Split batch_gt_instances with batch size.
 
-    [batch_size, number_gt, 5]. If some shape of single batch smaller than
-    gt bbox len, then using [-1., 0., 0., 0., 0.] to fill.
+    From [all_gt_bboxes, box_dim+2] to [batch_size, number_gt, box_dim+1].
+    For horizontal box, box_dim=4, for rotated box, box_dim=5
+
+    If some shape of single batch smaller than
+    gt bbox len, then using zeros to fill.
 
     Args:
         batch_gt_instances (Sequence[Tensor]): Ground truth
-            instances for whole batch, shape [all_gt_bboxes, 6]
+            instances for whole batch, shape [all_gt_bboxes, box_dim+2]
         batch_size (int): Batch size.
 
     Returns:
-        Tensor: batch gt instances data, shape [batch_size, number_gt, 5]
+        Tensor: batch gt instances data, shape
+                [batch_size, number_gt, box_dim+1]
     """
     if isinstance(batch_gt_instances, Sequence):
         max_gt_bbox_len = max(
             [len(gt_instances) for gt_instances in batch_gt_instances])
-        # fill [0., 0., 0., 0., 0.] if some shape of
+        # fill zeros with length box_dim+1 if some shape of
         # single batch not equal max_gt_bbox_len
         batch_instance_list = []
         for index, gt_instance in enumerate(batch_gt_instances):
             bboxes = gt_instance.bboxes
             labels = gt_instance.labels
+            box_dim = get_box_tensor(bboxes).size(-1)
             batch_instance_list.append(
                 torch.cat((labels[:, None], bboxes), dim=-1))
 
@@ -49,28 +55,33 @@ def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence],
                 continue
 
             fill_tensor = bboxes.new_full(
-                [max_gt_bbox_len - bboxes.shape[0], 5], 0)
+                [max_gt_bbox_len - bboxes.shape[0], box_dim + 1], 0)
             batch_instance_list[index] = torch.cat(
                 (batch_instance_list[index], fill_tensor), dim=0)
 
         return torch.stack(batch_instance_list)
     else:
         # faster version
-        # format of batch_gt_instances:
+        # format of batch_gt_instances: [img_ind, cls_ind, (box)]
+        # For example horizontal box should be:
         # [img_ind, cls_ind, x1, y1, x2, y2]
+        # Rotated box should be
+        # [img_ind, cls_ind, x, y, w, h, a]
 
-        # sqlit batch gt instance [all_gt_bboxes, 6] ->
-        # [batch_size, max_gt_bbox_len, 5]
+        # sqlit batch gt instance [all_gt_bboxes, box_dim+2] ->
+        # [batch_size, max_gt_bbox_len, box_dim+1]
         assert isinstance(batch_gt_instances, Tensor)
+        box_dim = batch_gt_instances.size(-1) - 2
         if len(batch_gt_instances) > 0:
             gt_images_indexes = batch_gt_instances[:, 0]
             max_gt_bbox_len = gt_images_indexes.unique(
                 return_counts=True)[1].max()
-            # fill [0., 0., 0., 0., 0.] if some shape of
+            # fill zeros with length box_dim+1 if some shape of
             # single batch not equal max_gt_bbox_len
-            batch_instance = torch.zeros((batch_size, max_gt_bbox_len, 5),
-                                         dtype=batch_gt_instances.dtype,
-                                         device=batch_gt_instances.device)
+            batch_instance = torch.zeros(
+                (batch_size, max_gt_bbox_len, box_dim + 1),
+                dtype=batch_gt_instances.dtype,
+                device=batch_gt_instances.device)
 
             for i in range(batch_size):
                 match_indexes = gt_images_indexes == i
@@ -79,7 +90,7 @@ def gt_instances_preprocess(batch_gt_instances: Union[Tensor, Sequence],
                     batch_instance[i, :gt_num] = batch_gt_instances[
                         match_indexes, 1:]
         else:
-            batch_instance = torch.zeros((batch_size, 0, 5),
+            batch_instance = torch.zeros((batch_size, 0, box_dim + 1),
                                          dtype=batch_gt_instances.dtype,
                                          device=batch_gt_instances.device)
 
diff --git a/mmyolo/utils/large_image.py b/mmyolo/utils/large_image.py
index 68c6938e..86708046 100644
--- a/mmyolo/utils/large_image.py
+++ b/mmyolo/utils/large_image.py
@@ -1,11 +1,28 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from typing import Sequence, Tuple
 
+import torch
 from mmcv.ops import batched_nms
 from mmdet.structures import DetDataSample, SampleList
 from mmengine.structures import InstanceData
 
 
+def shift_rbboxes(bboxes: torch.Tensor, offset: Sequence[int]):
+    """Shift rotated bboxes with offset.
+
+    Args:
+        bboxes (Tensor): The rotated bboxes need to be translated.
+            With shape (n, 5), which means (x, y, w, h, a).
+        offset (Sequence[int]): The translation offsets with shape of (2, ).
+    Returns:
+        Tensor: Shifted rotated bboxes.
+    """
+    offset_tensor = bboxes.new_tensor(offset)
+    shifted_bboxes = bboxes.clone()
+    shifted_bboxes[:, 0:2] = shifted_bboxes[:, 0:2] + offset_tensor
+    return shifted_bboxes
+
+
 def shift_predictions(det_data_samples: SampleList,
                       offsets: Sequence[Tuple[int, int]],
                       src_image_shape: Tuple[int, int]) -> SampleList:
@@ -32,8 +49,18 @@ def shift_predictions(det_data_samples: SampleList,
     for det_data_sample, offset in zip(det_data_samples, offsets):
         pred_inst = det_data_sample.pred_instances.clone()
 
+        # Check bbox type
+        if pred_inst.bboxes.size(-1) == 4:
+            # Horizontal bboxes
+            shifted_bboxes = shift_bboxes(pred_inst.bboxes, offset)
+        elif pred_inst.bboxes.size(-1) == 5:
+            # Rotated bboxes
+            shifted_bboxes = shift_rbboxes(pred_inst.bboxes, offset)
+        else:
+            raise NotImplementedError
+
         # shift bboxes and masks
-        pred_inst.bboxes = shift_bboxes(pred_inst.bboxes, offset)
+        pred_inst.bboxes = shifted_bboxes
         if 'masks' in det_data_sample:
             pred_inst.masks = shift_masks(pred_inst.masks, offset,
                                           src_image_shape)
diff --git a/requirements/mmrotate.txt b/requirements/mmrotate.txt
new file mode 100644
index 00000000..15f05d38
--- /dev/null
+++ b/requirements/mmrotate.txt
@@ -0,0 +1 @@
+mmrotate>=1.0.0rc1
diff --git a/requirements/tests.txt b/requirements/tests.txt
index 8de70105..be276ebe 100644
--- a/requirements/tests.txt
+++ b/requirements/tests.txt
@@ -7,6 +7,7 @@ kwarray
 memory_profiler
 mmcls>=1.0.0rc4
 mmrazor>=1.0.0rc2
+mmrotate>=1.0.0rc1
 parameterized
 protobuf<=3.20.1
 psutil
diff --git a/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py b/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py
new file mode 100644
index 00000000..96d193cd
--- /dev/null
+++ b/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py
@@ -0,0 +1,264 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from unittest import TestCase
+
+import pytest
+import torch
+from mmengine.config import Config
+from mmengine.structures import InstanceData
+
+from mmyolo.models.dense_heads import RotatedRTMDetHead
+from mmyolo.utils import register_all_modules
+
+register_all_modules()
+
+
+class TestRotatedRTMDetHead(TestCase):
+
+    def setUp(self):
+        self.head_module = dict(
+            type='RotatedRTMDetSepBNHeadModule',
+            num_classes=4,
+            in_channels=1,
+            stacked_convs=1,
+            feat_channels=64,
+            featmap_strides=[4, 8, 16])
+
+    def test_init_weights(self):
+        head = RotatedRTMDetHead(head_module=self.head_module)
+        head.head_module.init_weights()
+
+    def test_predict_by_feat(self):
+        s = 256
+        img_metas = [{
+            'img_shape': (s, s, 3),
+            'ori_shape': (s, s, 3),
+            'scale_factor': (1.0, 1.0),
+        }]
+        test_cfg = dict(
+            multi_label=True,
+            decode_with_angle=True,
+            nms_pre=2000,
+            score_thr=0.01,
+            nms=dict(type='nms_rotated', iou_threshold=0.1),
+            max_per_img=300)
+        test_cfg = Config(test_cfg)
+
+        head = RotatedRTMDetHead(
+            head_module=self.head_module, test_cfg=test_cfg)
+        feat = [
+            torch.rand(1, 1, s // feat_size, s // feat_size)
+            for feat_size in [4, 8, 16]
+        ]
+        cls_scores, bbox_preds, angle_preds = head.forward(feat)
+        head.predict_by_feat(
+            cls_scores,
+            bbox_preds,
+            angle_preds,
+            batch_img_metas=img_metas,
+            cfg=test_cfg,
+            rescale=True,
+            with_nms=True)
+        head.predict_by_feat(
+            cls_scores,
+            bbox_preds,
+            angle_preds,
+            batch_img_metas=img_metas,
+            cfg=test_cfg,
+            rescale=False,
+            with_nms=False)
+
+    def test_loss_by_feat(self):
+        if not torch.cuda.is_available():
+            pytest.skip('test requires GPU and torch+cuda')
+
+        s = 256
+        img_metas = [{
+            'img_shape': (s, s, 3),
+            'batch_input_shape': (s, s),
+            'scale_factor': 1,
+        }]
+        train_cfg = dict(
+            assigner=dict(
+                type='BatchDynamicSoftLabelAssigner',
+                num_classes=80,
+                topk=13,
+                iou_calculator=dict(type='mmrotate.RBboxOverlaps2D'),
+                batch_iou=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False)
+        train_cfg = Config(train_cfg)
+        head = RotatedRTMDetHead(
+            head_module=self.head_module, train_cfg=train_cfg).cuda()
+
+        feat = [
+            torch.rand(1, 1, s // feat_size, s // feat_size).cuda()
+            for feat_size in [4, 8, 16]
+        ]
+        cls_scores, bbox_preds, angle_preds = head.forward(feat)
+
+        # Test that empty ground truth encourages the network to predict
+        # background
+        gt_instances = InstanceData(
+            bboxes=torch.empty((0, 5)).cuda(),
+            labels=torch.LongTensor([]).cuda())
+
+        empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds,
+                                            angle_preds, [gt_instances],
+                                            img_metas)
+        # When there is no truth, the cls loss should be nonzero but there
+        # should be no box loss.
+        empty_cls_loss = empty_gt_losses['loss_cls'].sum()
+        empty_box_loss = empty_gt_losses['loss_bbox'].sum()
+        self.assertGreater(empty_cls_loss.item(), 0,
+                           'classification loss should be non-zero')
+        self.assertEqual(
+            empty_box_loss.item(), 0,
+            'there should be no box loss when there are no true boxes')
+
+        # When truth is non-empty then both cls and box loss should be nonzero
+        # for random inputs
+        head = RotatedRTMDetHead(
+            head_module=self.head_module, train_cfg=train_cfg).cuda()
+        gt_instances = InstanceData(
+            bboxes=torch.Tensor([[130.6667, 86.8757, 100.6326, 70.8874,
+                                  0.2]]).cuda(),
+            labels=torch.LongTensor([1]).cuda())
+
+        one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, angle_preds,
+                                          [gt_instances], img_metas)
+        onegt_cls_loss = one_gt_losses['loss_cls'].sum()
+        onegt_box_loss = one_gt_losses['loss_bbox'].sum()
+        self.assertGreater(onegt_cls_loss.item(), 0,
+                           'cls loss should be non-zero')
+        self.assertGreater(onegt_box_loss.item(), 0,
+                           'box loss should be non-zero')
+
+        # test num_class = 1
+        self.head_module['num_classes'] = 1
+        head = RotatedRTMDetHead(
+            head_module=self.head_module, train_cfg=train_cfg).cuda()
+        gt_instances = InstanceData(
+            bboxes=torch.Tensor([[130.6667, 86.8757, 100.6326, 70.8874,
+                                  0.2]]).cuda(),
+            labels=torch.LongTensor([0]).cuda())
+
+        cls_scores, bbox_preds, angle_preds = head.forward(feat)
+
+        one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, angle_preds,
+                                          [gt_instances], img_metas)
+        onegt_cls_loss = one_gt_losses['loss_cls'].sum()
+        onegt_box_loss = one_gt_losses['loss_bbox'].sum()
+        self.assertGreater(onegt_cls_loss.item(), 0,
+                           'cls loss should be non-zero')
+        self.assertGreater(onegt_box_loss.item(), 0,
+                           'box loss should be non-zero')
+
+    def test_hbb_loss_by_feat(self):
+
+        s = 256
+        img_metas = [{
+            'img_shape': (s, s, 3),
+            'batch_input_shape': (s, s),
+            'scale_factor': 1,
+        }]
+        train_cfg = dict(
+            assigner=dict(
+                type='BatchDynamicSoftLabelAssigner',
+                num_classes=80,
+                topk=13,
+                iou_calculator=dict(type='mmrotate.RBboxOverlaps2D'),
+                batch_iou=False),
+            allowed_border=-1,
+            pos_weight=-1,
+            debug=False)
+        train_cfg = Config(train_cfg)
+        hbb_cfg = dict(
+            bbox_coder=dict(
+                type='DistanceAnglePointCoder', angle_version='le90'),
+            loss_bbox=dict(type='mmdet.GIoULoss', loss_weight=2.0),
+            angle_coder=dict(
+                type='mmrotate.CSLCoder',
+                angle_version='le90',
+                omega=1,
+                window='gaussian',
+                radius=1),
+            loss_angle=dict(
+                type='mmrotate.SmoothFocalLoss',
+                gamma=2.0,
+                alpha=0.25,
+                loss_weight=0.2),
+            use_hbbox_loss=True,
+        )
+        head = RotatedRTMDetHead(
+            head_module=self.head_module, **hbb_cfg, train_cfg=train_cfg)
+
+        feat = [
+            torch.rand(1, 1, s // feat_size, s // feat_size)
+            for feat_size in [4, 8, 16]
+        ]
+        cls_scores, bbox_preds, angle_preds = head.forward(feat)
+
+        # Test that empty ground truth encourages the network to predict
+        # background
+        gt_instances = InstanceData(
+            bboxes=torch.empty((0, 5)), labels=torch.LongTensor([]))
+
+        empty_gt_losses = head.loss_by_feat(cls_scores, bbox_preds,
+                                            angle_preds, [gt_instances],
+                                            img_metas)
+        # When there is no truth, the cls loss should be nonzero but there
+        # should be no box loss.
+        empty_cls_loss = empty_gt_losses['loss_cls'].sum()
+        empty_box_loss = empty_gt_losses['loss_bbox'].sum()
+        empty_angle_loss = empty_gt_losses['loss_angle'].sum()
+        self.assertGreater(empty_cls_loss.item(), 0,
+                           'classification loss should be non-zero')
+        self.assertEqual(
+            empty_box_loss.item(), 0,
+            'there should be no box loss when there are no true boxes')
+        self.assertEqual(
+            empty_angle_loss.item(), 0,
+            'there should be no angle loss when there are no true boxes')
+
+        # When truth is non-empty then both cls and box loss should be nonzero
+        # for random inputs
+        head = RotatedRTMDetHead(
+            head_module=self.head_module, **hbb_cfg, train_cfg=train_cfg)
+        gt_instances = InstanceData(
+            bboxes=torch.Tensor([[130.6667, 86.8757, 100.6326, 70.8874, 0.2]]),
+            labels=torch.LongTensor([1]))
+
+        one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, angle_preds,
+                                          [gt_instances], img_metas)
+        onegt_cls_loss = one_gt_losses['loss_cls'].sum()
+        onegt_box_loss = one_gt_losses['loss_bbox'].sum()
+        onegt_angle_loss = one_gt_losses['loss_angle'].sum()
+        self.assertGreater(onegt_cls_loss.item(), 0,
+                           'cls loss should be non-zero')
+        self.assertGreater(onegt_box_loss.item(), 0,
+                           'box loss should be non-zero')
+        self.assertGreater(onegt_angle_loss.item(), 0,
+                           'angle loss should be non-zero')
+
+        # test num_class = 1
+        self.head_module['num_classes'] = 1
+        head = RotatedRTMDetHead(
+            head_module=self.head_module, **hbb_cfg, train_cfg=train_cfg)
+        gt_instances = InstanceData(
+            bboxes=torch.Tensor([[130.6667, 86.8757, 100.6326, 70.8874, 0.2]]),
+            labels=torch.LongTensor([0]))
+
+        cls_scores, bbox_preds, angle_preds = head.forward(feat)
+
+        one_gt_losses = head.loss_by_feat(cls_scores, bbox_preds, angle_preds,
+                                          [gt_instances], img_metas)
+        onegt_cls_loss = one_gt_losses['loss_cls'].sum()
+        onegt_box_loss = one_gt_losses['loss_bbox'].sum()
+        onegt_angle_loss = one_gt_losses['loss_angle'].sum()
+        self.assertGreater(onegt_cls_loss.item(), 0,
+                           'cls loss should be non-zero')
+        self.assertGreater(onegt_box_loss.item(), 0,
+                           'box loss should be non-zero')
+        self.assertGreater(onegt_angle_loss.item(), 0,
+                           'angle loss should be non-zero')
diff --git a/tests/test_models/test_task_modules/test_assigners/test_batch_dsl_assigner.py b/tests/test_models/test_task_modules/test_assigners/test_batch_dsl_assigner.py
new file mode 100644
index 00000000..9644896c
--- /dev/null
+++ b/tests/test_models/test_task_modules/test_assigners/test_batch_dsl_assigner.py
@@ -0,0 +1,192 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from unittest import TestCase
+
+import pytest
+import torch
+
+from mmyolo.models.task_modules.assigners import BatchDynamicSoftLabelAssigner
+
+
+class TestBatchDynamicSoftLabelAssigner(TestCase):
+
+    def test_assign(self):
+        num_classes = 2
+        batch_size = 2
+
+        assigner = BatchDynamicSoftLabelAssigner(
+            num_classes=num_classes,
+            soft_center_radius=3.0,
+            topk=1,
+            iou_weight=3.0)
+
+        pred_bboxes = torch.FloatTensor([
+            [23, 23, 43, 43],
+            [4, 5, 6, 7],
+        ]).unsqueeze(0).repeat(batch_size, 10, 1)
+
+        pred_scores = torch.FloatTensor([
+            [0.2],
+            [0.8],
+        ]).unsqueeze(0).repeat(batch_size, 10, 1)
+
+        priors = torch.FloatTensor([[30, 30, 8, 8], [4, 5, 6,
+                                                     7]]).repeat(10, 1)
+
+        gt_bboxes = torch.FloatTensor([[23, 23, 43, 43]]).unsqueeze(0).repeat(
+            batch_size, 1, 1)
+
+        gt_labels = torch.LongTensor([[0]
+                                      ]).unsqueeze(0).repeat(batch_size, 1, 1)
+        pad_bbox_flag = torch.FloatTensor([[1]]).unsqueeze(0).repeat(
+            batch_size, 1, 1)
+
+        assign_result = assigner.forward(pred_bboxes, pred_scores, priors,
+                                         gt_labels, gt_bboxes, pad_bbox_flag)
+
+        assigned_labels = assign_result['assigned_labels']
+        assigned_labels_weights = assign_result['assigned_labels_weights']
+        assigned_bboxes = assign_result['assigned_bboxes']
+        assign_metrics = assign_result['assign_metrics']
+
+        self.assertEqual(assigned_labels.shape, torch.Size([batch_size, 20]))
+        self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 20,
+                                                            4]))
+        self.assertEqual(assigned_labels_weights.shape,
+                         torch.Size([batch_size, 20]))
+        self.assertEqual(assign_metrics.shape, torch.Size([batch_size, 20]))
+
+    def test_assign_with_empty_gt(self):
+        num_classes = 2
+        batch_size = 2
+
+        assigner = BatchDynamicSoftLabelAssigner(
+            num_classes=num_classes,
+            soft_center_radius=3.0,
+            topk=1,
+            iou_weight=3.0)
+
+        pred_bboxes = torch.FloatTensor([
+            [23, 23, 43, 43],
+            [4, 5, 6, 7],
+        ]).unsqueeze(0).repeat(batch_size, 10, 1)
+
+        pred_scores = torch.FloatTensor([
+            [0.2],
+            [0.8],
+        ]).unsqueeze(0).repeat(batch_size, 10, 1)
+
+        priors = torch.FloatTensor([[30, 30, 8, 8], [4, 5, 6,
+                                                     7]]).repeat(10, 1)
+
+        gt_bboxes = torch.zeros(batch_size, 0, 4)
+        gt_labels = torch.zeros(batch_size, 0, 1)
+        pad_bbox_flag = torch.zeros(batch_size, 0, 1)
+
+        assign_result = assigner.forward(pred_bboxes, pred_scores, priors,
+                                         gt_labels, gt_bboxes, pad_bbox_flag)
+
+        assigned_labels = assign_result['assigned_labels']
+        assigned_labels_weights = assign_result['assigned_labels_weights']
+        assigned_bboxes = assign_result['assigned_bboxes']
+        assign_metrics = assign_result['assign_metrics']
+
+        self.assertEqual(assigned_labels.shape, torch.Size([batch_size, 20]))
+        self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 20,
+                                                            4]))
+        self.assertEqual(assigned_labels_weights.shape,
+                         torch.Size([batch_size, 20]))
+        self.assertEqual(assign_metrics.shape, torch.Size([batch_size, 20]))
+
+    def test_assign_with_empty_boxs(self):
+        num_classes = 2
+        batch_size = 2
+
+        assigner = BatchDynamicSoftLabelAssigner(
+            num_classes=num_classes,
+            soft_center_radius=3.0,
+            topk=1,
+            iou_weight=3.0)
+
+        pred_bboxes = torch.zeros(batch_size, 0, 4)
+
+        pred_scores = torch.zeros(batch_size, 0, 4)
+
+        priors = torch.zeros(0, 4)
+        gt_bboxes = torch.FloatTensor([[23, 23, 43, 43]]).unsqueeze(0).repeat(
+            batch_size, 1, 1)
+
+        gt_labels = torch.LongTensor([[0]
+                                      ]).unsqueeze(0).repeat(batch_size, 1, 1)
+        pad_bbox_flag = torch.FloatTensor([[1]]).unsqueeze(0).repeat(
+            batch_size, 1, 1)
+
+        assign_result = assigner.forward(pred_bboxes, pred_scores, priors,
+                                         gt_labels, gt_bboxes, pad_bbox_flag)
+
+        assigned_labels = assign_result['assigned_labels']
+        assigned_labels_weights = assign_result['assigned_labels_weights']
+        assigned_bboxes = assign_result['assigned_bboxes']
+        assign_metrics = assign_result['assign_metrics']
+
+        self.assertEqual(assigned_labels.shape, torch.Size([batch_size, 0]))
+        self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 0, 4]))
+        self.assertEqual(assigned_labels_weights.shape,
+                         torch.Size([batch_size, 0]))
+        self.assertEqual(assign_metrics.shape, torch.Size([batch_size, 0]))
+
+    def test_assign_rotate_box(self):
+        try:
+            import importlib
+            importlib.import_module('mmrotate')
+        except ImportError:
+            pytest.skip('mmrotate is not installed.', allow_module_level=True)
+
+        num_classes = 2
+        batch_size = 2
+
+        assigner = BatchDynamicSoftLabelAssigner(
+            num_classes=num_classes,
+            soft_center_radius=3.0,
+            topk=1,
+            iou_weight=3.0,
+            iou_calculator=dict(type='mmrotate.RBboxOverlaps2D'),
+            # RBboxOverlaps2D doesn't support batch input, use loop instead.
+            batch_iou=False,
+        )
+
+        pred_bboxes = torch.FloatTensor([
+            [23, 23, 20, 20, 0.078],
+            [4, 5, 2, 2, 0.078],
+        ]).unsqueeze(0).repeat(batch_size, 10, 1)
+
+        pred_scores = torch.FloatTensor([
+            [0.2],
+            [0.8],
+        ]).unsqueeze(0).repeat(batch_size, 10, 1)
+
+        priors = torch.FloatTensor([[30, 30, 8, 8], [4, 5, 6,
+                                                     7]]).repeat(10, 1)
+
+        gt_bboxes = torch.FloatTensor([[23, 23, 20, 20,
+                                        0.078]]).unsqueeze(0).repeat(
+                                            batch_size, 1, 1)
+
+        gt_labels = torch.LongTensor([[0]
+                                      ]).unsqueeze(0).repeat(batch_size, 1, 1)
+        pad_bbox_flag = torch.FloatTensor([[1]]).unsqueeze(0).repeat(
+            batch_size, 1, 1)
+
+        assign_result = assigner.forward(pred_bboxes, pred_scores, priors,
+                                         gt_labels, gt_bboxes, pad_bbox_flag)
+
+        assigned_labels = assign_result['assigned_labels']
+        assigned_labels_weights = assign_result['assigned_labels_weights']
+        assigned_bboxes = assign_result['assigned_bboxes']
+        assign_metrics = assign_result['assign_metrics']
+
+        self.assertEqual(assigned_labels.shape, torch.Size([batch_size, 20]))
+        self.assertEqual(assigned_bboxes.shape, torch.Size([batch_size, 20,
+                                                            5]))
+        self.assertEqual(assigned_labels_weights.shape,
+                         torch.Size([batch_size, 20]))
+        self.assertEqual(assign_metrics.shape, torch.Size([batch_size, 20]))
diff --git a/tests/test_models/test_utils/test_misc.py b/tests/test_models/test_utils/test_misc.py
index 0d2fa0c7..dce95025 100644
--- a/tests/test_models/test_utils/test_misc.py
+++ b/tests/test_models/test_utils/test_misc.py
@@ -1,7 +1,6 @@
 # Copyright (c) OpenMMLab. All rights reserved.
-from unittest import TestCase
-
 import numpy as np
+import pytest
 import torch
 from mmengine.structures import InstanceData
 from torch import Tensor
@@ -12,24 +11,25 @@ from mmyolo.utils import register_all_modules
 register_all_modules()
 
 
-class TestGtInstancesPreprocess(TestCase):
+class TestGtInstancesPreprocess:
 
-    def test(self):
+    @pytest.mark.parametrize('box_dim', [4, 5])
+    def test(self, box_dim):
         gt_instances = InstanceData(
-            bboxes=torch.empty((0, 4)), labels=torch.LongTensor([]))
+            bboxes=torch.empty((0, box_dim)), labels=torch.LongTensor([]))
         batch_size = 1
         batch_instance = gt_instances_preprocess([gt_instances], batch_size)
-        self.assertIsInstance(batch_instance, Tensor)
-        self.assertEqual(
-            len(batch_instance.shape), 3, 'the len of result must be 3.')
+        assert isinstance(batch_instance, Tensor)
+        assert len(batch_instance.shape) == 3, 'the len of result must be 3.'
+        assert batch_instance.size(-1) == box_dim + 1
 
-    def test_fast_version(self):
+    @pytest.mark.parametrize('box_dim', [4, 5])
+    def test_fast_version(self, box_dim: int):
         gt_instances = torch.from_numpy(
-            np.array([[0., 1., 0., 0., 0., 0.]], dtype=np.float32))
+            np.array([[0., 1., *(0., ) * box_dim]], dtype=np.float32))
         batch_size = 1
         batch_instance = gt_instances_preprocess(gt_instances, batch_size)
-        self.assertIsInstance(batch_instance, Tensor)
-        self.assertEqual(
-            len(batch_instance.shape), 3, 'the len of result must be 3.')
-        self.assertEqual(batch_instance.shape[1], 1)
-        self.assertEqual(batch_instance.shape[2], 5)
+        assert isinstance(batch_instance, Tensor)
+        assert len(batch_instance.shape) == 3, 'the len of result must be 3.'
+        assert batch_instance.shape[1] == 1
+        assert batch_instance.shape[2] == box_dim + 1
diff --git a/tools/dataset_converters/dota/README.md b/tools/dataset_converters/dota/README.md
new file mode 100644
index 00000000..a166e279
--- /dev/null
+++ b/tools/dataset_converters/dota/README.md
@@ -0,0 +1,3 @@
+# Preparing DOTA Dataset
+
+Please refer to [Dataset preparation and description](../../../docs/en/recommended_topics/dataset_preparation.md)
diff --git a/tools/dataset_converters/dota/dota_split.py b/tools/dataset_converters/dota/dota_split.py
new file mode 100644
index 00000000..0418e9d3
--- /dev/null
+++ b/tools/dataset_converters/dota/dota_split.py
@@ -0,0 +1,603 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+# Reference: https://github.com/jbwang1997/BboxToolkit
+
+import argparse
+import codecs
+import datetime
+import itertools
+import os
+import os.path as osp
+import time
+from functools import partial, reduce
+from math import ceil
+from multiprocessing import Manager, Pool
+from typing import List, Sequence
+
+import cv2
+import numpy as np
+from mmengine import Config, MMLogger, mkdir_or_exist, print_log
+from PIL import Image
+
+Image.MAX_IMAGE_PIXELS = None
+
+try:
+    import shapely.geometry as shgeo
+except ImportError:
+    raise ImportError('Please run "pip install shapely" '
+                      'to install shapely first.')
+
+PHASE_REQUIRE_SETS = dict(
+    trainval=['train', 'val'],
+    train=[
+        'train',
+    ],
+    val=[
+        'val',
+    ],
+    test=[
+        'test',
+    ],
+)
+
+
+def parse_args():
+    """Parse arguments."""
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        'split_config', type=str, help='The split config for image slicing.')
+    parser.add_argument(
+        'data_root', type=str, help='Root dir of DOTA dataset.')
+    parser.add_argument(
+        'out_dir', type=str, help='Output dir for split result.')
+    parser.add_argument(
+        '--ann-subdir',
+        default='labelTxt-v1.0',
+        type=str,
+        help='output directory')
+    parser.add_argument(
+        '--phase',
+        '-p',
+        nargs='+',
+        default=['trainval', 'test'],
+        type=str,
+        choices=['trainval', 'train', 'val', 'test'],
+        help='Phase of the data set to be prepared.')
+    parser.add_argument(
+        '--nproc', default=8, type=int, help='Number of processes.')
+    parser.add_argument(
+        '--save-ext',
+        default=None,
+        type=str,
+        help='Extension of the saved image.')
+    parser.add_argument(
+        '--overwrite',
+        action='store_true',
+        help='Whether to allow overwrite if annotation folder exist.')
+    args = parser.parse_args()
+
+    assert args.split_config is not None, "argument split_config can't be None"
+    split_cfg = Config.fromfile(args.split_config)
+
+    # assert arguments
+    assert args.data_root is not None, "argument data_root can't be None"
+    if args.save_ext:
+        assert args.save_ext in ['png', 'jpg', 'bmp', 'tif']
+
+    assert len(split_cfg.patch_sizes) == len(split_cfg.patch_overlap_sizes)
+    assert 0 <= split_cfg.iof_thr <= 1
+    if split_cfg.get('padding'):
+        padding_value = split_cfg.get('padding_value')
+        assert padding_value is not None, \
+            "padding_value can't be None when padding is True."
+        padding_value = padding_value[0] \
+            if len(padding_value) == 1 else padding_value
+        split_cfg.padding_value = padding_value
+    else:
+        split_cfg.padding = False
+        split_cfg.padding_value = None
+    return args, split_cfg
+
+
+def _make_dirs(out_dir: str, phase: List[str], allow_overwrite: bool):
+    """Prepare folder for DOTA dataset.
+
+    Args:
+        out_dir (str): The output dir for DOTA split.
+        phase (List[str]): The phase to prepare.
+        allow_overwrite (bool): Whether to allow overwrite when folder exist.
+    """
+    logger = MMLogger.get_current_instance()
+    for p in phase:
+        phase_dir = osp.join(out_dir, p)
+        if not allow_overwrite:
+            assert not osp.exists(phase_dir), \
+                f'{osp.join(phase_dir)} already exists,' \
+                'If you want to ignore existing files, set --overwrite'
+        else:
+            if osp.exists(phase_dir):
+                logger.warning(
+                    f'{p} set in {osp.join(phase_dir)} will be overwritten')
+        mkdir_or_exist(phase_dir)
+        mkdir_or_exist(osp.join(phase_dir, 'images'))
+        mkdir_or_exist(osp.join(phase_dir, 'annfiles'))
+
+
+def load_original_annotations(data_root: str,
+                              ann_subdir: str = 'labelTxt-v1.0',
+                              phase: str = 'train',
+                              nproc: int = 8):
+    img_dir = osp.join(data_root, phase, 'images')
+    assert osp.isdir(img_dir), f'The {img_dir} is not an existing dir!'
+
+    if phase == 'test':
+        ann_dir = None
+    else:
+        ann_dir = osp.join(data_root, phase, ann_subdir, 'labelTxt')
+        assert osp.isdir(ann_dir), f'The {ann_dir} is not an existing dir!'
+
+    _load_func = partial(_load_dota_single, img_dir=img_dir, ann_dir=ann_dir)
+    if nproc > 1:
+        pool = Pool(nproc)
+        contents = pool.map(_load_func, os.listdir(img_dir))
+        pool.close()
+    else:
+        contents = list(map(_load_func, os.listdir(img_dir)))
+    infos = [c for c in contents if c is not None]
+    return infos
+
+
+def _load_dota_single(imgfile: str, img_dir: str, ann_dir: str):
+    """Load DOTA's single image.
+
+    Args:
+        imgfile (str): Filename of single image.
+        img_dir (str): Path of images.
+        ann_dir (str): Path of annotations.
+
+    Returns:
+        result (dict): Information of a single image.
+
+        - ``id``: Image id.
+        - ``filename``: Filename of single image.
+        - ``filepath``: Filepath of single image.
+        - ``width``: The width of image.
+        - ``height``: The height of image.
+        - ``annotations``: The annotation of single image.
+        - ``gsd``: The ground sampling distance.
+    """
+    img_id, ext = osp.splitext(imgfile)
+    if ext not in ['.jpg', '.JPG', '.png', '.tif', '.bmp']:
+        return None
+
+    imgpath = osp.join(img_dir, imgfile)
+    size = Image.open(imgpath).size
+    txtfile = None if ann_dir is None else osp.join(ann_dir, img_id + '.txt')
+    content = _load_dota_txt(txtfile)
+
+    content.update(
+        dict(
+            width=size[0],
+            height=size[1],
+            filename=imgfile,
+            filepath=imgpath,
+            id=img_id))
+    return content
+
+
+def _load_dota_txt(txtfile):
+    """Load DOTA's txt annotation.
+
+    Args:
+        txtfile (str): Filename of single Dota txt annotation.
+
+    Returns:
+        result (dict): Annotation of single image.
+
+        - ``annotations``: The annotation of single image.
+        - ``gsd``: The ground sampling distance.
+    """
+    gsd, bboxes, labels, diffs = None, [], [], []
+    if txtfile is None:
+        pass
+    elif not osp.isfile(txtfile):
+        print(f"Can't find {txtfile}, treated as empty txtfile")
+    else:
+        with open(txtfile) as f:
+            for line in f:
+                if line.startswith('gsd'):
+                    num = line.split(':')[-1]
+                    try:
+                        gsd = float(num)
+                    except ValueError:
+                        gsd = None
+                    continue
+
+                items = line.split(' ')
+                if len(items) >= 9:
+                    bboxes.append([float(i) for i in items[:8]])
+                    labels.append(items[8])
+                    diffs.append(int(items[9]) if len(items) == 10 else 0)
+
+    bboxes = np.array(bboxes, dtype=np.float32) if bboxes else \
+        np.zeros((0, 8), dtype=np.float32)
+    diffs = np.array(diffs, dtype=np.int64) if diffs else \
+        np.zeros((0,), dtype=np.int64)
+    ann = dict(bboxes=bboxes, labels=labels, diffs=diffs)
+    return dict(gsd=gsd, annotations=ann)
+
+
+def poly2hbb(polys):
+    """Convert polygons to horizontal bboxes.
+
+    Args:
+        polys (np.array): Polygons with shape (N, 8)
+
+    Returns:
+        np.array: Horizontal bboxes.
+    """
+    shape = polys.shape
+    polys = polys.reshape(*shape[:-1], shape[-1] // 2, 2)
+    lt_point = np.min(polys, axis=-2)
+    rb_point = np.max(polys, axis=-2)
+    return np.concatenate([lt_point, rb_point], axis=-1)
+
+
+def get_sliding_window(info, patch_settings, img_rate_thr):
+    """Get sliding windows.
+
+    Args:
+        info (dict): Dict of image's width and height.
+        patch_settings (list): List of patch settings,
+            each in format (patch_size, patch_overlap).
+        img_rate_thr (float): Threshold of window area divided by image area.
+
+    Returns:
+        list[np.array]: Information of valid windows.
+    """
+    eps = 0.01
+    windows = []
+    width, height = info['width'], info['height']
+    for (size, gap) in patch_settings:
+        assert size > gap, f'invaild size gap pair [{size} {gap}]'
+        step = size - gap
+
+        x_num = 1 if width <= size else ceil((width - size) / step + 1)
+        x_start = [step * i for i in range(x_num)]
+        if len(x_start) > 1 and x_start[-1] + size > width:
+            x_start[-1] = width - size
+
+        y_num = 1 if height <= size else ceil((height - size) / step + 1)
+        y_start = [step * i for i in range(y_num)]
+        if len(y_start) > 1 and y_start[-1] + size > height:
+            y_start[-1] = height - size
+
+        start = np.array(
+            list(itertools.product(x_start, y_start)), dtype=np.int64)
+        stop = start + size
+        windows.append(np.concatenate([start, stop], axis=1))
+    windows = np.concatenate(windows, axis=0)
+
+    img_in_wins = windows.copy()
+    img_in_wins[:, 0::2] = np.clip(img_in_wins[:, 0::2], 0, width)
+    img_in_wins[:, 1::2] = np.clip(img_in_wins[:, 1::2], 0, height)
+    img_areas = (img_in_wins[:, 2] - img_in_wins[:, 0]) * \
+                (img_in_wins[:, 3] - img_in_wins[:, 1])
+    win_areas = (windows[:, 2] - windows[:, 0]) * \
+                (windows[:, 3] - windows[:, 1])
+    img_rates = img_areas / win_areas
+    if not (img_rates > img_rate_thr).any():
+        max_rate = img_rates.max()
+        img_rates[abs(img_rates - max_rate) < eps] = 1
+    return windows[img_rates > img_rate_thr]
+
+
+def get_window_annotation(info, windows, iof_thr):
+    """Get annotation by sliding windows.
+
+    Args:
+        info (dict): Dict of bbox annotations.
+        windows (np.array): information of sliding windows.
+        iof_thr (float): Threshold of overlaps between bbox and window.
+
+    Returns:
+        list[dict]: List of bbox annotations of every window.
+    """
+    bboxes = info['annotations']['bboxes']
+    iofs = ann_window_iof(bboxes, windows)
+
+    window_anns = []
+    for i in range(windows.shape[0]):
+        win_iofs = iofs[:, i]
+        pos_inds = np.nonzero(win_iofs >= iof_thr)[0].tolist()
+
+        win_ann = dict()
+        for k, v in info['annotations'].items():
+            try:
+                win_ann[k] = v[pos_inds]
+            except TypeError:
+                win_ann[k] = [v[i] for i in pos_inds]
+        win_ann['trunc'] = win_iofs[pos_inds] < 1
+        window_anns.append(win_ann)
+    return window_anns
+
+
+def ann_window_iof(anns, window, eps=1e-6):
+    """Compute overlaps (iof) between annotations (poly) and window (hbox).
+
+    Args:
+        anns (np.array): quadri annotations with shape (n, 8).
+        window (np.array): slide windows with shape (m, 4).
+        eps (float, optional): Defaults to 1e-6.
+
+    Returns:
+        np.array: iof between box and window.
+    """
+    rows = anns.shape[0]
+    cols = window.shape[0]
+
+    if rows * cols == 0:
+        return np.zeros((rows, cols), dtype=np.float32)
+
+    hbboxes_ann = poly2hbb(anns)
+    hbboxes_win = window
+    hbboxes_ann = hbboxes_ann[:, None, :]
+    lt = np.maximum(hbboxes_ann[..., :2], hbboxes_win[..., :2])
+    rb = np.minimum(hbboxes_ann[..., 2:], hbboxes_win[..., 2:])
+    wh = np.clip(rb - lt, 0, np.inf)
+    h_overlaps = wh[..., 0] * wh[..., 1]
+
+    l, t, r, b = (window[..., i] for i in range(4))
+    polys_win = np.stack([l, t, r, t, r, b, l, b], axis=-1)
+    sg_polys_ann = [shgeo.Polygon(p) for p in anns.reshape(rows, -1, 2)]
+    sg_polys_win = [shgeo.Polygon(p) for p in polys_win.reshape(cols, -1, 2)]
+    overlaps = np.zeros(h_overlaps.shape)
+    for p in zip(*np.nonzero(h_overlaps)):
+        overlaps[p] = sg_polys_ann[p[0]].intersection(sg_polys_win[p[-1]]).area
+    unions = np.array([p.area for p in sg_polys_ann], dtype=np.float32)
+    unions = unions[..., None]
+
+    unions = np.clip(unions, eps, np.inf)
+    outputs = overlaps / unions
+    if outputs.ndim == 1:
+        outputs = outputs[..., None]
+    return outputs
+
+
+def crop_and_save_img(info, windows, window_anns, padding, padding_value,
+                      save_dir, anno_dir, img_ext):
+    """Crop the image and save.
+
+    Args:
+        info (dict): Image's information.
+        windows (np.array): information of sliding windows.
+        window_anns (list[dict]): List of bbox annotations of every window.
+        padding (bool): If True, with padding.
+        padding_value (tuple[int|float]): Padding value.
+        save_dir (str): Save filename.
+        anno_dir (str): Annotation filename.
+        img_ext (str): Picture suffix.
+
+    Returns:
+        list[dict]: Information of paths.
+    """
+    img = cv2.imread(info['filepath'])
+    patch_infos = []
+    for window, ann in zip(windows, window_anns):
+        patch_info = dict()
+        for k, v in info.items():
+            if k not in [
+                    'id', 'filename', 'filepath', 'width', 'height',
+                    'annotations'
+            ]:
+                patch_info[k] = v
+
+        x_start, y_start, x_stop, y_stop = window.tolist()
+        patch_info['x_start'] = x_start
+        patch_info['y_start'] = y_start
+        patch_info['id'] = \
+            info['id'] + '__' + str(x_stop - x_start) + \
+            '__' + str(x_start) + '___' + str(y_start)
+        patch_info['ori_id'] = info['id']
+
+        ann['bboxes'] = shift_qbboxes(ann['bboxes'], [-x_start, -y_start])
+        patch_info['ann'] = ann
+
+        patch = img[y_start:y_stop, x_start:x_stop]
+        if padding:
+            height = y_stop - y_start
+            width = x_stop - x_start
+            if height > patch.shape[0] or width > patch.shape[1]:
+                padding_patch = np.empty((height, width, patch.shape[-1]),
+                                         dtype=np.uint8)
+                if not isinstance(padding_value, (int, float)):
+                    assert len(padding_value) == patch.shape[-1]
+                padding_patch[...] = padding_value
+                padding_patch[:patch.shape[0], :patch.shape[1], ...] = patch
+                patch = padding_patch
+        patch_info['height'] = patch.shape[0]
+        patch_info['width'] = patch.shape[1]
+
+        cv2.imwrite(
+            osp.join(save_dir, patch_info['id'] + '.' + img_ext), patch)
+        patch_info['filename'] = patch_info['id'] + '.' + img_ext
+        patch_infos.append(patch_info)
+
+        bboxes_num = patch_info['ann']['bboxes'].shape[0]
+        outdir = os.path.join(anno_dir, patch_info['id'] + '.txt')
+
+        with codecs.open(outdir, 'w', 'utf-8') as f_out:
+            if bboxes_num == 0:
+                pass
+            else:
+                for idx in range(bboxes_num):
+                    obj = patch_info['ann']
+                    outline = ' '.join(list(map(str, obj['bboxes'][idx])))
+                    diffs = str(
+                        obj['diffs'][idx]) if not obj['trunc'][idx] else '2'
+                    outline = outline + ' ' + obj['labels'][idx] + ' ' + diffs
+                    f_out.write(outline + '\n')
+
+    return patch_infos
+
+
+def shift_qbboxes(bboxes, offset: Sequence[float]):
+    """Map bboxes from window coordinate back to original coordinate. TODO
+    Refactor and move to `mmyolo/utils/large_image.py`
+
+    Args:
+        bboxes (np.array): quadrilateral boxes with window coordinate.
+        offset (Sequence[float]): The translation offsets with shape of (2, ).
+
+    Returns:
+        np.array: bboxes with original coordinate.
+    """
+    dim = bboxes.shape[-1]
+    translated = bboxes + np.array(offset * int(dim / 2), dtype=np.float32)
+    return translated
+
+
+def single_split(info, patch_settings, min_img_ratio, iof_thr, padding,
+                 padding_value, save_dir, anno_dir, img_ext, lock, prog,
+                 total):
+    """Single image split. TODO Refactoring to make it more generic.
+
+    Args:
+        info (dict): Image info and annotations.
+        patch_settings (list): List of patch settings,
+            each in format (patch_size, patch_overlap).
+        min_img_ratio (float): Threshold of window area divided by image area.
+        iof_thr (float): Threshold of overlaps between bbox and window.
+        padding (bool): If True, with padding.
+        padding_value (tuple[int|float]): Padding value.
+        save_dir (str): Save filename.
+        anno_dir (str): Annotation filename.
+        img_ext (str): Picture suffix.
+        lock (Lock): Lock of Manager.
+        prog (object): Progress of Manager.
+        total (int): Length of infos.
+
+    Returns:
+        list[dict]: Information of paths.
+    """
+    img_ext = img_ext if img_ext is not None else info['filename'].split(
+        '.')[-1]
+    windows = get_sliding_window(info, patch_settings, min_img_ratio)
+    window_anns = get_window_annotation(info, windows, iof_thr)
+    patch_infos = crop_and_save_img(info, windows, window_anns, padding,
+                                    padding_value, save_dir, anno_dir, img_ext)
+    assert patch_infos
+
+    lock.acquire()
+    prog.value += 1
+    msg = f'({prog.value / total:3.1%} {prog.value}:{total})'
+    msg += ' - ' + f"Filename: {info['filename']}"
+    msg += ' - ' + f"width: {info['width']:<5d}"
+    msg += ' - ' + f"height: {info['height']:<5d}"
+    msg += ' - ' + f"Objects: {len(info['annotations']['bboxes']):<5d}"
+    msg += ' - ' + f'Patches: {len(patch_infos)}'
+    print_log(msg, 'current')
+    lock.release()
+
+    return patch_infos
+
+
+def main():
+    args, split_cfg = parse_args()
+
+    mkdir_or_exist(args.out_dir)
+
+    # init logger
+    log_file_name = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + '.log'
+    logger: MMLogger = MMLogger.get_instance(
+        'mmyolo',
+        log_file=osp.join(args.out_dir, log_file_name),
+        log_level='INFO')
+
+    # print configs
+    arg_str = ''
+    for arg in args._get_kwargs():
+        arg_str += arg[0] + ' = ' + str(arg[1]) + '\n'
+
+    logger.info('Base Settings:\n' + arg_str)
+    logger.info('Split Settings:\n' + split_cfg.pretty_text)
+
+    # make dirs
+    _make_dirs(args.out_dir, args.phase, args.overwrite)
+
+    # Load original dota data
+    required_sets = []
+    for p in args.phase:
+        required_sets.extend(PHASE_REQUIRE_SETS[p])
+    required_sets = set(required_sets)
+
+    loaded_data_set = dict()
+    for req_set in required_sets:
+        logger.info(f'Starting loading DOTA {req_set} set information.')
+        start_time = time.time()
+
+        infos = load_original_annotations(
+            data_root=args.data_root,
+            ann_subdir=args.ann_subdir,
+            phase=req_set)
+
+        end_time = time.time()
+        result_log = f'Finishing loading {req_set} set, '
+        result_log += f'get {len(infos)} images, '
+        result_log += f'using {end_time - start_time:.3f}s.'
+        logger.info(result_log)
+
+        loaded_data_set[req_set] = infos
+
+    # Preprocess patch settings
+    patch_settings = []
+    for ratio in split_cfg.img_resize_ratio:
+        for size, gap in zip(split_cfg.patch_sizes,
+                             split_cfg.patch_overlap_sizes):
+            size_gap = (int(size / ratio), int(gap / ratio))
+            if size_gap not in patch_settings:
+                patch_settings.append(size_gap)
+
+    # Split data
+    for p in args.phase:
+        save_imgs_dir = osp.join(args.out_dir, p, 'images')
+        save_anns_dir = osp.join(args.out_dir, p, 'annfiles')
+
+        logger.info(f'Start splitting {p} set images!')
+        start = time.time()
+        manager = Manager()
+
+        data_infos = []
+        for req_set in PHASE_REQUIRE_SETS[p]:
+            data_infos.extend(loaded_data_set[req_set])
+
+        worker = partial(
+            single_split,
+            patch_settings=patch_settings,
+            min_img_ratio=split_cfg.min_img_ratio,
+            iof_thr=split_cfg.iof_thr,
+            padding=split_cfg.padding,
+            padding_value=split_cfg.padding_value,
+            save_dir=save_imgs_dir,
+            anno_dir=save_anns_dir,
+            img_ext=args.save_ext,
+            lock=manager.Lock(),
+            prog=manager.Value('i', 0),
+            total=len(data_infos))
+
+        if args.nproc > 1:
+            pool = Pool(args.nproc)
+            patch_infos = pool.map(worker, data_infos)
+            pool.close()
+        else:
+            patch_infos = list(map(worker, data_infos))
+
+        patch_infos = reduce(lambda x, y: x + y, patch_infos)
+        stop = time.time()
+        logger.info(
+            f'Finish splitting {p} set images in {int(stop - start)} second!!!'
+        )
+        logger.info(f'Total images number: {len(patch_infos)}')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/tools/dataset_converters/dota/split_config/multi_scale.json b/tools/dataset_converters/dota/split_config/multi_scale.json
new file mode 100644
index 00000000..8cbdc93a
--- /dev/null
+++ b/tools/dataset_converters/dota/split_config/multi_scale.json
@@ -0,0 +1,19 @@
+{
+    "patch_sizes": [
+      1024
+    ],
+    "patch_overlap_sizes": [
+      500
+    ],
+    "img_resize_ratio": [
+      0.5, 1.0, 1.5
+    ],
+    "min_img_ratio": 0.6,
+    "iof_thr": 0.7,
+    "padding": true,
+    "padding_value": [
+      104,
+      116,
+      124
+    ]
+}
diff --git a/tools/dataset_converters/dota/split_config/single_scale.json b/tools/dataset_converters/dota/split_config/single_scale.json
new file mode 100644
index 00000000..8c65c40a
--- /dev/null
+++ b/tools/dataset_converters/dota/split_config/single_scale.json
@@ -0,0 +1,19 @@
+{
+    "patch_sizes": [
+      1024
+    ],
+    "patch_overlap_sizes": [
+      200
+    ],
+    "img_resize_ratio": [
+      1.0
+    ],
+    "min_img_ratio": 0.6,
+    "iof_thr": 0.7,
+    "padding": true,
+    "padding_value": [
+      104,
+      116,
+      124
+    ]
+}
diff --git a/tools/model_converters/rtmdet_to_mmyolo.py b/tools/model_converters/rtmdet_to_mmyolo.py
index 35f24dbf..9c6f237d 100644
--- a/tools/model_converters/rtmdet_to_mmyolo.py
+++ b/tools/model_converters/rtmdet_to_mmyolo.py
@@ -45,7 +45,7 @@ def convert(src, dst):
     # save checkpoint
     checkpoint = dict()
     checkpoint['state_dict'] = state_dict
-    checkpoint['meta'] = blobs['meta']
+    checkpoint['meta'] = blobs.get('meta')
     torch.save(checkpoint, dst)
 
 

From 4faea33ab552bae58b7b456aaaad61da284da0fb Mon Sep 17 00:00:00 2001
From: Yanyi Liu <wolfsonliu@163.com>
Date: Thu, 2 Mar 2023 12:44:17 +0800
Subject: [PATCH 58/64] change name and remove loss_obj (#613)

---
 .../rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py   |  4 ++--
 mmyolo/models/dense_heads/__init__.py         |  6 ++---
 mmyolo/models/dense_heads/rtmdet_head.py      |  9 ++------
 ..._rtmdet_head.py => rtmdet_rotated_head.py} | 16 ++++----------
 .../test_rotated_rtmdet_head.py               | 22 +++++++++----------
 5 files changed, 22 insertions(+), 35 deletions(-)
 rename mmyolo/models/dense_heads/{rotated_rtmdet_head.py => rtmdet_rotated_head.py} (97%)

diff --git a/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py
index 1f34e137..50f58e50 100644
--- a/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py
+++ b/configs/rtmdet/rotated/rtmdet-r_l_syncbn_fast_2xb4-36e_dota.py
@@ -119,9 +119,9 @@ model = dict(
         norm_cfg=norm_cfg,
         act_cfg=dict(type='SiLU', inplace=True)),
     bbox_head=dict(
-        type='RotatedRTMDetHead',
+        type='RTMDetRotatedHead',
         head_module=dict(
-            type='RotatedRTMDetSepBNHeadModule',
+            type='RTMDetRotatedSepBNHeadModule',
             num_classes=num_classes,
             widen_factor=widen_factor,
             in_channels=256,
diff --git a/mmyolo/models/dense_heads/__init__.py b/mmyolo/models/dense_heads/__init__.py
index eaf96209..9fb8fbda 100644
--- a/mmyolo/models/dense_heads/__init__.py
+++ b/mmyolo/models/dense_heads/__init__.py
@@ -1,8 +1,8 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .ppyoloe_head import PPYOLOEHead, PPYOLOEHeadModule
-from .rotated_rtmdet_head import (RotatedRTMDetHead,
-                                  RotatedRTMDetSepBNHeadModule)
 from .rtmdet_head import RTMDetHead, RTMDetSepBNHeadModule
+from .rtmdet_rotated_head import (RTMDetRotatedHead,
+                                  RTMDetRotatedSepBNHeadModule)
 from .yolov5_head import YOLOv5Head, YOLOv5HeadModule
 from .yolov6_head import YOLOv6Head, YOLOv6HeadModule
 from .yolov7_head import YOLOv7Head, YOLOv7HeadModule, YOLOv7p6HeadModule
@@ -14,5 +14,5 @@ __all__ = [
     'YOLOv6HeadModule', 'YOLOXHeadModule', 'RTMDetHead',
     'RTMDetSepBNHeadModule', 'YOLOv7Head', 'PPYOLOEHead', 'PPYOLOEHeadModule',
     'YOLOv7HeadModule', 'YOLOv7p6HeadModule', 'YOLOv8Head', 'YOLOv8HeadModule',
-    'RotatedRTMDetHead', 'RotatedRTMDetSepBNHeadModule'
+    'RTMDetRotatedHead', 'RTMDetRotatedSepBNHeadModule'
 ]
diff --git a/mmyolo/models/dense_heads/rtmdet_head.py b/mmyolo/models/dense_heads/rtmdet_head.py
index 42b2948e..54245a97 100644
--- a/mmyolo/models/dense_heads/rtmdet_head.py
+++ b/mmyolo/models/dense_heads/rtmdet_head.py
@@ -197,7 +197,6 @@ class RTMDetHead(YOLOv5Head):
         bbox_coder (:obj:`ConfigDict` or dict): Config of bbox coder.
         loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
         loss_bbox (:obj:`ConfigDict` or dict): Config of localization loss.
-        loss_obj (:obj:`ConfigDict` or dict): Config of objectness loss.
         train_cfg (:obj:`ConfigDict` or dict, optional): Training config of
             anchor head. Defaults to None.
         test_cfg (:obj:`ConfigDict` or dict, optional): Testing config of
@@ -221,11 +220,6 @@ class RTMDetHead(YOLOv5Head):
                      loss_weight=1.0),
                  loss_bbox: ConfigType = dict(
                      type='mmdet.GIoULoss', loss_weight=2.0),
-                 loss_obj: ConfigType = dict(
-                     type='mmdet.CrossEntropyLoss',
-                     use_sigmoid=True,
-                     reduction='sum',
-                     loss_weight=1.0),
                  train_cfg: OptConfigType = None,
                  test_cfg: OptConfigType = None,
                  init_cfg: OptMultiConfig = None):
@@ -236,7 +230,6 @@ class RTMDetHead(YOLOv5Head):
             bbox_coder=bbox_coder,
             loss_cls=loss_cls,
             loss_bbox=loss_bbox,
-            loss_obj=loss_obj,
             train_cfg=train_cfg,
             test_cfg=test_cfg,
             init_cfg=init_cfg)
@@ -246,6 +239,8 @@ class RTMDetHead(YOLOv5Head):
             self.cls_out_channels = self.num_classes
         else:
             self.cls_out_channels = self.num_classes + 1
+        # rtmdet doesn't need loss_obj
+        self.loss_obj = None
 
     def special_init(self):
         """Since YOLO series algorithms will inherit from YOLOv5Head, but
diff --git a/mmyolo/models/dense_heads/rotated_rtmdet_head.py b/mmyolo/models/dense_heads/rtmdet_rotated_head.py
similarity index 97%
rename from mmyolo/models/dense_heads/rotated_rtmdet_head.py
rename to mmyolo/models/dense_heads/rtmdet_rotated_head.py
index 2f452ff3..1428b4fd 100644
--- a/mmyolo/models/dense_heads/rotated_rtmdet_head.py
+++ b/mmyolo/models/dense_heads/rtmdet_rotated_head.py
@@ -29,7 +29,7 @@ except ImportError:
 
 
 @MODELS.register_module()
-class RotatedRTMDetSepBNHeadModule(RTMDetSepBNHeadModule):
+class RTMDetRotatedSepBNHeadModule(RTMDetSepBNHeadModule):
     """Detection Head Module of RTMDet-R.
 
     Compared with RTMDet Detection Head Module, RTMDet-R adds
@@ -163,10 +163,10 @@ class RotatedRTMDetSepBNHeadModule(RTMDetSepBNHeadModule):
 
 
 @MODELS.register_module()
-class RotatedRTMDetHead(RTMDetHead):
+class RTMDetRotatedHead(RTMDetHead):
     """RTMDet-R head.
 
-    Compared with RTMDetHead, RotatedRTMDetHead add some args to support
+    Compared with RTMDetHead, RTMDetRotatedHead add some args to support
     rotated object detection.
 
     - `angle_version` used to limit angle_range during training.
@@ -234,14 +234,12 @@ class RotatedRTMDetHead(RTMDetHead):
         angle_pred────────►decode────(a)───────┘
 
     Args:
-        head_module(ConfigType): Base module used for RotatedRTMDetHead.
+        head_module(ConfigType): Base module used for RTMDetRotatedHead.
         prior_generator: Points generator feature maps in
             2D points-based detectors.
         bbox_coder (:obj:`ConfigDict` or dict): Config of bbox coder.
         loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
         loss_bbox (:obj:`ConfigDict` or dict): Config of localization loss.
-        loss_obj (:obj:`ConfigDict` or dict): Config of objectness loss.
-            Just for compatibility, not actually required.
         angle_version (str): Angle representations. Defaults to 'le90'.
         use_hbbox_loss (bool): If true, use horizontal bbox loss and
             loss_angle should not be None. Default to False.
@@ -271,11 +269,6 @@ class RotatedRTMDetHead(RTMDetHead):
             loss_bbox: ConfigType = dict(
                 type='mmrotate.RotatedIoULoss', mode='linear',
                 loss_weight=2.0),
-            loss_obj: ConfigType = dict(
-                type='mmdet.CrossEntropyLoss',
-                use_sigmoid=True,
-                reduction='sum',
-                loss_weight=1.0),
             angle_version: str = 'le90',
             use_hbbox_loss: bool = False,
             angle_coder: ConfigType = dict(type='mmrotate.PseudoAngleCoder'),
@@ -306,7 +299,6 @@ class RotatedRTMDetHead(RTMDetHead):
             bbox_coder=bbox_coder,
             loss_cls=loss_cls,
             loss_bbox=loss_bbox,
-            loss_obj=loss_obj,
             train_cfg=train_cfg,
             test_cfg=test_cfg,
             init_cfg=init_cfg)
diff --git a/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py b/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py
index 96d193cd..21e1d4d1 100644
--- a/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py
+++ b/tests/test_models/test_dense_heads/test_rotated_rtmdet_head.py
@@ -6,17 +6,17 @@ import torch
 from mmengine.config import Config
 from mmengine.structures import InstanceData
 
-from mmyolo.models.dense_heads import RotatedRTMDetHead
+from mmyolo.models.dense_heads import RTMDetRotatedHead
 from mmyolo.utils import register_all_modules
 
 register_all_modules()
 
 
-class TestRotatedRTMDetHead(TestCase):
+class TestRTMDetRotatedHead(TestCase):
 
     def setUp(self):
         self.head_module = dict(
-            type='RotatedRTMDetSepBNHeadModule',
+            type='RTMDetRotatedSepBNHeadModule',
             num_classes=4,
             in_channels=1,
             stacked_convs=1,
@@ -24,7 +24,7 @@ class TestRotatedRTMDetHead(TestCase):
             featmap_strides=[4, 8, 16])
 
     def test_init_weights(self):
-        head = RotatedRTMDetHead(head_module=self.head_module)
+        head = RTMDetRotatedHead(head_module=self.head_module)
         head.head_module.init_weights()
 
     def test_predict_by_feat(self):
@@ -43,7 +43,7 @@ class TestRotatedRTMDetHead(TestCase):
             max_per_img=300)
         test_cfg = Config(test_cfg)
 
-        head = RotatedRTMDetHead(
+        head = RTMDetRotatedHead(
             head_module=self.head_module, test_cfg=test_cfg)
         feat = [
             torch.rand(1, 1, s // feat_size, s // feat_size)
@@ -88,7 +88,7 @@ class TestRotatedRTMDetHead(TestCase):
             pos_weight=-1,
             debug=False)
         train_cfg = Config(train_cfg)
-        head = RotatedRTMDetHead(
+        head = RTMDetRotatedHead(
             head_module=self.head_module, train_cfg=train_cfg).cuda()
 
         feat = [
@@ -118,7 +118,7 @@ class TestRotatedRTMDetHead(TestCase):
 
         # When truth is non-empty then both cls and box loss should be nonzero
         # for random inputs
-        head = RotatedRTMDetHead(
+        head = RTMDetRotatedHead(
             head_module=self.head_module, train_cfg=train_cfg).cuda()
         gt_instances = InstanceData(
             bboxes=torch.Tensor([[130.6667, 86.8757, 100.6326, 70.8874,
@@ -136,7 +136,7 @@ class TestRotatedRTMDetHead(TestCase):
 
         # test num_class = 1
         self.head_module['num_classes'] = 1
-        head = RotatedRTMDetHead(
+        head = RTMDetRotatedHead(
             head_module=self.head_module, train_cfg=train_cfg).cuda()
         gt_instances = InstanceData(
             bboxes=torch.Tensor([[130.6667, 86.8757, 100.6326, 70.8874,
@@ -190,7 +190,7 @@ class TestRotatedRTMDetHead(TestCase):
                 loss_weight=0.2),
             use_hbbox_loss=True,
         )
-        head = RotatedRTMDetHead(
+        head = RTMDetRotatedHead(
             head_module=self.head_module, **hbb_cfg, train_cfg=train_cfg)
 
         feat = [
@@ -223,7 +223,7 @@ class TestRotatedRTMDetHead(TestCase):
 
         # When truth is non-empty then both cls and box loss should be nonzero
         # for random inputs
-        head = RotatedRTMDetHead(
+        head = RTMDetRotatedHead(
             head_module=self.head_module, **hbb_cfg, train_cfg=train_cfg)
         gt_instances = InstanceData(
             bboxes=torch.Tensor([[130.6667, 86.8757, 100.6326, 70.8874, 0.2]]),
@@ -243,7 +243,7 @@ class TestRotatedRTMDetHead(TestCase):
 
         # test num_class = 1
         self.head_module['num_classes'] = 1
-        head = RotatedRTMDetHead(
+        head = RTMDetRotatedHead(
             head_module=self.head_module, **hbb_cfg, train_cfg=train_cfg)
         gt_instances = InstanceData(
             bboxes=torch.Tensor([[130.6667, 86.8757, 100.6326, 70.8874, 0.2]]),

From 414deaea9acd55f04c649e3a550f7bc31e9f6cd8 Mon Sep 17 00:00:00 2001
From: Tianlong Ai <50650583+AI-Tianlong@users.noreply.github.com>
Date: Thu, 2 Mar 2023 14:17:38 +0800
Subject: [PATCH 59/64] Update yolov6_description.md (#611)

* Update yolov6_description.md

* Fix lint

* update

---------

Co-authored-by: huanghaian <huanghaian@sensetime.com>
---
 .../algorithm_descriptions/yolov6_description.md            | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/zh_cn/recommended_topics/algorithm_descriptions/yolov6_description.md b/docs/zh_cn/recommended_topics/algorithm_descriptions/yolov6_description.md
index 60d59f89..29bede36 100644
--- a/docs/zh_cn/recommended_topics/algorithm_descriptions/yolov6_description.md
+++ b/docs/zh_cn/recommended_topics/algorithm_descriptions/yolov6_description.md
@@ -63,7 +63,7 @@ YOLOv6 目标检测算法中使用的数据增强与 YOLOv5 基本一致，唯
 
 关于每一个增强的详细解释，详情请看 [YOLOv5 数据增强模块](yolov5_description.md)
 
-另外，YOLOv6 参考了 YOLOX 的数据增强方式，分为 2 钟增强方法组，一开始和 YOLOv5 一致，但是在最后 15 个 epoch 的时候将 `Mosaic` 使用 `YOLOv5KeepRatioResize` + `LetterResize` 替代了，个人感觉是为了拟合真实情况。
+另外，YOLOv6 参考了 YOLOX 的数据增强方式，分为 2 种增强方法组，一开始和 YOLOv5 一致，但是在最后 15 个 epoch 的时候将 `Mosaic` 使用 `YOLOv5KeepRatioResize` + `LetterResize` 替代了，个人感觉是为了拟合真实情况。
 
 ### 1.2 网络结构
 
@@ -334,7 +334,7 @@ def varifocal_loss(pred, target, alpha=0.75, gamma=2.0, iou_weighted=True):
 ##### SIou Loss
 
 SIoU 损失函数是 [SIoU Loss: More Powerful Learning for Bounding Box Regression](https://arxiv.org/pdf/2205.12740.pdf)
-中提出的度量预测框与 `GT` 的匹配度的指标，由之前的`GIoU`, `CIoU`, `DIoU` 都没有考虑预测框向 `GT`
+中提出的度量预测框与 `GT` 的匹配度的指标，由于之前的`GIoU`, `CIoU`, `DIoU` 都没有考虑预测框向 `GT`
 框回归的角度，然而角度也确实是回归中一个重要的影响因素，因此提出了全新的`SIoU`。
 
 SIoU 损失主要由四个度量方面组成：
@@ -429,7 +429,7 @@ def siou_loss(pred, target, eps=1e-7):
 
 #### Object Loss
 
-在 YOLOv6 中，由于额外的置信度预测头可能与 `Aligned Head` 有所冲突，经实验验证也在不同大小的模型上都有掉点，
+在 YOLOv6 中，由于额外的置信度预测头可能与 `Aligned Head` 有所冲突，经实验验证在不同大小的模型上也都有掉点，
 所以最后选择弃用 `Objectness` 分支。
 
 ### 1.5 优化策略和训练过程

From 146cd930c58efd0d6876cb01cbcc949be747d261 Mon Sep 17 00:00:00 2001
From: Nioolek <40284075+Nioolek@users.noreply.github.com>
Date: Thu, 2 Mar 2023 15:11:09 +0800
Subject: [PATCH 60/64] [Feature] Support RTMDet Ins Segmentation Inference
 (#583)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* update config

* reproduce map in mmyolo

* reproduce map in mmyolo

* collate fn、process加mask

* reproduce result

* beauty code

* beauty code

* beauty code

* del yolov5_seg_head.py

* beauty config

* add doc and typehint

* del objectness

* fix ut; add empty res
---
 ...tmdet-ins_s_syncbn_fast_8xb32-300e_coco.py |  31 +
 mmyolo/datasets/utils.py                      |   7 +
 .../data_preprocessors/data_preprocessor.py   |   6 +-
 mmyolo/models/dense_heads/__init__.py         |   4 +-
 mmyolo/models/dense_heads/rtmdet_ins_head.py  | 725 ++++++++++++++++++
 .../test_dense_heads/test_rtmdet_head.py      |  84 ++
 6 files changed, 854 insertions(+), 3 deletions(-)
 create mode 100644 configs/rtmdet/rtmdet-ins_s_syncbn_fast_8xb32-300e_coco.py
 create mode 100644 mmyolo/models/dense_heads/rtmdet_ins_head.py

diff --git a/configs/rtmdet/rtmdet-ins_s_syncbn_fast_8xb32-300e_coco.py b/configs/rtmdet/rtmdet-ins_s_syncbn_fast_8xb32-300e_coco.py
new file mode 100644
index 00000000..279a7990
--- /dev/null
+++ b/configs/rtmdet/rtmdet-ins_s_syncbn_fast_8xb32-300e_coco.py
@@ -0,0 +1,31 @@
+_base_ = './rtmdet_s_syncbn_fast_8xb32-300e_coco.py'
+
+widen_factor = 0.5
+
+model = dict(
+    bbox_head=dict(
+        type='RTMDetInsSepBNHead',
+        head_module=dict(
+            type='RTMDetInsSepBNHeadModule',
+            use_sigmoid_cls=True,
+            widen_factor=widen_factor),
+        loss_mask=dict(
+            type='mmdet.DiceLoss', loss_weight=2.0, eps=5e-6,
+            reduction='mean')),
+    test_cfg=dict(
+        multi_label=True,
+        nms_pre=1000,
+        min_bbox_size=0,
+        score_thr=0.05,
+        nms=dict(type='nms', iou_threshold=0.6),
+        max_per_img=100,
+        mask_thr_binary=0.5))
+
+_base_.test_pipeline[-2] = dict(
+    type='LoadAnnotations', with_bbox=True, with_mask=True, _scope_='mmdet')
+
+val_dataloader = dict(dataset=dict(pipeline=_base_.test_pipeline))
+test_dataloader = val_dataloader
+
+val_evaluator = dict(metric=['bbox', 'segm'])
+test_evaluator = val_evaluator
diff --git a/mmyolo/datasets/utils.py b/mmyolo/datasets/utils.py
index 1c056200..62fe5484 100644
--- a/mmyolo/datasets/utils.py
+++ b/mmyolo/datasets/utils.py
@@ -19,6 +19,7 @@ def yolov5_collate(data_batch: Sequence,
     """
     batch_imgs = []
     batch_bboxes_labels = []
+    batch_masks = []
     for i in range(len(data_batch)):
         datasamples = data_batch[i]['data_samples']
         inputs = data_batch[i]['inputs']
@@ -26,6 +27,10 @@ def yolov5_collate(data_batch: Sequence,
 
         gt_bboxes = datasamples.gt_instances.bboxes.tensor
         gt_labels = datasamples.gt_instances.labels
+        if 'masks' in datasamples.gt_instances:
+            masks = datasamples.gt_instances.masks.to_tensor(
+                dtype=torch.bool, device=gt_bboxes.device)
+            batch_masks.append(masks)
         batch_idx = gt_labels.new_full((len(gt_labels), 1), i)
         bboxes_labels = torch.cat((batch_idx, gt_labels[:, None], gt_bboxes),
                                   dim=1)
@@ -36,6 +41,8 @@ def yolov5_collate(data_batch: Sequence,
             'bboxes_labels': torch.cat(batch_bboxes_labels, 0)
         }
     }
+    if len(batch_masks) > 0:
+        collated_results['data_samples']['masks'] = torch.cat(batch_masks, 0)
 
     if use_ms_training:
         collated_results['inputs'] = batch_imgs
diff --git a/mmyolo/models/data_preprocessors/data_preprocessor.py b/mmyolo/models/data_preprocessors/data_preprocessor.py
index 611ecb22..f09fd8e7 100644
--- a/mmyolo/models/data_preprocessors/data_preprocessor.py
+++ b/mmyolo/models/data_preprocessors/data_preprocessor.py
@@ -96,12 +96,14 @@ class YOLOv5DetDataPreprocessor(DetDataPreprocessor):
                 inputs, data_samples = batch_aug(inputs, data_samples)
 
         img_metas = [{'batch_input_shape': inputs.shape[2:]}] * len(inputs)
-        data_samples = {
+        data_samples_output = {
             'bboxes_labels': data_samples['bboxes_labels'],
             'img_metas': img_metas
         }
+        if 'masks' in data_samples:
+            data_samples_output['masks'] = data_samples['masks']
 
-        return {'inputs': inputs, 'data_samples': data_samples}
+        return {'inputs': inputs, 'data_samples': data_samples_output}
 
 
 @MODELS.register_module()
diff --git a/mmyolo/models/dense_heads/__init__.py b/mmyolo/models/dense_heads/__init__.py
index 9fb8fbda..a95abd61 100644
--- a/mmyolo/models/dense_heads/__init__.py
+++ b/mmyolo/models/dense_heads/__init__.py
@@ -1,6 +1,7 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from .ppyoloe_head import PPYOLOEHead, PPYOLOEHeadModule
 from .rtmdet_head import RTMDetHead, RTMDetSepBNHeadModule
+from .rtmdet_ins_head import RTMDetInsSepBNHead, RTMDetInsSepBNHeadModule
 from .rtmdet_rotated_head import (RTMDetRotatedHead,
                                   RTMDetRotatedSepBNHeadModule)
 from .yolov5_head import YOLOv5Head, YOLOv5HeadModule
@@ -14,5 +15,6 @@ __all__ = [
     'YOLOv6HeadModule', 'YOLOXHeadModule', 'RTMDetHead',
     'RTMDetSepBNHeadModule', 'YOLOv7Head', 'PPYOLOEHead', 'PPYOLOEHeadModule',
     'YOLOv7HeadModule', 'YOLOv7p6HeadModule', 'YOLOv8Head', 'YOLOv8HeadModule',
-    'RTMDetRotatedHead', 'RTMDetRotatedSepBNHeadModule'
+    'RTMDetRotatedHead', 'RTMDetRotatedSepBNHeadModule', 'RTMDetInsSepBNHead',
+    'RTMDetInsSepBNHeadModule'
 ]
diff --git a/mmyolo/models/dense_heads/rtmdet_ins_head.py b/mmyolo/models/dense_heads/rtmdet_ins_head.py
new file mode 100644
index 00000000..1d0562aa
--- /dev/null
+++ b/mmyolo/models/dense_heads/rtmdet_ins_head.py
@@ -0,0 +1,725 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import copy
+from typing import List, Optional, Tuple
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from mmcv.cnn import ConvModule, is_norm
+from mmcv.ops import batched_nms
+from mmdet.models.utils import filter_scores_and_topk
+from mmdet.structures.bbox import get_box_tensor, get_box_wh, scale_boxes
+from mmdet.utils import (ConfigType, InstanceList, OptConfigType,
+                         OptInstanceList, OptMultiConfig)
+from mmengine import ConfigDict
+from mmengine.model import (BaseModule, bias_init_with_prob, constant_init,
+                            normal_init)
+from mmengine.structures import InstanceData
+from torch import Tensor
+
+from mmyolo.registry import MODELS
+from .rtmdet_head import RTMDetHead, RTMDetSepBNHeadModule
+
+
+class MaskFeatModule(BaseModule):
+    """Mask feature head used in RTMDet-Ins. Copy from mmdet.
+
+    Args:
+        in_channels (int): Number of channels in the input feature map.
+        feat_channels (int): Number of hidden channels of the mask feature
+             map branch.
+        stacked_convs (int): Number of convs in mask feature branch.
+        num_levels (int): The starting feature map level from RPN that
+             will be used to predict the mask feature map.
+        num_prototypes (int): Number of output channel of the mask feature
+             map branch. This is the channel count of the mask
+             feature map that to be dynamically convolved with the predicted
+             kernel.
+        act_cfg (:obj:`ConfigDict` or dict): Config dict for activation layer.
+            Default: dict(type='ReLU', inplace=True)
+        norm_cfg (dict): Config dict for normalization layer. Default: None.
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        feat_channels: int = 256,
+        stacked_convs: int = 4,
+        num_levels: int = 3,
+        num_prototypes: int = 8,
+        act_cfg: ConfigType = dict(type='ReLU', inplace=True),
+        norm_cfg: ConfigType = dict(type='BN')
+    ) -> None:
+        super().__init__(init_cfg=None)
+        self.num_levels = num_levels
+        self.fusion_conv = nn.Conv2d(num_levels * in_channels, in_channels, 1)
+        convs = []
+        for i in range(stacked_convs):
+            in_c = in_channels if i == 0 else feat_channels
+            convs.append(
+                ConvModule(
+                    in_c,
+                    feat_channels,
+                    3,
+                    padding=1,
+                    act_cfg=act_cfg,
+                    norm_cfg=norm_cfg))
+        self.stacked_convs = nn.Sequential(*convs)
+        self.projection = nn.Conv2d(
+            feat_channels, num_prototypes, kernel_size=1)
+
+    def forward(self, features: Tuple[Tensor, ...]) -> Tensor:
+        # multi-level feature fusion
+        fusion_feats = [features[0]]
+        size = features[0].shape[-2:]
+        for i in range(1, self.num_levels):
+            f = F.interpolate(features[i], size=size, mode='bilinear')
+            fusion_feats.append(f)
+        fusion_feats = torch.cat(fusion_feats, dim=1)
+        fusion_feats = self.fusion_conv(fusion_feats)
+        # pred mask feats
+        mask_features = self.stacked_convs(fusion_feats)
+        mask_features = self.projection(mask_features)
+        return mask_features
+
+
+@MODELS.register_module()
+class RTMDetInsSepBNHeadModule(RTMDetSepBNHeadModule):
+    """Detection and Instance Segmentation Head of RTMDet.
+
+    Args:
+        num_classes (int): Number of categories excluding the background
+            category.
+        num_prototypes (int): Number of mask prototype features extracted
+            from the mask head. Defaults to 8.
+        dyconv_channels (int): Channel of the dynamic conv layers.
+            Defaults to 8.
+        num_dyconvs (int): Number of the dynamic convolution layers.
+            Defaults to 3.
+        use_sigmoid_cls (bool): Use sigmoid for class prediction.
+            Defaults to True.
+    """
+
+    def __init__(self,
+                 num_classes: int,
+                 *args,
+                 num_prototypes: int = 8,
+                 dyconv_channels: int = 8,
+                 num_dyconvs: int = 3,
+                 use_sigmoid_cls: bool = True,
+                 **kwargs):
+        self.num_prototypes = num_prototypes
+        self.num_dyconvs = num_dyconvs
+        self.dyconv_channels = dyconv_channels
+        self.use_sigmoid_cls = use_sigmoid_cls
+        if self.use_sigmoid_cls:
+            self.cls_out_channels = num_classes
+        else:
+            self.cls_out_channels = num_classes + 1
+        super().__init__(num_classes=num_classes, *args, **kwargs)
+
+    def _init_layers(self):
+        """Initialize layers of the head."""
+        self.cls_convs = nn.ModuleList()
+        self.reg_convs = nn.ModuleList()
+        self.kernel_convs = nn.ModuleList()
+
+        self.rtm_cls = nn.ModuleList()
+        self.rtm_reg = nn.ModuleList()
+        self.rtm_kernel = nn.ModuleList()
+        self.rtm_obj = nn.ModuleList()
+
+        # calculate num dynamic parameters
+        weight_nums, bias_nums = [], []
+        for i in range(self.num_dyconvs):
+            if i == 0:
+                weight_nums.append(
+                    (self.num_prototypes + 2) * self.dyconv_channels)
+                bias_nums.append(self.dyconv_channels)
+            elif i == self.num_dyconvs - 1:
+                weight_nums.append(self.dyconv_channels)
+                bias_nums.append(1)
+            else:
+                weight_nums.append(self.dyconv_channels * self.dyconv_channels)
+                bias_nums.append(self.dyconv_channels)
+        self.weight_nums = weight_nums
+        self.bias_nums = bias_nums
+        self.num_gen_params = sum(weight_nums) + sum(bias_nums)
+        pred_pad_size = self.pred_kernel_size // 2
+
+        for n in range(len(self.featmap_strides)):
+            cls_convs = nn.ModuleList()
+            reg_convs = nn.ModuleList()
+            kernel_convs = nn.ModuleList()
+            for i in range(self.stacked_convs):
+                chn = self.in_channels if i == 0 else self.feat_channels
+                cls_convs.append(
+                    ConvModule(
+                        chn,
+                        self.feat_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        conv_cfg=self.conv_cfg,
+                        norm_cfg=self.norm_cfg,
+                        act_cfg=self.act_cfg))
+                reg_convs.append(
+                    ConvModule(
+                        chn,
+                        self.feat_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        conv_cfg=self.conv_cfg,
+                        norm_cfg=self.norm_cfg,
+                        act_cfg=self.act_cfg))
+                kernel_convs.append(
+                    ConvModule(
+                        chn,
+                        self.feat_channels,
+                        3,
+                        stride=1,
+                        padding=1,
+                        conv_cfg=self.conv_cfg,
+                        norm_cfg=self.norm_cfg,
+                        act_cfg=self.act_cfg))
+            self.cls_convs.append(cls_convs)
+            self.reg_convs.append(cls_convs)
+            self.kernel_convs.append(kernel_convs)
+
+            self.rtm_cls.append(
+                nn.Conv2d(
+                    self.feat_channels,
+                    self.num_base_priors * self.cls_out_channels,
+                    self.pred_kernel_size,
+                    padding=pred_pad_size))
+            self.rtm_reg.append(
+                nn.Conv2d(
+                    self.feat_channels,
+                    self.num_base_priors * 4,
+                    self.pred_kernel_size,
+                    padding=pred_pad_size))
+            self.rtm_kernel.append(
+                nn.Conv2d(
+                    self.feat_channels,
+                    self.num_gen_params,
+                    self.pred_kernel_size,
+                    padding=pred_pad_size))
+
+        if self.share_conv:
+            for n in range(len(self.featmap_strides)):
+                for i in range(self.stacked_convs):
+                    self.cls_convs[n][i].conv = self.cls_convs[0][i].conv
+                    self.reg_convs[n][i].conv = self.reg_convs[0][i].conv
+
+        self.mask_head = MaskFeatModule(
+            in_channels=self.in_channels,
+            feat_channels=self.feat_channels,
+            stacked_convs=4,
+            num_levels=len(self.featmap_strides),
+            num_prototypes=self.num_prototypes,
+            act_cfg=self.act_cfg,
+            norm_cfg=self.norm_cfg)
+
+    def init_weights(self) -> None:
+        """Initialize weights of the head."""
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                normal_init(m, mean=0, std=0.01)
+            if is_norm(m):
+                constant_init(m, 1)
+        bias_cls = bias_init_with_prob(0.01)
+        for rtm_cls, rtm_reg, rtm_kernel in zip(self.rtm_cls, self.rtm_reg,
+                                                self.rtm_kernel):
+            normal_init(rtm_cls, std=0.01, bias=bias_cls)
+            normal_init(rtm_reg, std=0.01, bias=1)
+
+    def forward(self, feats: Tuple[Tensor, ...]) -> tuple:
+        """Forward features from the upstream network.
+
+        Args:
+            feats (tuple[Tensor]): Features from the upstream network, each is
+                a 4D-tensor.
+
+        Returns:
+            tuple: Usually a tuple of classification scores and bbox prediction
+            - cls_scores (list[Tensor]): Classification scores for all scale
+              levels, each is a 4D-tensor, the channels number is
+              num_base_priors * num_classes.
+            - bbox_preds (list[Tensor]): Box energies / deltas for all scale
+              levels, each is a 4D-tensor, the channels number is
+              num_base_priors * 4.
+            - kernel_preds (list[Tensor]): Dynamic conv kernels for all scale
+              levels, each is a 4D-tensor, the channels number is
+              num_gen_params.
+            - mask_feat (Tensor): Mask prototype features.
+                Has shape (batch_size, num_prototypes, H, W).
+        """
+        mask_feat = self.mask_head(feats)
+
+        cls_scores = []
+        bbox_preds = []
+        kernel_preds = []
+        for idx, (x, stride) in enumerate(zip(feats, self.featmap_strides)):
+            cls_feat = x
+            reg_feat = x
+            kernel_feat = x
+
+            for cls_layer in self.cls_convs[idx]:
+                cls_feat = cls_layer(cls_feat)
+            cls_score = self.rtm_cls[idx](cls_feat)
+
+            for kernel_layer in self.kernel_convs[idx]:
+                kernel_feat = kernel_layer(kernel_feat)
+            kernel_pred = self.rtm_kernel[idx](kernel_feat)
+
+            for reg_layer in self.reg_convs[idx]:
+                reg_feat = reg_layer(reg_feat)
+            reg_dist = self.rtm_reg[idx](reg_feat)
+
+            cls_scores.append(cls_score)
+            bbox_preds.append(reg_dist)
+            kernel_preds.append(kernel_pred)
+        return tuple(cls_scores), tuple(bbox_preds), tuple(
+            kernel_preds), mask_feat
+
+
+@MODELS.register_module()
+class RTMDetInsSepBNHead(RTMDetHead):
+    """RTMDet Instance Segmentation head.
+
+    Args:
+        head_module(ConfigType): Base module used for RTMDetInsSepBNHead
+        prior_generator: Points generator feature maps in
+            2D points-based detectors.
+        bbox_coder (:obj:`ConfigDict` or dict): Config of bbox coder.
+        loss_cls (:obj:`ConfigDict` or dict): Config of classification loss.
+        loss_bbox (:obj:`ConfigDict` or dict): Config of localization loss.
+        loss_mask (:obj:`ConfigDict` or dict): Config of mask loss.
+        train_cfg (:obj:`ConfigDict` or dict, optional): Training config of
+            anchor head. Defaults to None.
+        test_cfg (:obj:`ConfigDict` or dict, optional): Testing config of
+            anchor head. Defaults to None.
+        init_cfg (:obj:`ConfigDict` or list[:obj:`ConfigDict`] or dict or
+            list[dict], optional): Initialization config dict.
+            Defaults to None.
+    """
+
+    def __init__(self,
+                 head_module: ConfigType,
+                 prior_generator: ConfigType = dict(
+                     type='mmdet.MlvlPointGenerator',
+                     offset=0,
+                     strides=[8, 16, 32]),
+                 bbox_coder: ConfigType = dict(type='DistancePointBBoxCoder'),
+                 loss_cls: ConfigType = dict(
+                     type='mmdet.QualityFocalLoss',
+                     use_sigmoid=True,
+                     beta=2.0,
+                     loss_weight=1.0),
+                 loss_bbox: ConfigType = dict(
+                     type='mmdet.GIoULoss', loss_weight=2.0),
+                 loss_mask=dict(
+                     type='mmdet.DiceLoss',
+                     loss_weight=2.0,
+                     eps=5e-6,
+                     reduction='mean'),
+                 train_cfg: OptConfigType = None,
+                 test_cfg: OptConfigType = None,
+                 init_cfg: OptMultiConfig = None):
+
+        super().__init__(
+            head_module=head_module,
+            prior_generator=prior_generator,
+            bbox_coder=bbox_coder,
+            loss_cls=loss_cls,
+            loss_bbox=loss_bbox,
+            train_cfg=train_cfg,
+            test_cfg=test_cfg,
+            init_cfg=init_cfg)
+
+        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
+        if isinstance(self.head_module, RTMDetInsSepBNHeadModule):
+            assert self.use_sigmoid_cls == self.head_module.use_sigmoid_cls
+        self.loss_mask = MODELS.build(loss_mask)
+
+    def predict_by_feat(self,
+                        cls_scores: List[Tensor],
+                        bbox_preds: List[Tensor],
+                        kernel_preds: List[Tensor],
+                        mask_feats: Tensor,
+                        score_factors: Optional[List[Tensor]] = None,
+                        batch_img_metas: Optional[List[dict]] = None,
+                        cfg: Optional[ConfigDict] = None,
+                        rescale: bool = True,
+                        with_nms: bool = True) -> List[InstanceData]:
+        """Transform a batch of output features extracted from the head into
+        bbox results.
+
+        Note: When score_factors is not None, the cls_scores are
+        usually multiplied by it then obtain the real score used in NMS.
+
+        Args:
+            cls_scores (list[Tensor]): Classification scores for all
+                scale levels, each is a 4D-tensor, has shape
+                (batch_size, num_priors * num_classes, H, W).
+            bbox_preds (list[Tensor]): Box energies / deltas for all
+                scale levels, each is a 4D-tensor, has shape
+                (batch_size, num_priors * 4, H, W).
+            kernel_preds (list[Tensor]): Kernel predictions of dynamic
+                convs for all scale levels, each is a 4D-tensor, has shape
+                (batch_size, num_params, H, W).
+            mask_feats (Tensor): Mask prototype features extracted from the
+                mask head, has shape (batch_size, num_prototypes, H, W).
+            score_factors (list[Tensor], optional): Score factor for
+                all scale level, each is a 4D-tensor, has shape
+                (batch_size, num_priors * 1, H, W). Defaults to None.
+            batch_img_metas (list[dict], Optional): Batch image meta info.
+                Defaults to None.
+            cfg (ConfigDict, optional): Test / postprocessing
+                configuration, if None, test_cfg would be used.
+                Defaults to None.
+            rescale (bool): If True, return boxes in original image space.
+                Defaults to False.
+            with_nms (bool): If True, do nms before return boxes.
+                Defaults to True.
+
+        Returns:
+            list[:obj:`InstanceData`]: Object detection and instance
+            segmentation results of each image after the post process.
+            Each item usually contains following keys.
+
+                - scores (Tensor): Classification scores, has a shape
+                  (num_instance, )
+                - labels (Tensor): Labels of bboxes, has a shape
+                  (num_instances, ).
+                - bboxes (Tensor): Has a shape (num_instances, 4),
+                  the last dimension 4 arrange as (x1, y1, x2, y2).
+                - masks (Tensor): Has a shape (num_instances, h, w).
+        """
+        cfg = self.test_cfg if cfg is None else cfg
+        cfg = copy.deepcopy(cfg)
+
+        multi_label = cfg.multi_label
+        multi_label &= self.num_classes > 1
+        cfg.multi_label = multi_label
+
+        num_imgs = len(batch_img_metas)
+        featmap_sizes = [cls_score.shape[2:] for cls_score in cls_scores]
+
+        # If the shape does not change, use the previous mlvl_priors
+        if featmap_sizes != self.featmap_sizes:
+            self.mlvl_priors = self.prior_generator.grid_priors(
+                featmap_sizes,
+                dtype=cls_scores[0].dtype,
+                device=cls_scores[0].device,
+                with_stride=True)
+            self.featmap_sizes = featmap_sizes
+        flatten_priors = torch.cat(self.mlvl_priors)
+
+        mlvl_strides = [
+            flatten_priors.new_full(
+                (featmap_size.numel() * self.num_base_priors, ), stride) for
+            featmap_size, stride in zip(featmap_sizes, self.featmap_strides)
+        ]
+        flatten_stride = torch.cat(mlvl_strides)
+
+        # flatten cls_scores, bbox_preds
+        flatten_cls_scores = [
+            cls_score.permute(0, 2, 3, 1).reshape(num_imgs, -1,
+                                                  self.num_classes)
+            for cls_score in cls_scores
+        ]
+        flatten_bbox_preds = [
+            bbox_pred.permute(0, 2, 3, 1).reshape(num_imgs, -1, 4)
+            for bbox_pred in bbox_preds
+        ]
+        flatten_kernel_preds = [
+            kernel_pred.permute(0, 2, 3,
+                                1).reshape(num_imgs, -1,
+                                           self.head_module.num_gen_params)
+            for kernel_pred in kernel_preds
+        ]
+
+        flatten_cls_scores = torch.cat(flatten_cls_scores, dim=1).sigmoid()
+        flatten_bbox_preds = torch.cat(flatten_bbox_preds, dim=1)
+        flatten_decoded_bboxes = self.bbox_coder.decode(
+            flatten_priors[..., :2].unsqueeze(0), flatten_bbox_preds,
+            flatten_stride)
+
+        flatten_kernel_preds = torch.cat(flatten_kernel_preds, dim=1)
+
+        results_list = []
+        for (bboxes, scores, kernel_pred, mask_feat,
+             img_meta) in zip(flatten_decoded_bboxes, flatten_cls_scores,
+                              flatten_kernel_preds, mask_feats,
+                              batch_img_metas):
+            ori_shape = img_meta['ori_shape']
+            scale_factor = img_meta['scale_factor']
+            if 'pad_param' in img_meta:
+                pad_param = img_meta['pad_param']
+            else:
+                pad_param = None
+
+            score_thr = cfg.get('score_thr', -1)
+            if scores.shape[0] == 0:
+                empty_results = InstanceData()
+                empty_results.bboxes = bboxes
+                empty_results.scores = scores[:, 0]
+                empty_results.labels = scores[:, 0].int()
+                h, w = ori_shape[:2] if rescale else img_meta['img_shape'][:2]
+                empty_results.masks = torch.zeros(
+                    size=(0, h, w), dtype=torch.bool, device=bboxes.device)
+                results_list.append(empty_results)
+                continue
+
+            nms_pre = cfg.get('nms_pre', 100000)
+            if cfg.multi_label is False:
+                scores, labels = scores.max(1, keepdim=True)
+                scores, _, keep_idxs, results = filter_scores_and_topk(
+                    scores,
+                    score_thr,
+                    nms_pre,
+                    results=dict(
+                        labels=labels[:, 0],
+                        kernel_pred=kernel_pred,
+                        priors=flatten_priors))
+                labels = results['labels']
+                kernel_pred = results['kernel_pred']
+                priors = results['priors']
+            else:
+                out = filter_scores_and_topk(
+                    scores,
+                    score_thr,
+                    nms_pre,
+                    results=dict(
+                        kernel_pred=kernel_pred, priors=flatten_priors))
+                scores, labels, keep_idxs, filtered_results = out
+                kernel_pred = filtered_results['kernel_pred']
+                priors = filtered_results['priors']
+
+            results = InstanceData(
+                scores=scores,
+                labels=labels,
+                bboxes=bboxes[keep_idxs],
+                kernels=kernel_pred,
+                priors=priors)
+
+            if rescale:
+                if pad_param is not None:
+                    results.bboxes -= results.bboxes.new_tensor([
+                        pad_param[2], pad_param[0], pad_param[2], pad_param[0]
+                    ])
+                results.bboxes /= results.bboxes.new_tensor(
+                    scale_factor).repeat((1, 2))
+
+            if cfg.get('yolox_style', False):
+                # do not need max_per_img
+                cfg.max_per_img = len(results)
+
+            results = self._bbox_mask_post_process(
+                results=results,
+                mask_feat=mask_feat,
+                cfg=cfg,
+                rescale_bbox=False,
+                rescale_mask=rescale,
+                with_nms=with_nms,
+                pad_param=pad_param,
+                img_meta=img_meta)
+            results.bboxes[:, 0::2].clamp_(0, ori_shape[1])
+            results.bboxes[:, 1::2].clamp_(0, ori_shape[0])
+
+            results_list.append(results)
+        return results_list
+
+    def _bbox_mask_post_process(
+            self,
+            results: InstanceData,
+            mask_feat: Tensor,
+            cfg: ConfigDict,
+            rescale_bbox: bool = False,
+            rescale_mask: bool = True,
+            with_nms: bool = True,
+            pad_param: Optional[np.ndarray] = None,
+            img_meta: Optional[dict] = None) -> InstanceData:
+        """bbox and mask post-processing method.
+
+        The boxes would be rescaled to the original image scale and do
+        the nms operation. Usually `with_nms` is False is used for aug test.
+
+        Args:
+            results (:obj:`InstaceData`): Detection instance results,
+                each item has shape (num_bboxes, ).
+            mask_feat (Tensor): Mask prototype features extracted from the
+                mask head, has shape (batch_size, num_prototypes, H, W).
+            cfg (ConfigDict): Test / postprocessing configuration,
+                if None, test_cfg would be used.
+            rescale_bbox (bool): If True, return boxes in original image space.
+                Default to False.
+            rescale_mask (bool): If True, return masks in original image space.
+                Default to True.
+            with_nms (bool): If True, do nms before return boxes.
+                Default to True.
+            img_meta (dict, optional): Image meta info. Defaults to None.
+
+        Returns:
+            :obj:`InstanceData`: Detection results of each image
+            after the post process.
+            Each item usually contains following keys.
+
+                - scores (Tensor): Classification scores, has a shape
+                  (num_instance, )
+                - labels (Tensor): Labels of bboxes, has a shape
+                  (num_instances, ).
+                - bboxes (Tensor): Has a shape (num_instances, 4),
+                  the last dimension 4 arrange as (x1, y1, x2, y2).
+                - masks (Tensor): Has a shape (num_instances, h, w).
+        """
+        if rescale_bbox:
+            assert img_meta.get('scale_factor') is not None
+            scale_factor = [1 / s for s in img_meta['scale_factor']]
+            results.bboxes = scale_boxes(results.bboxes, scale_factor)
+
+        if hasattr(results, 'score_factors'):
+            # TODO： Add sqrt operation in order to be consistent with
+            #  the paper.
+            score_factors = results.pop('score_factors')
+            results.scores = results.scores * score_factors
+
+        # filter small size bboxes
+        if cfg.get('min_bbox_size', -1) >= 0:
+            w, h = get_box_wh(results.bboxes)
+            valid_mask = (w > cfg.min_bbox_size) & (h > cfg.min_bbox_size)
+            if not valid_mask.all():
+                results = results[valid_mask]
+
+        # TODO: deal with `with_nms` and `nms_cfg=None` in test_cfg
+        assert with_nms, 'with_nms must be True for RTMDet-Ins'
+        if results.bboxes.numel() > 0:
+            bboxes = get_box_tensor(results.bboxes)
+            det_bboxes, keep_idxs = batched_nms(bboxes, results.scores,
+                                                results.labels, cfg.nms)
+            results = results[keep_idxs]
+            # some nms would reweight the score, such as softnms
+            results.scores = det_bboxes[:, -1]
+            results = results[:cfg.max_per_img]
+
+            # process masks
+            mask_logits = self._mask_predict_by_feat(mask_feat,
+                                                     results.kernels,
+                                                     results.priors)
+
+            stride = self.prior_generator.strides[0][0]
+            mask_logits = F.interpolate(
+                mask_logits.unsqueeze(0), scale_factor=stride, mode='bilinear')
+            if rescale_mask:
+                # TODO: When use mmdet.Resize or mmdet.Pad, will meet bug
+                # Use img_meta to crop and resize
+                ori_h, ori_w = img_meta['ori_shape'][:2]
+                if isinstance(pad_param, np.ndarray):
+                    pad_param = pad_param.astype(np.int32)
+                    crop_y1, crop_y2 = pad_param[
+                        0], mask_logits.shape[-2] - pad_param[1]
+                    crop_x1, crop_x2 = pad_param[
+                        2], mask_logits.shape[-1] - pad_param[3]
+                    mask_logits = mask_logits[..., crop_y1:crop_y2,
+                                              crop_x1:crop_x2]
+                mask_logits = F.interpolate(
+                    mask_logits,
+                    size=[ori_h, ori_w],
+                    mode='bilinear',
+                    align_corners=False)
+
+            masks = mask_logits.sigmoid().squeeze(0)
+            masks = masks > cfg.mask_thr_binary
+            results.masks = masks
+        else:
+            h, w = img_meta['ori_shape'][:2] if rescale_mask else img_meta[
+                'img_shape'][:2]
+            results.masks = torch.zeros(
+                size=(results.bboxes.shape[0], h, w),
+                dtype=torch.bool,
+                device=results.bboxes.device)
+        return results
+
+    def _mask_predict_by_feat(self, mask_feat: Tensor, kernels: Tensor,
+                              priors: Tensor) -> Tensor:
+        """Generate mask logits from mask features with dynamic convs.
+
+        Args:
+            mask_feat (Tensor): Mask prototype features.
+                Has shape (num_prototypes, H, W).
+            kernels (Tensor): Kernel parameters for each instance.
+                Has shape (num_instance, num_params)
+            priors (Tensor): Center priors for each instance.
+                Has shape (num_instance, 4).
+        Returns:
+            Tensor: Instance segmentation masks for each instance.
+                Has shape (num_instance, H, W).
+        """
+        num_inst = kernels.shape[0]
+        h, w = mask_feat.size()[-2:]
+        if num_inst < 1:
+            return torch.empty(
+                size=(num_inst, h, w),
+                dtype=mask_feat.dtype,
+                device=mask_feat.device)
+        if len(mask_feat.shape) < 4:
+            mask_feat.unsqueeze(0)
+
+        coord = self.prior_generator.single_level_grid_priors(
+            (h, w), level_idx=0, device=mask_feat.device).reshape(1, -1, 2)
+        num_inst = priors.shape[0]
+        points = priors[:, :2].reshape(-1, 1, 2)
+        strides = priors[:, 2:].reshape(-1, 1, 2)
+        relative_coord = (points - coord).permute(0, 2, 1) / (
+            strides[..., 0].reshape(-1, 1, 1) * 8)
+        relative_coord = relative_coord.reshape(num_inst, 2, h, w)
+
+        mask_feat = torch.cat(
+            [relative_coord,
+             mask_feat.repeat(num_inst, 1, 1, 1)], dim=1)
+        weights, biases = self.parse_dynamic_params(kernels)
+
+        n_layers = len(weights)
+        x = mask_feat.reshape(1, -1, h, w)
+        for i, (weight, bias) in enumerate(zip(weights, biases)):
+            x = F.conv2d(
+                x, weight, bias=bias, stride=1, padding=0, groups=num_inst)
+            if i < n_layers - 1:
+                x = F.relu(x)
+        x = x.reshape(num_inst, h, w)
+        return x
+
+    def parse_dynamic_params(self, flatten_kernels: Tensor) -> tuple:
+        """split kernel head prediction to conv weight and bias."""
+        n_inst = flatten_kernels.size(0)
+        n_layers = len(self.head_module.weight_nums)
+        params_splits = list(
+            torch.split_with_sizes(
+                flatten_kernels,
+                self.head_module.weight_nums + self.head_module.bias_nums,
+                dim=1))
+        weight_splits = params_splits[:n_layers]
+        bias_splits = params_splits[n_layers:]
+        for i in range(n_layers):
+            if i < n_layers - 1:
+                weight_splits[i] = weight_splits[i].reshape(
+                    n_inst * self.head_module.dyconv_channels, -1, 1, 1)
+                bias_splits[i] = bias_splits[i].reshape(
+                    n_inst * self.head_module.dyconv_channels)
+            else:
+                weight_splits[i] = weight_splits[i].reshape(n_inst, -1, 1, 1)
+                bias_splits[i] = bias_splits[i].reshape(n_inst)
+
+        return weight_splits, bias_splits
+
+    def loss_by_feat(
+            self,
+            cls_scores: List[Tensor],
+            bbox_preds: List[Tensor],
+            batch_gt_instances: InstanceList,
+            batch_img_metas: List[dict],
+            batch_gt_instances_ignore: OptInstanceList = None) -> dict:
+        raise NotImplementedError
diff --git a/tests/test_models/test_dense_heads/test_rtmdet_head.py b/tests/test_models/test_dense_heads/test_rtmdet_head.py
index 3f3fccfa..cce5ee6f 100644
--- a/tests/test_models/test_dense_heads/test_rtmdet_head.py
+++ b/tests/test_models/test_dense_heads/test_rtmdet_head.py
@@ -1,10 +1,12 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 from unittest import TestCase
 
+import numpy as np
 import torch
 from mmengine.config import Config
 from mmengine.structures import InstanceData
 
+from mmyolo.models import RTMDetInsSepBNHead
 from mmyolo.models.dense_heads import RTMDetHead
 from mmyolo.utils import register_all_modules
 
@@ -137,3 +139,85 @@ class TestRTMDetHead(TestCase):
                            'cls loss should be non-zero')
         self.assertGreater(onegt_box_loss.item(), 0,
                            'box loss should be non-zero')
+
+
+class TestRTMDetInsHead(TestCase):
+
+    def setUp(self):
+        self.head_module = dict(
+            type='RTMDetInsSepBNHeadModule',
+            num_classes=4,
+            in_channels=1,
+            stacked_convs=1,
+            feat_channels=64,
+            featmap_strides=[4, 8, 16],
+            num_prototypes=8,
+            dyconv_channels=8,
+            num_dyconvs=3,
+            share_conv=True,
+            use_sigmoid_cls=True)
+
+    def test_init_weights(self):
+        head = RTMDetInsSepBNHead(head_module=self.head_module)
+        head.head_module.init_weights()
+
+    def test_predict_by_feat(self):
+        s = 256
+        img_metas = [{
+            'img_shape': (s, s, 3),
+            'ori_shape': (s, s, 3),
+            'scale_factor': (1.0, 1.0),
+            'pad_param': np.array([0., 0., 0., 0.])
+        }]
+        test_cfg = dict(
+            multi_label=False,
+            nms_pre=1000,
+            min_bbox_size=0,
+            score_thr=0.05,
+            nms=dict(type='nms', iou_threshold=0.6),
+            max_per_img=100,
+            mask_thr_binary=0.5)
+        test_cfg = Config(test_cfg)
+
+        head = RTMDetInsSepBNHead(
+            head_module=self.head_module, test_cfg=test_cfg)
+        feat = [
+            torch.rand(1, 1, s // feat_size, s // feat_size)
+            for feat_size in [4, 8, 16]
+        ]
+        cls_scores, bbox_preds, kernel_preds, mask_feat = head.forward(feat)
+        head.predict_by_feat(
+            cls_scores,
+            bbox_preds,
+            kernel_preds,
+            mask_feat,
+            batch_img_metas=img_metas,
+            cfg=test_cfg,
+            rescale=True,
+            with_nms=True)
+
+        img_metas_without_pad_param = [{
+            'img_shape': (s, s, 3),
+            'ori_shape': (s, s, 3),
+            'scale_factor': (1.0, 1.0)
+        }]
+        head.predict_by_feat(
+            cls_scores,
+            bbox_preds,
+            kernel_preds,
+            mask_feat,
+            batch_img_metas=img_metas_without_pad_param,
+            cfg=test_cfg,
+            rescale=True,
+            with_nms=True)
+
+        with self.assertRaises(AssertionError):
+            head.predict_by_feat(
+                cls_scores,
+                bbox_preds,
+                kernel_preds,
+                mask_feat,
+                batch_img_metas=img_metas,
+                cfg=test_cfg,
+                rescale=False,
+                with_nms=False)

From 9e8f764399471688bb2ca5a71e48faa346d94b1b Mon Sep 17 00:00:00 2001
From: Xin Li <7219519+xin-li-67@users.noreply.github.com>
Date: Thu, 2 Mar 2023 15:14:51 +0800
Subject: [PATCH 61/64] fix image-demo.py empty class_name fatal (#615)

---
 projects/easydeploy/tools/image-demo.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/projects/easydeploy/tools/image-demo.py b/projects/easydeploy/tools/image-demo.py
index 197ad070..c85f31a0 100644
--- a/projects/easydeploy/tools/image-demo.py
+++ b/projects/easydeploy/tools/image-demo.py
@@ -74,6 +74,7 @@ def main():
     model.to(args.device)
 
     cfg = Config.fromfile(args.config)
+    class_names = cfg.get('class_name')
 
     test_pipeline = get_test_pipeline_cfg(cfg)
     test_pipeline[0] = ConfigDict({'type': 'mmdet.LoadImageFromNDArray'})
@@ -125,8 +126,12 @@ def main():
         for (bbox, score, label) in zip(bboxes, scores, labels):
             bbox = bbox.tolist()
             color = colors[label]
-            label_name = cfg.get('class_name', {})[label]
-            name = f'cls:{label_name}_score:{score:0.4f}'
+
+            if class_names is not None:
+                label_name = class_names[label]
+                name = f'cls:{label_name}_score:{score:0.4f}'
+            else:
+                name = f'cls:{label}_score:{score:0.4f}'
 
             cv2.rectangle(bgr, bbox[:2], bbox[2:], color, 2)
             cv2.putText(

From 30cc772524b0a0d505618db10975806741c04500 Mon Sep 17 00:00:00 2001
From: Yue Sun <36404164+aptsunny@users.noreply.github.com>
Date: Thu, 2 Mar 2023 17:01:41 +0800
Subject: [PATCH 62/64] Razor backbone cfg & log update (#610)

* razor cfg update

* formatter

* update readme

* readme format

* fix test ci

* remove log

---------

Co-authored-by: sunyue1 <sunyue1@sensetime.com>
---
 configs/razor/subnets/README.md               | 20 ++++-----
 ..._tiny_ofa_lat31_syncbn_16xb16-300e_coco.py | 44 +++++++++++--------
 ...pos_shufflenetv2_syncbn_8xb16-300e_coco.py |  1 -
 ...nas_a6_d12_syncbn_fast_8xb32-300e_coco.py} |  0
 tests/test_downstream/test_mmrazor.py         |  4 +-
 5 files changed, 37 insertions(+), 32 deletions(-)
 rename configs/razor/subnets/{yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py => yolov6_l_attentivenas_a6_d12_syncbn_fast_8xb32-300e_coco.py} (100%)

diff --git a/configs/razor/subnets/README.md b/configs/razor/subnets/README.md
index ad7a716b..093dedd7 100644
--- a/configs/razor/subnets/README.md
+++ b/configs/razor/subnets/README.md
@@ -62,18 +62,18 @@ CUDA_VISIBLE_DEVICES=0 PORT=29500 ./tools/dist_test.sh configs/razor/subnets/yol
 
 Here we provide the baseline version of YOLO Series with NAS backbone.
 
-|           Model            | size | box AP |  Params(M)   | FLOPs(G) |                                                                   Config                                                                   |                                                                                Download                                                                                 |
-| :------------------------: | :--: | :----: | :----------: | :------: | :----------------------------------------------------------------------------------------------------------------------------------------: | :---------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
-|          yolov5-s          | 640  |  37.7  |    7.235     |  8.265   |            [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py)             | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) |
-| yolov5_s_spos_shufflenetv2 | 640  |  37.9  | 7.04(-2.7%)  |   7.03   |     [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py)     |              [model](https://download.openmmlab.com/mmrazor/v1/spos/yolov5/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco_20230109_155302-777fd6f1.pth)              |
-|          yolov6-s          | 640  |  44.0  |    18.869    |  24.253  |              [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py)               |     [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth)     |
-|  yolov6_l_attentivenas_a6  | 640  |  44.5  | 18.38(-2.6%) |   8.49   | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py) |      [model](https://download.openmmlab.com/mmrazor/v1/attentivenas/yolov6/yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco_20230108_174944-4970f0b7.pth)      |
-|        RTMDet-tiny         | 640  |  41.0  |     4.8      |   8.1    |                                            [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py)                                             |  [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth)  |
-|   rtmdet_tiny_ofa_lat31    | 960  |  41.1  | 3.91(-18.5%) |   6.09   |       [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py)       |                [model](https://download.openmmlab.com/mmrazor/v1/ofa/rtmdet/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco_20230108_222141-24ff87dex.pth)                |
+|           Model            | size | box AP |  Params(M)   | FLOPs(G) |                                                                  Config                                                                   |                                                                                                                                                                   Download                                                                                                                                                                   |
+| :------------------------: | :--: | :----: | :----------: | :------: | :---------------------------------------------------------------------------------------------------------------------------------------: | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------: |
+|          yolov5-s          | 640  |  37.7  |    7.235     |  8.265   |            [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco.py)            | [model](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700-86e02187.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov5/yolov5_s-v61_syncbn_fast_8xb16-300e_coco/yolov5_s-v61_syncbn_fast_8xb16-300e_coco_20220918_084700.log.json) |
+| yolov5_s_spos_shufflenetv2 | 640  |  38.0  | 7.04(-2.7%)  |   7.03   |    [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py)     |                                                                                          [model](https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco_20230211_220635-578be9a9.pth) \| log                                                                                          |
+|          yolov6-s          | 640  |  44.0  |    18.869    |  24.253  |              [config](https://github.com/open-mmlab/mmyolo/blob/main/configs/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco.py)              |         [model](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035.log.json)         |
+|  yolov6_l_attentivenas_a6  | 640  |  45.3  | 18.38(-2.6%) |   8.49   | [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_8xb32-300e_coco.py) |                                                                                      [model](https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/yolov6_l_attentivenas_a6_d12_syncbn_fast_8xb32-300e_coco_20230211_222409-dcc72668.pth) \| log                                                                                       |
+|        RTMDet-tiny         | 640  |  41.0  |     4.8      |   8.1    |                                            [config](./rtmdet_l_syncbn_fast_8xb32-300e_coco.py)                                            |   [model](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth) \| [log](https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117.log.json)   |
+|   rtmdet_tiny_ofa_lat31    | 960  |  41.3  | 3.91(-18.5%) |   6.09   |      [config](https://github.com/open-mmlab/mmyolo/tree/dev/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py)       |                                                                                            [model](https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco_20230214_210623-449bb2a0.pth) \| log                                                                                            |
 
 **Note**:
 
 1. For fair comparison, the training configuration is consistent with the original configuration and results in an improvement of about 0.2-0.5% AP.
-2. `yolov5_s_spos_shufflenetv2` achieves 37.9% AP with only 7.042M parameters, directly instead of the backbone, and outperforms `yolov5_s` with a similar size by more than 0.2% AP.
+2. `yolov5_s_spos_shufflenetv2` achieves 38.0% AP with only 7.042M parameters, directly instead of the backbone, and outperforms `yolov5_s` with a similar size by more than 0.3% AP.
 3. With the efficient backbone of `yolov6_l_attentivenas_a6`, the input channels of `YOLOv6RepPAFPN` are reduced. Meanwhile, modify the **deepen_factor** and the neck is made deeper to restore the AP.
-4. with the `rtmdet_tiny_ofa_lat31` backbone with only 3.315M parameters and 3.634G flops, we can modify the input resolution to 960, with a similar model size compared to `rtmdet_tiny` and exceeds `rtmdet_tiny` by 0.1% AP, reducing the size of the whole model to 3.91 MB.
+4. with the `rtmdet_tiny_ofa_lat31` backbone with only 3.315M parameters and 3.634G flops, we can modify the input resolution to 960, with a similar model size compared to `rtmdet_tiny` and exceeds `rtmdet_tiny` by 0.4% AP, reducing the size of the whole model to 3.91 MB.
diff --git a/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py b/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py
index 82d696be..04d8c2d8 100644
--- a/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py
+++ b/configs/razor/subnets/rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py
@@ -4,17 +4,13 @@ _base_ = [
 ]
 
 checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/ofa/ofa_mobilenet_subnet_8xb256_in1k_note8_lat%4031ms_top1%4072.8_finetune%4025.py_20221214_0939-981a8b2a.pth'  # noqa
-fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/ofa/rtmdet/OFA_SUBNET_NOTE8_LAT31.yaml'  # noqa
+fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/yolo_nas_backbone/OFA_SUBNET_NOTE8_LAT31.yaml'  # noqa
 deepen_factor = 0.167
 widen_factor = 1.0
 channels = [40, 112, 160]
 train_batch_size_per_gpu = 16
 img_scale = (960, 960)
 
-_base_.base_lr = 0.002
-_base_.optim_wrapper.optimizer.lr = 0.002
-_base_.param_scheduler[1].eta_min = 0.002 * 0.05
-
 _base_.nas_backbone.out_indices = (2, 4, 5)
 _base_.nas_backbone.conv_cfg = dict(type='mmrazor.OFAConv2d')
 _base_.nas_backbone.init_cfg = dict(
@@ -36,6 +32,14 @@ _base_.model.bbox_head.head_module.in_channels = channels[0]
 _base_.model.bbox_head.head_module.feat_channels = channels[0]
 _base_.model.bbox_head.head_module.widen_factor = widen_factor
 
+_base_.model.test_cfg = dict(
+    multi_label=True,
+    nms_pre=1000,
+    min_bbox_size=0,
+    score_thr=0.05,
+    nms=dict(type='nms', iou_threshold=0.6),
+    max_per_img=100)
+
 train_pipeline = [
     dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
     dict(type='LoadAnnotations', with_bbox=True),
@@ -43,12 +47,12 @@ train_pipeline = [
         type='Mosaic',
         img_scale=img_scale,
         use_cached=True,
-        max_cached_images=40,
+        max_cached_images=20,
+        random_pop=False,
         pad_val=114.0),
     dict(
         type='mmdet.RandomResize',
-        # img_scale is (width, height)
-        scale=(img_scale[0] * 2, img_scale[1] * 2),
+        scale=(1280, 1280),
         ratio_range=(0.5, 2.0),  # note
         resize_type='mmdet.Resize',
         keep_ratio=True),
@@ -56,7 +60,15 @@ train_pipeline = [
     dict(type='mmdet.YOLOXHSVRandomAug'),
     dict(type='mmdet.RandomFlip', prob=0.5),
     dict(type='mmdet.Pad', size=img_scale, pad_val=dict(img=(114, 114, 114))),
-    dict(type='YOLOv5MixUp', use_cached=True, max_cached_images=20),
+    dict(
+        type='YOLOXMixUp',
+        img_scale=(960, 960),
+        ratio_range=(1.0, 1.0),
+        max_cached_images=10,
+        use_cached=True,
+        random_pop=False,
+        pad_val=(114, 114, 114),
+        prob=0.5),
     dict(type='mmdet.PackDetInputs')
 ]
 
@@ -81,23 +93,17 @@ train_dataloader = dict(
 
 test_pipeline = [
     dict(type='LoadImageFromFile', file_client_args=_base_.file_client_args),
-    dict(type='YOLOv5KeepRatioResize', scale=img_scale),
-    dict(
-        type='LetterResize',
-        scale=img_scale,
-        allow_scale_up=False,
-        pad_val=dict(img=114)),
+    dict(type='mmdet.Resize', scale=(960, 960), keep_ratio=True),
+    dict(type='mmdet.Pad', size=(960, 960), pad_val=dict(img=(114, 114, 114))),
     dict(type='LoadAnnotations', with_bbox=True, _scope_='mmdet'),
     dict(
         type='mmdet.PackDetInputs',
         meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
-                   'scale_factor', 'pad_param'))
+                   'scale_factor'))
 ]
 
-batch_shapes_cfg = dict(img_size=img_scale[0])
-
 val_dataloader = dict(
-    dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=batch_shapes_cfg))
+    dataset=dict(pipeline=test_pipeline, batch_shapes_cfg=None))
 
 test_dataloader = val_dataloader
 
diff --git a/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py b/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
index 39884047..beb4941c 100644
--- a/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
+++ b/configs/razor/subnets/yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py
@@ -5,7 +5,6 @@ _base_ = [
 
 checkpoint_file = 'https://download.openmmlab.com/mmrazor/v1/spos/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_v3.pth'  # noqa
 fix_subnet = 'https://download.openmmlab.com/mmrazor/v1/spos/spos_shufflenetv2_subnet_8xb128_in1k_flops_0.33M_acc_73.87_20211222-1f0a0b4d_subnet_cfg_v3.yaml'  # noqa
-
 widen_factor = 1.0
 channels = [160, 320, 640]
 
diff --git a/configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py b/configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_8xb32-300e_coco.py
similarity index 100%
rename from configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py
rename to configs/razor/subnets/yolov6_l_attentivenas_a6_d12_syncbn_fast_8xb32-300e_coco.py
diff --git a/tests/test_downstream/test_mmrazor.py b/tests/test_downstream/test_mmrazor.py
index ebf6806e..dc3090d2 100644
--- a/tests/test_downstream/test_mmrazor.py
+++ b/tests/test_downstream/test_mmrazor.py
@@ -11,9 +11,9 @@ from mmyolo.testing import get_detector_cfg
     'razor/subnets/'
     'yolov5_s_spos_shufflenetv2_syncbn_8xb16-300e_coco.py', 'razor/subnets/'
     'rtmdet_tiny_ofa_lat31_syncbn_16xb16-300e_coco.py', 'razor/subnets/'
-    'yolov6_l_attentivenas_a6_d12_syncbn_fast_16xb16-300e_coco.py'
+    'yolov6_l_attentivenas_a6_d12_syncbn_fast_8xb32-300e_coco.py'
 ])
-def test_razor_backbone_forward(cfg_file):
+def test_razor_backbone_init(cfg_file):
     model = get_detector_cfg(cfg_file)
     model_cfg = copy.deepcopy(model.backbone)
     from mmrazor.registry import MODELS

From e32838abe1eb246d5d35463a9e5a307e05e5392b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Haian=20Huang=28=E6=B7=B1=E5=BA=A6=E7=9C=B8=29?=
 <1286304229@qq.com>
Date: Thu, 2 Mar 2023 17:09:50 +0800
Subject: [PATCH 63/64] Add change log of v0.5.0 (#612)

* update

* update

* update

* update

* add configs

* update

* add tta

* update
---
 README.md                                     | 24 ++++--
 README_zh-CN.md                               | 24 ++++--
 .../ppyoloe_plus_s_fast_1xb12-40e_cat.py      | 56 ++++++++++++++
 .../rtmdet/rtmdet_tiny_fast_1xb12-40e_cat.py  | 70 +++++++++++++++++
 .../yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py | 10 ++-
 configs/yolov6/yolov6_s_fast_1xb12-40e_cat.py | 56 ++++++++++++++
 .../yolov7/yolov7_tiny_fast_1xb12-40e_cat.py  | 56 ++++++++++++++
 configs/yolov8/yolov8_s_fast_1xb12-40e_cat.py | 52 +++++++++++++
 .../yolox_s_fast_1xb12-40e-rtmdet-hyp_cat.py  | 76 +++++++++++++++++++
 demo/image_demo.py                            | 36 ++++++++-
 demo/large_image_demo.py                      | 37 ++++++++-
 .../15_minutes_object_detection.md            | 46 ++++++-----
 docs/en/get_started/overview.md               |  5 +-
 docs/en/notes/changelog.md                    | 55 ++++++++++++++
 .../15_minutes_object_detection.md            | 47 +++++++-----
 docs/zh_cn/get_started/article.md             |  1 +
 docs/zh_cn/get_started/overview.md            |  1 +
 docs/zh_cn/notes/changelog.md                 | 55 ++++++++++++++
 18 files changed, 647 insertions(+), 60 deletions(-)
 create mode 100644 configs/ppyoloe/ppyoloe_plus_s_fast_1xb12-40e_cat.py
 create mode 100644 configs/rtmdet/rtmdet_tiny_fast_1xb12-40e_cat.py
 create mode 100644 configs/yolov6/yolov6_s_fast_1xb12-40e_cat.py
 create mode 100644 configs/yolov7/yolov7_tiny_fast_1xb12-40e_cat.py
 create mode 100644 configs/yolov8/yolov8_s_fast_1xb12-40e_cat.py
 create mode 100644 configs/yolox/yolox_s_fast_1xb12-40e-rtmdet-hyp_cat.py

diff --git a/README.md b/README.md
index 4caa1b40..c6dee0a5 100644
--- a/README.md
+++ b/README.md
@@ -77,10 +77,17 @@ English | [简体中文](README_zh-CN.md)
 
 ## 🥳 🚀 What's New [🔝](#-table-of-contents)
 
-💎 **v0.4.0** was released on 18/1/2023:
+💎 **v0.5.0** was released on 2/3/2023:
 
-1. Implemented [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) object detection model, and supports model deployment in [projects/easydeploy](https://github.com/open-mmlab/mmyolo/blob/dev/projects/easydeploy)
-2. Added Chinese and English versions of [Algorithm principles and implementation with YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/algorithm_descriptions/yolov8_description.md)
+1. Support [RTMDet-R](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md#rotated-object-detection) rotated object detection
+2. Support for using mask annotation to improve [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) object detection performance
+3. Support [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/razor/subnets/README.md) searchable NAS sub-network as the backbone of YOLO series algorithm
+4. Support calling [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/distillation/README.md) to distill the knowledge of RTMDet
+5. [MMYOLO](https://mmyolo.readthedocs.io/zh_CN/dev/) document structure optimization, comprehensive content upgrade
+6. Improve YOLOX mAP and training speed based on RTMDet training hyperparameters
+7. Support calculation of model parameters and FLOPs, provide GPU latency data on T4 devices, and update [Model Zoo](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/model_zoo.md)
+8. Support test-time augmentation (TTA)
+9. Support RTMDet, YOLOv8 and YOLOv7 assigner visualization
 
 For release history and update details, please refer to [changelog](https://mmyolo.readthedocs.io/en/latest/notes/changelog.html).
 
@@ -102,7 +109,7 @@ We are excited to announce our latest work on real-time object recognition tasks
 <img src="https://user-images.githubusercontent.com/12907710/208044554-1e8de6b5-48d8-44e4-a7b5-75076c7ebb71.png"/>
 </div>
 
-MMYOLO currently only implements the object detection algorithm, but it has a significant training acceleration compared to the MMDeteciton version. The training speed is 2.6 times faster than the previous version.
+MMYOLO currently implements the object detection and rotated object detection algorithm, but it has a significant training acceleration compared to the MMDeteciton version. The training speed is 2.6 times faster than the previous version.
 
 ## 📖 Introduction [🔝](#-table-of-contents)
 
@@ -138,8 +145,8 @@ And the figure of P6 model is in [model_design.md](docs/en/recommended_topics/mo
 MMYOLO relies on PyTorch, MMCV, MMEngine, and MMDetection. Below are quick steps for installation. Please refer to the [Install Guide](docs/en/get_started/installation.md) for more detailed instructions.
 
 ```shell
-conda create -n open-mmlab python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
-conda activate open-mmlab
+conda create -n mmyolo python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
+conda activate mmyolo
 pip install openmim
 mim install "mmengine>=0.6.0"
 mim install "mmcv>=2.0.0rc4,<2.1.0"
@@ -258,6 +265,10 @@ For different parts from MMDetection, we have also prepared user guides and adva
 
 ## 📊 Overview of Benchmark and Model Zoo [🔝](#-table-of-contents)
 
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/222087414-168175cc-dae6-4c5c-a8e3-3109a152dd19.png"/>
+</div>
+
 Results and models are available in the [model zoo](docs/en/model_zoo.md).
 
 <details open>
@@ -274,6 +285,7 @@ Results and models are available in the [model zoo](docs/en/model_zoo.md).
 - [x] [YOLOv5](configs/yolov5)
 - [x] [YOLOX](configs/yolox)
 - [x] [RTMDet](configs/rtmdet)
+- [x] [RTMDet-Rotated](configs/rtmdet)
 - [x] [YOLOv6](configs/yolov6)
 - [x] [YOLOv7](configs/yolov7)
 - [x] [PPYOLOE](configs/ppyoloe)
diff --git a/README_zh-CN.md b/README_zh-CN.md
index c85beefe..e1d27924 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -78,10 +78,17 @@
 
 ## 🥳 🚀 最新进展 [🔝](#-table-of-contents)
 
-💎 **v0.4.0** 版本已经在 2023.1.18 发布：
+💎 **v0.5.0** 版本已经在 2023.3.2 发布：
 
-1. 实现了 [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) 目标检测模型，并通过 [projects/easydeploy](https://github.com/open-mmlab/mmyolo/blob/dev/projects/easydeploy) 支持了模型部署
-2. 新增了中英文版本的 [YOLOv8 原理和实现全解析文档](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/algorithm_descriptions/yolov8_description.md)
+1. 支持了 [RTMDet-R](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md#rotated-object-detection) 旋转框目标检测任务和算法
+2. [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) 支持使用 mask 标注提升目标检测模型性能
+3. 支持 [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/razor/subnets/README.md) 搜索的 NAS 子网络作为 YOLO 系列算法的 backbone
+4. 支持调用 [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/distillation/README.md) 对 RTMDet 进行知识蒸馏
+5. [MMYOLO](https://mmyolo.readthedocs.io/zh_CN/dev/) 文档结构优化，内容全面升级
+6. 基于 RTMDet 训练超参提升 YOLOX 精度和训练速度
+7. 支持模型参数量、FLOPs 计算和提供 T4 设备上 GPU 延时数据，并更新了 [Model Zoo](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/model_zoo.md)
+8. 支持测试时增强 TTA
+9. 支持 RTMDet、YOLOv8 和 YOLOv7 assigner 可视化
 
 我们提供了实用的**脚本命令速查表**
 
@@ -123,7 +130,7 @@
 <img src="https://user-images.githubusercontent.com/12907710/208044554-1e8de6b5-48d8-44e4-a7b5-75076c7ebb71.png"/>
 </div>
 
-MMYOLO 中目前仅仅实现了目标检测算法，但是相比 MMDeteciton 版本有显著训练加速，训练速度相比原先版本提升 2.6 倍。
+MMYOLO 中目前实现了目标检测和旋转框目标检测算法，但是相比 MMDeteciton 版本有显著训练加速，训练速度相比原先版本提升 2.6 倍。
 
 ## 📖 简介 [🔝](#-table-of-contents)
 
@@ -159,8 +166,8 @@ P6 模型图详见 [model_design.md](docs/zh_cn/recommended_topics/model_design.
 MMYOLO 依赖 PyTorch, MMCV, MMEngine 和 MMDetection，以下是安装的简要步骤。 更详细的安装指南请参考[安装文档](docs/zh_cn/get_started/installation.md)。
 
 ```shell
-conda create -n open-mmlab python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
-conda activate open-mmlab
+conda create -n mmyolo python=3.8 pytorch==1.10.1 torchvision==0.11.2 cudatoolkit=11.3 -c pytorch -y
+conda activate mmyolo
 pip install openmim
 mim install "mmengine>=0.6.0"
 mim install "mmcv>=2.0.0rc4,<2.1.0"
@@ -280,6 +287,10 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 
 ## 📊 基准测试和模型库 [🔝](#-table-of-contents)
 
+<div align=center>
+<img src="https://user-images.githubusercontent.com/17425982/222087414-168175cc-dae6-4c5c-a8e3-3109a152dd19.png"/>
+</div>
+
 测试结果和模型可以在 [模型库](docs/zh_cn/model_zoo.md) 中找到。
 
 <details open>
@@ -296,6 +307,7 @@ MMYOLO 用法和 MMDetection 几乎一致，所有教程都是通用的，你也
 - [x] [YOLOv5](configs/yolov5)
 - [x] [YOLOX](configs/yolox)
 - [x] [RTMDet](configs/rtmdet)
+- [x] [RTMDet-Rotated](configs/rtmdet)
 - [x] [YOLOv6](configs/yolov6)
 - [x] [YOLOv7](configs/yolov7)
 - [x] [PPYOLOE](configs/ppyoloe)
diff --git a/configs/ppyoloe/ppyoloe_plus_s_fast_1xb12-40e_cat.py b/configs/ppyoloe/ppyoloe_plus_s_fast_1xb12-40e_cat.py
new file mode 100644
index 00000000..752ff633
--- /dev/null
+++ b/configs/ppyoloe/ppyoloe_plus_s_fast_1xb12-40e_cat.py
@@ -0,0 +1,56 @@
+# Compared to other same scale models, this configuration consumes too much
+# GPU memory and is not validated for now
+_base_ = 'ppyoloe_plus_s_fast_8xb8-80e_coco.py'
+
+data_root = './data/cat/'
+class_name = ('cat', )
+num_classes = len(class_name)
+metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
+
+num_last_epochs = 5
+
+max_epochs = 40
+train_batch_size_per_gpu = 12
+train_num_workers = 2
+
+load_from = 'https://download.openmmlab.com/mmyolo/v0/ppyoloe/ppyoloe_plus_s_fast_8xb8-80e_coco/ppyoloe_plus_s_fast_8xb8-80e_coco_20230101_154052-9fee7619.pth'  # noqa
+
+model = dict(
+    backbone=dict(frozen_stages=4),
+    bbox_head=dict(head_module=dict(num_classes=num_classes)),
+    train_cfg=dict(
+        initial_assigner=dict(num_classes=num_classes),
+        assigner=dict(num_classes=num_classes)))
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    dataset=dict(
+        data_root=data_root,
+        metainfo=metainfo,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
+
+val_dataloader = dict(
+    dataset=dict(
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='annotations/test.json',
+        data_prefix=dict(img='images/')))
+
+test_dataloader = val_dataloader
+
+default_hooks = dict(
+    param_scheduler=dict(
+        warmup_min_iter=10,
+        warmup_epochs=3,
+        total_epochs=int(max_epochs * 1.2)))
+
+val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
+test_evaluator = val_evaluator
+
+default_hooks = dict(
+    checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
+    logger=dict(type='LoggerHook', interval=5))
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
diff --git a/configs/rtmdet/rtmdet_tiny_fast_1xb12-40e_cat.py b/configs/rtmdet/rtmdet_tiny_fast_1xb12-40e_cat.py
new file mode 100644
index 00000000..8d1182c5
--- /dev/null
+++ b/configs/rtmdet/rtmdet_tiny_fast_1xb12-40e_cat.py
@@ -0,0 +1,70 @@
+_base_ = 'rtmdet_tiny_syncbn_fast_8xb32-300e_coco.py'
+
+data_root = './data/cat/'
+class_name = ('cat', )
+num_classes = len(class_name)
+metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
+
+num_epochs_stage2 = 5
+
+max_epochs = 40
+train_batch_size_per_gpu = 12
+train_num_workers = 4
+val_batch_size_per_gpu = 1
+val_num_workers = 2
+
+load_from = 'https://download.openmmlab.com/mmyolo/v0/rtmdet/rtmdet_tiny_syncbn_fast_8xb32-300e_coco/rtmdet_tiny_syncbn_fast_8xb32-300e_coco_20230102_140117-dbb1dc83.pth'  # noqa
+
+model = dict(
+    backbone=dict(frozen_stages=4),
+    bbox_head=dict(head_module=dict(num_classes=num_classes)),
+    train_cfg=dict(assigner=dict(num_classes=num_classes)))
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    dataset=dict(
+        data_root=data_root,
+        metainfo=metainfo,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
+
+val_dataloader = dict(
+    batch_size=val_batch_size_per_gpu,
+    num_workers=val_num_workers,
+    dataset=dict(
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='annotations/test.json',
+        data_prefix=dict(img='images/')))
+
+test_dataloader = val_dataloader
+
+param_scheduler = [
+    dict(
+        type='LinearLR',
+        start_factor=_base_.lr_start_factor,
+        by_epoch=False,
+        begin=0,
+        end=30),
+    dict(
+        # use cosine lr from 150 to 300 epoch
+        type='CosineAnnealingLR',
+        eta_min=_base_.base_lr * 0.05,
+        begin=max_epochs // 2,
+        end=max_epochs,
+        T_max=max_epochs // 2,
+        by_epoch=True,
+        convert_to_iter_based=True),
+]
+
+_base_.custom_hooks[1].switch_epoch = max_epochs - num_epochs_stage2
+
+val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
+test_evaluator = val_evaluator
+
+default_hooks = dict(
+    checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
+    logger=dict(type='LoggerHook', interval=5))
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
diff --git a/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py b/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
index 61c24356..7b7e4f22 100644
--- a/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
+++ b/configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
@@ -36,17 +36,21 @@ val_dataloader = dict(
     dataset=dict(
         metainfo=metainfo,
         data_root=data_root,
-        ann_file='annotations/trainval.json',
+        ann_file='annotations/test.json',
         data_prefix=dict(img='images/')))
 
 test_dataloader = val_dataloader
 
-val_evaluator = dict(ann_file=data_root + 'annotations/trainval.json')
+_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
+
+val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
 test_evaluator = val_evaluator
 
 default_hooks = dict(
     checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
-    param_scheduler=dict(max_epochs=max_epochs),
+    # The warmup_mim_iter parameter is critical.
+    # The default value is 1000 which is not suitable for cat datasets.
+    param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
     logger=dict(type='LoggerHook', interval=5))
 train_cfg = dict(max_epochs=max_epochs, val_interval=10)
 # visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
diff --git a/configs/yolov6/yolov6_s_fast_1xb12-40e_cat.py b/configs/yolov6/yolov6_s_fast_1xb12-40e_cat.py
new file mode 100644
index 00000000..82578fcc
--- /dev/null
+++ b/configs/yolov6/yolov6_s_fast_1xb12-40e_cat.py
@@ -0,0 +1,56 @@
+_base_ = './yolov6_s_syncbn_fast_8xb32-400e_coco.py'
+
+data_root = './data/cat/'
+class_name = ('cat', )
+num_classes = len(class_name)
+metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
+
+max_epochs = 40
+train_batch_size_per_gpu = 12
+train_num_workers = 4
+num_last_epochs = 5
+
+load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov6/yolov6_s_syncbn_fast_8xb32-400e_coco/yolov6_s_syncbn_fast_8xb32-400e_coco_20221102_203035-932e1d91.pth'  # noqa
+
+model = dict(
+    backbone=dict(frozen_stages=4),
+    bbox_head=dict(head_module=dict(num_classes=num_classes)),
+    train_cfg=dict(
+        initial_assigner=dict(num_classes=num_classes),
+        assigner=dict(num_classes=num_classes)))
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    dataset=dict(
+        data_root=data_root,
+        metainfo=metainfo,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
+
+val_dataloader = dict(
+    dataset=dict(
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='annotations/test.json',
+        data_prefix=dict(img='images/')))
+
+test_dataloader = val_dataloader
+
+val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
+test_evaluator = val_evaluator
+
+_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
+_base_.custom_hooks[1].switch_epoch = max_epochs - num_last_epochs
+
+default_hooks = dict(
+    checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
+    # The warmup_mim_iter parameter is critical.
+    # The default value is 1000 which is not suitable for cat datasets.
+    param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
+    logger=dict(type='LoggerHook', interval=5))
+train_cfg = dict(
+    max_epochs=max_epochs,
+    val_interval=10,
+    dynamic_intervals=[(max_epochs - num_last_epochs, 1)])
+# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
diff --git a/configs/yolov7/yolov7_tiny_fast_1xb12-40e_cat.py b/configs/yolov7/yolov7_tiny_fast_1xb12-40e_cat.py
new file mode 100644
index 00000000..eb044676
--- /dev/null
+++ b/configs/yolov7/yolov7_tiny_fast_1xb12-40e_cat.py
@@ -0,0 +1,56 @@
+_base_ = 'yolov7_tiny_syncbn_fast_8x16b-300e_coco.py'
+
+data_root = './data/cat/'
+class_name = ('cat', )
+num_classes = len(class_name)
+metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
+
+anchors = [
+    [(68, 69), (154, 91), (143, 162)],  # P3/8
+    [(242, 160), (189, 287), (391, 207)],  # P4/16
+    [(353, 337), (539, 341), (443, 432)]  # P5/32
+]
+
+max_epochs = 40
+train_batch_size_per_gpu = 12
+train_num_workers = 4
+
+load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov7/yolov7_tiny_syncbn_fast_8x16b-300e_coco/yolov7_tiny_syncbn_fast_8x16b-300e_coco_20221126_102719-0ee5bbdf.pth'  # noqa
+
+model = dict(
+    backbone=dict(frozen_stages=4),
+    bbox_head=dict(
+        head_module=dict(num_classes=num_classes),
+        prior_generator=dict(base_sizes=anchors)))
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    dataset=dict(
+        data_root=data_root,
+        metainfo=metainfo,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
+
+val_dataloader = dict(
+    dataset=dict(
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='annotations/test.json',
+        data_prefix=dict(img='images/')))
+
+test_dataloader = val_dataloader
+
+_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
+
+val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
+test_evaluator = val_evaluator
+
+default_hooks = dict(
+    checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
+    # The warmup_mim_iter parameter is critical.
+    # The default value is 1000 which is not suitable for cat datasets.
+    param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
+    logger=dict(type='LoggerHook', interval=5))
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
diff --git a/configs/yolov8/yolov8_s_fast_1xb12-40e_cat.py b/configs/yolov8/yolov8_s_fast_1xb12-40e_cat.py
new file mode 100644
index 00000000..e54bff03
--- /dev/null
+++ b/configs/yolov8/yolov8_s_fast_1xb12-40e_cat.py
@@ -0,0 +1,52 @@
+_base_ = 'yolov8_s_syncbn_fast_8xb16-500e_coco.py'
+
+data_root = './data/cat/'
+class_name = ('cat', )
+num_classes = len(class_name)
+metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
+
+close_mosaic_epochs = 5
+
+max_epochs = 40
+train_batch_size_per_gpu = 12
+train_num_workers = 4
+
+load_from = 'https://download.openmmlab.com/mmyolo/v0/yolov8/yolov8_s_syncbn_fast_8xb16-500e_coco/yolov8_s_syncbn_fast_8xb16-500e_coco_20230117_180101-5aa5f0f1.pth'  # noqa
+
+model = dict(
+    backbone=dict(frozen_stages=4),
+    bbox_head=dict(head_module=dict(num_classes=num_classes)),
+    train_cfg=dict(assigner=dict(num_classes=num_classes)))
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    dataset=dict(
+        data_root=data_root,
+        metainfo=metainfo,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
+
+val_dataloader = dict(
+    dataset=dict(
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='annotations/test.json',
+        data_prefix=dict(img='images/')))
+
+test_dataloader = val_dataloader
+
+_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
+_base_.custom_hooks[1].switch_epoch = max_epochs - close_mosaic_epochs
+
+val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
+test_evaluator = val_evaluator
+
+default_hooks = dict(
+    checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
+    # The warmup_mim_iter parameter is critical.
+    # The default value is 1000 which is not suitable for cat datasets.
+    param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
+    logger=dict(type='LoggerHook', interval=5))
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
diff --git a/configs/yolox/yolox_s_fast_1xb12-40e-rtmdet-hyp_cat.py b/configs/yolox/yolox_s_fast_1xb12-40e-rtmdet-hyp_cat.py
new file mode 100644
index 00000000..f7eac58f
--- /dev/null
+++ b/configs/yolox/yolox_s_fast_1xb12-40e-rtmdet-hyp_cat.py
@@ -0,0 +1,76 @@
+_base_ = './yolox_s_fast_8xb32-300e-rtmdet-hyp_coco.py'
+
+data_root = './data/cat/'
+class_name = ('cat', )
+num_classes = len(class_name)
+metainfo = dict(classes=class_name, palette=[(20, 220, 60)])
+
+num_last_epochs = 5
+
+max_epochs = 40
+train_batch_size_per_gpu = 12
+train_num_workers = 4
+
+load_from = 'https://download.openmmlab.com/mmyolo/v0/yolox/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco/yolox_s_fast_8xb32-300e-rtmdet-hyp_coco_20230210_134645-3a8dfbd7.pth'  # noqa
+
+model = dict(
+    backbone=dict(frozen_stages=4),
+    bbox_head=dict(head_module=dict(num_classes=num_classes)))
+
+train_dataloader = dict(
+    batch_size=train_batch_size_per_gpu,
+    num_workers=train_num_workers,
+    dataset=dict(
+        data_root=data_root,
+        metainfo=metainfo,
+        ann_file='annotations/trainval.json',
+        data_prefix=dict(img='images/')))
+
+val_dataloader = dict(
+    dataset=dict(
+        metainfo=metainfo,
+        data_root=data_root,
+        ann_file='annotations/test.json',
+        data_prefix=dict(img='images/')))
+
+test_dataloader = val_dataloader
+
+param_scheduler = [
+    dict(
+        # use quadratic formula to warm up 3 epochs
+        # and lr is updated by iteration
+        # TODO: fix default scope in get function
+        type='mmdet.QuadraticWarmupLR',
+        by_epoch=True,
+        begin=0,
+        end=3,
+        convert_to_iter_based=True),
+    dict(
+        # use cosine lr from 5 to 35 epoch
+        type='CosineAnnealingLR',
+        eta_min=_base_.base_lr * 0.05,
+        begin=5,
+        T_max=max_epochs - num_last_epochs,
+        end=max_epochs - num_last_epochs,
+        by_epoch=True,
+        convert_to_iter_based=True),
+    dict(
+        # use fixed lr during last num_last_epochs epochs
+        type='ConstantLR',
+        by_epoch=True,
+        factor=1,
+        begin=max_epochs - num_last_epochs,
+        end=max_epochs,
+    )
+]
+
+_base_.custom_hooks[0].num_last_epochs = num_last_epochs
+
+val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
+test_evaluator = val_evaluator
+
+default_hooks = dict(
+    checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
+    logger=dict(type='LoggerHook', interval=5))
+train_cfg = dict(max_epochs=max_epochs, val_interval=10)
+# visualizer = dict(vis_backends = [dict(type='LocalVisBackend'), dict(type='WandbVisBackend')]) # noqa
diff --git a/demo/image_demo.py b/demo/image_demo.py
index be2217c9..fa2cfb2a 100644
--- a/demo/image_demo.py
+++ b/demo/image_demo.py
@@ -1,9 +1,11 @@
 # Copyright (c) OpenMMLab. All rights reserved.
 import os
 from argparse import ArgumentParser
+from pathlib import Path
 
 import mmcv
 from mmdet.apis import inference_detector, init_detector
+from mmengine.config import Config, ConfigDict
 from mmengine.logging import print_log
 from mmengine.utils import ProgressBar, path
 
@@ -29,6 +31,10 @@ def parse_args():
         '--deploy',
         action='store_true',
         help='Switch model to deployment mode')
+    parser.add_argument(
+        '--tta',
+        action='store_true',
+        help='Whether to use test time augmentation')
     parser.add_argument(
         '--score-thr', type=float, default=0.3, help='Bbox score threshold')
     parser.add_argument(
@@ -50,9 +56,37 @@ def main():
     if args.to_labelme and args.show:
         raise RuntimeError('`--to-labelme` or `--show` only '
                            'can choose one at the same time.')
+    config = args.config
 
+    if isinstance(config, (str, Path)):
+        config = Config.fromfile(config)
+    elif not isinstance(config, Config):
+        raise TypeError('config must be a filename or Config object, '
+                        f'but got {type(config)}')
+    if 'init_cfg' in config.model.backbone:
+        config.model.backbone.init_cfg = None
+
+    if args.tta:
+        assert 'tta_model' in config, 'Cannot find ``tta_model`` in config.' \
+            " Can't use tta !"
+        assert 'tta_pipeline' in config, 'Cannot find ``tta_pipeline`` ' \
+            "in config. Can't use tta !"
+        config.model = ConfigDict(**config.tta_model, module=config.model)
+        test_data_cfg = config.test_dataloader.dataset
+        while 'dataset' in test_data_cfg:
+            test_data_cfg = test_data_cfg['dataset']
+
+        # batch_shapes_cfg will force control the size of the output image,
+        # it is not compatible with tta.
+        if 'batch_shapes_cfg' in test_data_cfg:
+            test_data_cfg.batch_shapes_cfg = None
+        test_data_cfg.pipeline = config.tta_pipeline
+
+    # TODO: TTA mode will error if cfg_options is not set.
+    #  This is an mmdet issue and needs to be fixed later.
     # build the model from a config file and a checkpoint file
-    model = init_detector(args.config, args.checkpoint, device=args.device)
+    model = init_detector(
+        config, args.checkpoint, device=args.device, cfg_options={})
 
     if args.deploy:
         switch_to_deploy(model)
diff --git a/demo/large_image_demo.py b/demo/large_image_demo.py
index 24b540f4..bdbc3a56 100644
--- a/demo/large_image_demo.py
+++ b/demo/large_image_demo.py
@@ -14,10 +14,12 @@ python demo/large_image_demo.py \
 import os
 import random
 from argparse import ArgumentParser
+from pathlib import Path
 
 import mmcv
 import numpy as np
 from mmdet.apis import inference_detector, init_detector
+from mmengine.config import Config, ConfigDict
 from mmengine.logging import print_log
 from mmengine.utils import ProgressBar
 
@@ -50,6 +52,10 @@ def parse_args():
         '--deploy',
         action='store_true',
         help='Switch model to deployment mode')
+    parser.add_argument(
+        '--tta',
+        action='store_true',
+        help='Whether to use test time augmentation')
     parser.add_argument(
         '--score-thr', type=float, default=0.3, help='Bbox score threshold')
     parser.add_argument(
@@ -90,8 +96,37 @@ def parse_args():
 def main():
     args = parse_args()
 
+    config = args.config
+
+    if isinstance(config, (str, Path)):
+        config = Config.fromfile(config)
+    elif not isinstance(config, Config):
+        raise TypeError('config must be a filename or Config object, '
+                        f'but got {type(config)}')
+    if 'init_cfg' in config.model.backbone:
+        config.model.backbone.init_cfg = None
+
+    if args.tta:
+        assert 'tta_model' in config, 'Cannot find ``tta_model`` in config.' \
+                                      " Can't use tta !"
+        assert 'tta_pipeline' in config, 'Cannot find ``tta_pipeline`` ' \
+                                         "in config. Can't use tta !"
+        config.model = ConfigDict(**config.tta_model, module=config.model)
+        test_data_cfg = config.test_dataloader.dataset
+        while 'dataset' in test_data_cfg:
+            test_data_cfg = test_data_cfg['dataset']
+
+        # batch_shapes_cfg will force control the size of the output image,
+        # it is not compatible with tta.
+        if 'batch_shapes_cfg' in test_data_cfg:
+            test_data_cfg.batch_shapes_cfg = None
+        test_data_cfg.pipeline = config.tta_pipeline
+
+    # TODO: TTA mode will error if cfg_options is not set.
+    #  This is an mmdet issue and needs to be fixed later.
     # build the model from a config file and a checkpoint file
-    model = init_detector(args.config, args.checkpoint, device=args.device)
+    model = init_detector(
+        config, args.checkpoint, device=args.device, cfg_options={})
 
     if args.deploy:
         switch_to_deploy(model)
diff --git a/docs/en/get_started/15_minutes_object_detection.md b/docs/en/get_started/15_minutes_object_detection.md
index 470ed919..e34a48ee 100644
--- a/docs/en/get_started/15_minutes_object_detection.md
+++ b/docs/en/get_started/15_minutes_object_detection.md
@@ -15,6 +15,8 @@ Take the small dataset of cat as an example, you can easily learn MMYOLO object
 - [Testing](#testing)
 - [EasyDeploy](#easydeploy-deployment)
 
+In this article, we take YOLOv5-s as an example. For the rest of the YOLO series algorithms, please see the corresponding algorithm configuration folder.
+
 ## Installation
 
 Assuming you've already installed Conda in advance, install PyTorch
@@ -135,19 +137,23 @@ val_dataloader = dict(
     dataset=dict(
         metainfo=metainfo,
         data_root=data_root,
-        ann_file='annotations/trainval.json',
+        ann_file='annotations/test.json',
         data_prefix=dict(img='images/')))
 
 test_dataloader = val_dataloader
 
-val_evaluator = dict(ann_file=data_root + 'annotations/trainval.json')
+_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
+
+val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
 test_evaluator = val_evaluator
 
 default_hooks = dict(
     # Save weights every 10 epochs and a maximum of two weights can be saved.
     # The best model is saved automatically during model evaluation
     checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
-    param_scheduler=dict(max_epochs=max_epochs),
+    # The warmup_mim_iter parameter is critical.
+    # The default value is 1000 which is not suitable for cat datasets.
+    param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
     # The log printing interval is 5
     logger=dict(type='LoggerHook', interval=5))
 # The evaluation interval is 10
@@ -168,21 +174,21 @@ Run the above training command, `work_dirs/yolov5_s-v61_fast_1xb12-40e_cat` fold
 <img src="https://user-images.githubusercontent.com/17425982/220236361-bd113606-248e-4a0e-a484-c0dc9e355b5b.png" alt="image"/>
 </div>
 
-The performance on `trainval.json` is as follows:
+The performance on `test.json` is as follows:
 
 ```text
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.685
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.953
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.852
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.631
+ Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.909
+ Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.747
  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.685
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.664
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.749
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.761
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.631
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.627
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.703
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.703
  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.761
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.703
 ```
 
 The above properties are printed via the COCO API, where -1 indicates that no object exists for the scale. According to the rules defined by COCO, the Cat dataset contains all large sized objects, and there are no small or medium-sized objects.
@@ -244,10 +250,10 @@ python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
 ```
 
 <div align=center>
-<img src="https://user-images.githubusercontent.com/17425982/220238131-08eacedc-28a7-4008-af8c-f36dc239ecaa.png" alt="image"/>
+<img src="https://user-images.githubusercontent.com/17425982/222131114-30a79285-56bc-427d-a38d-8d6a6982ad60.png" alt="image"/>
 </div>
 <div align=center>
-<img src="https://user-images.githubusercontent.com/17425982/220238535-f363a6ba-876c-4bb7-80d6-9d8d8ca9b966.png" alt="image"/>
+<img src="https://user-images.githubusercontent.com/17425982/222132585-4b4962f1-211b-46f7-86b3-7534fc52a1b4.png" alt="image"/>
 </div>
 
 #### 2 Tensorboard
@@ -335,7 +341,7 @@ Let's choose the `data/cat/images/IMG_20221020_112705.jpg` image as an example t
 ```shell
 python demo/featmap_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
-                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                 --target-layers backbone \
                                 --channel-reduction squeeze_mean
 ```
@@ -351,7 +357,7 @@ The result will be saved to the output folder in current path. Three output feat
 ```shell
 python demo/featmap_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
-                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                 --target-layers neck \
                                 --channel-reduction squeeze_mean
 ```
@@ -371,7 +377,7 @@ Based on the above feature map visualization, we can analyze Grad CAM at the fea
 ```shell
 python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
-                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                 --target-layer neck.out_layers[2]
 ```
 
@@ -384,7 +390,7 @@ python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
 ```shell
 python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
-                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                 --target-layer neck.out_layers[1]
 ```
 
@@ -397,7 +403,7 @@ python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
 ```shell
 python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
-                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                 --target-layer neck.out_layers[0]
 ```
 
@@ -520,4 +526,4 @@ Here we choose to save the inference results under `output` instead of displayin
 
 This completes the transformation deployment of the trained model and checks the inference results. This is the end of the tutorial.
 
-The full content above can be viewed: [15_minutes_object_detection.ipynb](<>)
+The full content above can be viewed: [15_minutes_object_detection.ipynb](<>). If you encounter problems during training or testing, please check the \[common troubleshooting steps\](... /recommended_topics/troubleshooting_steps.md) first and feel free to raise an issue if you still can't solve it.
diff --git a/docs/en/get_started/overview.md b/docs/en/get_started/overview.md
index 0af5ab62..52bcbd17 100644
--- a/docs/en/get_started/overview.md
+++ b/docs/en/get_started/overview.md
@@ -17,8 +17,8 @@ The following tasks are currently supported:
 <details open>
 <summary><b>Tasks currently supported</b></summary>
 
-- object detection
-- rotated object detection
+- Object detection
+- Rotated object detection
 
 </details>
 
@@ -30,6 +30,7 @@ The YOLO series of algorithms currently supported are as follows:
 - YOLOv5
 - YOLOX
 - RTMDet
+- RTMDet-Rotated
 - YOLOv6
 - YOLOv7
 - PPYOLOE
diff --git a/docs/en/notes/changelog.md b/docs/en/notes/changelog.md
index 047ebcf4..310b930b 100644
--- a/docs/en/notes/changelog.md
+++ b/docs/en/notes/changelog.md
@@ -1,5 +1,60 @@
 # Changelog
 
+## v0.5.0 (2/3/2023)
+
+### Highlights
+
+1. Support [RTMDet-R](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md#rotated-object-detection) rotated object detection
+2. Support for using mask annotation to improve [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) object detection performance
+3. Support [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/razor/subnets/README.md) searchable NAS sub-network as the backbone of YOLO series algorithm
+4. Support calling [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/distillation/README.md) to distill the knowledge of RTMDet
+5. [MMYOLO](https://mmyolo.readthedocs.io/zh_CN/dev/) document structure optimization, comprehensive content upgrade
+6. Improve YOLOX mAP and training speed based on RTMDet training hyperparameters
+7. Support calculation of model parameters and FLOPs, provide GPU latency data on T4 devices, and update [Model Zoo](https://github.com/open-mmlab/mmyolo/blob/dev/docs/en/model_zoo.md)
+8. Support test-time augmentation (TTA)
+9. Support RTMDet, YOLOv8 and YOLOv7 assigner visualization
+
+### New Features
+
+01. Support inference for RTMDet instance segmentation tasks (#583)
+02. Beautify the configuration file in MMYOLO and add more comments (#501, #506, #516, #529, #531, #539)
+03. Refactor and optimize documentation (#568, #573, #579, #584, #587, #589, #596, #599, #600)
+04. Support fast version of YOLOX (#518)
+05. Support DeepStream in EasyDeploy and add documentation (#485, #545, #571)
+06. Add confusion matrix drawing script (#572)
+07. Add single channel application case (#460)
+08. Support auto registration (#597)
+09. Support Box CAM of YOLOv7, YOLOv8 and PPYOLOE (#601)
+10. Add automated generation of MM series repo registration information and tools scripts (#559)
+11. Added YOLOv7 model structure diagram (#504)
+12. Add how to specify specific GPU training and inference files (#503)
+13. Add check if `metainfo` is all lowercase when training or testing (#535)
+14. Add links to Twitter, Discord, Medium, YouTube, etc. (#555)
+
+### Bug Fixes
+
+1. Fix isort version issue (#492, #497)
+2. Fix type error of assigner visualization (#509)
+3. Fix YOLOv8 documentation link error (#517)
+4. Fix RTMDet Decoder error in EasyDeploy (#519)
+5. Fix some document linking errors (#537)
+6. Fix RTMDet-Tiny weight path error (#580)
+
+### Improvements
+
+1. Update `contributing.md`
+2. Optimize `DetDataPreprocessor` branch to support multitasking (#511)
+3. Optimize `gt_instances_preprocess` so it can be used for other YOLO algorithms (#532)
+4. Add `yolov7-e6e` weight conversion script (#570)
+5. Reference YOLOv8 inference code modification PPYOLOE
+
+### Contributors
+
+A total of 22 developers contributed to this release.
+
+Thank @triple-Mu, @isLinXu, @Audrey528, @TianWen580, @yechenzhi, @RangeKing, @lyviva, @Nioolek, @PeterH0323, @tianleiSHI, @aptsunny, @satuoqaq, @vansin, @xin-li-67, @VoyagerXvoyagerx,
+@landhill, @kitecats, @tang576225574, @HIT-cwh, @AI-Tianlong, @RangiLyu, @hhaAndroid
+
 ## v0.4.0 (18/1/2023)
 
 ### Highlights
diff --git a/docs/zh_cn/get_started/15_minutes_object_detection.md b/docs/zh_cn/get_started/15_minutes_object_detection.md
index 38cec820..aeac5e59 100644
--- a/docs/zh_cn/get_started/15_minutes_object_detection.md
+++ b/docs/zh_cn/get_started/15_minutes_object_detection.md
@@ -15,6 +15,8 @@
 - [模型测试](#模型测试)
 - [EasyDeploy 模型部署](#easydeploy-模型部署)
 
+本文以 YOLOv5-s 为例，其余 YOLO 系列算法的猫 cat 小数据集 demo 配置请查看对应的算法配置文件夹下。
+
 ## 环境安装
 
 假设你已经提前安装好了 Conda，接下来安装 PyTorch
@@ -134,19 +136,22 @@ val_dataloader = dict(
     dataset=dict(
         metainfo=metainfo,
         data_root=data_root,
-        ann_file='annotations/trainval.json',
+        ann_file='annotations/test.json',
         data_prefix=dict(img='images/')))
 
 test_dataloader = val_dataloader
 
-val_evaluator = dict(ann_file=data_root + 'annotations/trainval.json')
+_base_.optim_wrapper.optimizer.batch_size_per_gpu = train_batch_size_per_gpu
+
+val_evaluator = dict(ann_file=data_root + 'annotations/test.json')
 test_evaluator = val_evaluator
 
 default_hooks = dict(
     # 每隔 10 个 epoch 保存一次权重，并且最多保存 2 个权重
     # 模型评估时候自动保存最佳模型
     checkpoint=dict(interval=10, max_keep_ckpts=2, save_best='auto'),
-    param_scheduler=dict(max_epochs=max_epochs),
+    # warmup_mim_iter 参数非常关键，因为 cat 数据集非常小，默认的最小 warmup_mim_iter 是 1000，导致训练过程学习率偏小
+    param_scheduler=dict(max_epochs=max_epochs, warmup_mim_iter=10),
     # 日志打印间隔为 5
     logger=dict(type='LoggerHook', interval=5))
 # 评估间隔为 10
@@ -167,21 +172,21 @@ python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
 <img src="https://user-images.githubusercontent.com/17425982/220236361-bd113606-248e-4a0e-a484-c0dc9e355b5b.png" alt="image"/>
 </div>
 
-在 `trainval.json` 上性能如下所示：
+在 `test.json` 上性能如下所示：
 
 ```text
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.685
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.953
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.852
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.631
+ Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.909
+ Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.747
  Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
  Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.685
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.664
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.749
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.761
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.631
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.627
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.703
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.703
  Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
  Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.761
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.703
 ```
 
 上述性能是通过 COCO API 打印，其中 -1 表示不存在对于尺度的物体。根据 COCO 定义的规则，Cat 数据集里面全部是大物体，不存在小和中等规模物体。
@@ -243,10 +248,10 @@ python tools/train.py configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py
 ```
 
 <div align=center>
-<img src="https://user-images.githubusercontent.com/17425982/220238131-08eacedc-28a7-4008-af8c-f36dc239ecaa.png" alt="image"/>
+<img src="https://user-images.githubusercontent.com/17425982/222131114-30a79285-56bc-427d-a38d-8d6a6982ad60.png" alt="image"/>
 </div>
 <div align=center>
-<img src="https://user-images.githubusercontent.com/17425982/220238535-f363a6ba-876c-4bb7-80d6-9d8d8ca9b966.png" alt="image"/>
+<img src="https://user-images.githubusercontent.com/17425982/222132585-4b4962f1-211b-46f7-86b3-7534fc52a1b4.png" alt="image"/>
 </div>
 
 #### 2 Tensorboard 可视化使用
@@ -260,7 +265,7 @@ pip install tensorboard
 同上述在配置文件 `configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py`配置的最后添加 `tensorboard` 配置
 
 ```python
-visualizer = dict(vis_backends=[dict(type='LocalVisBackend'),dict(type='TensorboardVisBackend')])
+visualizer = dict(vis_backends=[dict(type='LocalVisBackend'), dict(type='TensorboardVisBackend')])
 ```
 
 重新运行训练命令后，Tensorboard 文件会生成在可视化文件夹 `work_dirs/yolov5_s-v61_fast_1xb12-40e_cat.py/{timestamp}/vis_data` 下，
@@ -334,7 +339,7 @@ test_pipeline = [
 ```shell
 python demo/featmap_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
-                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                 --target-layers backbone \
                                 --channel-reduction squeeze_mean
 ```
@@ -350,7 +355,7 @@ python demo/featmap_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
 ```shell
 python demo/featmap_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
-                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                 --target-layers neck \
                                 --channel-reduction squeeze_mean
 ```
@@ -370,7 +375,7 @@ python demo/featmap_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
 ```shell
 python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
-                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                 --target-layer neck.out_layers[2]
 ```
 
@@ -383,7 +388,7 @@ python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
 ```shell
 python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
-                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                 --target-layer neck.out_layers[1]
 ```
 
@@ -396,7 +401,7 @@ python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
 ```shell
 python demo/boxam_vis_demo.py data/cat/images/IMG_20221020_112705.jpg \
                                 configs/yolov5/yolov5_s-v61_fast_1xb12-40e_cat.py \
-                                work_dirs/yolov5_s-v61_fast_1xb8-40e_cat/epoch_40.pth \
+                                work_dirs/yolov5_s-v61_fast_1xb12-40e_cat/epoch_40.pth \
                                 --target-layer neck.out_layers[0]
 ```
 
@@ -519,4 +524,4 @@ python projects/easydeploy/tools/image-demo.py \
 
 这样我们就完成了将训练完成的模型进行转换部署并且检查推理结果的工作。至此本教程结束。
 
-以上完整内容可以查看 [15_minutes_object_detection.ipynb](<>)
+以上完整内容可以查看 [15_minutes_object_detection.ipynb](<>)。 如果你在训练或者测试过程中碰到问题，请先查看 [常见错误排除步骤](../recommended_topics/troubleshooting_steps.md)， 如果依然无法解决欢迎提 issue。
diff --git a/docs/zh_cn/get_started/article.md b/docs/zh_cn/get_started/article.md
index 43cd0f8e..0ec160c8 100644
--- a/docs/zh_cn/get_started/article.md
+++ b/docs/zh_cn/get_started/article.md
@@ -18,6 +18,7 @@
 - [MMYOLO 社区倾情贡献，RTMDet 原理社区开发者解读来啦！](https://zhuanlan.zhihu.com/p/569777684)
 - [MMYOLO 自定义数据集从标注到部署保姆级教程](https://zhuanlan.zhihu.com/p/595497726)
 - [满足一切需求的 MMYOLO 可视化：测试过程可视化](https://zhuanlan.zhihu.com/p/593179372)
+- [MMYOLO 想你所想: 训练过程可视化](https://zhuanlan.zhihu.com/p/608586878)
 - [YOLOv8 深度详解！一文看懂，快速上手](https://zhuanlan.zhihu.com/p/598566644)
 - [玩转 MMYOLO 基础类第一期： 配置文件太复杂？继承用法看不懂？配置全解读来了](https://zhuanlan.zhihu.com/p/577715188)
 - [玩转 MMYOLO 工具类第一期： 特征图可视化](https://zhuanlan.zhihu.com/p/578141381?)
diff --git a/docs/zh_cn/get_started/overview.md b/docs/zh_cn/get_started/overview.md
index 81a9c436..a6adc417 100644
--- a/docs/zh_cn/get_started/overview.md
+++ b/docs/zh_cn/get_started/overview.md
@@ -30,6 +30,7 @@ MMYOLO 是一个基于 PyTorch 和 MMDetection 的 YOLO 系列算法开源工具
 - YOLOv5
 - YOLOX
 - RTMDet
+- RTMDet-Rotated
 - YOLOv6
 - YOLOv7
 - PPYOLOE
diff --git a/docs/zh_cn/notes/changelog.md b/docs/zh_cn/notes/changelog.md
index b6eb86c9..bd511071 100644
--- a/docs/zh_cn/notes/changelog.md
+++ b/docs/zh_cn/notes/changelog.md
@@ -1,5 +1,60 @@
 # 更新日志
 
+## v0.5.0 (2/3/2023)
+
+### 亮点
+
+1. 支持了 [RTMDet-R](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/README.md#rotated-object-detection) 旋转框目标检测任务和算法
+2. [YOLOv8](https://github.com/open-mmlab/mmyolo/blob/dev/configs/yolov8/README.md) 支持使用 mask 标注提升目标检测模型性能
+3. 支持 [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/razor/subnets/README.md) 搜索的 NAS 子网络作为 YOLO 系列算法的 backbone
+4. 支持调用 [MMRazor](https://github.com/open-mmlab/mmyolo/blob/dev/configs/rtmdet/distillation/README.md) 对 RTMDet 进行知识蒸馏
+5. [MMYOLO](https://mmyolo.readthedocs.io/zh_CN/dev/) 文档结构优化，内容全面升级
+6. 基于 RTMDet 训练超参提升 YOLOX 精度和训练速度
+7. 支持模型参数量、FLOPs 计算和提供 T4 设备上 GPU 延时数据，并更新了 [Model Zoo](https://github.com/open-mmlab/mmyolo/blob/dev/docs/zh_cn/model_zoo.md)
+8. 支持测试时增强 TTA
+9. 支持 RTMDet、YOLOv8 和 YOLOv7 assigner 可视化
+
+### 新特性
+
+01. 支持 RTMDet 实例分割任务的推理 (#583)
+02. 美化 MMYOLO 中配置文件并增加更多注释 (#501, #506, #516, #529, #531, #539)
+03. 重构并优化中英文文档 (#568, #573, #579, #584, #587, #589, #596, #599, #600)
+04. 支持 fast 版本的 YOLOX (#518)
+05. EasyDeploy 中支持 DeepStream，并添加说明文档 (#485, #545, #571)
+06. 新增混淆矩阵绘制脚本 (#572)
+07. 新增单通道应用案例 (#460)
+08. 支持 auto registration (#597)
+09. Box CAM 支持 YOLOv7、YOLOv8 和 PPYOLOE (#601)
+10. 新增自动化生成 MM 系列 repo 注册信息和 tools 脚本 (#559)
+11. 新增 YOLOv7 模型结构图 (#504)
+12. 新增如何指定特定 GPU 训练和推理文档 (#503)
+13. 新增训练或者测试时检查 `metainfo` 是否全为小写 (#535)
+14. 增加 Twitter、Discord、Medium 和 YouTube 等链接 (#555)
+
+### Bug 修复
+
+1. 修复 isort 版本问题 (#492, #497)
+2. 修复 assigner 可视化模块的 type 错误 (#509)
+3. 修复 YOLOv8 文档链接错误 (#517)
+4. 修复 EasyDeploy 中的 RTMDet Decoder 错误 (#519)
+5. 修复一些文档链接错误 (#537)
+6. 修复 RTMDet-Tiny 权重路径错误 (#580)
+
+### 完善
+
+1. 完善更新 `contributing.md`
+2. 优化 `DetDataPreprocessor` 支使其支持多任务 (#511)
+3. 优化 `gt_instances_preprocess` 使其可以用于其他 YOLO 算法 (#532)
+4. 新增 `yolov7-e6e` 权重转换脚本 (#570)
+5. 参考 YOLOv8 推理代码修改 PPYOLOE (#614)
+
+### 贡献者
+
+总共 22 位开发者参与了本次版本
+
+@triple-Mu, @isLinXu, @Audrey528, @TianWen580, @yechenzhi, @RangeKing, @lyviva, @Nioolek, @PeterH0323, @tianleiSHI, @aptsunny, @satuoqaq, @vansin, @xin-li-67, @VoyagerXvoyagerx,
+@landhill, @kitecats, @tang576225574, @HIT-cwh, @AI-Tianlong, @RangiLyu, @hhaAndroid
+
 ## v0.4.0 (18/1/2023)
 
 ### 亮点

From 6506405970844165090fe9953917c492f46081a3 Mon Sep 17 00:00:00 2001
From: Range King <RangeKingHZ@gmail.com>
Date: Thu, 2 Mar 2023 17:25:14 +0800
Subject: [PATCH 64/64] update banner (#617)

---
 README.md       | 2 +-
 README_zh-CN.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index c6dee0a5..96e0c08f 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 <div align="center">
-  <img width="100%" src="https://user-images.githubusercontent.com/27466624/213130448-1f8529fd-2247-4ac4-851c-acd0148a49b9.png"/>
+  <img width="100%" src="https://user-images.githubusercontent.com/27466624/222385101-516e551c-49f5-480d-a135-4b24ee6dc308.png"/>
   <div>&nbsp;</div>
   <div align="center">
     <b><font size="5">OpenMMLab website</font></b>
diff --git a/README_zh-CN.md b/README_zh-CN.md
index e1d27924..16c55a65 100644
--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@@ -1,5 +1,5 @@
 <div align="center">
-  <img src="https://user-images.githubusercontent.com/27466624/213156908-cef7cc50-97d1-4e0a-9e06-309bd0a49173.png" width="100%"/>
+  <img src="https://user-images.githubusercontent.com/27466624/222385182-1247251c-8fac-4e77-94f5-57580e0ce3bd.png" width="100%"/>
   <div>&nbsp;</div>
   <div align="center">
     <b><font size="5">OpenMMLab 官网</font></b>