[Docs] Update user guides docs and tools for MMPretrain. (#1429)

* [Docs] Update user guides docs and tools for MMPretrain. * Fix UT * Fix Chinese docs. * Improve according to comments. * Fix windows CI.
2023-03-27 14:32:26 +08:00 · 2023-03-27 14:32:26 +08:00 · c4ccae40db
parent a50d96f7f1
commit c4ccae40db
63 changed files with 2028 additions and 1386 deletions
--- a/.github/workflows/pr_stage_test.yml
+++ b/.github/workflows/pr_stage_test.yml
@ -63,7 +63,7 @@ jobs:
    strategy:
      matrix:
        python: [3.7]
-        platform: [cu111]
+        platform: [cpu]
    steps:
      - uses: actions/checkout@v2
      - name: Set up Python ${{ matrix.python-version }}
@ -76,8 +76,8 @@ jobs:
        run: pip install torch==1.8.2+${{matrix.platform}} torchvision==0.9.2+${{matrix.platform}} -f https://download.pytorch.org/whl/lts/1.8/torch_lts.html
      - name: Install mmpretrain dependencies
        run: |
-          pip install git+https://github.com/open-mmlab/mmengine.git@main
          pip install -U openmim
+          mim install mmengine
          mim install 'mmcv >= 2.0.0rc1'
          pip install -r requirements.txt
      - name: Build and install
--- a/configs/byol/benchmarks/mask-rcnn_r50-c4_ms-1x_coco.py
+++ b/configs/byol/benchmarks/mask-rcnn_r50-c4_ms-1x_coco.py
@ -0,0 +1,46 @@
+_base_ = 'mmdet::mask_rcnn/mask-rcnn_r50-caffe-c4_1x_coco.py'
+# https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/mask_rcnn/mask-rcnn_r50-caffe-c4_1x_coco.py
+
+data_preprocessor = dict(
+    type='DetDataPreprocessor',
+    mean=[123.675, 116.28, 103.53],
+    std=[58.395, 57.12, 57.375],
+    bgr_to_rgb=True,
+    pad_mask=True,
+    pad_size_divisor=32)
+
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    data_preprocessor=data_preprocessor,
+    backbone=dict(
+        frozen_stages=-1,
+        norm_cfg=norm_cfg,
+        norm_eval=False,
+        style='pytorch',
+        init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
+    roi_head=dict(
+        shared_head=dict(
+            type='ResLayerExtraNorm',
+            norm_cfg=norm_cfg,
+            norm_eval=False,
+            style='pytorch')))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='RandomChoiceResize',
+        scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
+                (1333, 768), (1333, 800)],
+        keep_ratio=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs')
+]
+
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
+
+train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=12, val_interval=1)
+
+custom_imports = dict(
+    imports=['mmpretrain.models.utils.res_layer_extra_norm'],
+    allow_failed_imports=False)
--- a/configs/byol/benchmarks/mask-rcnn_r50_fpn_ms-1x_coco.py
+++ b/configs/byol/benchmarks/mask-rcnn_r50_fpn_ms-1x_coco.py
@ -0,0 +1,24 @@
+_base_ = 'mmdet::mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py'
+# https://github.com/open-mmlab/mmdetection/blob/dev-3.x/configs/mask_rcnn/mask-rcnn_r50_fpn_1x_coco.py
+
+norm_cfg = dict(type='SyncBN', requires_grad=True)
+model = dict(
+    backbone=dict(frozen_stages=-1, norm_cfg=norm_cfg, norm_eval=False),
+    neck=dict(norm_cfg=norm_cfg),
+    roi_head=dict(
+        bbox_head=dict(type='Shared4Conv1FCBBoxHead', norm_cfg=norm_cfg),
+        mask_head=dict(norm_cfg=norm_cfg)))
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True, with_mask=True),
+    dict(
+        type='RandomChoiceResize',
+        scales=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
+                (1333, 768), (1333, 800)],
+        keep_ratio=True),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs')
+]
+
+train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
--- a/configs/xcit/metafile.yml
+++ b/configs/xcit/metafile.yml
@ -22,7 +22,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 70.35
          Top 5 Accuracy: 89.98
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-nano-12-p16_3rdparty_in1k_20230213-ed776c38.pth
    Config: configs/xcit/xcit-nano-12-p16_8xb128_in1k.py
    Converted From:
@ -39,7 +39,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 72.36
          Top 5 Accuracy: 91.02
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-nano-12-p16_3rdparty-dist_in1k_20230213-fb247f7b.pth
    Config: configs/xcit/xcit-nano-12-p16_8xb128_in1k.py
    Converted From:
@ -56,7 +56,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 77.21
          Top 5 Accuracy: 93.62
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-12-p16_3rdparty_in1k_20230213-82c547ca.pth
    Config: configs/xcit/xcit-tiny-12-p16_8xb128_in1k.py
    Converted From:
@ -73,7 +73,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 78.7
          Top 5 Accuracy: 94.12
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-12-p16_3rdparty-dist_in1k_20230213-d5fde0a3.pth
    Config: configs/xcit/xcit-tiny-12-p16_8xb128_in1k.py
    Converted From:
@ -90,7 +90,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 74.93
          Top 5 Accuracy: 92.42
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-nano-12-p16_3rdparty-dist_in1k-384px_20230213-712db4d4.pth
    Config: configs/xcit/xcit-nano-12-p16_8xb128_in1k-384px.py
    Converted From:
@ -107,7 +107,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 73.8
          Top 5 Accuracy: 92.08
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-nano-12-p8_3rdparty_in1k_20230213-3370c293.pth
    Config: configs/xcit/xcit-nano-12-p8_8xb128_in1k.py
    Converted From:
@ -124,7 +124,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 76.17
          Top 5 Accuracy: 93.08
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-nano-12-p8_3rdparty-dist_in1k_20230213-2f87d2b3.pth
    Config: configs/xcit/xcit-nano-12-p8_8xb128_in1k.py
    Converted From:
@ -141,7 +141,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 79.47
          Top 5 Accuracy: 94.85
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-24-p16_3rdparty_in1k_20230213-366c1cd0.pth
    Config: configs/xcit/xcit-tiny-24-p16_8xb128_in1k.py
    Converted From:
@ -158,7 +158,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 80.51
          Top 5 Accuracy: 95.17
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-24-p16_3rdparty-dist_in1k_20230213-b472e80a.pth
    Config: configs/xcit/xcit-tiny-24-p16_8xb128_in1k.py
    Converted From:
@ -175,7 +175,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 80.58
          Top 5 Accuracy: 95.38
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-12-p16_3rdparty-dist_in1k-384px_20230213-00a20023.pth
    Config: configs/xcit/xcit-tiny-12-p16_8xb128_in1k-384px.py
    Converted From:
@ -192,7 +192,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 79.75
          Top 5 Accuracy: 94.88
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-12-p8_3rdparty_in1k_20230213-8b02f8f5.pth
    Config: configs/xcit/xcit-tiny-12-p8_8xb128_in1k.py
    Converted From:
@ -209,7 +209,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 81.26
          Top 5 Accuracy: 95.46
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-12-p8_3rdparty-dist_in1k_20230213-f3f9b44f.pth
    Config: configs/xcit/xcit-tiny-12-p8_8xb128_in1k.py
    Converted From:
@ -226,7 +226,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 81.87
          Top 5 Accuracy: 95.77
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-12-p16_3rdparty_in1k_20230213-d36779d2.pth
    Config: configs/xcit/xcit-small-12-p16_8xb128_in1k.py
    Converted From:
@ -243,7 +243,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 83.12
          Top 5 Accuracy: 96.41
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-12-p16_3rdparty-dist_in1k_20230213-c95bbae1.pth
    Config: configs/xcit/xcit-small-12-p16_8xb128_in1k.py
    Converted From:
@ -260,7 +260,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 77.69
          Top 5 Accuracy: 94.09
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-nano-12-p8_3rdparty-dist_in1k-384px_20230213-09d925ef.pth
    Config: configs/xcit/xcit-nano-12-p8_8xb128_in1k-384px.py
    Converted From:
@ -277,7 +277,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 82.43
          Top 5 Accuracy: 96.2
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-24-p16_3rdparty-dist_in1k-384px_20230213-20e13917.pth
    Config: configs/xcit/xcit-tiny-24-p16_8xb128_in1k-384px.py
    Converted From:
@ -294,7 +294,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 82.38
          Top 5 Accuracy: 95.93
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-24-p16_3rdparty_in1k_20230213-40febe38.pth
    Config: configs/xcit/xcit-small-24-p16_8xb128_in1k.py
    Converted From:
@ -311,7 +311,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 83.7
          Top 5 Accuracy: 96.61
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-24-p16_3rdparty-dist_in1k_20230213-130d7262.pth
    Config: configs/xcit/xcit-small-24-p16_8xb128_in1k.py
    Converted From:
@ -328,7 +328,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 81.7
          Top 5 Accuracy: 95.9
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-24-p8_3rdparty_in1k_20230213-4b9ba392.pth
    Config: configs/xcit/xcit-tiny-24-p8_8xb128_in1k.py
    Converted From:
@ -345,7 +345,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 82.62
          Top 5 Accuracy: 96.16
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-24-p8_3rdparty-dist_in1k_20230213-ad9c44b0.pth
    Config: configs/xcit/xcit-tiny-24-p8_8xb128_in1k.py
    Converted From:
@ -362,7 +362,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 82.46
          Top 5 Accuracy: 96.22
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-12-p8_3rdparty-dist_in1k-384px_20230213-a072174a.pth
    Config: configs/xcit/xcit-tiny-12-p8_8xb128_in1k-384px.py
    Converted From:
@ -379,7 +379,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 84.74
          Top 5 Accuracy: 97.19
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-12-p16_3rdparty-dist_in1k-384px_20230213-ba36c982.pth
    Config: configs/xcit/xcit-small-12-p16_8xb128_in1k-384px.py
    Converted From:
@ -396,7 +396,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 82.56
          Top 5 Accuracy: 95.82
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-medium-24-p16_3rdparty_in1k_20230213-ad0aa92e.pth
    Config: configs/xcit/xcit-medium-24-p16_8xb128_in1k.py
    Converted From:
@ -413,7 +413,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 84.15
          Top 5 Accuracy: 96.82
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-medium-24-p16_3rdparty-dist_in1k_20230213-aca5cd0c.pth
    Config: configs/xcit/xcit-medium-24-p16_8xb128_in1k.py
    Converted From:
@ -430,7 +430,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 83.21
          Top 5 Accuracy: 96.41
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-12-p8_3rdparty_in1k_20230213-9e364ce3.pth
    Config: configs/xcit/xcit-small-12-p8_8xb128_in1k.py
    Converted From:
@ -447,7 +447,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 83.97
          Top 5 Accuracy: 96.81
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-12-p8_3rdparty-dist_in1k_20230213-71886580.pth
    Config: configs/xcit/xcit-small-12-p8_8xb128_in1k.py
    Converted From:
@ -464,7 +464,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 85.1
          Top 5 Accuracy: 97.32
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-24-p16_3rdparty-dist_in1k-384px_20230213-28fa2d0e.pth
    Config: configs/xcit/xcit-small-24-p16_8xb128_in1k-384px.py
    Converted From:
@ -481,7 +481,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 83.77
          Top 5 Accuracy: 96.72
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-tiny-24-p8_3rdparty-dist_in1k-384px_20230213-30d5e5ec.pth
    Config: configs/xcit/xcit-tiny-24-p8_8xb128_in1k-384px.py
    Converted From:
@ -498,7 +498,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 83.62
          Top 5 Accuracy: 96.51
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-24-p8_3rdparty_in1k_20230213-280ebcc7.pth
    Config: configs/xcit/xcit-small-24-p8_8xb128_in1k.py
    Converted From:
@ -515,7 +515,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 84.68
          Top 5 Accuracy: 97.07
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-24-p8_3rdparty-dist_in1k_20230213-f2773c78.pth
    Config: configs/xcit/xcit-small-24-p8_8xb128_in1k.py
    Converted From:
@ -532,7 +532,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 82.97
          Top 5 Accuracy: 95.86
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-large-24-p16_3rdparty_in1k_20230214-d29d2529.pth
    Config: configs/xcit/xcit-large-24-p16_8xb128_in1k.py
    Converted From:
@ -549,7 +549,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 84.61
          Top 5 Accuracy: 97.07
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-large-24-p16_3rdparty-dist_in1k_20230214-4fea599c.pth
    Config: configs/xcit/xcit-large-24-p16_8xb128_in1k.py
    Converted From:
@ -566,7 +566,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 85.47
          Top 5 Accuracy: 97.49
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-medium-24-p16_3rdparty-dist_in1k-384px_20230214-6c23a201.pth
    Config: configs/xcit/xcit-medium-24-p16_8xb128_in1k-384px.py
    Converted From:
@ -583,7 +583,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 85.12
          Top 5 Accuracy: 97.31
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-12-p8_3rdparty-dist_in1k-384px_20230214-9f2178bc.pth
    Config: configs/xcit/xcit-small-12-p8_8xb128_in1k-384px.py
    Converted From:
@ -600,7 +600,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 83.61
          Top 5 Accuracy: 96.23
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-medium-24-p8_3rdparty_in1k_20230214-c362850b.pth
    Config: configs/xcit/xcit-medium-24-p8_8xb128_in1k.py
    Converted From:
@ -617,7 +617,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 85.0
          Top 5 Accuracy: 97.16
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-medium-24-p8_3rdparty-dist_in1k_20230214-625c953b.pth
    Config: configs/xcit/xcit-medium-24-p8_8xb128_in1k.py
    Converted From:
@ -634,7 +634,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 85.57
          Top 5 Accuracy: 97.6
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-small-24-p8_3rdparty-dist_in1k-384px_20230214-57298eca.pth
    Config: configs/xcit/xcit-small-24-p8_8xb128_in1k-384px.py
    Converted From:
@ -651,7 +651,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 85.78
          Top 5 Accuracy: 97.6
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-large-24-p16_3rdparty-dist_in1k-384px_20230214-bd515a34.pth
    Config: configs/xcit/xcit-large-24-p16_8xb128_in1k-384px.py
    Converted From:
@ -668,7 +668,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 84.23
          Top 5 Accuracy: 96.58
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-large-24-p8_3rdparty_in1k_20230214-08f2f664.pth
    Config: configs/xcit/xcit-large-24-p8_8xb128_in1k.py
    Converted From:
@ -685,7 +685,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 85.14
          Top 5 Accuracy: 97.32
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-large-24-p8_3rdparty-dist_in1k_20230214-8c092b34.pth
    Config: configs/xcit/xcit-large-24-p8_8xb128_in1k.py
    Converted From:
@ -702,7 +702,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 85.87
          Top 5 Accuracy: 97.61
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-medium-24-p8_3rdparty-dist_in1k-384px_20230214-5db925e0.pth
    Config: configs/xcit/xcit-medium-24-p8_8xb128_in1k-384px.py
    Converted From:
@ -719,7 +719,7 @@ Models:
        Metrics:
          Top 1 Accuracy: 86.13
          Top 5 Accuracy: 97.75
-          Task: Image Classification
+        Task: Image Classification
    Weights: https://download.openmmlab.com/mmclassification/v0/xcit/xcit-large-24-p8_3rdparty-dist_in1k-384px_20230214-9f718b1a.pth
    Config: configs/xcit/xcit-large-24-p8_8xb128_in1k-384px.py
    Converted From:
--- a/docker/serve/Dockerfile
+++ b/docker/serve/Dockerfile
@ -1,56 +1,37 @@
-ARG PYTORCH="1.8.1"
-ARG CUDA="10.2"
-ARG CUDNN="7"
-FROM pytorch/pytorch:${PYTORCH}-cuda${CUDA}-cudnn${CUDNN}-devel
+ARG PYTORCH="1.12.1"
+ARG CUDA="11.3"
+ARG CUDNN="8"
+FROM pytorch/torchserve:latest-gpu

-# fetch the key refer to https://forums.developer.nvidia.com/t/18-04-cuda-docker-image-is-broken/212892/9
-RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/3bf863cc.pub 32
-RUN apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/machine-learning/repos/ubuntu1804/x86_64/7fa2af80.pub
-
-ARG MMENGINE="0.3.2"
-ARG MMCV="2.0.0rc1"
-ARG MMCLS="1.0.0rc4"
+ARG MMPRE="1.0.0rc5"

 ENV PYTHONUNBUFFERED TRUE

-RUN apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get install --no-install-recommends -y \
-    ca-certificates \
-    g++ \
-    openjdk-11-jre-headless \
-    # MMDet Requirements
-    ffmpeg libsm6 libxext6 git ninja-build libglib2.0-0 libsm6 libxrender-dev libxext6 \
-    && rm -rf /var/lib/apt/lists/*
-
-ENV PATH="/opt/conda/bin:$PATH"
+ENV HOME="/home/model-server"
+ENV PATH="/opt/conda/bin:$HOME/.local/bin:$PATH"
 RUN export FORCE_CUDA=1

 # TORCHSEVER
 RUN pip install torchserve torch-model-archiver
 RUN pip install nvgpu

-# MMLAB
+# OPEN-MMLAB
 ARG PYTORCH
 ARG CUDA
-RUN pip install mmengine==${MMENGINE}
-RUN ["/bin/bash", "-c", "pip install mmcv==${MMCV} -f https://download.openmmlab.com/mmcv/dist/cu${CUDA//./}/torch${PYTORCH}/index.html"]
-RUN pip install mmcls==${MMCLS}
+RUN pip install openmim
+RUN mim install mmpretrain==${MMPRE}
+RUN mkdir -p $HOME/tmp

-RUN useradd -m model-server \
-    && mkdir -p /home/model-server/tmp
+COPY --chown=model-server entrypoint.sh $HOME/.local/bin/entrypoint.sh

-COPY entrypoint.sh /usr/local/bin/entrypoint.sh
+RUN chmod +x $HOME/.local/bin/entrypoint.sh

-RUN chmod +x /usr/local/bin/entrypoint.sh \
-    && chown -R model-server /home/model-server
-
-COPY config.properties /home/model-server/config.properties
-RUN mkdir /home/model-server/model-store && chown -R model-server /home/model-server/model-store
+COPY --chown=model-server config.properties $HOME/config.properties

 EXPOSE 8080 8081 8082

 USER model-server
-WORKDIR /home/model-server
-ENV TEMP=/home/model-server/tmp
-ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
+WORKDIR $HOME
+ENV TEMP=$HOME/tmp
+ENTRYPOINT ["/home/model-server/.local/bin/entrypoint.sh"]
 CMD ["serve"]
--- a/docs/en/advanced_guides/convention.md
+++ b/docs/en/advanced_guides/convention.md
@ -1,33 +1,52 @@
-# Convention in MMCLS
+# Convention in MMPretrain

-## Config files Naming Convention
+## Model Naming Convention

-We follow the below convention to name config files. Contributors are advised to follow the same style. The config file names are divided into four parts: algorithm info, module information, training information and data information. Logically, different parts are concatenated by underscores `'_'`, and words in the same part are concatenated by dashes `'-'`.
+We follow the below convention to name models. Contributors are advised to follow the same style. The model names are divided into five parts: algorithm info, module information, pretrain information, training information and data information. Logically, different parts are concatenated by underscores `'_'`, and words in the same part are concatenated by dashes `'-'`.

 ```text
-{algorithm info}_{module info}_{training info}_{data info}.py
+{algorithm info}_{module info}_{pretrain info}_{training info}_{data info}
 ```

- `algorithm info`：algorithm information, model name and neural network architecture, such as resnet, etc.;
- `module info`： module information is used to represent some special neck, head and pretrain information;
- `training info`：Training information, some training schedule, including batch size, lr schedule, data augment and the like;
- `data info`：Data information, dataset name, input size and so on, such as imagenet, cifar, etc.;
+- `algorithm info` (optional): The main algorithm information, it's includes the main training algorithms like MAE, BEiT, etc.
+- `module info`:  The module information, it usually includes the backbone name, such as resnet, vit, etc.
+- `pretrain info`: (optional): The pretrain model information, such as the pretrain model is trained on ImageNet-21k.
+- `training info`: The training information, some training schedule, including batch size, lr schedule, data augment and the like.
+- `data info`: The data information, it usually includes the dataset name, input size and so on, such as imagenet, cifar, etc.

 ### Algorithm information

-The main algorithm name and the corresponding branch architecture information. E.g：
+The main algorithm name to train the model. For example:

- `resnet50`
- `mobilenet-v3-large`
- `vit-small-patch32`   : `patch32` represents the size of the partition in `ViT` algorithm;
- `seresnext101-32x4d`  : `SeResNet101` network structure, `32x4d` means that `groups` and `width_per_group` are 32 and 4 respectively in `Bottleneck`;
+- `simclr`
+- `mocov2`
+- `eva-mae-style`
+
+The model trained by supervised image classification can omit this field.

 ### Module information

-Some special `neck`, `head` and `pretrain` information. In classification tasks, `pretrain` information is the most commonly used:
+The modules of the model, usually, the backbone must be included in this field, and the neck and head
+information can be omitted. For example:

- `in21k-pre` : pre-trained on ImageNet21k;
- `in21k-pre-3rd-party` : pre-trained on ImageNet21k and the checkpoint is converted from a third-party repository;
+- `resnet50`
+- `vit-base-p16`
+- `swin-base`
+
+### Pretrain information
+
+If the model is a fine-tuned model from a pre-trained model, we need to record some information of the
+pre-trained model. For example:
+
+- The source of the pre-trained model: `fb`, `openai`, etc.
+- The method to train the pre-trained model: `clip`, `mae`, `distill`, etc.
+- The dataset used for pre-training: `in21k`, `laion2b`, etc. (`in1k` can be omitted.)
+- The training duration: `300e`, `1600e`, etc.
+
+Not all information is necessary, only select the necessary information to distinguish different pre-trained
+models.
+
+At the end of this field, use a `-pre` as an identifier, like `mae-in21k-pre`.

 ### Training information

@ -45,6 +64,9 @@ Training recipe. Usually, only the part that is different from the original pape
 - `coslr-200e` : use cosine scheduler to train 200 epochs
 - `autoaug-mixup-lbs-coslr-50e` : use `autoaug`, `mixup`, `label smooth`, `cosine scheduler` to train 50 epochs

+If the model is converted from a third-party repository like the official repository, the training information
+can be omitted and use a `3rdparty` as an identifier.
+
 ### Data information

 - `in1k` : `ImageNet1k` dataset, default to use the input image size of 224x224;
@ -52,29 +74,47 @@ Training recipe. Usually, only the part that is different from the original pape
 - `in1k-384px` : Indicates that the input image size is 384x384;
 - `cifar100`

-### Config File Name Example
+### Model Name Example

 ```text
-repvgg-D2se_deploy_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py
+vit-base-p32_clip-openai-pre_3rdparty_in1k
 ```

- `repvgg-D2se`:  Algorithm information
-  - `repvgg`: The main algorithm.
-  - `D2se`: The architecture.
- `deploy`: Module information, means the backbone is in the deploy state.
- `4xb64-autoaug-lbs-mixup-coslr-200e`: Training information.
-  - `4xb64`: Use 4 GPUs and the size of batches per GPU is 64.
-  - `autoaug`: Use `AutoAugment` in training pipeline.
-  - `lbs`: Use label smoothing loss.
-  - `mixup`: Use `mixup` training augment method.
-  - `coslr`: Use cosine learning rate scheduler.
-  - `200e`: Train the model for 200 epochs.
- `in1k`: Dataset information. The config is for `ImageNet1k` dataset and the input size is `224x224`.
+- `vit-base-p32`: The module information
+- `clip-openai-pre`: The pre-train information.
+  - `clip`: The pre-train method is clip.
+  - `openai`: The pre-trained model is come from OpenAI.
+  - `pre`: The pre-train identifier.
+- `3rdparty`: The model is converted from a third-party repository.
+- `in1k`: Dataset information. The model is trained from ImageNet-1k dataset and the input size is `224x224`.
+
+```text
+beit_beit-base-p16_8xb256-amp-coslr-300e_in1k
+```
+
+- `beit`: The algorithm information
+- `beit-base`: The module information, since the backbone is a modified ViT from BEiT, the backbone name is
+  also `beit`.
+- `8xb256-amp-coslr-300e`: The training information.
+  - `8xb256`: Use 8 GPUs and the batch size on each GPU is 256.
+  - `amp`: Use automatic-mixed-precision training.
+  - `coslr`: Use cosine annealing learning rate scheduler.
+  - `300e`: To train 300 epochs.
+- `in1k`: Dataset information. The model is trained from ImageNet-1k dataset and the input size is `224x224`.
+
+## Config File Naming Convention
+
+The naming of the config file is almost the same with the model name, with several difference:
+
+- The training information is necessary, and cannot be `3rdparty`.
+- If the config file only includes backbone settings, without neither head settings nor dataset settings. We
+  will name it as `{module info}_headless.py`. This kind of config files are usually used for third-party
+  pre-trained models on large datasets.

 ## Checkpoint Naming Convention

-The naming of the weight mainly includes the configuration file name, date and hash value.
+The naming of the weight mainly includes the model name, date and hash value.

 ```text
-{config_name}_{date}-{hash}.pth
+{model_name}_{date}-{hash}.pth
 ```
--- a/docs/en/advanced_guides/data_flow.md
+++ b/docs/en/advanced_guides/data_flow.md
@ -1 +0,0 @@
-# Data Flow (TODO)
--- a/docs/en/advanced_guides/runtime.md
+++ b/docs/en/advanced_guides/runtime.md
@ -245,7 +245,7 @@ env_cfg = dict(
     )
     ```

-     See the [Fine-tune Models](../user_guides/finetune.md) for more details about fine-tuning.
+     See the [Fine-tune Models](../notes/finetune_custom_dataset.md) for more details about fine-tuning.

 2. **What's the difference between `default_hooks` and `custom_hooks`?**

--- a/docs/en/api/apis.rst
+++ b/docs/en/api/apis.rst
@ -22,7 +22,6 @@ Model

   list_models
   get_model
-   init_model

 Inference
 ------------------
--- a/docs/en/api/data_process.rst
+++ b/docs/en/api/data_process.rst
@ -60,6 +60,7 @@ Loading and Formatting

   LoadImageFromFile
   PackInputs
+   PackMultiTaskInputs
   ToNumpy
   ToPIL
   Transpose
@ -88,6 +89,8 @@ Processing and Augmentation
   RandomResizedCrop
   Resize
   ResizeEdge
+   BEiTMaskGenerator
+   SimMIMMaskGenerator

 Composed Augmentation
 """""""""""""""""""""
@ -193,6 +196,9 @@ Note that the ``model.data_preprocessor`` has higher priority than ``data_prepro
   :nosignatures:

   ClsDataPreprocessor
+   SelfSupDataPreprocessor
+   TwoNormDataPreprocessor
+   VideoDataPreprocessor

 .. module:: mmpretrain.models.utils.batch_augments

--- a/docs/en/api/utils.rst
+++ b/docs/en/api/utils.rst
@ -14,3 +14,6 @@ This package includes some useful helper functions for developing.

   collect_env
   register_all_modules
+   load_json_log
+   track_on_main_process
+   get_ori_model
--- a/docs/en/index.rst
+++ b/docs/en/index.rst
@ -16,7 +16,6 @@ Welcome to MMPretrain's documentation!
   user_guides/inference.md
   user_guides/train.md
   user_guides/test.md
-   user_guides/finetune.md
   user_guides/downstream.md

 .. toctree::
@ -29,7 +28,6 @@ Welcome to MMPretrain's documentation!
   advanced_guides/schedule.md
   advanced_guides/runtime.md
   advanced_guides/evaluation.md
-   advanced_guides/data_flow.md
   advanced_guides/convention.md

 .. toctree::
@ -92,6 +90,7 @@ Welcome to MMPretrain's documentation!
   notes/changelog.md
   notes/faq.md
   notes/pretrain_custom_dataset.md
+   notes/finetune_custom_dataset.md

 Indices and tables
 ==================
--- a/docs/en/notes/finetune_custom_dataset.md
+++ b/docs/en/notes/finetune_custom_dataset.md
@ -0,0 +1,340 @@
+# How to Fine-tune with Custom Dataset
+
+In most scenarios, we want to apply a pre-trained model without training from scratch, which might possibly introduce extra uncertainties about the model convergency and therefore, is time-consuming.
+The common sense is to learn from previous models trained on large dataset, which can hopefully provide better knowledge than a random beginner. Roughly speaking, this process is as known as fine-tuning.
+
+Models pre-trained on the ImageNet dataset have been demonstrated to be effective for other datasets and other downstream tasks.
+Hence, this tutorial provides instructions for users to use the models provided in the [Model Zoo](../modelzoo_statistics.md) for other datasets to obtain better performance.
+
+In this tutorial, we provide a practice example and some tips on how to fine-tune a model on your own dataset.
+
+## Step-1: Prepare your dataset
+
+Prepare your dataset following [Prepare Dataset](../user_guides/dataset_prepare.md).
+And the root folder of the dataset can be like `data/custom_dataset/`.
+
+Here, we assume you want to do supervised image-classification training, and use the sub-folder format
+`CustomDataset` to organize your dataset as:
+
+```text
+data/custom_dataset/
+├── train
+│   ├── class_x
+│   │   ├── x_1.png
+│   │   ├── x_2.png
+│   │   ├── x_3.png
+│   │   └── ...
+│   ├── class_y
+│   └── ...
+└── test
+    ├── class_x
+    │   ├── test_x_1.png
+    │   ├── test_x_2.png
+    │   ├── test_x_3.png
+    │   └── ...
+    ├── class_y
+    └── ...
+```
+
+## Step-2: Choose one config as template
+
+Here, we would like to use `configs/resnet/resnet50_8xb32_in1k.py` as the example. We first copy this config
+file to the same folder and rename it as `resnet50_8xb32-ft_custom.py`.
+
+```{tip}
+As a convention, the last field of the config name is the dataset, e.g.,`in1k` for ImageNet dataset, `coco` for COCO dataset
+```
+
+The content of this config is:
+
+```python
+_base_ = [
+    '../_base_/models/resnet50.py',           # model settings
+    '../_base_/datasets/imagenet_bs32.py',    # data settings
+    '../_base_/schedules/imagenet_bs256.py',  # schedule settings
+    '../_base_/default_runtime.py',           # runtime settings
+]
+```
+
+## Step-3: Edit the model settings
+
+When fine-tuning a model, usually we want to load the pre-trained backbone
+weights and train a new classification head from scratch.
+
+To load the pre-trained backbone, we need to change the initialization config
+of the backbone and use `Pretrained` initialization function. Besides, in the
+`init_cfg`, we use `prefix='backbone'` to tell the initialization function
+the prefix of the submodule that needs to be loaded in the checkpoint.
+
+For example, `backbone` here means to load the backbone submodule. And here we
+use an online checkpoint, it will be downloaded automatically during training,
+you can also download the model manually and use a local path.
+And then we need to modify the head according to the class numbers of the new
+datasets by just changing `num_classes` in the head.
+
+When new dataset is small and shares the domain with the pre-trained dataset,
+we might want to freeze the first several stages' parameters of the
+backbone, that will help the network to keep ability to extract low-level
+information learnt from pre-trained model. In MMPretrain, you can simply
+specify how many stages to freeze by `frozen_stages` argument. For example, to
+freeze the first two stages' parameters, just use the following configs:
+
+```{note}
+Not all backbones support the `frozen_stages` argument by now. Please check
+[the docs](https://mmpretrain.readthedocs.io/en/main/api.html#module-mmpretrain.models.backbones)
+to confirm if your backbone supports it.
+```
+
+```python
+_base_ = [
+    '../_base_/models/resnet50.py',           # model settings
+    '../_base_/datasets/imagenet_bs32.py',    # data settings
+    '../_base_/schedules/imagenet_bs256.py',  # schedule settings
+    '../_base_/default_runtime.py',           # runtime settings
+]
+
+# >>>>>>>>>>>>>>> Override model settings here >>>>>>>>>>>>>>>>>>>
+model = dict(
+    backbone=dict(
+        frozen_stages=2,
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
+            prefix='backbone',
+        )),
+    head=dict(num_classes=10),
+)
+# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+```
+
+```{tip}
+Here we only need to set the part of configs we want to modify, because the
+inherited configs will be merged and get the entire configs.
+```
+
+## Step-4: Edit the dataset settings
+
+To fine-tuning on a new dataset, we need to override some dataset settings, like the type of dataset, data
+pipeline, etc.
+
+```python
+_base_ = [
+    '../_base_/models/resnet50.py',           # model settings
+    '../_base_/datasets/imagenet_bs32.py',    # data settings
+    '../_base_/schedules/imagenet_bs256.py',  # schedule settings
+    '../_base_/default_runtime.py',           # runtime settings
+]
+
+# model settings
+model = dict(
+    backbone=dict(
+        frozen_stages=2,
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
+            prefix='backbone',
+        )),
+    head=dict(num_classes=10),
+)
+
+# >>>>>>>>>>>>>>> Override data settings here >>>>>>>>>>>>>>>>>>>
+data_root = 'data/custom_dataset'
+train_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',       # We assume you are using the sub-folder format without ann_file
+        data_prefix='train',
+    ))
+val_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',       # We assume you are using the sub-folder format without ann_file
+        data_prefix='test',
+    ))
+test_dataloader = val_dataloader
+# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+```
+
+## Step-5: Edit the schedule settings (optional)
+
+The fine-tuning hyper parameters vary from the default schedule. It usually
+requires smaller learning rate and quicker decaying scheduler epochs.
+
+```python
+_base_ = [
+    '../_base_/models/resnet50.py',           # model settings
+    '../_base_/datasets/imagenet_bs32.py',    # data settings
+    '../_base_/schedules/imagenet_bs256.py',  # schedule settings
+    '../_base_/default_runtime.py',           # runtime settings
+]
+
+# model settings
+model = dict(
+    backbone=dict(
+        frozen_stages=2,
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
+            prefix='backbone',
+        )),
+    head=dict(num_classes=10),
+)
+
+# data settings
+data_root = 'data/custom_dataset'
+train_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',       # We assume you are using the sub-folder format without ann_file
+        data_prefix='train',
+    ))
+val_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',       # We assume you are using the sub-folder format without ann_file
+        data_prefix='test',
+    ))
+test_dataloader = val_dataloader
+
+# >>>>>>>>>>>>>>> Override schedule settings here >>>>>>>>>>>>>>>>>>>
+# optimizer hyper-parameters
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+# learning policy
+param_scheduler = dict(
+    type='MultiStepLR', by_epoch=True, milestones=[15], gamma=0.1)
+# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+```
+
+```{tip}
+Refers to [Learn about Configs](../user_guides/config.md) for more detailed configurations.
+```
+
+## Start Training
+
+Now, we have finished the fine-tuning config file as following:
+
+```python
+_base_ = [
+    '../_base_/models/resnet50.py',           # model settings
+    '../_base_/datasets/imagenet_bs32.py',    # data settings
+    '../_base_/schedules/imagenet_bs256.py',  # schedule settings
+    '../_base_/default_runtime.py',           # runtime settings
+]
+
+# model settings
+model = dict(
+    backbone=dict(
+        frozen_stages=2,
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
+            prefix='backbone',
+        )),
+    head=dict(num_classes=10),
+)
+
+# data settings
+data_root = 'data/custom_dataset'
+train_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',       # We assume you are using the sub-folder format without ann_file
+        data_prefix='train',
+    ))
+val_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',       # We assume you are using the sub-folder format without ann_file
+        data_prefix='test',
+    ))
+test_dataloader = val_dataloader
+
+# schedule settings
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+param_scheduler = dict(
+    type='MultiStepLR', by_epoch=True, milestones=[15], gamma=0.1)
+```
+
+Here we use 8 GPUs on your computer to train the model with the following command:
+
+```shell
+bash tools/dist_train.sh configs/resnet/resnet50_8xb32-ft_custom.py 8
+```
+
+Also, you can use only one GPU to train the model with the following command:
+
+```shell
+python tools/train.py configs/resnet/resnet50_8xb32-ft_custom.py
+```
+
+But wait, an important config need to be changed if using one GPU. We need to
+change the dataset config as following:
+
+```python
+data_root = 'data/custom_dataset'
+train_dataloader = dict(
+    batch_size=256,
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',       # We assume you are using the sub-folder format without ann_file
+        data_prefix='train',
+    ))
+val_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',       # We assume you are using the sub-folder format without ann_file
+        data_prefix='test',
+    ))
+test_dataloader = val_dataloader
+```
+
+It's because our training schedule is for a batch size of 256. If using 8 GPUs,
+just use `batch_size=32` config in the base config file for every GPU, and the total batch
+size will be 256. But if using one GPU, you need to change it to 256 manually to
+match the training schedule.
+
+However, a larger batch size requires a larger GPU memory, and here are several simple tricks to save the GPU
+memory:
+
+1. Enable Automatic-Mixed-Precision training.
+
+   ```shell
+   python tools/train.py configs/resnet/resnet50_8xb32-ft_custom.py --amp
+   ```
+
+2. Use a smaller batch size, like `batch_size=32` instead of 256, and enable the auto learning rate scaling.
+
+   ```shell
+   python tools/train.py configs/resnet/resnet50_8xb32-ft_custom.py --auto-scale-lr
+   ```
+
+   The auto learning rate scaling will adjust the learning rate according to the actual batch size and the
+   `auto_scale_lr.base_batch_size` (You can find it in the base config
+   `configs/_base_/schedules/imagenet_bs256.py`)
+
+```{note}
+Most of these tricks may influence the training performance slightly.
+```
+
+### Apply pre-trained model with command line
+
+If you don't want to modify the configs, you could use `--cfg-options` to add your pre-trained model path to `init_cfg`.
+
+For example, the command below will also load pre-trained model.
+
+```shell
+bash tools/dist_train.sh configs/resnet/resnet50_8xb32-ft_custom.py 8 \
+    --cfg-options model.backbone.init_cfg.type='Pretrained' \
+    model.backbone.init_cfg.checkpoint='https://download.openmmlab.com/mmselfsup/1.x/mocov3/mocov3_resnet50_8xb512-amp-coslr-100e_in1k/mocov3_resnet50_8xb512-amp-coslr-100e_in1k_20220927-f1144efa.pth' \
+    model.backbone.init_cfg.prefix='backbone' \
+```
--- a/docs/en/notes/pretrain_custom_dataset.md
+++ b/docs/en/notes/pretrain_custom_dataset.md
@ -1,27 +1,35 @@
 # How to Pretrain with Custom Dataset

- [How to Pretrain with Custom Dataset](#how-to-pretrain-with-custom-dataset)
-  - [Train MAE on Custom Dataset](#train-mae-on-custom-dataset)
-    - [Step-1: Get the path of custom dataset](#step-1-get-the-path-of-custom-dataset)
-    - [Step-2: Choose one config as template](#step-2-choose-one-config-as-template)
-    - [Step-3: Edit the dataset related config](#step-3-edit-the-dataset-related-config)
-  - [Train MAE on COCO Dataset](#train-mae-on-coco-dataset)
-
-In this tutorial, we provide some tips on how to conduct self-supervised learning on your own dataset(without the need of label).
-
-## Train MAE on Custom Dataset
+In this tutorial, we provide a practice example and some tips on how to train on your own dataset.

 In MMPretrain, We support the `CustomDataset` (similar to the `ImageFolder` in `torchvision`),  which is able to read the images within the specified folder directly. You only need to prepare the path information of the custom dataset and edit the config.

-### Step-1: Get the path of custom dataset
+## Step-1: Prepare your dataset

-It should be like `data/custom_dataset/`
+Prepare your dataset following [Prepare Dataset](../user_guides/dataset_prepare.md).
+And the root folder of the dataset can be like `data/custom_dataset/`.

-### Step-2: Choose one config as template
+Here, we assume you want to do unsupervised training, and use the sub-folder format `CustomDataset` to
+organize your dataset as:

-Here, we would like to use `configs/mae/mae_vit-base-p16_8xb512-amp-coslr-300e_in1k.py` as the example. We first copy this config file and rename it as `mae_vit-base-p16_8xb512-amp-coslr-300e_${custom_dataset}.py`.
+```text
+data/custom_dataset/
+├── sample1.png
+├── sample2.png
+├── sample3.png
+├── sample4.png
+└── ...
+```

- `custom_dataset`: indicate which dataset you used, e.g.,`in1k` for ImageNet dataset, `coco` for COCO dataset
+## Step-2: Choose one config as template
+
+Here, we would like to use `configs/mae/mae_vit-base-p16_8xb512-amp-coslr-300e_in1k.py` as the example. We
+first copy this config file to the same folder and rename it as
+`mae_vit-base-p16_8xb512-amp-coslr-300e_custom.py`.
+
+```{tip}
+As a convention, the last field of the config name is the dataset, e.g.,`in1k` for ImageNet dataset, `coco` for COCO dataset
+```

 The content of this config is:

@ -84,34 +92,35 @@ resume = True
 auto_scale_lr = dict(base_batch_size=4096)
 ```

-### Step-3: Edit the dataset related config
+## Step-3: Edit the dataset related config

-The dataset related config is defined in `'../_base_/datasets/imagenet_bs512_mae.py'` in `_base_`. We then copy the content of dataset config file into our created file `mae_vit-base-p16_8xb512-coslr-400e_${custom_dataset}.py`.
+- Override the `type` of dataset settings as `'CustomDataset'`
+- Override the `data_root` of dataset settings as `data/custom_dataset`.
+- Override the `ann_file` of dataset settings as an empty string since we assume you are using the sub-folder
+  format `CustomDataset`.
+- Override the `data_prefix` of dataset settings as an empty string since we are using the whole dataset under
+  the `data_root`, and you don't need to split samples into different subset and set the `data_prefix`.

- Set the `dataset_type = 'CustomDataset'`, and the path of the custom dataset ` data_root = /dataset/my_custom_dataset`.
- Remove the `ann_file` in `train_dataloader`, and edit the `data_prefix` if needed.
-
-And the edited config will be like this:
+The modified config will be like:

 ```python
-# >>>>>>>>>>>>>>>>>>>>> Start of Changed >>>>>>>>>>>>>>>>>>>>>>>>>
 _base_ = [
    '../_base_/models/mae_vit-base-p16.py',
-    '../_base_/datasets/imagenet_mae.py',
+    '../_base_/datasets/imagenet_bs512_mae.py',
    '../_base_/default_runtime.py',
 ]

-# custom dataset
-dataset_type = 'CustomDataset'
-data_root = 'data/custom_dataset/'
-
+# >>>>>>>>>>>>>>> Override dataset settings here >>>>>>>>>>>>>>>>>>>
 train_dataloader = dict(
    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        # ann_file='meta/train.txt', # removed if you don't have the annotation file
-        data_prefix=dict(img_path='./'))
-# <<<<<<<<<<<<<<<<<<<<<< End of Changed <<<<<<<<<<<<<<<<<<<<<<<<<<<
+        type='CustomDataset',
+        data_root='data/custom_dataset/',
+        ann_file='',       # We assume you are using the sub-folder format without ann_file
+        data_prefix='',    # The `data_root` is the data_prefix directly.
+        with_label=False,
+    )
+)
+# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

 # optimizer wrapper
 optim_wrapper = dict(
@ -167,7 +176,7 @@ auto_scale_lr = dict(base_batch_size=4096)

 By using the edited config file, you are able to train a self-supervised model with MAE algorithm on the custom dataset.

-## Train MAE on COCO Dataset
+## Another example: Train MAE on COCO Dataset

 ```{note}
 You need to install MMDetection to use the `mmdet.CocoDataset` follow this [documentation](https://github.com/open-mmlab/mmdetection/blob/3.x/docs/en/get_started.md)
@ -176,25 +185,22 @@ You need to install MMDetection to use the `mmdet.CocoDataset` follow this [docu
 Follow the aforementioned idea, we also present an example of how to train MAE on COCO dataset.  The edited file will be like this:

 ```python
-# >>>>>>>>>>>>>>>>>>>>> Start of Changed >>>>>>>>>>>>>>>>>>>>>>>>>
 _base_ = [
    '../_base_/models/mae_vit-base-p16.py',
    '../_base_/datasets/imagenet_mae.py',
    '../_base_/default_runtime.py',
 ]

-# custom dataset
-dataset_type = 'mmdet.CocoDataset'
-data_root = 'data/coco/'
-
+# >>>>>>>>>>>>>>> Override dataset settings here >>>>>>>>>>>>>>>>>>>
 train_dataloader = dict(
    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/')))
-
-# <<<<<<<<<<<<<<<<<<<<<< End of Changed <<<<<<<<<<<<<<<<<<<<<<<<<<<
+        type='mmdet.CocoDataset',
+        data_root='data/coco/',
+        ann_file='annotations/instances_train2017.json',  # Only for loading images, and the labels won't be used.
+        data_prefix=dict(img='train2017/'),
+    )
+)
+# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<

 # optimizer wrapper
 optim_wrapper = dict(
--- a/docs/en/stat.py
+++ b/docs/en/stat.py
@ -5,17 +5,18 @@ from collections import defaultdict
 from pathlib import Path

 from modelindex.load_model_index import load
+from modelindex.models.Result import Result
 from tabulate import tabulate

 MMPT_ROOT = Path(__file__).absolute().parents[2]
 PAPERS_ROOT = Path('papers')  # Path to save generated paper pages.
 GITHUB_PREFIX = 'https://github.com/open-mmlab/mmpretrain/blob/1.x/'
-MODELZOO_TEMPLATE = """
+MODELZOO_TEMPLATE = """\
 # Model Zoo Summary

 In this page, we list [all algorithms](#all-supported-algorithms) we support. You can click the link to jump to the corresponding model pages.

-And we also list [all checkpoints](#all-checkpoints) we provide. You can sort or search checkpoints in the table and click the corresponding link to model pages for more details.
+And we also list all checkpoints for different tasks we provide. You can sort or search checkpoints in the table and click the corresponding link to model pages for more details.

 ## All supported algorithms

@ -25,9 +26,13 @@ And we also list [all checkpoints](#all-checkpoints) we provide. You can sort or
 * Number of checkpoints: {num_ckpts}
 {paper_msg}

-## All checkpoints
 """  # noqa: E501

+METRIC_ALIAS = {
+    'Top 1 Accuracy': 'Top-1 (%)',
+    'Top 5 Accuracy': 'Top-5 (%)',
+}
+
 model_index = load(str(MMPT_ROOT / 'model-index.yml'))


@ -41,6 +46,10 @@ def build_collections(model_index):
        col = col_by_name[model.in_collection]
        col.models.append(model)
        setattr(model, 'collection', col)
+        if model.results is None:
+            setattr(model, 'tasks', [])
+        else:
+            setattr(model, 'tasks', [result.task for result in model.results])


 build_collections(model_index)
@ -139,41 +148,99 @@ for collection in model_index.collections:
    generate_paper_page(collection)


-def generate_summary_table(models):
-    dataset_rows = defaultdict(list)
+def scatter_results(models):
+    model_result_pairs = []
    for model in models:
        if model.results is None:
+            result = Result(task=None, dataset=None, metrics={})
+            model_result_pairs.append((model, result))
+        else:
+            for result in model.results:
+                model_result_pairs.append((model, result))
+    return model_result_pairs
+
+
+def generate_summary_table(task, model_result_pairs, title=None):
+    metrics = set()
+    for model, result in model_result_pairs:
+        if result.task == task:
+            metrics = metrics.union(result.metrics.keys())
+    metrics = sorted(list(metrics))
+
+    rows = []
+    for model, result in model_result_pairs:
+        if result.task != task:
            continue
        name = model.name
        params = f'{model.metadata.parameters / 1e6:.2f}'  # Params
        flops = f'{model.metadata.flops / 1e9:.2f}'  # Params
-        result = model.results[0]
-        top1 = result.metrics.get('Top 1 Accuracy')
-        top5 = result.metrics.get('Top 5 Accuracy')
        readme = Path(model.collection.filepath).parent.with_suffix('.md').name
        page = f'[link]({PAPERS_ROOT / readme})'
-        row = [name, params, flops, top1, top5, page]
-        dataset_rows[result.dataset].append(row)
+        model_metrics = []
+        for metric in metrics:
+            model_metrics.append(str(result.metrics.get(metric, '')))
+
+        rows.append([name, params, flops, *model_metrics, page])

    with open('modelzoo_statistics.md', 'a') as f:
-        for dataset, rows in dataset_rows.items():
-            f.write(f'\n### {dataset}\n')
-            f.write("""```{table}\n:class: model-summary\n""")
-            header = [
-                'Model',
-                'Params (M)',
-                'Flops (G)',
-                'Top-1 (%)',
-                'Top-5 (%)',
-                'Readme',
-            ]
-            table_cfg = dict(
-                tablefmt='pipe',
-                floatfmt='.2f',
-                numalign='right',
-                stralign='center')
-            f.write(tabulate(rows, header, **table_cfg))
-            f.write('\n```\n')
+        if title is not None:
+            f.write(f'\n{title}')
+        f.write("""\n```{table}\n:class: model-summary\n""")
+        header = [
+            'Model',
+            'Params (M)',
+            'Flops (G)',
+            *[METRIC_ALIAS.get(metric, metric) for metric in metrics],
+            'Readme',
+        ]
+        table_cfg = dict(
+            tablefmt='pipe',
+            floatfmt='.2f',
+            numalign='right',
+            stralign='center')
+        f.write(tabulate(rows, header, **table_cfg))
+        f.write('\n```\n')


-generate_summary_table(model_index.models)
+def generate_dataset_wise_table(task, model_result_pairs, title=None):
+    dataset_rows = defaultdict(list)
+    for model, result in model_result_pairs:
+        if result.task == task:
+            dataset_rows[result.dataset].append((model, result))
+
+    if title is not None:
+        with open('modelzoo_statistics.md', 'a') as f:
+            f.write(f'\n{title}')
+    for dataset, pairs in dataset_rows.items():
+        generate_summary_table(task, pairs, title=f'### {dataset}')
+
+
+model_result_pairs = scatter_results(model_index.models)
+
+# Generate Pretrain Summary
+generate_summary_table(
+    task=None,
+    model_result_pairs=model_result_pairs,
+    title='## Pretrained Models',
+)
+
+# Generate Image Classification Summary
+generate_dataset_wise_table(
+    task='Image Classification',
+    model_result_pairs=model_result_pairs,
+    title='## Image Classification',
+)
+
+# Generate Multi-Label Classification Summary
+generate_dataset_wise_table(
+    task='Multi-Label Classification',
+    model_result_pairs=model_result_pairs,
+    title='## Multi-Label Classification',
+)
+
+# Generate Image Retrieval Summary
+generate_dataset_wise_table(
+    task='Image Retrieval',
+    model_result_pairs=model_result_pairs,
+    title='## Image Retrieval',
+)
--- a/docs/en/useful_tools/cam_visualization.md
+++ b/docs/en/useful_tools/cam_visualization.md
@ -1,4 +1,4 @@
-# Class Activation Map(CAM) Visualization
+# Class Activation Map (CAM) Visualization

 ## Introduction of the CAM visualization tool

@ -15,6 +15,8 @@ The supported methods are as follows:
 | EigenGradCAM | Like EigenCAM but with class discrimination: First principle component of Activations\*Grad. Looks like GradCAM, but cleaner |
 | LayerCAM     | Spatially weight the activations by positive gradients. Works better especially in lower layers                              |

+More CAM methods supported by the new version `pytorch-grad-cam` can also be used but we haven't verified the availability.
+
 **Command**：

 ```bash
@ -37,26 +39,26 @@ python tools/visualization/vis_cam.py \

 **Description of all arguments**：

- `img` : The target picture path.
- `config` : The path of the model config file.
- `checkpoint` : The path of the checkpoint.
- `--target-layers` : The target layers to get activation maps, one or more network layers can be specified. If not set, use the norm layer of the last block.
- `--preview-model` : Whether to print all network layer names in the model.
- `--method` : Visualization method, supports `GradCAM`, `GradCAM++`, `XGradCAM`, `EigenCAM`, `EigenGradCAM`, `LayerCAM`, which is case insensitive. Defaults to `GradCAM`.
- `--target-category` : Target category, if not set, use the category detected by the given model.
- `--save-path` : The path to save the CAM visualization image. If not set, the CAM image will not be saved.
- `--vit-like` : Whether the network is ViT-like network.
- `--num-extra-tokens` : The number of extra tokens in ViT-like backbones. If not set, use num_extra_tokens the backbone.
- `--aug_smooth` : Whether to use TTA(Test Time Augment) to get CAM.
- `--eigen_smooth` : Whether to use the principal component to reduce noise.
- `--device` : The computing device used. Default to 'cpu'.
- `--cfg-options` : Modifications to the configuration file, refer to [Learn about Configs](../user_guides/config.md).
+- `img`: The target picture path.
+- `config`: The path of the model config file.
+- `checkpoint`: The path of the checkpoint.
+- `--target-layers`: The target layers to get activation maps, one or more network layers can be specified. If not set, use the norm layer of the last block.
+- `--preview-model`: Whether to print all network layer names in the model.
+- `--method`: Visualization method, supports `GradCAM`, `GradCAM++`, `XGradCAM`, `EigenCAM`, `EigenGradCAM`, `LayerCAM`, which is case insensitive. Defaults to `GradCAM`.
+- `--target-category`: Target category, if not set, use the category detected by the given model.
+- `--eigen-smooth`: Whether to use the principal component to reduce noise.
+- `--aug-smooth`: Whether to use TTA(Test Time Augment) to get CAM.
+- `--save-path`: The path to save the CAM visualization image. If not set, the CAM image will not be saved.
+- `--vit-like`: Whether the network is ViT-like network.
+- `--num-extra-tokens`: The number of extra tokens in ViT-like backbones. If not set, use num_extra_tokens the backbone.
+- `--device`: The computing device used. Default to 'cpu'.
+- `--cfg-options`: Modifications to the configuration file, refer to [Learn about Configs](../user_guides/config.md).

 ```{note}
 The argument `--preview-model` can view all network layers names in the given model. It will be helpful if you know nothing about the model layers when setting `--target-layers`.
 ```

-## How to visualize the CAM of CNN(ResNet-50)
+## How to visualize the CAM of CNN (ResNet-50)

 Here are some examples of `target-layers` in ResNet-50, which can be any module or layer:

@ -64,12 +66,6 @@ Here are some examples of `target-layers` in ResNet-50, which can be any module
 - `'backbone.layer4.2'` means the output of the third BottleNeck block in the forth ResLayer.
 - `'backbone.layer4.2.conv1'` means the output of the `conv1` layer in above BottleNeck block.

-```{note}
-For `ModuleList` or `Sequential`, you can also use the index to specify which sub-module is the target layer.
-
-For example, the `backbone.layer4[-1]` is the same as `backbone.layer4.2` since `layer4` is a `Sequential` with three sub-modules.
-```
-
 1. Use different methods to visualize CAM for `ResNet50`, the `target-category` is the predicted result by the given checkpoint, using the default `target-layers`.

   ```shell
@ -123,7 +119,7 @@ For example, the `backbone.layer4[-1]` is the same as `backbone.layer4.2` since
 Here are some examples:

 - `'backbone.norm3'` for Swin-Transformer;
- `'backbone.layers[-1].ln1'` for ViT;
+- `'backbone.layers.11.ln1'` for ViT;

 For ViT-like networks, such as ViT, T2T-ViT and Swin-Transformer, the features are flattened. And for drawing the CAM, we need to specify the `--vit-like` argument to reshape the features into square feature maps.

@ -146,10 +142,10 @@ To exclude these extra tokens, we need know the number of extra tokens. Almost a
   ```shell
   python tools/visualization/vis_cam.py \
       demo/bird.JPEG  \
-       configs/vision_transformer/vit-base-p16_ft-64xb64_in1k-384.py \
+       configs/vision_transformer/vit-base-p16_64xb64_in1k-384px.py \
       https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-base-p16_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-98e8652b.pth \
       --vit-like \
-       --target-layers 'backbone.layers[-1].ln1'
+       --target-layers 'backbone.layers.11.ln1'
   ```

 3. Visualize CAM for `T2T-ViT`:
@ -160,7 +156,7 @@ To exclude these extra tokens, we need know the number of extra tokens. Almost a
       configs/t2t_vit/t2t-vit-t-14_8xb64_in1k.py \
       https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-14_3rdparty_8xb64_in1k_20210928-b7c09b62.pth \
       --vit-like \
-       --target-layers 'backbone.encoder[-1].ln1'
+       --target-layers 'backbone.encoder.12.ln1'
   ```

 | Image                                   | ResNet50                                   | ViT                                    | Swin                                    | T2T-ViT                                    |
--- a/docs/en/useful_tools/complexity_analysis.md
+++ b/docs/en/useful_tools/complexity_analysis.md
@ -2,7 +2,7 @@

 ## Get the FLOPs and params (experimental)

-We provide a script adapted from [fvcore](https://github.com/facebookresearch/fvcore/blob/main/fvcore/nn/flop_count.py) to compute the FLOPs and params of a given model.
+We provide a script adapted from [MMEngine](https://github.com/open-mmlab/mmengine/blob/main/mmengine/analysis/complexity_analysis.py) to compute the FLOPs and params of a given model.

 ```shell
 python tools/analysis_tools/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}]
@ -10,59 +10,68 @@ python tools/analysis_tools/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}]

 Description of all arguments:

- `config` : The path of the model config file.
+- `config`: The path of the model config file.
 - `--shape`: Input size, support single value or double value parameter, such as `--shape 256` or `--shape 224 256`. If not set, default to be `224 224`.

+Example:
+
+```shell
+python tools/analysis_tools/get_flops.py configs/resnet/resnet50_8xb32_in1k.py
+```
+
 You will get the final result like this.

 ```text
 ==============================
 Input shape: (3, 224, 224)
-Flops: 17.582G
-Params: 91.234M
-Activation: 23.895M
+Flops: 4.109G
+Params: 25.557M
+Activation: 11.114M
 ==============================
 ```

 Also, you will get the detailed complexity information of each layer like this:

 ```text
-| module                                    | #parameters or shape   | #flops    | #activations   |
-|:------------------------------------------|:-----------------------|:----------|:---------------|
-| model                                     | 91.234M                | 17.582G   | 23.895M        |
-|  backbone                                 |  85.799M               |  17.582G  |  23.895M       |
-|   backbone.cls_token                      |   (1, 1, 768)          |           |                |
-|   backbone.pos_embed                      |   (1, 197, 768)        |           |                |
-|   backbone.patch_embed.projection         |   0.591M               |   0.116G  |   0.151M       |
-|    backbone.patch_embed.projection.weight |    (768, 3, 16, 16)    |           |                |
-|    backbone.patch_embed.projection.bias   |    (768,)              |           |                |
-|   backbone.layers                         |   85.054M              |   17.466G |   23.744M      |
-|    backbone.layers.0                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.1                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.2                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.3                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.4                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.5                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.6                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.7                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.8                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.9                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.10                     |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.11                     |    7.088M              |    1.455G |    1.979M      |
-|   backbone.ln1                            |   1.536K               |   0.756M  |   0            |
-|    backbone.ln1.weight                    |    (768,)              |           |                |
-|    backbone.ln1.bias                      |    (768,)              |           |                |
-|  head.layers                              |  5.435M                |           |                |
-|   head.layers.pre_logits                  |   2.362M               |           |                |
-|    head.layers.pre_logits.weight          |    (3072, 768)         |           |                |
-|    head.layers.pre_logits.bias            |    (3072,)             |           |                |
-|   head.layers.head                        |   3.073M               |           |                |
-|    head.layers.head.weight                |    (1000, 3072)        |           |                |
-|    head.layers.head.bias                  |    (1000,)             |           |                |
+--------------------------+----------------------+-----------+--------------+
+| module                   | #parameters or shape | #flops    | #activations |
+--------------------------+----------------------+-----------+--------------+
+| model                    | 25.557M              | 4.109G    | 11.114M      |
+|  backbone                |  23.508M             |  4.109G   |  11.114M     |
+|   backbone.conv1         |   9.408K             |   0.118G  |   0.803M     |
+|    backbone.conv1.weight |    (64, 3, 7, 7)     |           |              |
+|   backbone.bn1           |   0.128K             |   1.606M  |   0          |
+|    backbone.bn1.weight   |    (64,)             |           |              |
+|    backbone.bn1.bias     |    (64,)             |           |              |
+|   backbone.layer1        |   0.216M             |   0.677G  |   4.415M     |
+|    backbone.layer1.0     |    75.008K           |    0.235G |    2.007M    |
+|    backbone.layer1.1     |    70.4K             |    0.221G |    1.204M    |
+|    backbone.layer1.2     |    70.4K             |    0.221G |    1.204M    |
+|   backbone.layer2        |   1.22M              |   1.034G  |   3.111M     |
+|    backbone.layer2.0     |    0.379M            |    0.375G |    1.305M    |
+|    backbone.layer2.1     |    0.28M             |    0.22G  |    0.602M    |
+|    backbone.layer2.2     |    0.28M             |    0.22G  |    0.602M    |
+|    backbone.layer2.3     |    0.28M             |    0.22G  |    0.602M    |
+|   backbone.layer3        |   7.098M             |   1.469G  |   2.158M     |
+|    backbone.layer3.0     |    1.512M            |    0.374G |    0.652M    |
+|    backbone.layer3.1     |    1.117M            |    0.219G |    0.301M    |
+|    backbone.layer3.2     |    1.117M            |    0.219G |    0.301M    |
+|    backbone.layer3.3     |    1.117M            |    0.219G |    0.301M    |
+|    backbone.layer3.4     |    1.117M            |    0.219G |    0.301M    |
+|    backbone.layer3.5     |    1.117M            |    0.219G |    0.301M    |
+|   backbone.layer4        |   14.965M            |   0.81G   |   0.627M     |
+|    backbone.layer4.0     |    6.04M             |    0.373G |    0.326M    |
+|    backbone.layer4.1     |    4.463M            |    0.219G |    0.151M    |
+|    backbone.layer4.2     |    4.463M            |    0.219G |    0.151M    |
+|  head.fc                 |  2.049M              |           |              |
+|   head.fc.weight         |   (1000, 2048)       |           |              |
+|   head.fc.bias           |   (1000,)            |           |              |
+|  neck.gap                |                      |  0.1M     |  0           |
+--------------------------+----------------------+-----------+--------------+
 ```

 ```{warning}
 This tool is still experimental and we do not guarantee that the number is correct. You may well use the result for simple comparisons, but double-check it before you adopt it in technical reports or papers.
 - FLOPs are related to the input shape while parameters are not. The default input shape is (1, 3, 224, 224).
- Some operators are not counted into FLOPs like custom operators. Refer to [`fvcore.nn.flop_count._DEFAULT_SUPPORTED_OPS`](https://github.com/facebookresearch/fvcore/blob/main/fvcore/nn/flop_count.py) for details.
+- Some operators are not counted into FLOPs like custom operators. Refer to [`mmengine.analysis.complexity_analysis._DEFAULT_SUPPORTED_FLOP_OPS`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/analysis/complexity_analysis.py) for details.
 ```
--- a/docs/en/useful_tools/dataset_visualization.md
+++ b/docs/en/useful_tools/dataset_visualization.md
@ -23,7 +23,8 @@ python tools/visualization/browse_dataset.py \
 - **`-n, --show-number`**: The number of samples to visualized. If not specified, display all images in the dataset.
 - `--show-interval`: The interval of show (s).
 - **`-m, --mode`**: The display mode, can be one of `['original', 'transformed', 'concat', 'pipeline']`. If not specified, it will be set to `'transformed'`.
- **`-r, --rescale-factor`**: The image rescale factor, which is useful if the output is too large or too small.
+- `-r, --rescale-factor`: The image rescale factor, which is useful if the output is too large or too small
+  in the `original` mode.
 - `-c, --channel-order`: The channel of the showing images, could be "BGR" or "RGB", If not specified, it will be set to 'BGR'.
 - `--cfg-options` : Modifications to the configuration file, refer to [Learn about Configs](../user_guides/config.md).

@ -59,7 +60,7 @@ python ./tools/visualization/browse_dataset.py ./configs/resnet/resnet101_8xb16_
 In **'transformed'** mode:

 ```shell
-python ./tools/visualization/browse_dataset.py ./configs/resnet/resnet50_8xb32_in1k.py -n 100 -r 2
+python ./tools/visualization/browse_dataset.py ./configs/resnet/resnet50_8xb32_in1k.py -n 100
 ```

 <div align=center><img src="https://user-images.githubusercontent.com/18586273/190994696-737b09d9-d0fb-4593-94a2-4487121e0286.JPEG" style=" width: auto; height: 40%; "></div>
@ -81,3 +82,9 @@ python ./tools/visualization/browse_dataset.py configs/swin_transformer/swin-sma
 ```

 <div align=center><img src="https://user-images.githubusercontent.com/18586273/190995525-fac0220f-6630-4013-b94a-bc6de4fdff7a.JPEG" style=" width: auto; height: 40%; "></div>
+
+```shell
+python ./tools/visualization/browse_dataset.py configs/beit/beit_beit-base-p16_8xb256-amp-coslr-300e_in1k.py -m pipeline
+```
+
+<div align=center><img src="https://user-images.githubusercontent.com/26739999/226542300-74216187-e3d0-4a6e-8731-342abe719721.png" style=" width: auto; height: 40%; "></div>
--- a/docs/en/useful_tools/log_result_analysis.md
+++ b/docs/en/useful_tools/log_result_analysis.md
@ -54,7 +54,7 @@ python tools/analysis_tools/analyze_logs.py plot_curve your_log_json --keys accu
 #### Compare the top-1 accuracy of two log files in the same figure.

 ```shell
-python tools/analysis_tools/analyze_logs.py plot_curve log1.json log2.json --keys accuracy_top-1 --legend exp1 exp2
+python tools/analysis_tools/analyze_logs.py plot_curve log1.json log2.json --keys accuracy/top1 --legend exp1 exp2
 ```

 ### How to calculate training time
@ -100,7 +100,7 @@ We provide `tools/analysis_tools/eval_metric.py` to enable the user evaluate the
 ```shell
 python tools/analysis_tools/eval_metric.py \
      ${RESULT} \
-      [--metric ${METRIC_OPTIONS} ...] \
+      [--metric ${METRIC_OPTIONS} ...]
 ```

 Description of all arguments:
@ -127,9 +127,56 @@ python tools/test.py configs/resnet/resnet18_8xb16_cifar10.py \
 # Eval the top-1 and top-5 accuracy
 python tools/analysis_tools/eval_metric.py results.pkl --metric type=Accuracy topk=1,5

-# Eval accuracy, precision, recall and f1-score
+# Eval the overall accuracy and the class-wise precision, recall, f1-score
 python tools/analysis_tools/eval_metric.py results.pkl --metric type=Accuracy \
-    --metric type=SingleLabelMetric items=precision,recall,f1-score
+    --metric type=SingleLabelMetric items=precision,recall,f1-score average=None
+```
+
+### How to plot the confusion matrix for the test result
+
+We provide `tools/analysis_tools/confusion_matrix.py` to enable the user plot the confusion matrix from the prediction files.
+
+```shell
+python tools/analysis_tools/confusion_matrix.py \
+      ${CONFIG} \
+      ${RESULT} \
+      [--out ${OUT}] \
+      [--show] \
+      [--show-path ${SHOW_PATH}] \
+      [--include-values] \
+      [--cmap] \
+      [--cfg-options ${CFG_OPTIONS} ...] \
+```
+
+Description of all arguments:
+
+- `config`: The config file path.
+- `result`:  The output result file in pickle format from `tools/test.py`, or a checkpoint file.
+- `--out`: The path to save the confusion matrix in pickle format.
+- `--show`: Whether to show the confusion matrix plot.
+- `--show-path`: The path to save the confusion matrix plot.
+- `--include-values`: Whether to show the values in the confusion matrix plot.
+- `--cmap`: The color map to plot the confusion matrix.
+- `--cfg-options`: If specified, the key-value pair config will be merged into the config file, for more details please refer to [Learn about Configs](../user_guides/config.md)
+
+```{note}
+In `tools/test.py`, we support using `--out-item` option to select which kind of results will be saved.
+Please ensure the `--out-item` is not specified or `--out-item=pred` to use this tool.
+```
+
+**Examples**:
+
+```shell
+# Get the prediction results
+python tools/test.py configs/resnet/resnet18_8xb16_cifar10.py \
+    https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.pth \
+    --out results.pkl
+
+# Save the confusion matrix in a pickle file
+python tools/analysis_tools/confusion_matrix.py configs/resnet/resnet18_8xb16_cifar10.py results.pkl --out cm.pkl
+
+# Show the confusion matrix plot in a graphical window.
+python tools/analysis_tools/confusion_matrix.py configs/resnet/resnet18_8xb16_cifar10.py results.pkl --show
 ```

 ### How to visualize the prediction results
--- a/docs/en/useful_tools/model_serving.md
+++ b/docs/en/useful_tools/model_serving.md
@ -40,6 +40,7 @@ Example:

 ```shell
 docker run --rm \
+--name mar \
 --cpus 8 \
 --gpus device=0 \
 -p8080:8080 -p8081:8081 -p8082:8082 \
--- a/docs/en/useful_tools/scheduler_visualization.md
+++ b/docs/en/useful_tools/scheduler_visualization.md
@ -1,6 +1,6 @@
 # Hyper-parameter Scheduler Visualization

-This tool aims to help the user to check the hyper-parameter scheduler of the optimizer(without training), which support the "learning rate" or "momentum"
+This tool aims to help the user to check the hyper-parameter scheduler of the optimizer (without training), which support the "learning rate" or "momentum"

 ## Introduce the scheduler visualization tool

@ -35,18 +35,10 @@ Loading annotations maybe consume much time, you can directly specify the size o

 ## How to plot the learning rate curve without training

-You can use the following command to plot the step learning rate schedule used in the config `configs/resnet/resnet50_b16x8_cifar100.py`:
+You can use the following command to plot the step learning rate schedule used in the config `configs/swin_transformer/swin-base_16xb64_in1k.py`:

 ```bash
-python tools/visualization/vis_scheduler.py configs/resnet/resnet50_b16x8_cifar100.py
+python tools/visualization/vis_scheduler.py configs/swin_transformer/swin-base_16xb64_in1k.py --dataset-size 1281167 --ngpus 16
 ```

-<div align=center><img src="https://user-images.githubusercontent.com/18586273/191006713-023f065d-d366-4165-a52e-36176367506e.png" style=" width: auto; height: 40%; "></div>
-
-When using ImageNet, directly specify the size of ImageNet, as below:
-
-```bash
-python tools/visualization/vis_scheduler.py configs/repvgg/repvgg-B3g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py --dataset-size 1281167 --ngpus 4 --save-path ./repvgg-B3g4_4xb64-lr.jpg
-```
-
-<div align=center><img src="https://user-images.githubusercontent.com/18586273/191006721-0f680e07-355e-4cd6-889c-86c0cad9acb7.png" style=" width: auto; height: 40%; "></div>
+<div align=center><img src="https://user-images.githubusercontent.com/26739999/226544329-cf3a3d45-6ab3-48aa-8972-2c2a58c35e62.png" style=" width: auto; height: 40%; "></div>
--- a/docs/en/user_guides/config.md
+++ b/docs/en/user_guides/config.md
@ -1,17 +1,5 @@
 # Learn about Configs

- [Learn about Configs](#learn-about-configs)
-  - [Config Structure](#config-structure)
-    - [Model settings](#model-settings)
-    - [Data settings](#data-settings)
-    - [Schedule settings](#schedule-settings)
-    - [Runtime settings](#runtime-settings)
-  - [Inherit and Modify Config File](#inherit-and-modify-config-file)
-    - [Use intermediate variables in configs](#use-intermediate-variables-in-configs)
-    - [Ignore some fields in the base configs](#ignore-some-fields-in-the-base-configs)
-    - [Use some fields in the base configs](#use-some-fields-in-the-base-configs)
-  - [Modify config in command](#modify-config-in-command)
-
 To manage various configurations in a deep-learning experiment, we use a kind of config file to record all of
 these configurations. This config system has a modular and inheritance design, and more details can be found in
 {external+mmengine:doc}`the tutorial in MMEngine <advanced_tutorials/config>`.
@ -73,11 +61,11 @@ This primitive config file includes a dict variable `model`, which mainly includ
  - For self-supervised leanrning, there are several `SelfSupervisors`, such as `MoCoV2`, `BEiT`, `MAE`, etc. You can find more details in the [API documentation](mmpretrain.models.selfsup).
  - For image retrieval tasks, it's usually `ImageToImageRetriever` You can find more details in the [API documentation](mmpretrain.models.retrievers).

-Usually, we use the `type` field to specify the class of the component and use other fields to pass
+Usually, we use the **`type` field** to specify the class of the component and use other fields to pass
 the initialization arguments of the class. The {external+mmengine:doc}`registry tutorial <advanced_tutorials/registry>` describes it in detail.

-Here, we use the config fields of [`ImageClassifier`](mmpretrain.models.ImageClassifier) as an example to
-describe the below initialization arguments:
+Here, we use the config fields of [`ImageClassifier`](mmpretrain.models.classifiers.ImageClassifier) as an example to
+describe the initialization arguments as below:

 - `backbone`: The settings of the backbone. The backbone is the main network to extract features of the inputs, like `ResNet`, `Swin Transformer`, `Vision Transformer` etc. All available backbones can be found in the [API documentation](mmpretrain.models.backbones).
  - For self-supervised leanrning, some of the backbones are re-implemented, you can find more details in the [API documentation](mmpretrain.models.selfsup).
@ -188,7 +176,7 @@ test_evaluator = val_evaluator    # The settings of the evaluation metrics for t
 ```

 ```{note}
-'model.data_preprocessor' can be defined either in `model=dict(data_preprocessor=dict())` or using the `data_preprocessor` definition here, if both of them exist, use the `model.data_preprocessor` configuration.
+The data preprocessor can be defined either in the subfield of `model`, or a using the `data_preprocessor` definition here, if both of them exist, use the `model.data_preprocessor` configuration.
 ```

 ### Schedule settings
--- a/docs/en/user_guides/dataset_prepare.md
+++ b/docs/en/user_guides/dataset_prepare.md
@ -1,53 +1,42 @@
 # Prepare Dataset

-MMPretrain supports following datasets:
-
- [Prepare Dataset](#prepare-dataset)
-  - [CustomDataset](#customdataset)
-    - [Subfolder Format](#subfolder-format)
-    - [Text Annotation File Format](#text-annotation-file-format)
-  - [ImageNet](#imagenet)
-  - [CIFAR](#cifar)
-  - [MNIST](#mnist)
-  - [OpenMMLab 2.0 Standard Dataset](#openmmlab-20-standard-dataset)
-  - [Other Datasets](#other-datasets)
-  - [Dataset Wrappers](#dataset-wrappers)
-
-If your dataset is not in the abvove list, you could reorganize the format of your dataset to adapt to **`CustomDataset`**.
-
 ## CustomDataset

 [`CustomDataset`](mmpretrain.datasets.CustomDataset) is a general dataset class for you to use your own datasets. To use `CustomDataset`, you need to organize your dataset files according to the following two formats:

 ### Subfolder Format

-Place all samples in one folder as below:
+In this format, you only need to re-organize your dataset folder and place all samples in one folder without
+creating any annotation files.
+
+For supervised tasks (with `with_label=True`), we use the name of sub-folders as the categories names, as
+shown in the below example, `class_x` and `class_y` will be recognized as the categories names.

 ```text
-Sample files (for `with_label=True`, supervised tasks, we use the name of sub-folders as the categories names):
-As follows, class_x and class_y represent different categories.):
-    data_prefix/
-    ├── class_x
-    │   ├── xxx.png
-    │   ├── xxy.png
-    │   └── ...
-    │       └── xxz.png
-    └── class_y
-        ├── 123.png
-        ├── nsdf3.png
-        ├── ...
-        └── asd932_.png
-
-
-Sample files (for `with_label=False`, unsupervised tasks, we use all sample files under the specified folder):
-    data_prefix/
-    ├── folder_1
-    │   ├── xxx.png
-    │   ├── xxy.png
-    │   └── ...
+data_prefix/
+├── class_x
+│   ├── xxx.png
+│   ├── xxy.png
+│   └── ...
+│       └── xxz.png
+└── class_y
    ├── 123.png
    ├── nsdf3.png
-    └── ...
+    ├── ...
+    └── asd932_.png
+```
+
+For unsupervised tasks (with `with_label=False`), we directly load all sample files under the specified folder:
+
+```text
+data_prefix/
+├── folder_1
+│   ├── xxx.png
+│   ├── xxy.png
+│   └── ...
+├── 123.png
+├── nsdf3.png
+└── ...
 ```

 Assume you want to use it as the training dataset, and the below is the configurations in your config file.
@ -59,48 +48,28 @@ train_dataloader = dict(
    dataset=dict(
        type='CustomDataset',
        data_prefix='path/to/data_prefix',
+        with_label=True,   # or False for unsupervised tasks
        pipeline=...
    )
 )
 ```

 ```{note}
-Do not specify `ann_file`, or specify `ann_file=None` if you want to use this method.
+If you want to use this format, do not specify `ann_file`, or specify `ann_file=''`.
+
+And please note that the subfolder format requires a folder scanning which may cause a slower initialization,
+especially for large datasets or slow file IO.
 ```

 ### Text Annotation File Format

-The text annotation file format uses text files to store path and category information. All the images are placed in the folder of `data_prefix`, and `ann_file` contaions all the ground-truth annotation.
+In this format, we use a text annotation file to store image file paths and the corespondding category
+indices.

-In the following case, the dataset directory is as follows:
+For supervised tasks (with `with_label=True`), the annotation file should include the file path and the
+category index of one sample in one line and split them by a space, as below:

-```text
-The annotation file (for ``with_label=True``, supervised tasks):
-    folder_1/xxx.png 0
-    folder_1/xxy.png 1
-    123.png 4
-    nsdf3.png 3
-    ...
-
-The annotation file (for ``with_label=False``, unsupervised tasks):
-    folder_1/xxx.png
-    folder_1/xxy.png
-    123.png
-    nsdf3.png
-    ...
-
-Sample files:
-    data_prefix/
-    ├── folder_1
-    │   ├── xxx.png
-    │   ├── xxy.png
-    │   └── ...
-    ├── 123.png
-    ├── nsdf3.png
-    └── ...
-```
-
-Assume you want to use the training dataset, and the annotation file is `train_annfile.txt` as above. The annotation file contains ordinary text, which is divided into two columns, the first column is the image path, and the second column is the **index number** of its category:
+All these file paths can be absolute paths, or paths relative to the `data_prefix`.

 ```text
 folder_1/xxx.png 0
@ -112,27 +81,59 @@ nsdf3.png 3

 ```{note}
 The index numbers of categories start from 0. And the value of ground-truth labels should fall in range `[0, num_classes - 1]`.
+
+In addition, please use the `classes` field in the dataset settings to specify the name of every category.
 ```

-In the annotation file, we only specified the category index of every sample, you also need to specify `classes` field in the dataset config to record the name of every category:
+For unsupervised tasks (with `with_label=False`), the annotation file only need to include the file path of
+one sample in one line, as below:
+
+```text
+folder_1/xxx.png
+folder_1/xxy.png
+123.png
+nsdf3.png
+...
+```
+
+Assume the entire dataset folder is as below:
+
+```text
+data_root
+├── meta
+│   ├── test.txt     # The annotation file for the test dataset
+│   ├── train.txt    # The annotation file for the training dataset
+│   └── val.txt      # The annotation file for the validation dataset.
+├── train
+│   ├── 123.png
+│   ├── folder_1
+│   │   ├── xxx.png
+│   │   └── xxy.png
+│   └── nsdf3.png
+├── test
+└── val
+```
+
+Here is an example dataset settings in config files:

 ```python
+# Training dataloader configurations
 train_dataloader = dict(
-    ...
-    # Training dataset configurations
    dataset=dict(
        type='CustomDataset',
-        data_root='path/to/data_root',
-        ann_file='meta/train_annfile.txt',
-        data_prefix='train',
-        classes=['A', 'B', 'C', 'D', ...],
-        pipeline=...,
+        data_root='path/to/data_root',  # The common prefix of both `ann_flie` and `data_prefix`.
+        ann_file='meta/train.txt',      # The path of annotation file relative to the data_root.
+        data_prefix='train',            # The prefix of file paths in the `ann_file`, relative to the data_root.
+        with_label=True,                # or False for unsupervised tasks
+        classes=['A', 'B', 'C', 'D', ...],  # The name of every category.
+        pipeline=...,    # The transformations to process the dataset samples.
    )
+    ...
 )
 ```

 ```{note}
-If the `ann_file` is specified, the dataset will be generated by the the ``ann_file``. Otherwise, try the first way.
+For a complete example about how to use the `CustomDataset`, please see [How to Pretrain with Custom Dataset](../notes/pretrain_custom_dataset.md)
 ```

 ## ImageNet
@ -144,31 +145,94 @@ ImageNet has multiple versions, but the most commonly used one is [ILSVRC 2012](
   - ILSVRC2012_img_train.tar (~138GB)
   - ILSVRC2012_img_val.tar (~6.3GB)
 3. Untar the downloaded files
-4. Download and untar the meta data from this [link](https://download.openmmlab.com/mmclassification/datasets/imagenet/meta/caffe_ilsvrc12.tar.gz).
-5. Re-organize the image files according to the path in the meta data, and it should be like:
+4. Re-organize the image files according the format convention of [CustomDataset](#CustomDataset)
+
+```{note}
+In MMPretrain, we use the text annotation file format ImageNet in all provided config files. Therefore, to use
+the subfolder format, you please set `ann_file=''` in these config files.
+```
+
+### Subfolder Format
+
+Re-organize the dataset as below:

 ```text
-   imagenet/
-   ├── meta/
-   │   ├── train.txt
-   │   ├── test.txt
-   │   └── val.txt
-   ├── train/
-   │   ├── n01440764
-   │   │   ├── n01440764_10026.JPEG
-   │   │   ├── n01440764_10027.JPEG
-   │   │   ├── n01440764_10029.JPEG
-   │   │   ├── n01440764_10040.JPEG
-   │   │   ├── n01440764_10042.JPEG
-   │   │   ├── n01440764_10043.JPEG
-   │   │   └── n01440764_10048.JPEG
-   │   ├── ...
-   ├── val/
-   │   ├── ILSVRC2012_val_00000001.JPEG
-   │   ├── ILSVRC2012_val_00000002.JPEG
-   │   ├── ILSVRC2012_val_00000003.JPEG
-   │   ├── ILSVRC2012_val_00000004.JPEG
-   │   ├── ...
+data/imagenet/
+├── train/
+│   ├── n01440764
+│   │   ├── n01440764_10026.JPEG
+│   │   ├── n01440764_10027.JPEG
+│   │   ├── n01440764_10029.JPEG
+│   │   ├── n01440764_10040.JPEG
+│   │   ├── n01440764_10042.JPEG
+│   │   ├── n01440764_10043.JPEG
+│   │   └── n01440764_10048.JPEG
+│   ├── ...
+├── val/
+│   ├── n01440764
+│   │   ├── ILSVRC2012_val_00000293.JPEG
+│   │   ├── ILSVRC2012_val_00002138.JPEG
+│   │   ├── ILSVRC2012_val_00003014.JPEG
+│   │   └── ...
+│   ├── ...
+```
+
+And then, you can use the [`ImageNet`](mmpretrain.datasets.ImageNet) dataset with the below configurations:
+
+```python
+train_dataloader = dict(
+    ...
+    # Training dataset configurations
+    dataset=dict(
+        type='ImageNet',
+        data_root='data/imagenet',
+        data_prefix='train',
+        ann_file='',
+        pipeline=...,
+    )
+)
+
+val_dataloader = dict(
+    ...
+    # Validation dataset configurations
+    dataset=dict(
+        type='ImageNet',
+        data_root='data/imagenet',
+        data_prefix='val',
+        ann_file='',
+        pipeline=...,
+    )
+)
+
+test_dataloader = val_dataloader
+```
+
+### Text Annotation File Format
+
+You can download and untar the meta data from this [link](https://download.openmmlab.com/mmclassification/datasets/imagenet/meta/caffe_ilsvrc12.tar.gz). And re-organize the dataset as below:
+
+```text
+data/imagenet/
+├── meta/
+│   ├── train.txt
+│   ├── test.txt
+│   └── val.txt
+├── train/
+│   ├── n01440764
+│   │   ├── n01440764_10026.JPEG
+│   │   ├── n01440764_10027.JPEG
+│   │   ├── n01440764_10029.JPEG
+│   │   ├── n01440764_10040.JPEG
+│   │   ├── n01440764_10042.JPEG
+│   │   ├── n01440764_10043.JPEG
+│   │   └── n01440764_10048.JPEG
+│   ├── ...
+├── val/
+│   ├── ILSVRC2012_val_00000001.JPEG
+│   ├── ILSVRC2012_val_00000002.JPEG
+│   ├── ILSVRC2012_val_00000003.JPEG
+│   ├── ILSVRC2012_val_00000004.JPEG
+│   ├── ...
 ```

 And then, you can use the [`ImageNet`](mmpretrain.datasets.ImageNet) dataset with the below configurations:
@ -180,8 +244,8 @@ train_dataloader = dict(
    dataset=dict(
        type='ImageNet',
        data_root='imagenet_folder',
-        ann_file='meta/train.txt',
        data_prefix='train/',
+        ann_file='meta/train.txt',
        pipeline=...,
    )
 )
@ -192,8 +256,8 @@ val_dataloader = dict(
    dataset=dict(
        type='ImageNet',
        data_root='imagenet_folder',
-        ann_file='meta/val.txt',
        data_prefix='val/',
+        ann_file='meta/val.txt',
        pipeline=...,
    )
 )
@ -328,6 +392,8 @@ train_dataloader = dict(

 To find more datasets supported by MMPretrain, and get more configurations of the above datasets, please see the [dataset documentation](mmpretrain.datasets).

+To implement your own dataset class for some special formats, please see the [Adding New Dataset](../advanced_guides/datasets.md).
+
 ## Dataset Wrappers

 The following datawrappers are supported in MMEngine, you can refer to {external+mmengine:doc}`MMEngine tutorial <advanced_tutorials/basedataset>` to learn how to use it.
--- a/docs/en/user_guides/downstream.md
+++ b/docs/en/user_guides/downstream.md
@ -1,24 +1,14 @@
 # Downstream tasks

- [Downstream tasks](#downstream-tasks)
-  - [Detection](#detection)
-    - [Train](#train)
-    - [Test](#test)
-  - [Segmentation](#segmentation)
-    - [Train](#train-1)
-    - [Test](#test-1)
-
 ## Detection

-Here, we prefer to use MMDetection to do the detection task. First, make sure you have installed [MIM](https://github.com/open-mmlab/mim), which is also a project of OpenMMLab.
+For detection tasks, please use MMDetection. First, make sure you have installed [MIM](https://github.com/open-mmlab/mim), which is also a project of OpenMMLab.

 ```shell
 pip install openmim
 mim install 'mmdet>=3.0.0rc0'
 ```

-It is very easy to install the package.
-
 Besides, please refer to MMDet for [installation](https://mmdetection.readthedocs.io/en/dev-3.x/get_started.html) and [data preparation](https://mmdetection.readthedocs.io/en/dev-3.x/user_guides/dataset_prepare.html)

 ### Train
@ -35,16 +25,9 @@ bash tools/benchmarks/mmdetection/mim_slurm_train_c4.sh ${PARTITION} ${CONFIG} $
 bash tools/benchmarks/mmdetection/mim_slurm_train_fpn.sh ${PARTITION} ${CONFIG} ${PRETRAIN}
 ```

-Remarks:
-
- `${CONFIG}`: Use config files under `configs/benchmarks/mmdetection/`. Since repositories of OpenMMLab have support referring config files across different repositories, we can easily leverage the configs from MMDetection like:
-
-```shell
-_base_ = 'mmdet::mask_rcnn/mask-rcnn_r50-caffe-c4_1x_coco.py'
-```
-
-Writing your config files from scratch is also supported.
-
+- `${CONFIG}`: Use config file path in MMDetection directly. And for some algorithms, we also have some
+  modified config files which can be found in the `benchmarks` folder under the correspondding algorithm
+  folder. You can also writing your config file from scratch.
 - `${PRETRAIN}`: the pre-trained model file.
 - `${GPUS}`: The number of GPUs that you want to use to train. We adopt 8 GPUs for detection tasks by default.

@ -52,18 +35,8 @@ Example:

 ```shell
 bash ./tools/benchmarks/mmdetection/mim_dist_train_c4.sh \
-configs/benchmarks/mmdetection/coco/mask-rcnn_r50-c4_ms-1x_coco.py \
-https://download.openmmlab.com/mmselfsup/1.x/byol/byol_resnet50_16xb256-coslr-200e_in1k/byol_resnet50_16xb256-coslr-200e_in1k_20220825-de817331.pth 8
-```
-
-Or if you want to do detection task with [detectron2](https://github.com/facebookresearch/detectron2), we also provide some config files.
-Please refer to [INSTALL.md](https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md) for installation and follow the [directory structure](https://github.com/facebookresearch/detectron2/tree/main/datasets) to prepare your datasets required by detectron2.
-
-```shell
-conda activate detectron2 # use detectron2 environment here, otherwise use open-mmlab environment
-cd tools/benchmarks/detectron2
-python convert-pretrain-to-detectron2.py ${WEIGHT_FILE} ${OUTPUT_FILE} # must use .pkl as the output extension.
-bash run.sh ${DET_CFG} ${OUTPUT_FILE}
+  configs/byol/benchmarks/mask-rcnn_r50-c4_ms-1x_coco.py \
+  https://download.openmmlab.com/mmselfsup/1.x/byol/byol_resnet50_16xb256-coslr-200e_in1k/byol_resnet50_16xb256-coslr-200e_in1k_20220825-de817331.pth 8
 ```

 ### Test
@ -78,15 +51,17 @@ bash tools/benchmarks/mmdetection/mim_dist_test.sh ${CONFIG} ${CHECKPOINT} ${GPU
 bash tools/benchmarks/mmdetection/mim_slurm_test.sh ${PARTITION} ${CONFIG} ${CHECKPOINT}
 ```

-Remarks:
-
- `${CHECKPOINT}`: The well-trained detection model that you want to test.
+- `${CONFIG}`: Use config file name in MMDetection directly. And for some algorithms, we also have some
+  modified config files which can be found in the `benchmarks` folder under the correspondding algorithm
+  folder. You can also writing your config file from scratch.
+- `${CHECKPOINT}`: The fine-tuned detection model that you want to test.
+- `${GPUS}`: The number of GPUs that you want to use to test. We adopt 8 GPUs for detection tasks by default.

 Example:

 ```shell
 bash ./tools/benchmarks/mmdetection/mim_dist_test.sh \
-configs/benchmarks/mmdetection/coco/mask-rcnn_r50_fpn_ms-1x_coco.py \
+configs/byol/benchmarks/mask-rcnn_r50_fpn_ms-1x_coco.py \
 https://download.openmmlab.com/mmselfsup/1.x/byol/byol_resnet50_16xb256-coslr-200e_in1k/byol_resnet50_16xb256-coslr-200e_in1k_20220825-de817331.pth 8
 ```

@ -99,13 +74,11 @@ pip install openmim
 mim install 'mmsegmentation>=1.0.0rc0'
 ```

-It is very easy to install the package.
-
 Besides, please refer to MMSegmentation for [installation](https://mmsegmentation.readthedocs.io/en/dev-1.x/get_started.html) and [data preparation](https://mmsegmentation.readthedocs.io/en/dev-1.x/user_guides/2_dataset_prepare.html).

 ### Train

-After installation, you can run MMSeg with simple command.
+After installation, you can run MMSegmentation with simple command.

 ```shell
 # distributed version
@ -115,17 +88,9 @@ bash tools/benchmarks/mmsegmentation/mim_dist_train.sh ${CONFIG} ${PRETRAIN} ${G
 bash tools/benchmarks/mmsegmentation/mim_slurm_train.sh ${PARTITION} ${CONFIG} ${PRETRAIN}
 ```

-Remarks:
-
- `${CONFIG}`: Use config files under `configs/benchmarks/mmsegmentation/`. Since repositories of OpenMMLab have support referring config files across different
-  repositories, we can easily leverage the configs from MMSegmentation like:
-
-```shell
-_base_ = 'mmseg::fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py'
-```
-
-Writing your config files from scratch is also supported.
-
+- `${CONFIG}`: Use config file path in MMSegmentation directly. And for some algorithms, we also have some
+  modified config files which can be found in the `benchmarks` folder under the correspondding algorithm
+  folder. You can also writing your config file from scratch.
 - `${PRETRAIN}`: the pre-trained model file.
 - `${GPUS}`: The number of GPUs that you want to use to train. We adopt 4 GPUs for segmentation tasks by default.

@ -149,14 +114,15 @@ bash tools/benchmarks/mmsegmentation/mim_dist_test.sh ${CONFIG} ${CHECKPOINT} ${
 bash tools/benchmarks/mmsegmentation/mim_slurm_test.sh ${PARTITION} ${CONFIG} ${CHECKPOINT}
 ```

-Remarks:
-
- `${CHECKPOINT}`: The well-trained segmentation model that you want to test.
+- `${CONFIG}`: Use config file name in MMSegmentation directly. And for some algorithms, we also have some
+  modified config files which can be found in the `benchmarks` folder under the correspondding algorithm
+  folder. You can also writing your config file from scratch.
+- `${CHECKPOINT}`: The fine-tuned segmentation model that you want to test.
+- `${GPUS}`: The number of GPUs that you want to use to test. We adopt 4 GPUs for segmentation tasks by default.

 Example:

 ```shell
-bash ./tools/benchmarks/mmsegmentation/mim_dist_test.sh \
-configs/benchmarks/mmsegmentation/voc12aug/fcn_r50-d8_4xb4-20k_voc12aug-512x512.py \
+bash ./tools/benchmarks/mmsegmentation/mim_dist_test.sh  fcn_r50-d8_4xb4-20k_voc12aug-512x512.py \
 https://download.openmmlab.com/mmselfsup/1.x/byol/byol_resnet50_16xb256-coslr-200e_in1k/byol_resnet50_16xb256-coslr-200e_in1k_20220825-de817331.pth 4
 ```
--- a/docs/en/user_guides/finetune.md
+++ b/docs/en/user_guides/finetune.md
@ -1,252 +0,0 @@
-# Fine-tune Models
-
- [Fine-tune Models](#fine-tune-models)
-  - [Inherit base configs](#inherit-base-configs)
-  - [Specify pre-trained model in configs](#specify-pre-trained-model-in-configs)
-  - [Modify dataset configs](#modify-dataset-configs)
-  - [Modify training schedule configs](#modify-training-schedule-configs)
-  - [Start Training](#start-training)
-    - [Apply pre-trained model with command line](#apply-pre-trained-model-with-command-line)
-
-In most scenarios, we want to apply a pre-trained model without training from scratch, which might possibly introduce extra uncertainties about the model convergency and therefore, is time-consuming.
-The common sense is to learn from previous models trained on large dataset, which can hopefully provide better knowledge than a random beginner. Roughly speaking, this process is as known as fine-tuning.
-
-Models pre-trained on the ImageNet dataset have been demonstrated to be effective for other datasets and other downstream tasks.
-Hence, this tutorial provides instructions for users to use the models provided in the [Model Zoo](../modelzoo_statistics.md) for other datasets to obtain better performance.
-
-There are two steps to fine-tune a model on a new dataset.
-
- Add support for the new dataset following [Prepare Dataset](dataset_prepare.md).
- Modify the configs as will be discussed in this tutorial.
-
-Assume we have a ResNet-50 model pre-trained on the ImageNet-2012 dataset and want
-to fine-tune on the CIFAR-10 dataset, we need to modify five parts in the config.
-
-## Inherit base configs
-
-At first, create a new config file
-`configs/tutorial/resnet50_finetune_cifar.py` to store our fine-tune configs. Of course,
-the path can be customized by yourself.
-
-To reuse the common parts among different base configs, we support inheriting
-configs from multiple existing configs.Including following four parts：
-
- Model configs: To fine-tune a ResNet-50 model, the new
-  config needs to inherit `configs/_base_/models/resnet50.py` to build the basic structure of the model.
- Dataset configs: To use the CIFAR-10 dataset, the new config can simply
-  inherit `configs/_base_/datasets/cifar10_bs16.py`.
- Schedule configs: The new config can inherit `_base_/schedules/cifar10_bs128.py`
-  for CIFAR-10 dataset with a batch size of 128.
- Runtime configs: For runtime settings such as basic hooks, etc.,
-  the new config needs to inherit `configs/_base_/default_runtime.py`.
-
-To inherit all configs above, put the following code at the config file.
-
-```python
-_base_ = [
-    '../_base_/models/resnet50.py',
-    '../_base_/datasets/cifar10_bs16.py',
-    '../_base_/schedules/cifar10_bs128.py',
-    '../_base_/default_runtime.py',
-]
-```
-
-Besides, you can also choose to write the whole contents rather than use inheritance.
-Refers to [`configs/lenet/lenet5_mnist.py`](https://github.com/open-mmlab/mmpretrain/blob/main/configs/lenet/lenet5_mnist.py) for more details.
-
-## Specify pre-trained model in configs
-
-When fine-tuning a model, usually we want to load the pre-trained backbone
-weights and train a new classification head from scratch.
-
-To load the pre-trained backbone, we need to change the initialization config
-of the backbone and use `Pretrained` initialization function. Besides, in the
-`init_cfg`, we use `prefix='backbone'` to tell the initialization function
-the prefix of the submodule that needs to be loaded in the checkpoint.
-
-For example, `backbone` here means to load the backbone submodule. And here we
-use an online checkpoint, it will be downloaded automatically during training,
-you can also download the model manually and use a local path.
-And then we need to modify the head according to the class numbers of the new
-datasets by just changing `num_classes` in the head.
-
-```python
-model = dict(
-    backbone=dict(
-        init_cfg=dict(
-            type='Pretrained',
-            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
-            prefix='backbone',
-        )),
-    head=dict(num_classes=10),
-)
-```
-
-```{tip}
-Here we only need to set the part of configs we want to modify, because the
-inherited configs will be merged and get the entire configs.
-```
-
-When new dataset is small and shares the domain with the pre-trained dataset,
-we might want to freeze the first several stages' parameters of the
-backbone, that will help the network to keep ability to extract low-level
-information learnt from pre-trained model. In MMPretrain, you can simply
-specify how many stages to freeze by `frozen_stages` argument. For example, to
-freeze the first two stages' parameters, just use the following configs:
-
-```python
-model = dict(
-    backbone=dict(
-        frozen_stages=2,
-        init_cfg=dict(
-            type='Pretrained',
-            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
-            prefix='backbone',
-        )),
-    head=dict(num_classes=10),
-)
-```
-
-```{note}
-Not all backbones support the `frozen_stages` argument by now. Please check
-[the docs](https://mmpretrain.readthedocs.io/en/main/api.html#module-mmpretrain.models.backbones)
-to confirm if your backbone supports it.
-```
-
-## Modify dataset configs
-
-When fine-tuning on a new dataset, usually we need to modify some dataset
-configs. Here, we need to modify the pipeline to resize the image from 32 to
-224 to fit the input size of the model pre-trained on ImageNet, and modify
-dataloaders correspondingly.
-
-```python
-# data pipeline settings
-train_pipeline = [
-    dict(type='RandomCrop', crop_size=32, padding=4),
-    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
-    dict(type='Resize', scale=224),
-    dict(type='PackInputs'),
-]
-test_pipeline = [
-    dict(type='Resize', scale=224),
-    dict(type='PackInputs'),
-]
-# dataloader settings
-train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
-val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
-test_dataloader = val_dataloader
-```
-
-## Modify training schedule configs
-
-The fine-tuning hyper parameters vary from the default schedule. It usually
-requires smaller learning rate and quicker decaying scheduler epochs.
-
-```python
-# lr is set for a batch size of 128
-optim_wrapper = dict(
-    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
-# learning policy
-param_scheduler = dict(
-    type='MultiStepLR', by_epoch=True, milestones=[15], gamma=0.1)
-```
-
-```{tip}
-Refers to [Learn about Configs](config.md) for more detailed configurations.
-```
-
-## Start Training
-
-Now, we have finished the fine-tuning config file as following:
-
-```python
-_base_ = [
-    '../_base_/models/resnet50.py',
-    '../_base_/datasets/cifar10_bs16.py',
-    '../_base_/schedules/cifar10_bs128.py',
-    '../_base_/default_runtime.py',
-]
-
-# Model config
-model = dict(
-    backbone=dict(
-        frozen_stages=2,
-        init_cfg=dict(
-            type='Pretrained',
-            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
-            prefix='backbone',
-        )),
-    head=dict(num_classes=10),
-)
-
-# Dataset config
-# data pipeline settings
-train_pipeline = [
-    dict(type='RandomCrop', crop_size=32, padding=4),
-    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
-    dict(type='Resize', scale=224),
-    dict(type='PackInputs'),
-]
-test_pipeline = [
-    dict(type='Resize', scale=224),
-    dict(type='PackInputs'),
-]
-# dataloader settings
-train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
-val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
-test_dataloader = val_dataloader
-
-# Training schedule config
-# lr is set for a batch size of 128
-optim_wrapper = dict(
-    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
-# learning policy
-param_scheduler = dict(
-    type='MultiStepLR', by_epoch=True, milestones=[15], gamma=0.1)
-```
-
-Here we use 8 GPUs on your computer to train the model with the following command:
-
-```shell
-bash tools/dist_train.sh configs/tutorial/resnet50_finetune_cifar.py 8
-```
-
-Also, you can use only one GPU to train the model with the following command:
-
-```shell
-python tools/train.py configs/tutorial/resnet50_finetune_cifar.py
-```
-
-But wait, an important config need to be changed if using one GPU. We need to
-change the dataset config as following:
-
-```python
-train_dataloader = dict(
-    batch_size=128,
-    dataset=dict(pipeline=train_pipeline),
-)
-val_dataloader = dict(
-    batch_size=128,
-    dataset=dict(pipeline=test_pipeline),
-)
-test_dataloader = val_dataloader
-```
-
-It's because our training schedule is for a batch size of 128. If using 8 GPUs,
-just use `batch_size=16` config in the base config file for every GPU, and the total batch
-size will be 128. But if using one GPU, you need to change it to 128 manually to
-match the training schedule.
-
-### Apply pre-trained model with command line
-
-If you don't want to modify the configs, you could use `--cfg-options` to add your pre-trained model path to `init_cfg`.
-
-For example, the command below will also load pre-trained model.
-
-```shell
-bash tools/dist_train.sh configs/tutorial/resnet50_finetune_cifar.py 8 \
-    --cfg-options model.backbone.init_cfg.type='Pretrained' \
-    model.backbone.init_cfg.checkpoint='https://download.openmmlab.com/mmselfsup/1.x/mocov3/mocov3_resnet50_8xb512-amp-coslr-100e_in1k/mocov3_resnet50_8xb512-amp-coslr-100e_in1k_20220927-f1144efa.pth' \
-    model.backbone.init_cfg.prefix='backbone' \
-```
--- a/docs/en/user_guides/inference.md
+++ b/docs/en/user_guides/inference.md
@ -1,19 +1,15 @@
 # Inference with existing models

- [Inference with existing models](#inference-with-existing-models)
-  - [Inference on a given image](#inference-on-a-given-image)
-
 MMPretrain provides pre-trained models in [Model Zoo](../modelzoo_statistics.md).
 This note will show **how to use existing models to inference on given images**.

-As for how to test existing models on standard datasets, please see this [guide](./train_test.md#test)
+As for how to test existing models on standard datasets, please see this [guide](./test.md)

 ## Inference on a given image

 MMPretrain provides high-level Python APIs for inference on a given image:

 - [`get_model`](mmpretrain.apis.get_model): Get a model with the model name.
- [`init_model`](mmpretrain.apis.init_model): Initialize a model with a config and checkpoint
 - [`inference_model`](mmpretrain.apis.inference_model): Inference on a given image

 Here is an example of building the model and inference on a given image by using ImageNet-1k pre-trained checkpoint.
--- a/docs/en/user_guides/test.md
+++ b/docs/en/user_guides/test.md
@ -1,12 +1,5 @@
 # Test

- [Test](#test)
-  - [Test with your PC](#test-with-your-pc)
-  - [Test with multiple GPUs](#test-with-multiple-gpus)
-  - [Test with multiple machines](#test-with-multiple-machines)
-    - [Multiple machines in the same network](#multiple-machines-in-the-same-network)
-    - [Multiple machines managed with slurm](#multiple-machines-managed-with-slurm)
-
 For image classification task and image retrieval task, you could test your model after training.

 ## Test with your PC
@ -39,7 +32,7 @@ CUDA_VISIBLE_DEVICES=-1 python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [
 | `--show`                              | Visualize the prediction result in a window.                                                                                                                        |
 | `--interval INTERVAL`                 | The interval of samples to visualize.                                                                                                                               |
 | `--wait-time WAIT_TIME`               | The display time of every window (in seconds). Defaults to 1.                                                                                                       |
-| `--no-pin-memory`                     | Whether to disable the pin_memory option in dataloaders.                                                                                                            |
+| `--no-pin-memory`                     | Whether to disable the `pin_memory` option in dataloaders.                                                                                                          |
 | `--tta`                               | Whether to enable the Test-Time-Aug (TTA). If the config file has `tta_pipeline` and `tta_model` fields, use them to determine the TTA transforms and how to merge the TTA results. Otherwise, use flip TTA by averaging classification score. |
 | `--launcher {none,pytorch,slurm,mpi}` | Options for job launcher.                                                                                                                                           |

--- a/docs/en/user_guides/train.md
+++ b/docs/en/user_guides/train.md
@ -1,11 +1,8 @@
 # Train

- [Train](#train)
-  - [Train with your PC](#train-with-your-pc)
-  - [Train with multiple GPUs](#train-with-multiple-gpus)
-  - [Train with multiple machines](#train-with-multiple-machines)
-    - [Multiple machines in the same network](#multiple-machines-in-the-same-network)
-    - [Multiple machines managed with slurm](#multiple-machines-managed-with-slurm)
+In this tutorial, we will introduce how to use the scripts provided in MMPretrain to start a training task. If
+you need, we also have some practice examples about [how to pretrain with custom dataset](../notes/pretrain_custom_dataset.md)
+and [how to finetune with custom dataset](../notes/finetune_custom_dataset.md).

 ## Train with your PC

@ -33,8 +30,8 @@ CUDA_VISIBLE_DEVICES=-1 python tools/train.py ${CONFIG_FILE} [ARGS]
 | `--amp`                               | Enable automatic-mixed-precision training.                                                                                                                          |
 | `--no-validate`                       | **Not suggested**. Disable checkpoint evaluation during training.                                                                                                   |
 | `--auto-scale-lr`                     | Auto scale the learning rate according to the actual batch size and the original batch size.                                                                        |
-| `--no-pin-memory`                     | Whether to disable the pin_memory option in dataloaders.                                                                                                            |
-| `--no-persistent-workers`             | Whether to disable the persistent_workers option in dataloaders.                                                                                                    |
+| `--no-pin-memory`                     | Whether to disable the `pin_memory` option in dataloaders.                                                                                                          |
+| `--no-persistent-workers`             | Whether to disable the `persistent_workers` option in dataloaders.                                                                                                  |
 | `--cfg-options CFG_OPTIONS`           | Override some settings in the used config, the key-value pair in xxx=yyy format will be merged into the config file. If the value to be overwritten is a list, it should be of the form of either `key="[a,b]"` or `key=a,b`. The argument also allows nested list/tuple values, e.g. `key="[(a,b),(c,d)]"`. Note that the quotation marks are necessary and that no white space is allowed. |
 | `--launcher {none,pytorch,slurm,mpi}` | Options for job launcher.                                                                                                                                           |

--- a/docs/zh_CN/advanced_guides/convention.md
+++ b/docs/zh_CN/advanced_guides/convention.md
@ -1,33 +1,49 @@
-# MMCLS 中的约定
+# MMPretrain 中的约定

-## 配置文件命名规则
+## 模型命名规则

-MMClassification 按照以下风格进行配置文件命名，代码库的贡献者需要遵循相同的命名规则。文件名总体分为四部分：算法信息，模块信息，训练信息和数据信息。逻辑上属于不同部分的单词之间用下划线 `'_'` 连接，同一部分有多个单词用短横线 `'-'` 连接。
+MMPretrain 按照以下风格进行模型命名，代码库的贡献者需要遵循相同的命名规则。模型名总体分为五个部分：算法信息，模块信息，预训练信息，训练信息和数据信息。逻辑上属于不同部分的单词之间用下划线 `'_'` 连接，同一部分有多个单词用短横线 `'-'` 连接。

 ```text
-{algorithm info}_{module info}_{training info}_{data info}.py
+{algorithm info}_{module info}_{pretrain info}_{training info}_{data info}
 ```

- `algorithm info`：算法信息，算法名称或者网络架构，如 resnet 等；
- `module info`： 模块信息，因任务而异，用以表示一些特殊的 neck、head 和 pretrain 信息；
- `training info`：一些训练信息，训练策略设置，包括 batch size，schedule 以及数据增强等；
+- `algorithm info`（可选）：算法信息，表示用以训练该模型的主要算法，如 MAE、BEiT 等
+- `module info`：模块信息，主要包含模型的主干网络名称，如 resnet、vit 等
+- `pretrain info`（可选）：预训练信息，比如预训练模型是在 ImageNet-21k 数据集上训练的等
+- `training info`：训练信息，训练策略设置，包括 batch size，schedule 以及数据增强等；
 - `data info`：数据信息，数据集名称、模态、输入尺寸等，如 imagenet, cifar 等；

 ### 算法信息

-指论文中的算法名称缩写，以及相应的分支架构信息。例如：
+指用以训练该模型的算法名称，例如：

- `resnet50`
- `mobilenet-v3-large`
- `vit-small-patch32`   : `patch32` 表示 `ViT` 切分的分块大小
- `seresnext101-32x4d`  : `SeResNet101` 基本网络结构，`32x4d` 表示在 `Bottleneck` 中  `groups` 和 `width_per_group` 分别为32和4
+- `simclr`
+- `mocov2`
+- `eva-mae-style`
+
+使用监督图像分类任务训练的模型可以省略这个字段。

 ### 模块信息

-指一些特殊的 `neck` 、`head` 或者 `pretrain` 的信息， 在分类中常见为预训练信息，比如：
+指模型的结构信息，一般主要包含模型的主干网络结构，`neck` 和 `head` 信息一般被省略。例如：

- `in21k-pre` : 在 `ImageNet21k` 上预训练
- `in21k-pre-3rd-party` : 在 `ImageNet21k` 上预训练，其权重来自其他仓库
+- `resnet50`
+- `vit-base-p16`
+- `swin-base`
+
+### 预训练信息
+
+如果该模型是在预训练模型基础上，通过微调获得的，我们需要记录预训练模型的一些信息。例如：
+
+- 预训练模型的来源：`fb`、`openai`等。
+- 训练预训练模型的方法：`clip`、`mae`、`distill` 等。
+- 用于预训练的数据集：`in21k`、`laion2b`等（`in1k`可以省略）
+- 训练时长：`300e`、`1600e` 等。
+
+并非所有信息都是必要的，只需要选择用以区分不同的预训练模型的信息即可。
+
+在此字段的末尾，使用 `-pre` 作为标识符，例如 `mae-in21k-pre`。

 ### 训练信息

@ -46,6 +62,8 @@ Batch size 信息：
 - `coslr-200e` : 使用 cosine scheduler, 训练 200 个 epoch
 - `autoaug-mixup-lbs-coslr-50e` : 使用了 `autoaug`、`mixup`、`label smooth`、`cosine scheduler`, 训练了 50 个轮次

+如果模型是从官方仓库等第三方仓库转换过来的，训练信息可以省略，使用 `3rdparty` 作为标识符。
+
 ### 数据信息

 - `in1k` : `ImageNet1k` 数据集，默认使用 `224x224` 大小的图片
@ -53,29 +71,44 @@ Batch size 信息：
 - `in1k-384px` : 表示训练的输出图片大小为 `384x384`
 - `cifar100`

-### 配置文件命名案例
+### 模型命名案例

 ```text
-repvgg-D2se_deploy_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py
+vit-base-p32_clip-openai-pre_3rdparty_in1k
 ```

- `repvgg-D2se`:  算法信息
-  - `repvgg`: 主要算法名称。
-  - `D2se`: 模型的结构。
- `deploy`:模块信息，该模型为推理状态。
- `4xb64-autoaug-lbs-mixup-coslr-200e`: 训练信息
-  - `4xb64`: 使用4块 GPU 并且 每块 GPU 的批大小为64。
-  - `autoaug`: 使用 `AutoAugment` 数据增强方法。
-  - `lbs`: 使用 `label smoothing` 损失函数。
-  - `mixup`: 使用 `mixup` 训练增强方法。
-  - `coslr`: 使用 `cosine scheduler` 优化策略。
-  - `200e`: 训练 200 轮次。
- `in1k`: 数据信息。 配置文件用于 `ImageNet1k` 数据集上使用 `224x224` 大小图片训练。
+- `vit-base-p32`: 模块信息
+- `clip-openai-pre`：预训练信息
+  - `clip`：预训练方法是 clip
+  - `openai`：预训练模型来自 OpenAI
+  - `pre`：预训练标识符
+- `3rdparty`：模型是从第三方仓库转换而来的
+- `in1k`：数据集信息。该模型是从 ImageNet-1k 数据集训练而来的，输入大小为 `224x224`
+
+```text
+beit_beit-base-p16_8xb256-amp-coslr-300e_in1k
+```
+
+- `beit`: 算法信息
+- `beit-base`：模块信息，由于主干网络来自 BEiT 中提出的修改版 ViT，主干网络名称也是 `beit`
+- `8xb256-amp-coslr-300e`：训练信息
+  - `8xb256`：使用 8 个 GPU，每个 GPU 的批量大小为 256
+  - `amp`：使用自动混合精度训练
+  - `coslr`：使用余弦退火学习率调度器
+  - `300e`：训练 300 个 epoch
+- `in1k`：数据集信息。该模型是从 ImageNet-1k 数据集训练而来的，输入大小为 `224x224`
+
+## 配置文件命名规则
+
+配置文件的命名与模型名称几乎相同，有几点不同：
+
+- 训练信息是必要的，不能是 `3rdparty`
+- 如果配置文件只包含主干网络设置，既没有头部设置也没有数据集设置，我们将其命名为`{module info}_headless.py`。这种配置文件通常用于大型数据集上的第三方预训练模型。

 ### 权重命名规则

-权重的命名主要包括配置文件名，日期和哈希值。
+权重的命名主要包括模型名称，日期和哈希值。

 ```text
-{config_name}_{date}-{hash}.pth
+{model_name}_{date}-{hash}.pth
 ```
--- a/docs/zh_CN/advanced_guides/data_flow.md
+++ b/docs/zh_CN/advanced_guides/data_flow.md
@ -1 +0,0 @@
-# 数据流（待更新）
--- a/docs/zh_CN/advanced_guides/runtime.md
+++ b/docs/zh_CN/advanced_guides/runtime.md
@ -235,7 +235,7 @@ env_cfg = dict(
     )
     ```

-     参见 [微调模型](../user_guides/finetune.md) 以了解更多关于模型微调的细节。
+     参见 [微调模型](../notes/finetune_custom_dataset.md) 以了解更多关于模型微调的细节。

 2. **`default_hooks` 和 `custom_hooks` 之间有什么区别？**

--- a/docs/zh_CN/advanced_guides/schedule.md
+++ b/docs/zh_CN/advanced_guides/schedule.md
@ -68,7 +68,7 @@ optim_wrapper = dict(
 optim_wrapper = dict(type='AmpOptimWrapper', optimizer=...)
 ```

-另外，为了方便，我们同时在启动训练脚本 `tools/train.py` 中提供了 `--amp` 参数作为开启混合精度训练的开关，更多细节可以参考[训练与测试](../user_guides/train_test.md)教程。
+另外，为了方便，我们同时在启动训练脚本 `tools/train.py` 中提供了 `--amp` 参数作为开启混合精度训练的开关，更多细节可以参考[训练教程](../user_guides/train.md)。

 ### 参数化精细配置

--- a/docs/zh_CN/conf.py
+++ b/docs/zh_CN/conf.py
@ -18,7 +18,7 @@ import sys
 import pytorch_sphinx_theme
 from sphinx.builders.html import StandaloneHTMLBuilder

-sys.path.insert(0, os.path.abspath('../..'))
+sys.path.insert(0, os.path.abspath('../../'))

 # -- Project information -----------------------------------------------------

@ -233,7 +233,7 @@ napoleon_custom_sections = [
 # Disable docstring inheritance
 autodoc_inherit_docstrings = False
 # Mock some imports during generate API docs.
-autodoc_mock_imports = ['mmcv._ext', 'matplotlib', 'rich']
+autodoc_mock_imports = ['rich', 'attr', 'einops']
 # Disable displaying type annotations, these can be very verbose
 autodoc_typehints = 'none'

--- a/docs/zh_CN/index.rst
+++ b/docs/zh_CN/index.rst
@ -16,7 +16,6 @@
   user_guides/inference.md
   user_guides/train.md
   user_guides/test.md
-   user_guides/finetune.md
   user_guides/downstream.md

 .. toctree::
@ -29,7 +28,6 @@
   advanced_guides/schedule.md
   advanced_guides/runtime.md
   advanced_guides/evaluation.md
-   advanced_guides/data_flow.md
   advanced_guides/convention.md

 .. toctree::
@ -92,6 +90,7 @@
   notes/changelog.md
   notes/faq.md
   notes/pretrain_custom_dataset.md
+   notes/finetune_custom_dataset.md

 .. toctree::
   :caption: 切换语言
--- a/docs/zh_CN/notes/finetune_custom_dataset.md
+++ b/docs/zh_CN/notes/finetune_custom_dataset.md
@ -0,0 +1,328 @@
+# 如何在自定义数据集上微调模型
+
+在很多场景下，我们需要快速地将模型应用到新的数据集上，但从头训练模型通常很难快速收敛，这种不确定性会浪费额外的时间。
+通常，已有的、在大数据集上训练好的模型会比随机初始化提供更为有效的先验信息，粗略来讲，在此基础上的学习我们称之为模型微调。
+
+已经证明，在 ImageNet 数据集上预训练的模型对于其他数据集和其他下游任务有很好的效果。
+因此，该教程提供了如何将 [Model Zoo](../modelzoo_statistics.md) 中提供的预训练模型用于其他数据集，已获得更好的效果。
+
+在本教程中，我们提供了一个实践示例和一些关于如何在自己的数据集上微调模型的技巧。
+
+## 第一步：准备你的数据集
+
+按照 [准备数据集](../user_guides/dataset_prepare.md) 准备你的数据集。
+假设我们的数据集根文件夹路径为 `data/custom_dataset/`
+
+假设我们想进行有监督图像分类训练，并使用子文件夹格式的 `CustomDataset` 来组织数据集：
+
+```text
+data/custom_dataset/
+├── train
+│   ├── class_x
+│   │   ├── x_1.png
+│   │   ├── x_2.png
+│   │   ├── x_3.png
+│   │   └── ...
+│   ├── class_y
+│   └── ...
+└── test
+    ├── class_x
+    │   ├── test_x_1.png
+    │   ├── test_x_2.png
+    │   ├── test_x_3.png
+    │   └── ...
+    ├── class_y
+    └── ...
+```
+
+## 第二步：选择一个配置文件作为模板
+
+在这里，我们使用 `configs/resnet/resnet50_8xb32_in1k.py` 作为示例。
+首先在同一文件夹下复制一份配置文件，并将其重命名为 `resnet50_8xb32-ft_custom.py`。
+
+```{tip}
+按照惯例，配置名称的最后一个字段是数据集，例如，`in1k` 表示 ImageNet-1k，`coco` 表示 coco 数据集
+```
+
+这个配置的内容是：
+
+```python
+_base_ = [
+    '../_base_/models/resnet50.py',           # 模型设置
+    '../_base_/datasets/imagenet_bs32.py',    # 数据设置
+    '../_base_/schedules/imagenet_bs256.py',  # 训练策略设置
+    '../_base_/default_runtime.py',           # 运行设置
+]
+```
+
+## 第三步：修改模型设置
+
+在进行模型微调时，我们通常希望在主干网络（backbone）加载预训练模型，再用我们的数据集训练一个新的分类头（head）。
+
+为了在主干网络加载预训练模型，我们需要修改主干网络的初始化设置，使用
+`Pretrained` 类型的初始化函数。另外，在初始化设置中，我们使用 `prefix='backbone'`
+来告诉初始化函数需要加载的子模块的前缀，`backbone`即指加载模型中的主干网络。
+方便起见，我们这里使用一个在线的权重文件链接，它
+会在训练前自动下载对应的文件，你也可以提前下载这个模型，然后使用本地路径。
+
+接下来，新的配置文件需要按照新数据集的类别数目来修改分类头的配置。只需要修改分
+类头中的 `num_classes` 设置即可。
+
+另外，当新的小数据集和原本预训练的大数据集中的数据分布较为类似的话，我们在进行微调时会希望
+冻结主干网络前面几层的参数，只训练后面层以及分类头的参数，这么做有助于在后续训练中，
+保持网络从预训练权重中获得的提取低阶特征的能力。在 MMPretrain 中，
+这一功能可以通过简单的一个 `frozen_stages` 参数来实现。比如我们需要冻结前两层网
+络的参数，只需要在上面的配置中添加一行：
+
+```{note}
+注意，目前并非所有的主干网络都支持 `frozen_stages` 参数。请检查[文档](https://mmpretrain.readthedocs.io/en/main/api.html#module-mmpretrain.models.backbones)
+确认使用的主干网络是否支持这一参数。
+```
+
+```python
+_base_ = [
+    '../_base_/models/resnet50.py',           # 模型设置
+    '../_base_/datasets/imagenet_bs32.py',    # 数据设置
+    '../_base_/schedules/imagenet_bs256.py',  # 训练策略设置
+    '../_base_/default_runtime.py',           # 运行设置
+]
+
+# >>>>>>>>>>>>>>> 在这里重载模型相关配置 >>>>>>>>>>>>>>>>>>>
+model = dict(
+    backbone=dict(
+        frozen_stages=2,
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
+            prefix='backbone',
+        )),
+    head=dict(num_classes=10),
+)
+# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+```
+
+```{tip}
+这里我们只需要设定我们想要修改的部分配置，其他配置将会自动从我们的基配置文件中获取。
+```
+
+## 第四步：修改数据集设置
+
+为了在新数据集上进行微调，我们需要覆盖一些数据集设置，例如数据集类型、数据流水线等。
+
+```python
+_base_ = [
+    '../_base_/models/resnet50.py',           # 模型设置
+    '../_base_/datasets/imagenet_bs32.py',    # 数据设置
+    '../_base_/schedules/imagenet_bs256.py',  # 训练策略设置
+    '../_base_/default_runtime.py',           # 运行设置
+]
+
+# 模型设置
+model = dict(
+    backbone=dict(
+        frozen_stages=2,
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
+            prefix='backbone',
+        )),
+    head=dict(num_classes=10),
+)
+
+# >>>>>>>>>>>>>>> 在这里重载数据配置 >>>>>>>>>>>>>>>>>>>
+data_root = 'data/custom_dataset'
+train_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',       # 我们假定使用子文件夹格式，因此需要将标注文件置空
+        data_prefix='train',
+    ))
+val_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',       # 我们假定使用子文件夹格式，因此需要将标注文件置空
+        data_prefix='test',
+    ))
+test_dataloader = val_dataloader
+# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+```
+
+## 第五步：修改训练策略设置（可选）
+
+微调所使用的训练超参数一般与默认的超参数不同，它通常需要更小的学习率和更快的学习率衰减。
+
+```python
+_base_ = [
+    '../_base_/models/resnet50.py',           # 模型设置
+    '../_base_/datasets/imagenet_bs32.py',    # 数据设置
+    '../_base_/schedules/imagenet_bs256.py',  # 训练策略设置
+    '../_base_/default_runtime.py',           # 运行设置
+]
+
+# 模型设置
+model = dict(
+    backbone=dict(
+        frozen_stages=2,
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
+            prefix='backbone',
+        )),
+    head=dict(num_classes=10),
+)
+
+# 数据设置
+data_root = 'data/custom_dataset'
+train_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',
+        data_prefix='train',
+    ))
+val_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',
+        data_prefix='test',
+    ))
+test_dataloader = val_dataloader
+
+# >>>>>>>>>>>>>>> 在这里重载训练策略设置 >>>>>>>>>>>>>>>>>>>
+# 优化器超参数
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+# 学习率策略
+param_scheduler = dict(
+    type='MultiStepLR', by_epoch=True, milestones=[15], gamma=0.1)
+# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+```
+
+```{tip}
+更多关于配置文件的信息，请参阅[学习配置文件](../user_guides/config.md)
+```
+
+## 开始训练
+
+现在，我们完成了用于微调的配置文件，完整的文件如下：
+
+```python
+_base_ = [
+    '../_base_/models/resnet50.py',           # 模型设置
+    '../_base_/datasets/imagenet_bs32.py',    # 数据设置
+    '../_base_/schedules/imagenet_bs256.py',  # 训练策略设置
+    '../_base_/default_runtime.py',           # 运行设置
+]
+
+# 模型设置
+model = dict(
+    backbone=dict(
+        frozen_stages=2,
+        init_cfg=dict(
+            type='Pretrained',
+            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
+            prefix='backbone',
+        )),
+    head=dict(num_classes=10),
+)
+
+# 数据设置
+data_root = 'data/custom_dataset'
+train_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',
+        data_prefix='train',
+    ))
+val_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',
+        data_prefix='test',
+    ))
+test_dataloader = val_dataloader
+
+# 训练策略设置
+optim_wrapper = dict(
+    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
+param_scheduler = dict(
+    type='MultiStepLR', by_epoch=True, milestones=[15], gamma=0.1)
+```
+
+接下来，我们使用一台 8 张 GPU 的电脑来训练我们的模型，指令如下：
+
+```shell
+bash tools/dist_train.sh configs/resnet/resnet50_8xb32-ft_custom.py 8
+```
+
+当然，我们也可以使用单张 GPU 来进行训练，使用如下命令：
+
+```shell
+python tools/train.py configs/resnet/resnet50_8xb32-ft_custom.py
+```
+
+但是如果我们使用单张 GPU 进行训练的话，需要在数据集设置部分作如下修改：
+
+```python
+data_root = 'data/custom_dataset'
+train_dataloader = dict(
+    batch_size=256,
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',
+        data_prefix='train',
+    ))
+val_dataloader = dict(
+    dataset=dict(
+        type='CustomDataset',
+        data_root=data_root,
+        ann_file='',
+        data_prefix='test',
+    ))
+test_dataloader = val_dataloader
+```
+
+这是因为我们的训练策略是针对批次大小（batch size）为 256 设置的。在父配置文件中，
+设置了单张 `batch_size=32`，如果使用 8 张 GPU，总的批次大小就是 256。而如果使
+用单张 GPU，就必须手动修改 `batch_size=256` 来匹配训练策略。
+
+然而，更大的批次大小需要更大的 GPU 显存，这里有几个简单的技巧来节省显存：
+
+1. 启用自动混合精度训练
+
+   ```shell
+   python tools/train.py configs/resnet/resnet50_8xb32-ft_custom.py --amp
+   ```
+
+2. 使用较小的批次大小，例如仍然使用 `batch_size=32`，而不是 256，并启用学习率自动缩放
+
+   ```shell
+   python tools/train.py configs/resnet/resnet50_8xb32-ft_custom.py --auto-scale-lr
+   ```
+
+   学习率自动缩放功能会根据实际的 batch size 和配置文件中的 `auto_scale_lr.base_batch_size`
+   字段对学习率进行线性调整（你可以在基配置文件 `configs/_base_/schedules/imagenet_bs256.py`
+   中找到这一字段）
+
+```{note}
+以上技巧都有可能对训练效果造成轻微影响。
+```
+
+### 在命令行指定预训练模型
+
+如果您不想修改配置文件，您可以使用 `--cfg-options` 将您的预训练模型文件添加到 `init_cfg`.
+
+例如，以下命令也会加载预训练模型：
+
+```shell
+bash tools/dist_train.sh configs/tutorial/resnet50_finetune_cifar.py 8 \
+    --cfg-options model.backbone.init_cfg.type='Pretrained' \
+    model.backbone.init_cfg.checkpoint='https://download.openmmlab.com/mmselfsup/1.x/mocov3/mocov3_resnet50_8xb512-amp-coslr-100e_in1k/mocov3_resnet50_8xb512-amp-coslr-100e_in1k_20220927-f1144efa.pth' \
+    model.backbone.init_cfg.prefix='backbone' \
+```
--- a/docs/zh_CN/notes/pretrain_custom_dataset.md
+++ b/docs/zh_CN/notes/pretrain_custom_dataset.md
@ -1,27 +1,33 @@
-# 教程 4: 使用自定义数据集进行预训练
+# 如何在自定义数据集上进行模型预训练

- [教程 4: 使用自定义数据集进行预训练](#教程-4-使用自定义数据集进行预训练)
-  - [在自定义数据集上使用 MAE 算法进行预训练](#在自定义数据集上使用-mae-算法进行预训练)
-    - [第一步：获取自定义数据路径](#第一步获取自定义数据路径)
-    - [第二步：选择一个配置文件作为模板](#第二步选择一个配置文件作为模板)
-    - [第三步：修改数据集相关的配置](#第三步修改数据集相关的配置)
-  - [在 COCO 数据集上使用 MAE 算法进行预训练](#在-coco-数据集上使用-mae-算法进行预训练)
+在本教程中，我们提供了一个实践示例和一些有关如何在您自己的数据集上进行训练的技巧。

-在本教程中，我们将介绍如何使用自定义数据集(无需标注)进行自监督预训练。
+在 MMPretrain 中，我们支持用户直接调用 MMPretrain 的 `CustomDataset` （类似于 `torchvision` 的 `ImageFolder`）, 该数据集能自动的读取给的路径下的图片。你只需要准备你的数据集路径，并修改配置文件，即可轻松使用 MMPretrain 进行预训练。

-## 在自定义数据集上使用 MAE 算法进行预训练
+## 第一步：准备你的数据集

-在 MMPretrain 中, 我们支持用户直接调用 MMPretrain 的 `CustomDataset` (类似于 `torchvision` 的 `ImageFolder`), 该数据集能自动的读取给的路径下的图片。你只需要准备你的数据集路径，并修改配置文件，即可轻松使用 MMPretrain 进行预训练。
+按照 [准备数据集](../user_guides/dataset_prepare.md) 准备你的数据集。
+假设我们的数据集根文件夹路径为 `data/custom_dataset/`

-### 第一步：获取自定义数据路径
+假设我们想使用 MAE 算法进行图像自监督训练，并使用子文件夹格式的 `CustomDataset` 来组织数据集：

-路径应类似这种形式： `data/custom_dataset/`
+```text
+data/custom_dataset/
+├── sample1.png
+├── sample2.png
+├── sample3.png
+├── sample4.png
+└── ...
+```

-### 第二步：选择一个配置文件作为模板
+## 第二步：选择一个配置文件作为模板

-在本教程中，我们使用 `configs/selfsup/mae/mae_vit-base-p16_8xb512-coslr-400e_in1k.py`作为一个示例进行讲解。我们首先复制这个配置文件，将新复制的文件命名为`mae_vit-base-p16_8xb512-coslr-400e_${custom_dataset}.py`.
+在本教程中，我们使用 `configs/mae/mae_vit-base-p16_8xb512-amp-coslr-300e_in1k.py` 作为一个示例进行介绍。
+首先在同一文件夹下复制一份配置文件，并将其重命名为 `mae_vit-base-p16_8xb512-amp-coslr-300e_custom.py`。

- `custom_dataset`: 表明你用的那个数据集。例如，用 `in1k` 代表ImageNet 数据集，`coco` 代表COCO数据集。
+```{tip}
+按照惯例，配置名称的最后一个字段是数据集，例如，`in1k` 表示 ImageNet-1k，`coco` 表示 coco 数据集
+```

 这个配置文件的内容如下：

@ -31,6 +37,7 @@ _base_ = [
    '../_base_/datasets/imagenet_bs512_mae.py',
    '../_base_/default_runtime.py',
 ]
+
 # optimizer wrapper
 optim_wrapper = dict(
    type='AmpOptimWrapper',
@ -48,6 +55,7 @@ optim_wrapper = dict(
            'mask_token': dict(decay_mult=0.),
            'cls_token': dict(decay_mult=0.)
        }))
+
 # learning rate scheduler
 param_scheduler = [
    dict(
@ -65,49 +73,51 @@ param_scheduler = [
        end=300,
        convert_to_iter_based=True)
 ]
+
 # runtime settings
 train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=300)
 default_hooks = dict(
    # only keeps the latest 3 checkpoints
    checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3))
+
 randomness = dict(seed=0, diff_rank_seed=True)
+
 # auto resume
 resume = True
+
 # NOTE: `auto_scale_lr` is for automatically scaling LR
 # based on the actual training batch size.
 auto_scale_lr = dict(base_batch_size=4096)
 ```

-### 第三步：修改数据集相关的配置
+## 第三步：修改数据集设置

-数据集相关的配置是定义在 `_base_`的`'../_base_/datasets/imagenet_mae.py'` 文件内。我们直接将其内容复制到刚刚创建的新的配置文件 `mae_vit-base-p16_8xb512-coslr-400e_${custom_dataset}.py` 中.
+- 重载数据集设置中的 `type` 为 `'CustomDataset'`
+- 重载数据集设置中的 `data_root` 为 `data/custom_dataset`
+- 重载数据集设置中的 `ann_file` 为空字符串，这是因为我们使用子文件格式的 `CustomDataset`，需要将配置文件置空
+- 重载数据集设置中的 `data_prefix` 为空字符串，这是因为我们希望使用数据集根目录下的所有数据进行训练，并不需要将其拆分为不同子集。

- 修改`dataset_type = 'CustomDataset'`和` data_root = /dataset/my_custom_dataset`.
- 删除 `train_dataloader`中的 `ann_file` ，同时根据自己的实际情况决定是否需要设定 `data_prefix`。
-
-```{note}
-`CustomDataset` 是在 MMPretrain 实现的, 因此我们使用这种方式 `dataset_type=CustomDataset` 来使用这个类。
-```
-
-此时，修改后的文件应如下：
+修改后的文件应如下：

 ```python
-# >>>>>>>>>>>>>>>>>>>>> Start of Changed >>>>>>>>>>>>>>>>>>>>>>>>>
 _base_ = [
    '../_base_/models/mae_vit-base-p16.py',
-    '../_base_/datasets/imagenet_mae.py',
+    '../_base_/datasets/imagenet_bs512_mae.py',
    '../_base_/default_runtime.py',
 ]
-# custom dataset
-dataset_type = 'CustomDataset'
-data_root = 'data/custom_dataset/'
+
+# >>>>>>>>>>>>>>> 在此重载数据设置 >>>>>>>>>>>>>>>>>>>
 train_dataloader = dict(
    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        # ann_file='meta/train.txt', # removed if you don't have the annotation file
-        data_prefix=dict(img_path='./'))
-# <<<<<<<<<<<<<<<<<<<<<< End of Changed <<<<<<<<<<<<<<<<<<<<<<<<<<<
+        type='CustomDataset',
+        data_root='data/custom_dataset/',
+        ann_file='',       # 我们假定使用子文件夹格式，因此需要将标注文件置空
+        data_prefix='',    # 使用 `data_root` 路径下所有数据
+        with_label=False,
+    )
+)
+# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
 # optimizer wrapper
 optim_wrapper = dict(
    type='AmpOptimWrapper',
@ -125,6 +135,7 @@ optim_wrapper = dict(
            'mask_token': dict(decay_mult=0.),
            'cls_token': dict(decay_mult=0.)
        }))
+
 # learning rate scheduler
 param_scheduler = [
    dict(
@ -142,14 +153,18 @@ param_scheduler = [
        end=300,
        convert_to_iter_based=True)
 ]
+
 # runtime settings
 train_cfg = dict(type='EpochBasedTrainLoop', max_epochs=300)
 default_hooks = dict(
    # only keeps the latest 3 checkpoints
    checkpoint=dict(type='CheckpointHook', interval=1, max_keep_ckpts=3))
+
 randomness = dict(seed=0, diff_rank_seed=True)
+
 # auto resume
 resume = True
+
 # NOTE: `auto_scale_lr` is for automatically scaling LR
 # based on the actual training batch size.
 auto_scale_lr = dict(base_batch_size=4096)
@ -157,7 +172,7 @@ auto_scale_lr = dict(base_batch_size=4096)

 使用上述配置文件，你就能够轻松的在自定义数据集上使用 `MAE` 算法来进行预训练了。

-## 在 COCO 数据集上使用 MAE 算法进行预训练
+## 另一个例子：在 COCO 数据集上训练 MAE

 ```{note}
 你可能需要参考[文档](https://github.com/open-mmlab/mmdetection/blob/3.x/docs/en/get_started.md)安装 MMDetection 来使用 `mmdet.CocoDataset`。
@ -172,16 +187,18 @@ _base_ = [
    '../_base_/datasets/imagenet_mae.py',
    '../_base_/default_runtime.py',
 ]
-# custom dataset
-dataset_type = 'mmdet.CocoDataset'
-data_root = 'data/coco/'
+
+# >>>>>>>>>>>>>>> 在这里重载数据配置 >>>>>>>>>>>>>>>>>>>
 train_dataloader = dict(
    dataset=dict(
-        type=dataset_type,
-        data_root=data_root,
-        ann_file='annotations/instances_train2017.json',
-        data_prefix=dict(img='train2017/')))
-# <<<<<<<<<<<<<<<<<<<<<< End of Changed <<<<<<<<<<<<<<<<<<<<<<<<<<<
+        type='mmdet.CocoDataset',
+        data_root='data/coco/',
+        ann_file='annotations/instances_train2017.json',  # 仅用于加载图片，不会使用标签
+        data_prefix=dict(img='train2017/'),
+    )
+)
+# <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
+
 # optimizer wrapper
 optim_wrapper = dict(
    type='AmpOptimWrapper',
--- a/docs/zh_CN/stat.py
+++ b/docs/zh_CN/stat.py
@ -5,17 +5,18 @@ from collections import defaultdict
 from pathlib import Path

 from modelindex.load_model_index import load
+from modelindex.models.Result import Result
 from tabulate import tabulate

 MMPT_ROOT = Path(__file__).absolute().parents[2]
 PAPERS_ROOT = Path('papers')  # Path to save generated paper pages.
 GITHUB_PREFIX = 'https://github.com/open-mmlab/mmpretrain/blob/1.x/'
-MODELZOO_TEMPLATE = """
+MODELZOO_TEMPLATE = """\
 # 模型库统计

 在本页面中，我们列举了我们支持的[所有算法](#所有已支持的算法)。你可以点击链接跳转至对应的模型详情页面。

-另外，我们还列出了我们提供的[所有模型权重文件](#所有模型权重文件)。你可以使用排序和搜索功能找到需要的模型权重，并使用链接跳转至模型详情页面。
+另外，我们还列出了我们提供的所有模型权重文件。你可以使用排序和搜索功能找到需要的模型权重，并使用链接跳转至模型详情页面。

 ## 所有已支持的算法

@ -25,9 +26,13 @@ MODELZOO_TEMPLATE = """
 * 模型权重文件数量：{num_ckpts}
 {paper_msg}

-## 所有模型权重文件
 """  # noqa: E501

+METRIC_ALIAS = {
+    'Top 1 Accuracy': 'Top-1 (%)',
+    'Top 5 Accuracy': 'Top-5 (%)',
+}
+
 model_index = load(str(MMPT_ROOT / 'model-index.yml'))


@ -41,6 +46,10 @@ def build_collections(model_index):
        col = col_by_name[model.in_collection]
        col.models.append(model)
        setattr(model, 'collection', col)
+        if model.results is None:
+            setattr(model, 'tasks', [])
+        else:
+            setattr(model, 'tasks', [result.task for result in model.results])


 build_collections(model_index)
@ -139,41 +148,99 @@ for collection in model_index.collections:
    generate_paper_page(collection)


-def generate_summary_table(models):
-    dataset_rows = defaultdict(list)
+def scatter_results(models):
+    model_result_pairs = []
    for model in models:
        if model.results is None:
+            result = Result(task=None, dataset=None, metrics={})
+            model_result_pairs.append((model, result))
+        else:
+            for result in model.results:
+                model_result_pairs.append((model, result))
+    return model_result_pairs
+
+
+def generate_summary_table(task, model_result_pairs, title=None):
+    metrics = set()
+    for model, result in model_result_pairs:
+        if result.task == task:
+            metrics = metrics.union(result.metrics.keys())
+    metrics = sorted(list(metrics))
+
+    rows = []
+    for model, result in model_result_pairs:
+        if result.task != task:
            continue
        name = model.name
        params = f'{model.metadata.parameters / 1e6:.2f}'  # Params
        flops = f'{model.metadata.flops / 1e9:.2f}'  # Params
-        result = model.results[0]
-        top1 = result.metrics.get('Top 1 Accuracy')
-        top5 = result.metrics.get('Top 5 Accuracy')
        readme = Path(model.collection.filepath).parent.with_suffix('.md').name
        page = f'[链接]({PAPERS_ROOT / readme})'
-        row = [name, params, flops, top1, top5, page]
-        dataset_rows[result.dataset].append(row)
+        model_metrics = []
+        for metric in metrics:
+            model_metrics.append(str(result.metrics.get(metric, '')))
+
+        rows.append([name, params, flops, *model_metrics, page])

    with open('modelzoo_statistics.md', 'a') as f:
-        for dataset, rows in dataset_rows.items():
-            f.write(f'\n### {dataset}\n')
-            f.write("""```{table}\n:class: model-summary\n""")
-            header = [
-                '模型',
-                '参数量 (M)',
-                'Flops (G)',
-                'Top-1 (%)',
-                'Top-5 (%)',
-                'Readme',
-            ]
-            table_cfg = dict(
-                tablefmt='pipe',
-                floatfmt='.2f',
-                numalign='right',
-                stralign='center')
-            f.write(tabulate(rows, header, **table_cfg))
-            f.write('\n```\n')
+        if title is not None:
+            f.write(f'\n{title}')
+        f.write("""\n```{table}\n:class: model-summary\n""")
+        header = [
+            '模型',
+            '参数量 (M)',
+            'Flops (G)',
+            *[METRIC_ALIAS.get(metric, metric) for metric in metrics],
+            'Readme',
+        ]
+        table_cfg = dict(
+            tablefmt='pipe',
+            floatfmt='.2f',
+            numalign='right',
+            stralign='center')
+        f.write(tabulate(rows, header, **table_cfg))
+        f.write('\n```\n')


-generate_summary_table(model_index.models)
+def generate_dataset_wise_table(task, model_result_pairs, title=None):
+    dataset_rows = defaultdict(list)
+    for model, result in model_result_pairs:
+        if result.task == task:
+            dataset_rows[result.dataset].append((model, result))
+
+    if title is not None:
+        with open('modelzoo_statistics.md', 'a') as f:
+            f.write(f'\n{title}')
+    for dataset, pairs in dataset_rows.items():
+        generate_summary_table(task, pairs, title=f'### {dataset}')
+
+
+model_result_pairs = scatter_results(model_index.models)
+
+# Generate Pretrain Summary
+generate_summary_table(
+    task=None,
+    model_result_pairs=model_result_pairs,
+    title='## 预训练模型',
+)
+
+# Generate Image Classification Summary
+generate_dataset_wise_table(
+    task='Image Classification',
+    model_result_pairs=model_result_pairs,
+    title='## 图像分类',
+)
+
+# Generate Multi-Label Classification Summary
+generate_dataset_wise_table(
+    task='Multi-Label Classification',
+    model_result_pairs=model_result_pairs,
+    title='## 图像多标签分类',
+)
+
+# Generate Image Retrieval Summary
+generate_dataset_wise_table(
+    task='Image Retrieval',
+    model_result_pairs=model_result_pairs,
+    title='## 图像检索',
+)
--- a/docs/zh_CN/useful_tools/cam_visualization.md
+++ b/docs/zh_CN/useful_tools/cam_visualization.md
@ -15,6 +15,8 @@ MMPretrain 提供 `tools/visualization/vis_cam.py` 工具来可视化类别激
 | EigenGradCAM | 类似 EigenCAM，但支持类别区分，使用了激活 * 梯度的第一主成分，看起来和 GradCAM 差不多，但是更干净 |
 |   LayerCAM   |                        使用正梯度对激活进行空间加权，对于浅层有更好的效果                         |

+也可以使用新版本 `pytorch-grad-cam` 支持的更多 CAM 方法，但我们尚未验证可用性。
+
 **命令行**：

 ```bash
@ -40,7 +42,7 @@ python tools/visualization/vis_cam.py \
 - `img`：目标图片路径。
 - `config`：模型配置文件的路径。
 - `checkpoint`：权重路径。
- `--target-layers`：所查看的网络层名称，可输入一个或者多个网络层, 如果不设置，将使用最后一个`block`中的`norm`层。
+- `--target-layers`：所查看的网络层名称，可输入一个或者多个网络层，如果不设置，将使用最后一个`block`中的`norm`层。
 - `--preview-model`：是否查看模型所有网络层。
 - `--method`：类别激活图图可视化的方法，目前支持 `GradCAM`, `GradCAM++`, `XGradCAM`, `EigenCAM`, `EigenGradCAM`, `LayerCAM`，不区分大小写。如果不设置，默认为 `GradCAM`。
 - `--target-category`：查看的目标类别，如果不设置，使用模型检测出来的类别做为目标类别。
@ -58,16 +60,12 @@ python tools/visualization/vis_cam.py \

 ## 如何可视化 CNN 网络的类别激活图（如 ResNet-50）

-`--target-layers` 在 `Resnet-50` 中的一些示例如下:
+`--target-layers` 在 `Resnet-50` 中的一些示例如下：

 - `'backbone.layer4'`，表示第四个 `ResLayer` 层的输出。
 - `'backbone.layer4.2'` 表示第四个 `ResLayer` 层中第三个 `BottleNeck` 块的输出。
 - `'backbone.layer4.2.conv1'` 表示上述 `BottleNeck` 块中 `conv1` 层的输出。

-```{note}
-对于 `ModuleList` 或者 `Sequential` 类型的网络层，可以直接使用索引的方式指定子模块。比如 `backbone.layer4[-1]` 和 `backbone.layer4.2` 是相同的，因为 `layer4` 是一个拥有三个子模块的 `Sequential`。
-```
-
 1. 使用不同方法可视化 `ResNet50`，默认 `target-category` 为模型检测的结果，使用默认推导的 `target-layers`。

   ```shell
@ -83,7 +81,7 @@ python tools/visualization/vis_cam.py \
   | ------------------------------------ | --------------------------------------- | ----------------------------------------- | -------------------------------------------- | ---------------------------------------- |
   | <div align=center><img src='https://user-images.githubusercontent.com/18586273/144429496-628d3fb3-1f6e-41ff-aa5c-1b08c60c32a9.JPEG' height="auto" width="160" ></div> | <div align=center><img src='https://user-images.githubusercontent.com/18586273/147065002-f1c86516-38b2-47ba-90c1-e00b49556c70.jpg' height="auto" width="150" ></div> | <div align=center><img src='https://user-images.githubusercontent.com/18586273/147065119-82581fa1-3414-4d6c-a849-804e1503c74b.jpg' height="auto" width="150"></div> | <div align=center><img src='https://user-images.githubusercontent.com/18586273/147065096-75a6a2c1-6c57-4789-ad64-ebe5e38765f4.jpg' height="auto" width="150"></div> | <div align=center><img src='https://user-images.githubusercontent.com/18586273/147065129-814d20fb-98be-4106-8c5e-420adcc85295.jpg' height="auto" width="150"></div> |

-2. 同一张图不同类别的激活图效果图，在 `ImageNet` 数据集中，类别238为 'Greater Swiss Mountain dog'，类别281为 'tabby, tabby cat'。
+2. 同一张图不同类别的激活图效果图，在 `ImageNet` 数据集中，类别 238 为 'Greater Swiss Mountain dog'，类别 281 为 'tabby, tabby cat'。

   ```shell
   python tools/visualization/vis_cam.py \
@ -118,10 +116,10 @@ python tools/visualization/vis_cam.py \

 ## 如何可视化 Transformer 类型网络的类别激活图

-`--target-layers` 在 Transformer-based 网络中的一些示例如下:
+`--target-layers` 在 Transformer-based 网络中的一些示例如下：

 - Swin-Transformer 中：`'backbone.norm3'`
- ViT 中：`'backbone.layers[-1].ln1'`
+- ViT 中：`'backbone.layers.11.ln1'`

 对于 Transformer-based 的网络，比如 ViT、T2T-ViT 和 Swin-Transformer，特征是被展平的。为了绘制 CAM 图，我们需要指定 `--vit-like` 选项，从而让被展平的特征恢复方形的特征图。

@ -144,10 +142,10 @@ python tools/visualization/vis_cam.py \
   ```shell
   python tools/visualization/vis_cam.py \
       demo/bird.JPEG  \
-       configs/vision_transformer/vit-base-p16_ft-64xb64_in1k-384.py \
+       configs/vision_transformer/vit-base-p16_64xb64_in1k-384px.py \
       https://download.openmmlab.com/mmclassification/v0/vit/finetune/vit-base-p16_in21k-pre-3rdparty_ft-64xb64_in1k-384_20210928-98e8652b.pth \
       --vit-like \
-       --target-layers 'backbone.layers[-1].ln1'
+       --target-layers 'backbone.layers.11.ln1'
   ```

 3. 对 `T2T-ViT` 进行 CAM 可视化：
@ -158,7 +156,7 @@ python tools/visualization/vis_cam.py \
       configs/t2t_vit/t2t-vit-t-14_8xb64_in1k.py \
       https://download.openmmlab.com/mmclassification/v0/t2t-vit/t2t-vit-t-14_3rdparty_8xb64_in1k_20210928-b7c09b62.pth \
       --vit-like \
-       --target-layers 'backbone.encoder[-1].ln1'
+       --target-layers 'backbone.encoder.12.ln1'
   ```

 | Image                                   | ResNet50                                   | ViT                                    | Swin                                    | T2T-ViT                                    |
--- a/docs/zh_CN/useful_tools/complexity_analysis.md
+++ b/docs/zh_CN/useful_tools/complexity_analysis.md
@ -1,8 +1,8 @@
 # 模型复杂度分析

-## 计算FLOPs 和参数数量（实验性的）
+## 计算 FLOPs 和参数数量（实验性的）

-我们根据 [fvcore](https://github.com/facebookresearch/fvcore/blob/main/fvcore/nn/flop_count.py) 提供了一个脚本用于计算给定模型的 FLOPs 和参数量。
+我们根据 [MMEngine](https://github.com/open-mmlab/mmengine/blob/main/mmengine/analysis/complexity_analysis.py) 提供了一个脚本用于计算给定模型的 FLOPs 和参数量。

 ```shell
 python tools/analysis_tools/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}]
@ -13,52 +13,61 @@ python tools/analysis_tools/get_flops.py ${CONFIG_FILE} [--shape ${INPUT_SHAPE}]
 - `config` : 配置文件的路径。
 - `--shape`: 输入尺寸，支持单值或者双值， 如： `--shape 256`、`--shape 224 256`。默认为`224 224`。

+示例：
+
+```shell
+python tools/analysis_tools/get_flops.py configs/resnet/resnet50_8xb32_in1k.py
+```
+
 你将获得如下结果：

 ```text
 ==============================
 Input shape: (3, 224, 224)
-Flops: 17.582G
-Params: 91.234M
-Activation: 23.895M
+Flops: 4.109G
+Params: 25.557M
+Activation: 11.114M
 ==============================
 ```

-同时，你会得到每层的详细复杂度信息，如下所示:
+同时，你会得到每层的详细复杂度信息，如下所示：

 ```text
-| module                                    | #parameters or shape   | #flops    | #activations   |
-|:------------------------------------------|:-----------------------|:----------|:---------------|
-| model                                     | 91.234M                | 17.582G   | 23.895M        |
-|  backbone                                 |  85.799M               |  17.582G  |  23.895M       |
-|   backbone.cls_token                      |   (1, 1, 768)          |           |                |
-|   backbone.pos_embed                      |   (1, 197, 768)        |           |                |
-|   backbone.patch_embed.projection         |   0.591M               |   0.116G  |   0.151M       |
-|    backbone.patch_embed.projection.weight |    (768, 3, 16, 16)    |           |                |
-|    backbone.patch_embed.projection.bias   |    (768,)              |           |                |
-|   backbone.layers                         |   85.054M              |   17.466G |   23.744M      |
-|    backbone.layers.0                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.1                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.2                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.3                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.4                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.5                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.6                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.7                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.8                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.9                      |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.10                     |    7.088M              |    1.455G |    1.979M      |
-|    backbone.layers.11                     |    7.088M              |    1.455G |    1.979M      |
-|   backbone.ln1                            |   1.536K               |   0.756M  |   0            |
-|    backbone.ln1.weight                    |    (768,)              |           |                |
-|    backbone.ln1.bias                      |    (768,)              |           |                |
-|  head.layers                              |  5.435M                |           |                |
-|   head.layers.pre_logits                  |   2.362M               |           |                |
-|    head.layers.pre_logits.weight          |    (3072, 768)         |           |                |
-|    head.layers.pre_logits.bias            |    (3072,)             |           |                |
-|   head.layers.head                        |   3.073M               |           |                |
-|    head.layers.head.weight                |    (1000, 3072)        |           |                |
-|    head.layers.head.bias                  |    (1000,)             |           |                |
+--------------------------+----------------------+-----------+--------------+
+| module                   | #parameters or shape | #flops    | #activations |
+--------------------------+----------------------+-----------+--------------+
+| model                    | 25.557M              | 4.109G    | 11.114M      |
+|  backbone                |  23.508M             |  4.109G   |  11.114M     |
+|   backbone.conv1         |   9.408K             |   0.118G  |   0.803M     |
+|    backbone.conv1.weight |    (64, 3, 7, 7)     |           |              |
+|   backbone.bn1           |   0.128K             |   1.606M  |   0          |
+|    backbone.bn1.weight   |    (64,)             |           |              |
+|    backbone.bn1.bias     |    (64,)             |           |              |
+|   backbone.layer1        |   0.216M             |   0.677G  |   4.415M     |
+|    backbone.layer1.0     |    75.008K           |    0.235G |    2.007M    |
+|    backbone.layer1.1     |    70.4K             |    0.221G |    1.204M    |
+|    backbone.layer1.2     |    70.4K             |    0.221G |    1.204M    |
+|   backbone.layer2        |   1.22M              |   1.034G  |   3.111M     |
+|    backbone.layer2.0     |    0.379M            |    0.375G |    1.305M    |
+|    backbone.layer2.1     |    0.28M             |    0.22G  |    0.602M    |
+|    backbone.layer2.2     |    0.28M             |    0.22G  |    0.602M    |
+|    backbone.layer2.3     |    0.28M             |    0.22G  |    0.602M    |
+|   backbone.layer3        |   7.098M             |   1.469G  |   2.158M     |
+|    backbone.layer3.0     |    1.512M            |    0.374G |    0.652M    |
+|    backbone.layer3.1     |    1.117M            |    0.219G |    0.301M    |
+|    backbone.layer3.2     |    1.117M            |    0.219G |    0.301M    |
+|    backbone.layer3.3     |    1.117M            |    0.219G |    0.301M    |
+|    backbone.layer3.4     |    1.117M            |    0.219G |    0.301M    |
+|    backbone.layer3.5     |    1.117M            |    0.219G |    0.301M    |
+|   backbone.layer4        |   14.965M            |   0.81G   |   0.627M     |
+|    backbone.layer4.0     |    6.04M             |    0.373G |    0.326M    |
+|    backbone.layer4.1     |    4.463M            |    0.219G |    0.151M    |
+|    backbone.layer4.2     |    4.463M            |    0.219G |    0.151M    |
+|  head.fc                 |  2.049M              |           |              |
+|   head.fc.weight         |   (1000, 2048)       |           |              |
+|   head.fc.bias           |   (1000,)            |           |              |
+|  neck.gap                |                      |  0.1M     |  0           |
+--------------------------+----------------------+-----------+--------------+
 ```

 ```{warning}
@ -67,5 +76,5 @@ Activation: 23.895M
 此工具仍处于试验阶段，我们不保证该数字正确无误。您最好将结果用于简单比较，但在技术报告或论文中采用该结果之前，请仔细检查。

 - FLOPs 与输入的尺寸有关，而参数量与输入尺寸无关。默认输入尺寸为 (1, 3, 224, 224)
- 一些运算不会被计入 FLOPs 的统计中，例如某些自定义运算。详细信息请参考 [`fvcore.nn.flop_count._DEFAULT_SUPPORTED_OPS`](https://github.com/facebookresearch/fvcore/blob/main/fvcore/nn/flop_count.py)。
+- 一些运算不会被计入 FLOPs 的统计中，例如某些自定义运算。详细信息请参考 [`mmengine.analysis.complexity_analysis._DEFAULT_SUPPORTED_FLOP_OPS`](https://github.com/open-mmlab/mmengine/blob/main/mmengine/analysis/complexity_analysis.py)。
 ```
--- a/docs/zh_CN/useful_tools/dataset_visualization.md
+++ b/docs/zh_CN/useful_tools/dataset_visualization.md
@ -23,7 +23,7 @@ python tools/visualization/browse_dataset.py \
 - **`-n, --show-number`**: 可视化样本数量。如果没有指定，默认展示数据集的所有图片。
 - `-i, --show-interval`: 浏览时，每张图片的停留间隔，单位为秒。
 - **`-m, --mode`**: 可视化的模式，只能为 `['original', 'transformed', 'concat', 'pipeline']` 之一。 默认为`'transformed'`.
- **`-r, --rescale-factor`**: 对可视化图片的放缩倍数，在图片过大或过小时设置。
+- `-r, --rescale-factor`: 在 `mode='original'` 下，可视化图片的放缩倍数，在图片过大或过小时设置。
 - `-c, --channel-order`: 图片的通道顺序，为  `['BGR', 'RGB']` 之一，默认为 `'BGR'`。
 - `--cfg-options` : 对配置文件的修改，参考[学习配置文件](../user_guides/config.md)。

@ -46,11 +46,11 @@ python tools/visualization/browse_dataset.py \
 python ./tools/visualization/browse_dataset.py ./configs/resnet/resnet101_8xb16_cifar10.py --phase val --output-dir tmp --mode original --show-number 100 --rescale-factor 10 --channel-order RGB
 ```

- `--phase val`: 可视化验证集, 可简化为 `-p val`;
- `--output-dir tmp`: 可视化结果保存在 "tmp" 文件夹, 可简化为 `-o tmp`;
- `--mode original`: 可视化原图, 可简化为 `-m original`;
- `--show-number 100`: 可视化100张图，可简化为 `-n 100`;
- `--rescale-factor`: 图像放大10倍，可简化为 `-r 10`;
+- `--phase val`: 可视化验证集，可简化为 `-p val`;
+- `--output-dir tmp`: 可视化结果保存在 "tmp" 文件夹，可简化为 `-o tmp`;
+- `--mode original`: 可视化原图，可简化为 `-m original`;
+- `--show-number 100`: 可视化 100 张图，可简化为 `-n 100`;
+- `--rescale-factor`: 图像放大 10 倍，可简化为 `-r 10`;
 - `--channel-order RGB`: 可视化图像的通道顺序为 "RGB", 可简化为 `-c RGB`。

 <div align=center><img src="https://user-images.githubusercontent.com/18586273/190993839-216a7a1e-590e-47b9-92ae-08f87a7d58df.jpg" style=" width: auto; height: 40%; "></div>
@ -60,7 +60,7 @@ python ./tools/visualization/browse_dataset.py ./configs/resnet/resnet101_8xb16_
 使用 **'transformed'** 模式：

 ```shell
-python ./tools/visualization/browse_dataset.py ./configs/resnet/resnet50_8xb32_in1k.py -n 100 -r 2
+python ./tools/visualization/browse_dataset.py ./configs/resnet/resnet50_8xb32_in1k.py -n 100
 ```

 <div align=center><img src="https://user-images.githubusercontent.com/18586273/190994696-737b09d9-d0fb-4593-94a2-4487121e0286.JPEG" style=" width: auto; height: 40%; "></div>
@ -82,3 +82,9 @@ python ./tools/visualization/browse_dataset.py configs/swin_transformer/swin-sma
 ```

 <div align=center><img src="https://user-images.githubusercontent.com/18586273/190995525-fac0220f-6630-4013-b94a-bc6de4fdff7a.JPEG" style=" width: auto; height: 40%; "></div>
+
+```shell
+python ./tools/visualization/browse_dataset.py configs/beit/beit_beit-base-p16_8xb256-amp-coslr-300e_in1k.py -m pipeline
+```
+
+<div align=center><img src="https://user-images.githubusercontent.com/26739999/226542300-74216187-e3d0-4a6e-8731-342abe719721.png" style=" width: auto; height: 40%; "></div>
--- a/docs/zh_CN/useful_tools/log_result_analysis.md
+++ b/docs/zh_CN/useful_tools/log_result_analysis.md
@ -54,7 +54,7 @@ python tools/analysis_tools/analyze_logs.py plot_curve your_log_json --keys accu
 #### 在同一图像内绘制两份日志文件对应的 top-1 准确率曲线图。

 ```shell
-python tools/analysis_tools/analyze_logs.py plot_curve log1.json log2.json --keys accuracy_top-1 --legend exp1 exp2
+python tools/analysis_tools/analyze_logs.py plot_curve log1.json log2.json --keys accuracy/top1 --legend exp1 exp2
 ```

 ### 如何统计训练时间
@ -99,7 +99,7 @@ average iter time: 0.3777 s/iter
 ```shell
 python tools/analysis_tools/eval_metric.py \
      ${RESULT} \
-      [--metric ${METRIC_OPTIONS} ...]  \
+      [--metric ${METRIC_OPTIONS} ...]
 ```

 **所有参数说明**：
@ -125,9 +125,56 @@ python tools/test.py configs/resnet/resnet18_8xb16_cifar10.py \
 # 计算 top-1 和 top-5 准确率
 python tools/analysis_tools/eval_metric.py results.pkl --metric type=Accuracy topk=1,5

-# 计算准确率、精确度、召回率、F1-score
+# 计算总体准确率，各个类别上的精确度、召回率、F1-score
 python tools/analysis_tools/eval_metric.py results.pkl --metric type=Accuracy \
-    --metric type=SingleLabelMetric items=precision,recall,f1-score
+    --metric type=SingleLabelMetric items=precision,recall,f1-score average=None
+```
+
+### 如何绘制测试结果的混淆矩阵
+
+我们提供 `tools/analysis_tools/confusion_matrix.py`，帮助用户能够从测试输出文件中绘制混淆矩阵。
+
+```shell
+python tools/analysis_tools/confusion_matrix.py \
+      ${CONFIG} \
+      ${RESULT} \
+      [--out ${OUT}] \
+      [--show] \
+      [--show-path ${SHOW_PATH}] \
+      [--include-values] \
+      [--cmap] \
+      [--cfg-options ${CFG_OPTIONS} ...] \
+```
+
+**所有参数说明**：
+
+- `config`：配置文件的路径。
+- `result`：`tools/test.py`的输出结果文件，或是模型权重文件。
+- `--out`：将混淆矩阵保存到指定路径下的 pickle 文件中。
+- `--show`：是否可视化混淆矩阵图。
+- `--show-path`：将可视化混淆矩阵图保存到指定路径下。
+- `--include-values`：是否在可视化混淆矩阵图中显示具体值。
+- `--cmap`：用以可视化混淆矩阵的颜色配置。
+- `--cfg-options`：额外的配置选项，会被合入配置文件，参考[学习配置文件](../user_guides/config.md)。
+
+```{note}
+在 `tools/test.py` 中，我们支持使用 `--out-item` 选项来选择保存何种结果至输出文件。
+请确保没有额外指定 `--out-item`，或指定了 `--out-item=pred`。
+```
+
+**Examples**:
+
+```shell
+# 获取结果文件
+python tools/test.py configs/resnet/resnet18_8xb16_cifar10.py \
+    https://download.openmmlab.com/mmclassification/v0/resnet/resnet18_b16x8_cifar10_20210528-bd6371c8.pth \
+    --out results.pkl
+
+# 将混淆矩阵计算结果保存至 cm.pkl 中
+python tools/analysis_tools/confusion_matrix.py configs/resnet/resnet18_8xb16_cifar10.py results.pkl --out cm.pkl
+
+# 可视化混淆矩阵图，并在图形窗口显示
+python tools/analysis_tools/confusion_matrix.py configs/resnet/resnet18_8xb16_cifar10.py results.pkl --show
 ```

 ### 如何将预测结果可视化
--- a/docs/zh_CN/useful_tools/model_serving.md
+++ b/docs/zh_CN/useful_tools/model_serving.md
@ -40,6 +40,7 @@ docker build -t mmpretrain-serve:latest docker/serve/

 ```shell
 docker run --rm \
+--name mar \
 --cpus 8 \
 --gpus device=0 \
 -p8080:8080 -p8081:8081 -p8082:8082 \
--- a/docs/zh_CN/useful_tools/print_config.md
+++ b/docs/zh_CN/useful_tools/print_config.md
@ -12,8 +12,8 @@ python tools/misc/print_config.py ${CONFIG} [--cfg-options ${CFG_OPTIONS}]

 所有参数的说明：

- `config` : 模型配置文件的路径。
- `--cfg-options`::额外的配置选项，会被合入配置文件，参考[学习配置文件](../user_guides/config.md)。
+- `config`：模型配置文件的路径。
+- `--cfg-options`：额外的配置选项，会被合入配置文件，参考[学习配置文件](../user_guides/config.md)。

 ## 示例：

--- a/docs/zh_CN/useful_tools/scheduler_visualization.md
+++ b/docs/zh_CN/useful_tools/scheduler_visualization.md
@ -22,7 +22,7 @@ python tools/visualization/vis_scheduler.py \
 - `config` : 模型配置文件的路径。
 - **`-p, parameter`**: 可视化参数名，只能为 `["lr", "momentum"]` 之一， 默认为 `"lr"`.
 - **`-d, --dataset-size`**: 数据集的大小。如果指定，`build_dataset` 将被跳过并使用这个大小作为数据集大小，默认使用 `build_dataset` 所得数据集的大小。
- **`-n, --ngpus`**: 使用 GPU 的数量, 默认为1。
+- **`-n, --ngpus`**: 使用 GPU 的数量，默认为 1。
 - **`-s, --save-path`**: 保存的可视化图片的路径，默认不保存。
 - `--title`: 可视化图片的标题，默认为配置文件名。
 - `--style`: 可视化图片的风格，默认为 `whitegrid`。
@ -35,18 +35,10 @@ python tools/visualization/vis_scheduler.py \

 ## 如何在开始训练前可视化学习率曲线

-你可以使用如下命令来绘制配置文件 `configs/resnet/resnet50_b16x8_cifar100.py` 将会使用的变化率曲线：
+你可以使用如下命令来绘制配置文件 `configs/swin_transformer/swin-base_16xb64_in1k.py` 将会使用的变化率曲线：

 ```bash
-python tools/visualization/vis_scheduler.py configs/resnet/resnet50_b16x8_cifar100.py
+python tools/visualization/vis_scheduler.py configs/swin_transformer/swin-base_16xb64_in1k.py --dataset-size 1281167 --ngpus 16
 ```

-<div align=center><img src="https://user-images.githubusercontent.com/18586273/191006713-023f065d-d366-4165-a52e-36176367506e.png" style=" width: auto; height: 40%; "></div>
-
-当数据集为 ImageNet 时，通过直接指定数据集大小来节约时间，并保存图片：
-
-```bash
-python tools/visualization/vis_scheduler.py configs/repvgg/repvgg-B3g4_4xb64-autoaug-lbs-mixup-coslr-200e_in1k.py --dataset-size 1281167 --ngpus 4 --save-path ./repvgg-B3g4_4xb64-lr.jpg
-```
-
-<div align=center><img src="https://user-images.githubusercontent.com/18586273/191006721-0f680e07-355e-4cd6-889c-86c0cad9acb7.png" style=" width: auto; height: 40%; "></div>
+<div align=center><img src="https://user-images.githubusercontent.com/26739999/226544329-cf3a3d45-6ab3-48aa-8972-2c2a58c35e62.png" style=" width: auto; height: 40%; "></div>
--- a/docs/zh_CN/useful_tools/verify_dataset.md
+++ b/docs/zh_CN/useful_tools/verify_dataset.md
@ -18,7 +18,7 @@ python tools/print_config.py \
 - `config` : 配置文件的路径。
 - `--out-path` : 输出结果路径，默认为 ‘brokenfiles.log’。
 - `--phase` :  检查哪个阶段的数据集，可用值为 “train” 、”test” 或者 “val”， 默认为 “train”。
- `--num-process` : 指定的进程数，默认为1。
+- `--num-process` : 指定的进程数，默认为 1。
 - `--cfg-options`: 额外的配置选项，会被合入配置文件，参考[教程 1：如何编写配置文件](https://mmclassification.readthedocs.io/zh_CN/latest/tutorials/config.html)。

 ## 示例：
--- a/docs/zh_CN/user_guides/config.md
+++ b/docs/zh_CN/user_guides/config.md
@ -1,20 +1,8 @@
 # 学习配置文件

- [学习配置文件](#学习配置文件)
-  - [配置文件结构](#配置文件结构)
-    - [模型配置](#模型配置)
-    - [数据](#数据)
-    - [训练策略](#训练策略)
-    - [运行设置](#运行设置)
-  - [继承并修改配置文件](#继承并修改配置文件)
-    - [使用配置文件里的中间变量](#使用配置文件里的中间变量)
-    - [忽略基础配置文件里的部分内容](#忽略基础配置文件里的部分内容)
-    - [引用基础配置文件里的变量](#引用基础配置文件里的变量)
-  - [通过命令行参数修改配置信息](#通过命令行参数修改配置信息)
-
 为了管理深度学习实验的各种设置，我们使用配置文件来记录所有这些配置。这种配置文件系统具有模块化和继承特性，更多细节可以在{external+mmengine:doc}`MMEngine 中的教程 <advanced_tutorials/config>`。

-MMPretrain 主要使用 python 文件作为配置文件，所有配置文件都放置在 [`configs`](https://github.com/open-mmlab/mmpretrain/tree/main/configs) 文件夹下，目录结构如下所示:
+MMPretrain 主要使用 python 文件作为配置文件，所有配置文件都放置在 [`configs`](https://github.com/open-mmlab/mmclassification/tree/pretrain/configs) 文件夹下，目录结构如下所示：

 ```text
 MMPretrain/
@ -36,20 +24,20 @@ MMPretrain/

 可以使用 `python tools/misc/print_config.py /PATH/TO/CONFIG` 命令来查看完整的配置信息，从而方便检查所对应的配置文件。

-本文主要讲解 MMPretrain 配置文件的命名和结构，以及如何基于已有的配置文件修改，并以 [ResNet50 配置文件](https://github.com/open-mmlab/mmpretrain/blob/main/configs/resnet/resnet50_8xb32_in1k.py) 逐行解释。
+本文主要讲解 MMPretrain 配置文件的命名和结构，以及如何基于已有的配置文件修改，并以 [ResNet50 配置文件](https://github.com/open-mmlab/mmclassification/blob/main/configs/resnet/resnet50_8xb32_in1k.py) 逐行解释。

 ## 配置文件结构

 在 `configs/_base_` 文件夹下有 4 个基本组件类型，分别是：

- [模型(model)](https://github.com/open-mmlab/mmpretrain/tree/main/configs/_base_/models)
- [数据(data)](https://github.com/open-mmlab/mmpretrain/tree/main/configs/_base_/datasets)
- [训练策略(schedule)](https://github.com/open-mmlab/mmpretrain/tree/main/configs/_base_/schedules)
- [运行设置(runtime)](https://github.com/open-mmlab/mmpretrain/blob/main/configs/_base_/default_runtime.py)
+- [模型(model)](https://github.com/open-mmlab/mmclassification/tree/pretrain/configs/_base_/models)
+- [数据(data)](https://github.com/open-mmlab/mmclassification/tree/pretrain/configs/_base_/datasets)
+- [训练策略(schedule)](https://github.com/open-mmlab/mmclassification/tree/pretrain/configs/_base_/schedules)
+- [运行设置(runtime)](https://github.com/open-mmlab/mmclassification/blob/pretrain/configs/_base_/default_runtime.py)

 你可以通过继承一些基本配置文件轻松构建自己的训练配置文件。我们称这些被继承的配置文件为 _原始配置文件_，如 `_base_` 文件夹中的文件一般仅作为原始配置文件。

-下面使用 [ResNet50 配置文件](https://github.com/open-mmlab/mmpretrain/blob/main/configs/resnet/resnet50_8xb32_in1k.py) 作为案例进行说明并注释每一行含义。
+下面使用 [ResNet50 配置文件](https://github.com/open-mmlab/mmclassification/blob/pretrain/configs/resnet/resnet50_8xb32_in1k.py) 作为案例进行说明并注释每一行含义。

 ```python
 _base_ = [                                    # 此配置文件将继承所有 `_base_` 中的配置
@ -66,31 +54,33 @@ _base_ = [                                    # 此配置文件将继承所有 `

 模型原始配置文件包含一个 `model` 字典数据结构，主要包括网络结构、损失函数等信息：

- `type`：模型类型，我们支持了多种任务
+- `type`：算法类型，我们支持了多种任务
  - 对于图像分类任务，通常为 `ImageClassifier`，更多细节请参考 [API 文档](mmpretrain.models.classifiers)。
-  - 对于自监督任务，有多中类型的 `SelfSupervisors`, 例如 `MoCoV2`, `BEiT`, `MAE` 等。更多细节请参考 [API 文档](mmpretrain.models.selfsup)。
+  - 对于自监督任务，有多种类型的算法，例如 `MoCoV2`, `BEiT`, `MAE` 等。更多细节请参考 [API 文档](mmpretrain.models.selfsup)。
  - 对于图像检索任务，通常为 `ImageToImageRetriever`，更多细节请参考 [API 文档](mmpretrain.models.retrievers).
+
+通常，我们使用 **`type`字段** 来指定组件的类，并使用其他字段来传递类的初始化参数。{external+mmengine:doc}`注册器教程 <advanced_tutorials/registry>` 对其进行了详细描述。
+
+这里我们以 [`ImageClassifier`](mmpretrain.models.classifiers.ImageClassifier) 的配置字段为例，对初始化参数进行说明：
+
 - `backbone`： 主干网络设置，主干网络为主要的特征提取网络，比如 `ResNet`, `Swin Transformer`, `Vision Transformer` 等等。更多可用选项请参考 [API 文档](mmpretrain.models.backbones)。
  - 对于自监督学习，有些主干网络需要重新实现，您可以在 [API 文档](mmpretrain.models.selfsup) 中获取更多细节。
 - `neck`： 颈网络设置，颈网络主要是连接主干网和头网络的中间部分，比如 `GlobalAveragePooling` 等，更多可用选项请参考 [API 文档](mmpretrain.models.necks)。
- `head`： 头网络设置，头网络主要是最后关联任务的部件，更多可用选项请参考 [API 文档](mmpretrain.models.heads)。
+- `head`： 头网络设置，头网络主要是与具体任务关联的部件，如图像分类、自监督训练等，更多可用选项请参考 [API 文档](mmpretrain.models.heads)。
  - `loss`： 损失函数设置， 支持 `CrossEntropyLoss`, `LabelSmoothLoss`, `PixelReconstructionLoss` 等，更多可用选项参考 [API 文档](mmpretrain.models.losses)。
 - `data_preprocessor`: 图像输入的预处理模块，输入在进入模型前的预处理操作，例如 `ClsDataPreprocessor`, 有关详细信息，请参阅 [API 文档](mmpretrain.models.utils.data_preprocessor)。
 - `train_cfg`：训练模型时的额外设置。在 MMCLS 中，我们主要使用它来配置批量增强，例如 `Mixup` 和 `CutMix`。有关详细信息，请参阅 [文档](mmpretrain.models.utils.batch_augments)。
+- `train_cfg`: `ImageClassifier` 的额外训练配置。在 `ImageClassifier` 中，我们使用这一参数指定批数据增强设置，比如 `Mixup` 和 `CutMix`。详见[文档](mmpretrain.models.utils.batch_augments)。

-```{note}
-配置文件中的 'type' 不是构造时的参数，而是类名。
-```
-
-以下是 ResNet50 的模型配置['configs/_base_/models/resnet50.py'](https://github.com/open-mmlab/mmpretrain/blob/main/configs/_base_/models/resnet50.py)：
+以下是 ResNet50 的模型配置['configs/_base_/models/resnet50.py'](https://github.com/open-mmlab/mmclassification/blob/pretrain/configs/_base_/models/resnet50.py)：

 ```python
 model = dict(
-    type='ImageClassifier',     # 分类器类型， 目前只有 'ImageClassifier'
+    type='ImageClassifier',     # 主模型类型（对于图像分类任务，使用 `ImageClassifier`）
    backbone=dict(
        type='ResNet',          # 主干网络类型
        # 除了 `type` 之外的所有字段都来自 `ResNet` 类的 __init__ 方法
-        # 可查阅 https://mmpretrain.readthedocs.io/zh_CN/main/api/generated/mmpretrain.models.ResNet.html
+        # 可查阅 https://mmclassification.readthedocs.io/zh_CN/pretrain/api/generated/mmpretrain.models.backbones.ResNet.html
        depth=50,
        num_stages=4,           # 主干网络状态(stages)的数目，这些状态产生的特征图作为后续的 head 的输入。
        out_indices=(3, ),      # 输出的特征图输出索引。
@ -100,7 +90,7 @@ model = dict(
    head=dict(
        type='LinearClsHead',         # 分类颈网络类型
        # 除了 `type` 之外的所有字段都来自 `LinearClsHead` 类的 __init__ 方法
-        # 可查阅 https://mmpretrain.readthedocs.io/zh_CN/main/api/generated/mmpretrain.models.LinearClsHead.html
+        # 可查阅 https://mmclassification.readthedocs.io/zh_CN/pretrain/api/generated/mmpretrain.models.heads.LinearClsHead.html
        num_classes=1000,
        in_channels=2048,
        loss=dict(type='CrossEntropyLoss', loss_weight=1.0), # 损失函数配置信息
@ -112,7 +102,7 @@ model = dict(

 数据原始配置文件主要包括预处理设置、dataloader 以及 评估器等设置：

- `data_preprocessor`: 模型输入预处理配置, 与 `model.data_preprocessor` 相同，但优先级更低。
+- `data_preprocessor`: 模型输入预处理配置，与 `model.data_preprocessor` 相同，但优先级更低。
 - `train_evaluator | val_evaluator | test_evaluator`: 构建评估器，参考 [API 文档](mmpretrain.evaluation)。
 - `train_dataloader | val_dataloader | test_dataloader`: 构建 dataloader
  - `samples_per_gpu`: 每个 GPU 的 batch size
@ -120,9 +110,9 @@ model = dict(
  - `sampler`: 采样器配置
  - `dataset`: 数据集配置
    - `type`:  数据集类型， MMPretrain 支持 `ImageNet`、 `Cifar` 等数据集 ，参考 [API 文档](mmpretrain.datasets)
-    - `pipeline`:  数据处理流水线，参考相关教程文档 [如何设计数据处理流水线](https://mmpretrain.readthedocs.io/zh_CN/main/api/generated/tutorials/data_pipeline.html)
+    - `pipeline`:  数据处理流水线，参考相关教程文档 [如何设计数据处理流水线](../advanced_guides/pipeline.md)

-以下是 ResNet50 的数据配置 ['configs/_base_/datasets/imagenet_bs32.py'](https://github.com/open-mmlab/mmpretrain/blob/main/configs/_base_/datasets/imagenet_bs32.py)：
+以下是 ResNet50 的数据配置 ['configs/_base_/datasets/imagenet_bs32.py'](https://github.com/open-mmlab/mmclassification/blob/pretrain/configs/_base_/datasets/imagenet_bs32.py)：

 ```python
 dataset_type = 'ImageNet'
@ -143,15 +133,15 @@ train_pipeline = [

 test_pipeline = [
    dict(type='LoadImageFromFile'),     # 读取图像
-    dict(type='ResizeEdge', scale=256, edge='short'),  # 短边对其256进行放缩
+    dict(type='ResizeEdge', scale=256, edge='short'),  # 缩放短边尺寸至 256px
    dict(type='CenterCrop', crop_size=224),     # 中心裁剪
    dict(type='PackInputs'),                 # 准备图像以及标签
 ]

 # 构造训练集 dataloader
 train_dataloader = dict(
-    batch_size=32,                     # 每张GPU的 batchsize
-    num_workers=5,                     # 每个GPU的线程数
+    batch_size=32,                     # 每张 GPU 的 batchsize
+    num_workers=5,                     # 每个 GPU 的线程数
    dataset=dict(                      # 训练数据集
        type=dataset_type,
        data_root='data/imagenet',
@ -159,7 +149,7 @@ train_dataloader = dict(
        data_prefix='train',
        pipeline=train_pipeline),
    sampler=dict(type='DefaultSampler', shuffle=True),   # 默认采样器
-    persistent_workers=True,                             # 是否保持进程，可以缩短每个epoch的准备时间
+    persistent_workers=True,                             # 是否保持进程，可以缩短每个 epoch 的准备时间
 )

 # 构造验证集 dataloader
@ -178,12 +168,13 @@ val_dataloader = dict(
 # 验证集评估设置，使用准确率为指标， 这里使用 topk1 以及 top5 准确率
 val_evaluator = dict(type='Accuracy', topk=(1, 5))

-test_dataloader = val_dataloader  # test dataloader配置，这里直接与 val_dataloader相同
+test_dataloader = val_dataloader  # test dataloader 配置，这里直接与 val_dataloader 相同
 test_evaluator = val_evaluator    # 测试集的评估配置，这里直接与 val_evaluator 相同
 ```

 ```{note}
-'model.data_preprocessor' 既可以在 `model=dict(data_preprocessor=dict())`中定义，也可以使用此处的 `data_preprocessor` 定义, 同时配置时，优先使用 `model.data_preprocessor` 的配置。
+预处理配置（`data_preprocessor`）既可以作为 `model` 的一个子字段，也可以定义在外部的 `data_preprocessor` 字段，
+同时配置时，优先使用 `model.data_preprocessor` 的配置。
 ```

 ### 训练策略
@ -197,7 +188,7 @@ test_evaluator = val_evaluator    # 测试集的评估配置，这里直接与 v
 - `param_scheduler` : 学习率策略，你可以指定训练期间的学习率和动量曲线。有关详细信息，请参阅 MMEngine 中的 {external+mmengine:doc}`文档 <tutorials/param_scheduler>`。
 - `train_cfg | val_cfg | test_cfg`: 训练、验证以及测试的循环执行器配置，请参考相关的{external+mmengine:doc}`MMEngine 文档 <design/runner>`。

-以下是 ResNet50 的训练策略配置['configs/_base_/schedules/imagenet_bs256.py'](https://github.com/open-mmlab/mmpretrain/blob/main/configs/_base_/schedules/imagenet_bs256.py)：
+以下是 ResNet50 的训练策略配置['configs/_base_/schedules/imagenet_bs256.py'](https://github.com/open-mmlab/mmclassification/blob/pretrain/configs/_base_/schedules/imagenet_bs256.py)：

 ```python
 optim_wrapper = dict(
@ -209,7 +200,7 @@ optim_wrapper = dict(
 param_scheduler = dict(
    type='MultiStepLR', by_epoch=True, milestones=[30, 60, 90], gamma=0.1)

-# 训练的配置, 迭代 100 个epoch，每一个训练 epoch 后都做验证集评估
+# 训练的配置，迭代 100 个 epoch，每一个训练 epoch 后都做验证集评估
 # 'by_epoch=True' 默认使用 `EpochBaseLoop`,  'by_epoch=False' 默认使用 `IterBaseLoop`
 train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
 # 使用默认的验证循环控制器
@ -227,13 +218,13 @@ auto_scale_lr = dict(base_batch_size=256)

 本部分主要包括保存权重策略、日志配置、训练参数、断点权重路径和工作目录等等。

-以下是几乎所有算法都使用的运行配置['configs/_base_/default_runtime.py'](https://github.com/open-mmlab/mmpretrain/blob/main//configs/_base_/default_runtime.py)：
+以下是几乎所有算法都使用的运行配置['configs/_base_/default_runtime.py'](https://github.com/open-mmlab/mmclassification/blob/pretrain//configs/_base_/default_runtime.py)：

 ```python
 # 默认所有注册器使用的域
 default_scope = 'mmpretrain'

-# 配置默认的hook
+# 配置默认的 hook
 default_hooks = dict(
    # 记录每次迭代的时间。
    timer=dict(type='IterTimerHook'),
@ -241,10 +232,10 @@ default_hooks = dict(
    # 每 100 次迭代打印一次日志。
    logger=dict(type='LoggerHook', interval=100),

-    # 启用默认参数调度hook。
+    # 启用默认参数调度 hook。
    param_scheduler=dict(type='ParamSchedulerHook'),

-    # 每个epoch保存检查点。
+    # 每个 epoch 保存检查点。
    checkpoint=dict(type='CheckpointHook', interval=1),

    # 在分布式环境中设置采样器种子。
@ -256,7 +247,7 @@ default_hooks = dict(

 # 配置环境
 env_cfg = dict(
-   # 是否开启cudnn benchmark
+   # 是否开启 cudnn benchmark
    cudnn_benchmark=False,

    # 设置多进程参数
@ -288,7 +279,7 @@ resume = False
 对于在同一算法文件夹下的所有配置文件，MMPretrain 推荐只存在 **一个** 对应的 _原始配置_ 文件。
 所有其他的配置文件都应该继承 _原始配置_ 文件，这样就能保证配置文件的最大继承深度为 3。

-例如，如果在 ResNet 的基础上做了一些修改，用户首先可以通过指定 `_base_ = './resnet50_8xb32_in1k.py'`（相对于你的配置文件的路径），来继承基础的 ResNet 结构、数据集以及其他训练配置信息，然后修改配置文件中的必要参数以完成继承。如想在基础 resnet50 的基础上使用 `CutMix` 训练增强，将训练轮数由 100 改为 300 和修改学习率衰减轮数，同时修改数据集路径，可以建立新的配置文件 `configs/resnet/resnet50_8xb32-300e_in1k.py`， 文件中写入以下内容:
+例如，如果在 ResNet 的基础上做了一些修改，用户首先可以通过指定 `_base_ = './resnet50_8xb32_in1k.py'`（相对于你的配置文件的路径），来继承基础的 ResNet 结构、数据集以及其他训练配置信息，然后修改配置文件中的必要参数以完成继承。如想在基础 resnet50 的基础上使用 `CutMix` 训练增强，将训练轮数由 100 改为 300 和修改学习率衰减轮数，同时修改数据集路径，可以建立新的配置文件 `configs/resnet/resnet50_8xb32-300e_in1k.py`， 文件中写入以下内容：

 ```python
 # 在 'configs/resnet/' 创建此文件
@ -302,7 +293,7 @@ model = dict(
 )

 # 优化策略在之前基础上训练更多个 epoch
-train_cfg = dict(max_epochs=300, val_interval=10)  # 训练300个 epoch，每10个 epoch 评估一次
+train_cfg = dict(max_epochs=300, val_interval=10)  # 训练 300 个 epoch，每 10 个 epoch 评估一次
 param_scheduler = dict(step=[150, 200, 250])   # 学习率调整也有所变动

 # 使用自己的数据集目录
@ -358,7 +349,7 @@ test_dataloader = dict(dataset=dict(pipeline=val_pipeline))

 有时，您需要设置 `_delete_=True` 去忽略基础配置文件里的一些域内容。可以查看 {external+mmengine:doc}`MMEngine 文档 <advanced_tutorials/config>` 进一步了解该设计。

-以下是一个简单应用案例。 如果在上述 ResNet50 案例中 使用余弦调度 ,使用继承并直接修改会报 `get unexcepected keyword 'step'` 错, 因为基础配置文件 `param_scheduler` 域信息的 `'step'` 字段被保留下来了，需要加入 `_delete_=True` 去忽略基础配置文件里的 `param_scheduler` 相关域内容：
+以下是一个简单应用案例。 如果在上述 ResNet50 案例中 使用余弦调度 ,使用继承并直接修改会报 `get unexcepected keyword 'step'` 错，因为基础配置文件 `param_scheduler` 域信息的 `'step'` 字段被保留下来了，需要加入 `_delete_=True` 去忽略基础配置文件里的 `param_scheduler` 相关域内容：

 ```python
 _base_ = '../../configs/resnet/resnet50_8xb32_in1k.py'
@ -371,7 +362,7 @@ param_scheduler = dict(type='CosineAnnealingLR', by_epoch=True, _delete_=True)

 有时，您可以引用 `_base_` 配置信息的一些域内容，这样可以避免重复定义。可以查看 {external+mmengine:doc}`MMEngine 文档 <advanced_tutorials/config>` 进一步了解该设计。

-以下是一个简单应用案例，在训练数据预处理流水线中使用 `auto augment` 数据增强，参考配置文件 [`configs/resnest/resnest50_32xb64_in1k.py`](https://github.com/open-mmlab/mmpretrain/blob/main/configs/resnest/resnest50_32xb64_in1k.py)。 在定义 `train_pipeline` 时，可以直接在 `_base_` 中加入定义 auto augment 数据增强的文件命名，再通过 `{{_base_.auto_increasing_policies}}` 引用变量：
+以下是一个简单应用案例，在训练数据预处理流水线中使用 `auto augment` 数据增强，参考配置文件 [`configs/resnest/resnest50_32xb64_in1k.py`](https://github.com/open-mmlab/mmclassification/blob/pretrain/configs/resnest/resnest50_32xb64_in1k.py)。 在定义 `train_pipeline` 时，可以直接在 `_base_` 中加入定义 auto augment 数据增强的文件命名，再通过 `{{_base_.auto_increasing_policies}}` 引用变量：

 ```python
 _base_ = [
--- a/docs/zh_CN/user_guides/dataset_prepare.md
+++ b/docs/zh_CN/user_guides/dataset_prepare.md
@ -1,52 +1,40 @@
 # 准备数据集

-目前 MMPretrain 所支持的数据集有：
-
- [准备数据集](#准备数据集)
-  - [CustomDataset](#customdataset)
-    - [子文件夹方式](#子文件夹方式)
-    - [标注文件方式](#标注文件方式)
-  - [ImageNet](#imagenet)
-  - [CIFAR](#cifar)
-  - [MNIST](#mnist)
-  - [OpenMMLab 2.0 标准数据集](#openmmlab-20-标准数据集)
-  - [其他数据集](#其他数据集)
-  - [数据集包装](#数据集包装)
-
-如果你使用的数据集不在以上所列公开数据集中，需要转换数据集格式来适配 **`CustomDataset`**。
-
 ## CustomDataset

 [`CustomDataset`](mmpretrain.datasets.CustomDataset) 是一个通用的数据集类，供您使用自己的数据集。目前 `CustomDataset` 支持以下两种方式组织你的数据集文件：

 ### 子文件夹方式

-如下所示，将所有样本放在同一个文件夹下：
+在这种格式下，您只需要重新组织您的数据集文件夹并将所有样本放在一个文件夹中，而无需创建任何标注文件。
+
+对于监督任务（使用 `with_label=true`），我们使用子文件夹的名称作为类别名称，如下例所示，`class_x` 和 `class_y` 将被识别为类别名称。

 ```text
-样本文件目录结构 (在有监督任务中设定 `with_label=True`，我们使用子文件夹的名字作为类别名):
-如下所示，class_x 和 class_y 就代表了区分了不同的类别。:
-    data_prefix/
-    ├── class_x
-    │   ├── xxx.png
-    │   ├── xxy.png
-    │   └── ...
-    │       └── xxz.png
-    └── class_y
-        ├── 123.png
-        ├── nsdf3.png
-        ├── ...
-        └── asd932_.png
-
-样本文件目录结构 (在无监督任务中设定 `with_label=False`, 我们使用制定的文件夹下所有图片):
-    data_prefix/
-    ├── folder_1
-    │   ├── xxx.png
-    │   ├── xxy.png
-    │   └── ...
+data_prefix/
+├── class_x
+│   ├── xxx.png
+│   ├── xxy.png
+│   └── ...
+│       └── xxz.png
+└── class_y
    ├── 123.png
    ├── nsdf3.png
-    └── ...
+    ├── ...
+    └── asd932_.png
+```
+
+对于无监督任务（使用 `with_label=false`），我们直接加载指定文件夹下的所有样本文件：
+
+```
+data_prefix/
+├── folder_1
+│   ├── xxx.png
+│   ├── xxy.png
+│   └── ...
+├── 123.png
+├── nsdf3.png
+└── ...
 ```

 假如你希望将之用于训练，那么配置文件中需要添加以下配置：
@ -58,44 +46,29 @@ train_dataloader = dict(
    dataset=dict(
        type='CustomDataset',
        data_prefix='path/to/data_prefix',
+        with_label=True,  # 对于无监督任务，使用 False
        pipeline=...
    )
 )
 ```

+```{note}
+如果要使用此格式，请不要指定 `ann_file`，或指定 `ann_file=''`。
+
+请注意，子文件夹格式需要对文件夹进行扫描，这可能会导致初始化速度变慢，尤其是对于大型数据集或慢速文件 IO。
+```
+
 ### 标注文件方式

 标注文件格式主要使用文本文件来保存类别信息，`data_prefix` 存放图片，`ann_file` 存放标注类别信息。

 如下案例，dataset 目录如下：

-```text
-标注文件如下 (在有监督任务中设定 ``with_label=True``):
-    folder_1/xxx.png 0
-    folder_1/xxy.png 1
-    123.png 4
-    nsdf3.png 3
-    ...
+在这种格式中，我们使用文本标注文件来存储图像文件路径和对应的类别索引。

-标注文件如下 (在无监督任务中设定 ``with_label=False``):
-    folder_1/xxx.png
-    folder_1/xxy.png
-    123.png
-    nsdf3.png
-    ...
+对于监督任务（`with_label=true`），注释文件应在一行中包含一个样本的文件路径和类别索引，并用空格分隔，如下所示：

-样本文件目录结构:
-    data_prefix/
-    ├── folder_1
-    │   ├── xxx.png
-    │   ├── xxy.png
-    │   └── ...
-    ├── 123.png
-    ├── nsdf3.png
-    └── ...
-```
-
-标注文件 `ann_file` 内为普通文本，分为两列，第一列为图片路径，第二列为**类别的序号**。如下：
+所有这些文件路径都可以是绝对路径，也可以是相对于 `data_prefix` 的相对路径。

 ```text
 folder_1/xxx.png 0
@ -106,27 +79,59 @@ nsdf3.png 3
 ```

 ```{note}
-类别序号的值应当在 [0, num_classes - 1] 范围的整数。
+类别的索引号从 0 开始。真实标签的值应在`[0, num_classes - 1]`范围内。
+
+此外，请使用数据集设置中的 `classes` 字段来指定每个类别的名称
 ```

-另外还需要数据集配置文件中指定 `classes` 字段，如：
+对于无监督任务（`with_label=false`），标注文件只需要在一行中包含一个样本的文件路径，如下：
+
+```text
+folder_1/xxx.png
+folder_1/xxy.png
+123.png
+nsdf3.png
+...
+```
+
+假设整个数据集文件夹如下：
+
+```text
+data_root
+├── meta
+│   ├── test.txt     # 测试数据集的标注文件
+│   ├── train.txt    # 训练数据集的标注文件
+│   └── val.txt      # 验证数据集的标注文件
+
+├── train
+│   ├── 123.png
+│   ├── folder_1
+│   │   ├── xxx.png
+│   │   └── xxy.png
+│   └── nsdf3.png
+├── test
+└── val
+```
+
+这是配置文件中的数据集设置的示例：

 ```python
+# 训练数据设置
 train_dataloader = dict(
-    ...
-    # 训练数据集配置
    dataset=dict(
        type='CustomDataset',
-        ann_file='path/to/ann_file_path',
-        data_prefix='path/to/images',
-        classes=['A', 'B', 'C', 'D', ...]
-        pipeline=...,
+        data_root='path/to/data_root',  # `ann_flie` 和 `data_prefix` 共同的文件路径前缀
+        ann_file='meta/train.txt',      # 相对于 `data_root` 的标注文件路径
+        data_prefix='train',            # `ann_file` 中文件路径的前缀，相对于 `data_root`
+        classes=['A', 'B', 'C', 'D', ...],  # 每个类别的名称
+        pipeline=...,    # 处理数据集样本的一系列变换操作
    )
+    ...
 )
 ```

 ```{note}
-如果指定了 'ann_file'， 则通过 'ann_file' 得到标注信息；否则，按照子文件夹格式处理。
+有关如何使用 `CustomDataset` 的完整示例，请参阅[如何使用自定义数据集进行预训练](../notes/pretrain_custom_dataset.md)
 ```

 ## ImageNet
@ -134,35 +139,97 @@ train_dataloader = dict(
 ImageNet 有多个版本，但最常用的一个是 [ILSVRC 2012](http://www.image-net.org/challenges/LSVRC/2012/)。 可以通过以下步骤使用它。

 1. 注册一个帐户并登录到[下载页面](http://www.image-net.org/download-images)。
-2. 找到ILSVRC2012的下载链接，下载以下两个文件：
+2. 找到 ILSVRC2012 的下载链接，下载以下两个文件：
   - ILSVRC2012_img_train.tar (~138GB)
   - ILSVRC2012_img_val.tar (~6.3GB)
 3. 解压已下载的图片。
-4. 从此 [链接](https://download.openmmlab.com/mmclassification/datasets/imagenet/meta/caffe_ilsvrc12.tar.gz) 下载并解压标注文件。
-5. 根据标注数据中的路径重新组织图像文件，应该是这样的:
+4. 根据 [CustomDataset](#CustomDataset) 的格式约定重新组织图像文件
+
+```{note}
+在 MMPretrain 中，所有配置文件默认使用文本标注文件的数据集。因此，如果希望使用子文件夹格式，请在配置文件中设置 `ann_file=''`。
+```
+
+### 子文件夹格式
+
+重新组织数据集如下：

 ```text
-   imagenet/
-   ├── meta/
-   │   ├── train.txt
-   │   ├── test.txt
-   │   └── val.txt
-   ├── train/
-   │   ├── n01440764
-   │   │   ├── n01440764_10026.JPEG
-   │   │   ├── n01440764_10027.JPEG
-   │   │   ├── n01440764_10029.JPEG
-   │   │   ├── n01440764_10040.JPEG
-   │   │   ├── n01440764_10042.JPEG
-   │   │   ├── n01440764_10043.JPEG
-   │   │   └── n01440764_10048.JPEG
-   │   ├── ...
-   ├── val/
-   │   ├── ILSVRC2012_val_00000001.JPEG
-   │   ├── ILSVRC2012_val_00000002.JPEG
-   │   ├── ILSVRC2012_val_00000003.JPEG
-   │   ├── ILSVRC2012_val_00000004.JPEG
-   │   ├── ...
+data/imagenet/
+├── train/
+│   ├── n01440764
+│   │   ├── n01440764_10026.JPEG
+│   │   ├── n01440764_10027.JPEG
+│   │   ├── n01440764_10029.JPEG
+│   │   ├── n01440764_10040.JPEG
+│   │   ├── n01440764_10042.JPEG
+│   │   ├── n01440764_10043.JPEG
+│   │   └── n01440764_10048.JPEG
+│   ├── ...
+├── val/
+│   ├── n01440764
+│   │   ├── ILSVRC2012_val_00000293.JPEG
+│   │   ├── ILSVRC2012_val_00002138.JPEG
+│   │   ├── ILSVRC2012_val_00003014.JPEG
+│   │   └── ...
+│   ├── ...
+```
+
+然后，您可以使用具有以下配置的 [`ImageNet`](mmpretrain.datasets.ImageNet) 数据集：
+
+```python
+train_dataloader = dict(
+    ...
+    # 训练数据集配置
+    dataset=dict(
+        type='ImageNet',
+        data_root='data/imagenet',
+        data_prefix='train',
+        ann_file='',
+        pipeline=...,
+    )
+)
+
+val_dataloader = dict(
+    ...
+    # 验证数据集配置
+    dataset=dict(
+        type='ImageNet',
+        data_root='data/imagenet',
+        data_prefix='val',
+        ann_file='',
+        pipeline=...,
+    )
+)
+
+test_dataloader = val_dataloader
+```
+
+### 文本标注文件格式
+
+您可以从[此链接](https://download.openmmlab.com/mmclassification/datasets/imagenet/meta/caffe_ilsvrc12.tar.gz)下载并解压元数据，然后组织文件夹如下：
+
+```text
+data/imagenet/
+├── meta/
+│   ├── train.txt
+│   ├── test.txt
+│   └── val.txt
+├── train/
+│   ├── n01440764
+│   │   ├── n01440764_10026.JPEG
+│   │   ├── n01440764_10027.JPEG
+│   │   ├── n01440764_10029.JPEG
+│   │   ├── n01440764_10040.JPEG
+│   │   ├── n01440764_10042.JPEG
+│   │   ├── n01440764_10043.JPEG
+│   │   └── n01440764_10048.JPEG
+│   ├── ...
+├── val/
+│   ├── ILSVRC2012_val_00000001.JPEG
+│   ├── ILSVRC2012_val_00000002.JPEG
+│   ├── ILSVRC2012_val_00000003.JPEG
+│   ├── ILSVRC2012_val_00000004.JPEG
+│   ├── ...
 ```

 然后，您可以使用具有以下配置的 [`ImageNet`](mmpretrain.datasets.ImageNet) 数据集：
@ -174,22 +241,24 @@ train_dataloader = dict(
    dataset=dict(
        type='ImageNet',
        data_root='imagenet_folder',
-        ann_file='meta/train.txt',
        data_prefix='train/',
+        ann_file='meta/train.txt',
        pipeline=...,
    )
 )
+
 val_dataloader = dict(
    ...
    # 验证数据集配置
    dataset=dict(
        type='ImageNet',
        data_root='imagenet_folder',
-        ann_file='meta/val.txt',
        data_prefix='val/',
+        ann_file='meta/val.txt',
        pipeline=...,
    )
 )
+
 test_dataloader = val_dataloader
 ```

@ -307,7 +376,9 @@ dataset_cfg=dict(

 ## 其他数据集

-MMPretrain 还是支持更多其他的数据集，可以通过查阅[数据集文档](mmpretrain.datasets)获取它们的配置信息。
+MMPretrain 还支持更多其他的数据集，可以通过查阅[数据集文档](mmpretrain.datasets)获取它们的配置信息。
+
+如果需要使用一些特殊格式的数据集，您需要实现您自己的数据集类，请参阅[添加新数据集](../advanced_guides/datasets.md)。

 ## 数据集包装

--- a/docs/zh_CN/user_guides/downstream.md
+++ b/docs/zh_CN/user_guides/downstream.md
@ -1,24 +1,14 @@
 # 下游任务

- [下游任务](#下游任务)
-  - [检测](#检测)
-    - [训练](#训练)
-    - [测试](#测试)
-  - [分割](#分割)
-    - [训练](#训练-1)
-    - [测试](#测试-1)
-
 ## 检测

-这里，我们倾向使用 MMDetection 做检测任务。首先确保您已经安装了 [MIM](https://github.com/open-mmlab/mim)，这也是 OpenMMLab 的一个项目。
+我们使用 MMDetection 进行图像检测。首先确保您已经安装了 [MIM](https://github.com/open-mmlab/mim)，这也是 OpenMMLab 的一个项目。

 ```shell
 pip install openmim
 mim install 'mmdet>=3.0.0rc0'
 ```

-非常容易安装这个包。
-
 此外，请参考 MMDetection 的[安装](https://mmdetection.readthedocs.io/en/dev-3.x/get_started.html)和[数据准备](https://mmdetection.readthedocs.io/en/dev-3.x/user_guides/dataset_prepare.html)

 ### 训练
@ -35,35 +25,17 @@ bash tools/benchmarks/mmdetection/mim_slurm_train_c4.sh ${PARTITION} ${CONFIG} $
 bash tools/benchmarks/mmdetection/mim_slurm_train_fpn.sh ${PARTITION} ${CONFIG} ${PRETRAIN}
 ```

-备注：
-
- `${CONFIG}`: 使用`configs/benchmarks/mmdetection/`下的配置文件。由于 OpenMMLab 的算法库支持跨不同存储库引用配置文件，因此我们可以轻松使用 MMDetection 的配置文件，例如：
-
-```shell
-_base_ = 'mmdet::mask_rcnn/mask-rcnn_r50-caffe-c4_1x_coco.py'
-```
-
-从头开始写您的配置文件也是支持的。
-
+- `${CONFIG}`：直接用 MMDetection 中的配置文件路径即可。对于一些算法，我们有一些修改过的配置文件，
+  可以在相应算法文件夹下的 `benchmarks` 文件夹中找到。另外，您也可以从头开始编写配置文件。
 - `${PRETRAIN}`：预训练模型文件
- `${GPUS}`: 您想用于训练的 GPU 数量，对于检测任务，我们默认采用 8 块 GPU。
+- `${GPUS}`：使用多少 GPU 进行训练，对于检测任务，我们默认使用 8 个 GPU。

 例子：

 ```shell
 bash ./tools/benchmarks/mmdetection/mim_dist_train_c4.sh \
-configs/benchmarks/mmdetection/coco/mask-rcnn_r50-c4_ms-1x_coco.py \
-https://download.openmmlab.com/mmselfsup/1.x/byol/byol_resnet50_16xb256-coslr-200e_in1k/byol_resnet50_16xb256-coslr-200e_in1k_20220825-de817331.pth 8
-```
-
-或者您想用 [detectron2](https://github.com/facebookresearch/detectron2) 来做检测任务，我们也提供了一些配置文件。
-请参考 [INSTALL.md](https://github.com/facebookresearch/detectron2/blob/main/INSTALL.md) 用于安装并按照 detectron2 需要的[目录结构](https://github.com/facebookresearch/detectron2/tree/main/datasets)准备您的数据集。
-
-```shell
-conda activate detectron2 # use detectron2 environment here, otherwise use open-mmlab environment
-cd tools/benchmarks/detectron2
-python convert-pretrain-to-detectron2.py ${WEIGHT_FILE} ${OUTPUT_FILE} # must use .pkl as the output extension.
-bash run.sh ${DET_CFG} ${OUTPUT_FILE}
+  configs/byol/benchmarks/mask-rcnn_r50-c4_ms-1x_coco.py \
+  https://download.openmmlab.com/mmselfsup/1.x/byol/byol_resnet50_16xb256-coslr-200e_in1k/byol_resnet50_16xb256-coslr-200e_in1k_20220825-de817331.pth 8
 ```

 ### 测试
@ -92,15 +64,13 @@ https://download.openmmlab.com/mmselfsup/1.x/byol/byol_resnet50_16xb256-coslr-20

 ## 分割

-对于语义分割任务我们使用 MMSegmentation。首先确保您已经安装了 [MIM](https://github.com/open-mmlab/mim)，这也是 OpenMMLab 的一个项目。
+我们使用 MMSegmentation 进行图像分割。首先确保您已经安装了 [MIM](https://github.com/open-mmlab/mim)，这也是 OpenMMLab 的一个项目。

 ```shell
 pip install openmim
 mim install 'mmsegmentation>=1.0.0rc0'
 ```

-非常容易安装这个包。
-
 此外，请参考 MMSegmentation 的[安装](https://mmsegmentation.readthedocs.io/en/dev-1.x/get_started.html)和[数据准备](https://mmsegmentation.readthedocs.io/en/dev-1.x/user_guides/2_dataset_prepare.html)。

 ### 训练
@ -115,18 +85,12 @@ bash tools/benchmarks/mmsegmentation/mim_dist_train.sh ${CONFIG} ${PRETRAIN} ${G
 bash tools/benchmarks/mmsegmentation/mim_slurm_train.sh ${PARTITION} ${CONFIG} ${PRETRAIN}
 ```

-备注:
+备注：

- `${CONFIG}`：使用`configs/benchmarks/mmsegmentation/`下的配置文件。由于 OpenMMLab 的算法库支持跨不同存储库引用配置文件，因此我们可以轻松使用 MMSegmentation 的配置文件，例如：
-
-```shell
-_base_ = 'mmseg::fcn/fcn_r50-d8_4xb2-40k_cityscapes-769x769.py'
-```
-
-从头开始写您的配置文件也是支持的。
-
- `${PARTITION}`：预训练模型文件
- `${GPUS}`: 您想用于训练的 GPU 数量，对于分割任务，我们默认采用 4 块 GPU。
+- `${CONFIG}`：直接用 MMSegmentation 中的配置文件路径即可。对于一些算法，我们有一些修改过的配置文件，
+  可以在相应算法文件夹下的 `benchmarks` 文件夹中找到。另外，您也可以从头开始编写配置文件。
+- `${PRETRAIN}`：预训练模型文件
+- `${GPUS}`：使用多少 GPU 进行训练，对于检测任务，我们默认使用 8 个 GPU。

 例子：

--- a/docs/zh_CN/user_guides/finetune.md
+++ b/docs/zh_CN/user_guides/finetune.md
@ -1,237 +0,0 @@
-# 如何微调模型
-
- [如何微调模型](#如何微调模型)
-  - [继承基础配置](#继承基础配置)
-  - [在配置文件中指定预训练模型](#在配置文件中指定预训练模型)
-  - [修改数据集](#修改数据集)
-  - [修改训练策略设置](#修改训练策略设置)
-  - [开始训练](#开始训练)
-    - [在命令行指定预训练模型](#在命令行指定预训练模型)
-
-在很多场景下，我们需要快速地将模型应用到新的数据集上，但从头训练模型通常很难快速收敛，这种不确定性会浪费额外的时间。
-通常，已有的、在大数据集上训练好的模型会比随机初始化提供更为有效的先验信息，粗略来讲，在此基础上的学习我们称之为模型微调。
-
-已经证明，在 ImageNet 数据集上预训练的模型对于其他数据集和其他下游任务有很好的效果。
-因此，该教程提供了如何将 [Model Zoo](../modelzoo_statistics.md) 中提供的预训练模型用于其他数据集，已获得更好的效果。
-
-在新数据集上微调模型分为两步：
-
- 按照 [数据集准备](dataset_prepare.md) 添加对新数据集的支持。
- 按照本教程中讨论的内容修改配置文件
-
-假设我们现在有一个在 ImageNet-2012 数据集上训练好的 ResNet-50 模型，并且希望在
-CIFAR-10 数据集上进行模型微调，我们需要修改配置文件中的五个部分。
-
-## 继承基础配置
-
-首先，创建一个新的配置文件 `configs/tutorial/resnet50_finetune_cifar.py` 来保存我们的配置，当然，这个文件名可以自由设定。
-
-为了重用不同基础配置之间的通用部分，我们支持从多个现有配置中继承配置，其中包括：
-
- 模型配置：要微调 ResNet-50 模型，可以继承 `_base_/models/resnet50.py` 来搭建模型的基本结构。
- 数据集配置：使用 CIFAR10 数据集，可以继承 `_base_/datasets/cifar10_bs16.py`。
- 训练策略配置：可以继承 batchsize 为 128 的 CIFAR10 数据集基本训练配置文件`_base_/schedules/cifar10_bs128.py`。
- 运行配置：为了保留运行相关设置，比如默认训练钩子、环境配置等，需要继承 `_base_/default_runtime.py`。
-
-要继承以上这些配置文件，只需要把下面一段代码放在我们的配置文件开头。
-
-```python
-_base_ = [
-    '../_base_/models/resnet50.py',
-    '../_base_/datasets/cifar10_bs16.py',
-    '../_base_/schedules/cifar10_bs128.py',
-    '../_base_/default_runtime.py',
-]
-```
-
-除此之外，你也可以不使用继承，直接编写完整的配置文件，例如
-[`configs/lenet/lenet5_mnist.py`](https://github.com/open-mmlab/mmpretrain/blob/main/configs/lenet/lenet5_mnist.py)。
-
-## 在配置文件中指定预训练模型
-
-在进行模型微调时，我们通常希望在主干网络（backbone）加载预训练模型，再用我们的数据集训练一个新的分类头（head）。
-
-为了在主干网络加载预训练模型，我们需要修改主干网络的初始化设置，使用
-`Pretrained` 类型的初始化函数。另外，在初始化设置中，我们使用 `prefix='backbone'`
-来告诉初始化函数需要加载的子模块的前缀，`backbone`即指加载模型中的主干网络。
-方便起见，我们这里使用一个在线的权重文件链接，它
-会在训练前自动下载对应的文件，你也可以提前下载这个模型，然后使用本地路径。
-
-接下来，新的配置文件需要按照新数据集的类别数目来修改分类头的配置。只需要修改分
-类头中的 `num_classes` 设置即可。
-
-```python
-model = dict(
-    backbone=dict(
-        init_cfg=dict(
-            type='Pretrained',
-            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
-            prefix='backbone',
-        )),
-    head=dict(num_classes=10),
-)
-```
-
-```{tip}
-这里我们只需要设定我们想要修改的部分配置，其他配置将会自动从我们的父配置文件中获取。
-```
-
-另外，当新的小数据集和原本预训练的大数据中的数据分布较为类似的话，我们在进行微调时会希望
-冻结主干网络前面几层的参数，只训练后面层以及分类头的参数，这么做有助于在后续训练中，
-保持网络从预训练权重中获得的提取低阶特征的能力。在 MMPretrain 中，
-这一功能可以通过简单的一个 `frozen_stages` 参数来实现。比如我们需要冻结前两层网
-络的参数，只需要在上面的配置中添加一行：
-
-```python
-model = dict(
-    backbone=dict(
-        frozen_stages=2,
-        init_cfg=dict(
-            type='Pretrained',
-            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
-            prefix='backbone',
-        )),
-    head=dict(num_classes=10),
-)
-```
-
-```{note}
-目前还不是所有的网络都支持 `frozen_stages` 参数，在使用之前，请先检查
-[文档](https://mmpretrain.readthedocs.io/zh_CN/main/api.html#module-mmpretrain.models.backbones)
-以确认你所使用的主干网络是否支持。
-```
-
-## 修改数据集
-
-当针对一个新的数据集进行微调时，我们通常都需要修改一些数据集相关的配置。比如这
-里，我们就需要把 CIFAR-10 数据集中的图像大小从 32 缩放到 224 来配合 ImageNet 上
-预训练模型的输入。这一需要可以通过修改数据集的预处理流水线（pipeline）并覆盖数据加载器（dataloader）来实现。
-
-```python
-# 数据流水线设置
-train_pipeline = [
-    dict(type='RandomCrop', crop_size=32, padding=4),
-    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
-    dict(type='Resize', scale=224),
-    dict(type='PackInputs'),
-]
-test_pipeline = [
-    dict(type='Resize', scale=224),
-    dict(type='PackInputs'),
-]
-# 数据加载器设置
-train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
-val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
-test_dataloader = val_dataloader
-```
-
-## 修改训练策略设置
-
-用于微调任务的超参数与默认配置不同，通常只需要较小的学习率以及较快的衰减策略。
-
-```python
-# 用于批大小为 128 的优化器学习率
-optim_wrapper = dict(
-    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
-# 学习率衰减策略
-param_scheduler = dict(
-    type='MultiStepLR', by_epoch=True, milestones=[15], gamma=0.1)
-```
-
-```{tip}
-更多可修改的细节可以参考[如何编写配置文件](config.md).
-```
-
-## 开始训练
-
-现在，我们完成了用于微调的配置文件，完整的文件如下：
-
-```python
-_base_ = [
-    '../_base_/models/resnet50.py',
-    '../_base_/datasets/cifar10_bs16.py',
-    '../_base_/schedules/cifar10_bs128.py',
-    '../_base_/default_runtime.py',
-]
-
-# 模型设置
-model = dict(
-    backbone=dict(
-        frozen_stages=2,
-        init_cfg=dict(
-            type='Pretrained',
-            checkpoint='https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_8xb32_in1k_20210831-ea4938fc.pth',
-            prefix='backbone',
-        )),
-    head=dict(num_classes=10),
-)
-
-# 数据集设置
-# 数据流水线设置
-train_pipeline = [
-    dict(type='RandomCrop', crop_size=32, padding=4),
-    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
-    dict(type='Resize', scale=224),
-    dict(type='PackInputs'),
-]
-test_pipeline = [
-    dict(type='Resize', scale=224),
-    dict(type='PackInputs'),
-]
-# 数据加载器设置
-train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
-val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
-test_dataloader = val_dataloader
-
-# 训练策略设置
-# 用于批大小为 128 的优化器学习率
-optim_wrapper = dict(
-    optimizer=dict(type='SGD', lr=0.01, momentum=0.9, weight_decay=0.0001))
-# 学习率衰减策略
-param_scheduler = dict(
-    type='MultiStepLR', by_epoch=True, milestones=[15], gamma=0.1)
-
-```
-
-接下来，我们使用一台 8 张 GPU 的电脑来训练我们的模型，指令如下：
-
-```shell
-bash tools/dist_train.sh configs/tutorial/resnet50_finetune_cifar.py 8
-```
-
-当然，我们也可以使用单张 GPU 来进行训练，使用如下命令：
-
-```shell
-python tools/train.py configs/tutorial/resnet50_finetune_cifar.py
-```
-
-但是如果我们使用单张 GPU 进行训练的话，需要在数据集设置部分作如下修改：
-
-```python
-train_dataloader = dict(
-    batch_size=128,
-    dataset=dict(pipeline=train_pipeline),
-)
-val_dataloader = dict(
-    batch_size=128,
-    dataset=dict(pipeline=test_pipeline),
-)
-test_dataloader = val_dataloader
-```
-
-这是因为我们的训练策略是针对批次大小（batch size）为 128 设置的。在父配置文件中，
-设置了单张 `batch_size=16`，如果使用 8 张 GPU，总的批次大小就是 128。而如果使
-用单张 GPU，就必须手动修改 `batch_size=128` 来匹配训练策略。
-
-### 在命令行指定预训练模型
-
-如果您不想修改配置文件，您可以使用 `--cfg-options` 将您的预训练模型文件添加到 `init_cfg`.
-
-例如，以下命令也会加载预训练模型：
-
-```shell
-bash tools/dist_train.sh configs/tutorial/resnet50_finetune_cifar.py 8 \
-    --cfg-options model.backbone.init_cfg.type='Pretrained' \
-    model.backbone.init_cfg.checkpoint='https://download.openmmlab.com/mmselfsup/1.x/mocov3/mocov3_resnet50_8xb512-amp-coslr-100e_in1k/mocov3_resnet50_8xb512-amp-coslr-100e_in1k_20220927-f1144efa.pth' \
-    model.backbone.init_cfg.prefix='backbone' \
-```
--- a/docs/zh_CN/user_guides/inference.md
+++ b/docs/zh_CN/user_guides/inference.md
@ -1,19 +1,15 @@
 # 使用现有模型推理

- [使用现有模型推理](#使用现有模型推理)
-  - [推理单张图片](#推理单张图片)
-
 MMPretrain 在 [Model Zoo](../modelzoo_statistics.md) 中提供了预训练模型。
 本说明将展示**如何使用现有模型对给定图像进行推理**。

-至于如何在标准数据集上测试现有模型，请看这个[指南](./train_test.md#测试)
+至于如何在标准数据集上测试现有模型，请看这个[指南](./test.md)

 ## 推理单张图片

 MMPretrain 为图像推理提供高级 Python API：

 - [`get_model`](mmpretrain.apis.get_model): 根据名称获取一个模型。
- [`init_model`](mmpretrain.apis.init_model): 根据配置文件和权重文件初始化一个模型。
 - [`inference_model`](mmpretrain.apis.inference_model)：对给定图片进行推理。

 下面是一个示例，如何使用一个 ImageNet-1k 预训练权重初始化模型并推理给定图像。
@ -33,7 +29,7 @@ model = get_model('resnet50_8xb32_in1k', pretrained=True, device="cpu")  # `devi
 result = inference_model(model, img_path)
 ```

-`result` 为一个包含了 `pred_label`, `pred_score`, `pred_scores` 和 `pred_class`的字典，结果如下:
+`result` 为一个包含了 `pred_label`, `pred_score`, `pred_scores` 和 `pred_class`的字典，结果如下：

 ```text
 {"pred_label":65,"pred_score":0.6649366617202759,"pred_class":"sea snake", "pred_scores": [..., 0.6649366617202759, ...]}
--- a/docs/zh_CN/user_guides/test.md
+++ b/docs/zh_CN/user_guides/test.md
@ -1,12 +1,5 @@
 # 测试

- [测试](#测试)
-  - [单机单卡测试](#单机单卡测试)
-  - [单机多卡测试](#单机多卡测试)
-  - [多机测试](#多机测试)
-    - [同一网络下的多机](#同一网络下的多机)
-    - [Slurm 管理下的多机集群](#slurm-管理下的多机集群)
-
 ## 单机单卡测试

 你可以使用 `tools/test.py` 在电脑上用 CPU 或是 GPU 进行模型的测试。
@ -37,7 +30,7 @@ CUDA_VISIBLE_DEVICES=-1 python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [
 | `--show`                              | 在窗口中显示预测结果图像。                                                                                                                                          |
 | `--interval INTERVAL`                 | 每隔多少样本进行一次预测结果可视化。                                                                                                                                |
 | `--wait-time WAIT_TIME`               | 每个窗口的显示时间（单位为秒）。                                                                                                                                    |
-| `--no-pin-memory`                     | 是否在 dataloaders 中关闭 pin_memory 选项                                                                                                                           |
+| `--no-pin-memory`                     | 是否在 dataloaders 中关闭 `pin_memory` 选项                                                                                                                         |
 | `--tta`                               | 是否开启 Test-Time-Aug (TTA). 如果配置文件有 `tta_pipeline` 和 `tta_model`，将使用这些配置指定 TTA transforms，并且决定如何融合 TTA 的结果。 否则，通过平均分类分数使用 flip TTA。 |
 | `--launcher {none,pytorch,slurm,mpi}` | 启动器，默认为 "none"。                                                                                                                                             |

--- a/docs/zh_CN/user_guides/train.md
+++ b/docs/zh_CN/user_guides/train.md
@ -1,11 +1,7 @@
 # 训练

- [训练](#训练)
-  - [单机单卡训练](#单机单卡训练)
-  - [单机多卡训练](#单机多卡训练)
-  - [多机训练](#多机训练)
-    - [同一网络下的多机](#同一网络下的多机)
-    - [Slurm 管理下的多机集群](#slurm-管理下的多机集群)
+在本教程中，我们将介绍如何使用 MMPretrain 中提供的脚本启动训练任务。
+如果你需要了解一些具体的训练例子，可以查阅 [如何在自定义数据集上进行模型预训练](../notes/pretrain_custom_dataset.md) 和 [如何在自定义数据集上微调模型](../notes/finetune_custom_dataset.md).

 ## 单机单卡训练

@ -33,8 +29,8 @@ CUDA_VISIBLE_DEVICES=-1 python tools/train.py ${CONFIG_FILE} [ARGS]
 | `--amp`                               | 启用混合精度训练。                                                                                                                                                  |
 | `--no-validate`                       | **不建议** 在训练过程中不进行验证集上的精度验证。                                                                                                                   |
 | `--auto-scale-lr`                     | 自动根据实际的批次大小（batch size）和预设的批次大小对学习率进行缩放。                                                                                              |
-| `--no-pin-memory`                     | 是否在 dataloaders 中关闭 pin_memory 选项                                                                                                                           |
-| `--no-persistent-workers`             | 是否在 dataloaders 中关闭 persistent_workers 选项                                                                                                                   |
+| `--no-pin-memory`                     | 是否在 dataloaders 中关闭 `pin_memory` 选项                                                                                                                         |
+| `--no-persistent-workers`             | 是否在 dataloaders 中关闭 `persistent_workers` 选项                                                                                                                 |
 | `--cfg-options CFG_OPTIONS`           | 重载配置文件中的一些设置。使用类似 `xxx=yyy` 的键值对形式指定，这些设置会被融合入从配置文件读取的配置。你可以使用 `key="[a,b]"` 或者 `key=a,b` 的格式来指定列表格式的值，且支持嵌套，例如 \`key="[(a,b),(c,d)]"，这里的引号是不可省略的。另外每个重载项内部不可出现空格。 |
 | `--launcher {none,pytorch,slurm,mpi}` | 启动器，默认为 "none"。                                                                                                                                             |

--- a/mmpretrain/datasets/multi_task.py
+++ b/mmpretrain/datasets/multi_task.py
@ -6,7 +6,7 @@ from typing import Optional, Sequence

 import mmengine
 from mmcv.transforms import Compose
-from mmengine.fileio import FileClient
+from mmengine.fileio import get_file_backend

 from .builder import DATASETS

@ -136,10 +136,6 @@ class MultiTaskDataset:
            represents a operation defined in
            :mod:`mmpretrain.datasets.pipelines`. Defaults to an empty tuple.
        test_mode (bool): in train mode or test mode. Defaults to False.
-        file_client_args (dict, optional): Arguments to instantiate a
-            FileClient. See :class:`mmengine.fileio.FileClient` for details.
-            If None, automatically inference from the ``data_root``.
-            Defaults to None.
    """
    METAINFO = dict()

@ -149,18 +145,15 @@ class MultiTaskDataset:
                 data_root: Optional[str] = None,
                 data_prefix: Optional[str] = None,
                 pipeline: Sequence = (),
-                 test_mode: bool = False,
-                 file_client_args: Optional[dict] = None):
+                 test_mode: bool = False):

        self.data_root = expanduser(data_root)

        # Inference the file client
        if self.data_root is not None:
-            file_client = FileClient.infer_client(
-                file_client_args, uri=self.data_root)
+            self.file_backend = get_file_backend(uri=self.data_root)
        else:
-            file_client = FileClient(file_client_args)
-        self.file_client: FileClient = file_client
+            self.file_backend = None

        self.ann_file = self._join_root(expanduser(ann_file))
        self.data_prefix = self._join_root(data_prefix)
@ -189,7 +182,7 @@ class MultiTaskDataset:
        if isabs(path):
            return path

-        joined_path = self.file_client.join_path(self.data_root, path)
+        joined_path = self.file_backend.join_path(self.data_root, path)
        return joined_path

    @classmethod
--- a/mmpretrain/models/utils/init.py
+++ b/mmpretrain/models/utils/init.py
@ -20,6 +20,7 @@ from .norm import GRN, LayerNorm2d, build_norm_layer
 from .position_encoding import (ConditionalPositionEncoding,
                                PositionEncodingFourier,
                                build_2d_sincos_position_embedding)
+from .res_layer_extra_norm import ResLayerExtraNorm
 from .se_layer import SELayer
 from .vector_quantizer import NormEMAVectorQuantizer

@ -67,4 +68,5 @@ __all__ = [
    'TwoNormDataPreprocessor',
    'VideoDataPreprocessor',
    'CosineEMA',
+    'ResLayerExtraNorm',
 ]
--- a/mmpretrain/models/utils/norm.py
+++ b/mmpretrain/models/utils/norm.py
@ -116,8 +116,18 @@ def build_norm_layer(cfg: dict, num_features: int) -> nn.Module:
        raise KeyError(f'Cannot find {layer_type} in registry under scope '
                       f'name {MODELS.scope}')

-    layer = norm_layer(num_features, **cfg_)
+    requires_grad = cfg_.pop('requires_grad', True)
+    cfg_.setdefault('eps', 1e-5)
+
+    if layer_type != 'GN':
+        layer = norm_layer(num_features, **cfg_)
+    else:
+        layer = norm_layer(num_channels=num_features, **cfg_)
+
    if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'):
        layer._specify_ddp_gpu_num(1)

+    for param in layer.parameters():
+        param.requires_grad = requires_grad
+
    return layer
--- a/mmpretrain/models/utils/res_layer_extra_norm.py
+++ b/mmpretrain/models/utils/res_layer_extra_norm.py
@ -0,0 +1,31 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+from .norm import build_norm_layer
+
+try:
+    from mmdet.models.backbones import ResNet
+    from mmdet.models.roi_heads.shared_heads.res_layer import ResLayer
+    from mmdet.registry import MODELS
+
+    @MODELS.register_module()
+    class ResLayerExtraNorm(ResLayer):
+        """Add extra norm to original ``ResLayer``."""
+
+        def __init__(self, *args, **kwargs):
+            super(ResLayerExtraNorm, self).__init__(*args, **kwargs)
+
+            block = ResNet.arch_settings[kwargs['depth']][0]
+            self.add_module(
+                'norm',
+                build_norm_layer(self.norm_cfg,
+                                 64 * 2**self.stage * block.expansion))
+
+        def forward(self, x):
+            """Forward function."""
+            res_layer = getattr(self, f'layer{self.stage + 1}')
+            norm = getattr(self, 'norm')
+            x = res_layer(x)
+            out = norm(x)
+            return out
+
+except ImportError:
+    ResLayerExtraNorm = None
--- a/tools/analysis_tools/analyze_logs.py
+++ b/tools/analysis_tools/analyze_logs.py
@ -98,8 +98,9 @@ def plot_curve_helper(log_dicts, metrics, args, legend):
            elif key in train_keys:
                plot_phase_train(key, log_dict['train'], curve_label)
            else:
-                raise ValueError(f'Invalid key "{key}", please choose from '
-                                 f'{set.union(train_keys, val_keys)}.')
+                raise ValueError(
+                    f'Invalid key "{key}", please choose from '
+                    f'{set.union(set(train_keys), set(val_keys))}.')
            plt.legend()


--- a/tools/analysis_tools/get_flops.py
+++ b/tools/analysis_tools/get_flops.py
@ -45,8 +45,8 @@ def main():
    activations = analysis_results['activations_str']
    out_table = analysis_results['out_table']
    out_arch = analysis_results['out_arch']
-    print(out_table)
    print(out_arch)
+    print(out_table)
    split_line = '=' * 30
    print(f'{split_line}\nInput shape: {input_shape}\n'
          f'Flops: {flops}\nParams: {params}\n'
--- a/tools/benchmarks/mmsegmentation/mim_dist_train.sh
+++ b/tools/benchmarks/mmsegmentation/mim_dist_train.sh
@ -13,4 +13,5 @@ mim train mmseg $CFG \
    --cfg-options model.backbone.init_cfg.type=Pretrained \
    model.backbone.init_cfg.checkpoint=$PRETRAIN \
    model.backbone.init_cfg.prefix="backbone." \
+    model.pretrained=None \
    $PY_ARGS
--- a/tools/benchmarks/mmsegmentation/mim_slurm_train.sh
+++ b/tools/benchmarks/mmsegmentation/mim_slurm_train.sh
@ -21,4 +21,5 @@ mim train mmseg $CFG \
    --cfg-options model.backbone.init_cfg.type=Pretrained \
    model.backbone.init_cfg.checkpoint=$PRETRAIN \
    model.backbone.init_cfg.prefix="backbone." \
+    model.pretrained=None \
    $PY_ARGS
--- a/tools/torchserve/mmpretrain2torchserve.py
+++ b/tools/torchserve/mmpretrain2torchserve.py
--- a/tools/torchserve/mmpretrain_handler.py
+++ b/tools/torchserve/mmpretrain_handler.py
@ -3,13 +3,16 @@ import base64
 import os

 import mmcv
+import numpy as np
 import torch
 from ts.torch_handler.base_handler import BaseHandler

-from mmpretrain.apis import inference_model, init_model
+import mmpretrain.models
+from mmpretrain.apis import (ImageClassificationInferencer,
+                             ImageRetrievalInferencer, get_model)


-class MMclsHandler(BaseHandler):
+class MMPreHandler(BaseHandler):

    def initialize(self, context):
        properties = context.system_properties
@ -24,7 +27,14 @@ class MMclsHandler(BaseHandler):
        checkpoint = os.path.join(model_dir, serialized_file)
        self.config_file = os.path.join(model_dir, 'config.py')

-        self.model = init_model(self.config_file, checkpoint, self.device)
+        model = get_model(self.config_file, checkpoint, self.device)
+        if isinstance(model, mmpretrain.models.ImageClassifier):
+            self.inferencer = ImageClassificationInferencer(model)
+        elif isinstance(model, mmpretrain.models.ImageToImageRetriever):
+            self.inferencer = ImageRetrievalInferencer(model)
+        else:
+            raise NotImplementedError(
+                f'No available inferencer for {type(model)}')
        self.initialized = True

    def preprocess(self, data):
@ -42,10 +52,17 @@ class MMclsHandler(BaseHandler):
    def inference(self, data, *args, **kwargs):
        results = []
        for image in data:
-            results.append(inference_model(self.model, image))
+            results.append(self.inferencer(image)[0])
        return results

    def postprocess(self, data):
+        processed_data = []
        for result in data:
-            result['pred_label'] = int(result['pred_label'])
-        return data
+            processed_result = {}
+            for k, v in result.items():
+                if isinstance(v, (torch.Tensor, np.ndarray)):
+                    processed_result[k] = v.tolist()
+                else:
+                    processed_result[k] = v
+            processed_data.append(processed_result)
+        return processed_data
--- a/tools/torchserve/test_torchserver.py
+++ b/tools/torchserve/test_torchserver.py
@ -4,7 +4,7 @@ from argparse import ArgumentParser
 import numpy as np
 import requests

-from mmpretrain.apis import inference_model, init_model
+from mmpretrain.apis import get_model, inference_model


 def parse_args():
@ -25,7 +25,7 @@ def parse_args():

 def main(args):
    # Inference single image by native apis.
-    model = init_model(args.config, args.checkpoint, device=args.device)
+    model = get_model(args.config, args.checkpoint, device=args.device)
    model_result = inference_model(model, args.img)

    # Inference single image by torchserve engine.