[Feature] Support RepLKnet backbone. (#1129)

* update replknet configs * update replknet test * update replknet model * update replknet model * update replknet model * update replknet model * Fix docs and config names Co-authored-by: mzr1996 <mzr1996@163.com>
2022-11-21 10:18:58 +08:00 · 2022-11-21 10:18:58 +08:00 · 72c6bc4864
parent c3c1cb93aa
commit 72c6bc4864
24 changed files with 1496 additions and 0 deletions
--- a/README.md
+++ b/README.md
@ -151,6 +151,7 @@ Results and models are available in the [model zoo](https://mmclassification.rea
 - [x] [HorNet](https://github.com/open-mmlab/mmclassification/tree/master/configs/hornet)
 - [x] [MobileViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mobilevit)
 - [x] [DaViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/davit)
+- [x] [RepLKNet](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/replknet)

 </details>

--- a/README_zh-CN.md
+++ b/README_zh-CN.md
@ -150,6 +150,7 @@ mim install -e .
 - [x] [HorNet](https://github.com/open-mmlab/mmclassification/tree/master/configs/hornet)
 - [x] [MobileViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mobilevit)
 - [x] [DaViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/davit)
+- [x] [RepLKNet](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/replknet)

 </details>

--- a/configs/_base_/datasets/imagenet_bs16_pil_bicubic_384.py
+++ b/configs/_base_/datasets/imagenet_bs16_pil_bicubic_384.py
@ -0,0 +1,57 @@
+# dataset settings
+dataset_type = 'ImageNet'
+data_preprocessor = dict(
+    # RGB format normalization parameters
+    mean=[123.675, 116.28, 103.53],
+    std=[58.395, 57.12, 57.375],
+    # convert image from BGR to RGB
+    to_rgb=True,
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomResizedCrop',
+        scale=384,
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
+    dict(type='PackClsInputs'),
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(type='Resize', scale=384, backend='pillow', interpolation='bicubic'),
+    dict(type='PackClsInputs'),
+]
+
+train_dataloader = dict(
+    batch_size=16,
+    num_workers=5,
+    dataset=dict(
+        type=dataset_type,
+        data_root='data/imagenet',
+        ann_file='meta/train.txt',
+        data_prefix='train',
+        pipeline=train_pipeline),
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    persistent_workers=True,
+)
+
+val_dataloader = dict(
+    batch_size=16,
+    num_workers=5,
+    dataset=dict(
+        type=dataset_type,
+        data_root='data/imagenet',
+        ann_file='meta/val.txt',
+        data_prefix='val',
+        pipeline=test_pipeline),
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    persistent_workers=True,
+)
+val_evaluator = dict(type='Accuracy', topk=(1, 5))
+
+# If you want standard test, please manually configure the test dataset
+test_dataloader = val_dataloader
+test_evaluator = val_evaluator
--- a/configs/_base_/datasets/imagenet_bs8_pil_bicubic_320.py
+++ b/configs/_base_/datasets/imagenet_bs8_pil_bicubic_320.py
@ -0,0 +1,63 @@
+# dataset settings
+dataset_type = 'ImageNet'
+data_preprocessor = dict(
+    # RGB format normalization parameters
+    mean=[122.5, 122.5, 122.5],
+    std=[122.5, 122.5, 122.5],
+    # convert image from BGR to RGB
+    to_rgb=True,
+)
+
+train_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='RandomResizedCrop',
+        scale=320,
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='RandomFlip', prob=0.5, direction='horizontal'),
+    dict(type='PackClsInputs'),
+]
+
+test_pipeline = [
+    dict(type='LoadImageFromFile'),
+    dict(
+        type='ResizeEdge',
+        scale=int(320 / 224 * 256),
+        edge='short',
+        backend='pillow',
+        interpolation='bicubic'),
+    dict(type='CenterCrop', crop_size=320),
+    dict(type='PackClsInputs'),
+]
+
+train_dataloader = dict(
+    batch_size=8,
+    num_workers=5,
+    dataset=dict(
+        type=dataset_type,
+        data_root='data/imagenet',
+        ann_file='meta/train.txt',
+        data_prefix='train',
+        pipeline=train_pipeline),
+    sampler=dict(type='DefaultSampler', shuffle=True),
+    persistent_workers=True,
+)
+
+val_dataloader = dict(
+    batch_size=8,
+    num_workers=5,
+    dataset=dict(
+        type=dataset_type,
+        data_root='data/imagenet',
+        ann_file='meta/val.txt',
+        data_prefix='val',
+        pipeline=test_pipeline),
+    sampler=dict(type='DefaultSampler', shuffle=False),
+    persistent_workers=True,
+)
+val_evaluator = dict(type='Accuracy', topk=(1, 5))
+
+# If you want standard test, please manually configure the test dataset
+test_dataloader = val_dataloader
+test_evaluator = val_evaluator
--- a/configs/_base_/models/replknet-31B_in1k.py
+++ b/configs/_base_/models/replknet-31B_in1k.py
@ -0,0 +1,25 @@
+from mmcls.models import build_classifier
+
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(
+        type='RepLKNet',
+        arch='31B',
+        out_indices=(3, ),
+    ),
+    neck=dict(type='GlobalAveragePooling'),
+    head=dict(
+        type='LinearClsHead',
+        num_classes=1000,
+        in_channels=1024,
+        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
+        topk=(1, 5),
+    ))
+
+if __name__ == '__main__':
+    # model.pop('type')
+    model = build_classifier(model)
+    model.eval()
+    print('------------------- training-time model -------------')
+    for i in model.state_dict().keys():
+        print(i)
--- a/configs/_base_/models/replknet-31L_in1k.py
+++ b/configs/_base_/models/replknet-31L_in1k.py
@ -0,0 +1,15 @@
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(
+        type='RepLKNet',
+        arch='31L',
+        out_indices=(3, ),
+    ),
+    neck=dict(type='GlobalAveragePooling'),
+    head=dict(
+        type='LinearClsHead',
+        num_classes=1000,
+        in_channels=1536,
+        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
+        topk=(1, 5),
+    ))
--- a/configs/_base_/models/replknet-XL_in1k.py
+++ b/configs/_base_/models/replknet-XL_in1k.py
@ -0,0 +1,15 @@
+model = dict(
+    type='ImageClassifier',
+    backbone=dict(
+        type='RepLKNet',
+        arch='XL',
+        out_indices=(3, ),
+    ),
+    neck=dict(type='GlobalAveragePooling'),
+    head=dict(
+        type='LinearClsHead',
+        num_classes=1000,
+        in_channels=2048,
+        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
+        topk=(1, 5),
+    ))
--- a/configs/replknet/README.md
+++ b/configs/replknet/README.md
@ -0,0 +1,95 @@
+# RepLKNet
+
+> [Scaling Up Your Kernels to 31x31: Revisiting Large Kernel Design in CNNs](https://arxiv.org/abs/2203.06717)
+
+<!-- [ALGORITHM] -->
+
+## Abstract
+
+We revisit large kernel design in modern convolutional neural networks (CNNs). Inspired by recent advances in vision transformers (ViTs), in this paper, we demonstrate that using a few large convolutional kernels instead of a stack of small kernels could be a more powerful paradigm. We suggested five guidelines, e.g., applying re-parameterized large depth-wise convolutions, to design efficient highperformance large-kernel CNNs. Following the guidelines, we propose RepLKNet, a pure CNN architecture whose kernel size is as large as 31×31, in contrast to commonly used 3×3. RepLKNet greatly closes the performance gap between CNNs and ViTs, e.g., achieving comparable or superior results than Swin Transformer on ImageNet and a few typical downstream tasks, with lower latency. RepLKNet also shows nice scalability to big data and large models, obtaining 87.8% top-1 accuracy on ImageNet and 56.0% mIoU on ADE20K, which is very competitive among the state-of-the-arts with similar model sizes. Our study further reveals that, in contrast to small-kernel CNNs, large kernel CNNs have much larger effective receptive fields and higher shape bias rather than texture bias.
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/48375204/197546040-cdf078c3-7fbd-400f-8b27-01668c8dfebf.png" width="60%"/>
+</div>
+
+## Results and models
+
+### ImageNet-1k
+
+|     Model      | Resolution | Pretrained Dataset |            Params(M)            |            Flops(G)             | Top-1 (%) | Top-5 (%) |                 Config                 |                 Download                 |
+| :------------: | :--------: | :----------------: | :-----------------------------: | :-----------------------------: | :-------: | :-------: | :------------------------------------: | :--------------------------------------: |
+| RepLKNet-31B\* |  224x224   |    From Scratch    |  79.9（train) \| 79.5 (deploy)  |  15.6 (train) \| 15.4 (deploy)  |   83.48   |   96.57   | [config (train)](./replknet-31B_32xb64_in1k.py) \| [config (deploy)](./deploy/replknet-31B-deploy_32xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_3rdparty_in1k_20221118-fd08e268.pth) |
+| RepLKNet-31B\* |  384x384   |    From Scratch    |  79.9（train) \| 79.5 (deploy)  |  46.0 (train) \| 45.3 (deploy)  |   84.84   |   97.34   | [config (train)](./replknet-31B_32xb64_in1k-384px.py) \| [config (deploy)](./deploy/replknet-31B-deploy_32xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_3rdparty_in1k-384px_20221118-03a170ce.pth) |
+| RepLKNet-31B\* |  224x224   |    ImageNet-21K    |  79.9（train) \| 79.5 (deploy)  |  15.6 (train) \| 15.4 (deploy)  |   85.20   |   97.56   | [config (train)](./replknet-31B_32xb64_in1k.py) \| [config (deploy)](./deploy/replknet-31B-deploy_32xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_in21k-pre_3rdparty_in1k_20221118-54ed5c46.pth) |
+| RepLKNet-31B\* |  384x384   |    ImageNet-21K    |  79.9（train) \| 79.5 (deploy)  |  46.0 (train) \| 45.3 (deploy)  |   85.99   |   97.75   | [config (train)](./replknet-31B_32xb64_in1k-384px.py) \| [config (deploy)](./deploy/replknet-31B-deploy_32xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_in21k-pre_3rdparty_in1k-384px_20221118-76c92b24.pth) |
+| RepLKNet-31L\* |  384x384   |    ImageNet-21K    | 172.7（train) \| 172.0 (deploy) |  97.2 (train) \| 97.0 (deploy)  |   86.63   |   98.00   | [config (train)](./replknet-31L_32xb64_in1k-384px.py) \| [config (deploy)](./deploy/replknet-31L-deploy_32xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31L_in21k-pre_3rdparty_in1k-384px_20221118-dc3fc07c.pth) |
+| RepLKNet-XL\*  |  320x320   |    MegData-73M     | 335.4（train) \| 335.0 (deploy) | 129.6 (train) \| 129.0 (deploy) |   87.57   |   98.39   | [config (train)](./replknet-XL_32xb64_in1k-320px.py) \| [config (deploy)](./deploy/replknet-XL-deploy_32xb64_in1k-320px.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-XL_meg73m-pre_3rdparty_in1k-320px_20221118-88259b1d.pth) |
+
+*Models with * are converted from the [official repo](https://github.com/DingXiaoH/RepVGG). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
+
+## How to use
+
+The checkpoints provided are all `training-time` models. Use the reparameterize tool to switch them to more efficient `inference-time` architecture, which not only has fewer parameters but also less calculations.
+
+### Use tool
+
+Use provided tool to reparameterize the given model and save the checkpoint:
+
+```bash
+python tools/convert_models/reparameterize_model.py ${CFG_PATH} ${SRC_CKPT_PATH} ${TARGET_CKPT_PATH}
+```
+
+`${CFG_PATH}` is the config file, `${SRC_CKPT_PATH}` is the source chenpoint file, `${TARGET_CKPT_PATH}` is the target deploy weight file path.
+
+To use reparameterized weights, the config file must switch to the deploy config files.
+
+```bash
+python tools/test.py ${Deploy_CFG} ${Deploy_Checkpoint} --metrics accuracy
+```
+
+### In the code
+
+Use `backbone.switch_to_deploy()` or `classificer.backbone.switch_to_deploy()` to switch to the deploy mode. For example:
+
+```python
+from mmcls.models import build_backbone
+
+backbone_cfg=dict(type='RepLKNet',arch='31B'),
+backbone = build_backbone(backbone_cfg)
+backbone.switch_to_deploy()
+```
+
+or
+
+```python
+from mmcls.models import build_classifier
+
+cfg = dict(
+    type='ImageClassifier',
+    backbone=dict(
+        type='RepLKNet',
+        arch='31B'),
+    neck=dict(type='GlobalAveragePooling'),
+    head=dict(
+        type='LinearClsHead',
+        num_classes=1000,
+        in_channels=1024,
+        loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
+        topk=(1, 5),
+    ))
+
+classifier = build_classifier(cfg)
+classifier.backbone.switch_to_deploy()
+```
+
+## Citation
+
+```
+@inproceedings{ding2022scaling,
+  title={Scaling up your kernels to 31x31: Revisiting large kernel design in cnns},
+  author={Ding, Xiaohan and Zhang, Xiangyu and Han, Jungong and Ding, Guiguang},
+  booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
+  pages={11963--11975},
+  year={2022}
+}
+```
--- a/configs/replknet/deploy/replknet-31B-deploy_32xb64_in1k-384px.py
+++ b/configs/replknet/deploy/replknet-31B-deploy_32xb64_in1k-384px.py
@ -0,0 +1,3 @@
+_base_ = '../replknet-31B_32xb64_in1k-384px.py'
+
+model = dict(backbone=dict(small_kernel_merged=True))
--- a/configs/replknet/deploy/replknet-31B-deploy_32xb64_in1k.py
+++ b/configs/replknet/deploy/replknet-31B-deploy_32xb64_in1k.py
@ -0,0 +1,3 @@
+_base_ = '../replknet-31B_32xb64_in1k.py'
+
+model = dict(backbone=dict(small_kernel_merged=True))
--- a/configs/replknet/deploy/replknet-31L-deploy_32xb64_in1k-384px.py
+++ b/configs/replknet/deploy/replknet-31L-deploy_32xb64_in1k-384px.py
@ -0,0 +1,3 @@
+_base_ = '../replknet-31L_32xb64_in1k-384px.py'
+
+model = dict(backbone=dict(small_kernel_merged=True))
--- a/configs/replknet/deploy/replknet-XL-deploy_32xb64_in1k-320px.py
+++ b/configs/replknet/deploy/replknet-XL-deploy_32xb64_in1k-320px.py
@ -0,0 +1,3 @@
+_base_ = '../replknet-XL_32xb64_in1k-320px.py'
+
+model = dict(backbone=dict(small_kernel_merged=True))
--- a/configs/replknet/metafile.yml
+++ b/configs/replknet/metafile.yml
@ -0,0 +1,129 @@
+Collections:
+  - Name: RepLKNet
+    Metadata:
+      Training Data: ImageNet-1k
+      Architecture:
+        - Large-Kernel Convolution
+        - VGG-style Neural Network
+    Paper:
+      URL: https://arxiv.org/abs/2203.06717
+      Title: 'Scaling Up Your Kernels to 31x31: Revisiting Large Kernel Design in CNNs'
+    README: configs/replknet/README.md
+    Code:
+      URL: https://github.com/open-mmlab/mmclassification/blob/v1.0.0rc3/mmcls/models/backbones/replknet.py
+      Version: v1.0.0rc3
+
+Models:
+  - Name: replknet-31B_3rdparty_in1k
+    In Collection: RepLKNet
+    Config: configs/replknet/replknet-31B_32xb64_in1k.py
+    Metadata:
+      FLOPs: 15636547584
+      Parameters: 79864168
+    Results:
+    - Dataset: ImageNet-1k
+      Task: Image Classification
+      Metrics:
+        Top 1 Accuracy: 83.48
+        Top 5 Accuracy: 96.57
+    Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_3rdparty_in1k_20221118-fd08e268.pth
+    Converted From:
+      Weights: https://drive.google.com/u/0/uc?id=1azQUiCxK9feYVkkrPqwVPBtNsTzDrX7S&export=download
+      Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
+
+  - Name: replknet-31B_3rdparty_in1k-384px
+    In Collection: RepLKNet
+    Config: configs/replknet/replknet-31B_32xb64_in1k-384px.py
+    Metadata:
+      FLOPs: 45952303104
+      Parameters: 79864168
+    Results:
+    - Dataset: ImageNet-1k
+      Task: Image Classification
+      Metrics:
+        Top 1 Accuracy: 84.84
+        Top 5 Accuracy: 97.34
+    Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_3rdparty_in1k-384px_20221118-03a170ce.pth
+    Converted From:
+      Weights: https://drive.google.com/u/0/uc?id=1vo-P3XB6mRLUeDzmgv90dOu73uCeLfZN&export=download
+      Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
+
+  - Name: replknet-31B_in21k-pre_3rdparty_in1k
+    In Collection: RepLKNet
+    Config: configs/replknet/replknet-31B_32xb64_in1k.py
+    Metadata:
+      Training Data:
+        - ImageNet-21k
+        - ImageNet-1k
+      FLOPs: 15636547584
+      Parameters: 79864168
+    Results:
+    - Dataset: ImageNet-1k
+      Task: Image Classification
+      Metrics:
+        Top 1 Accuracy: 85.20
+        Top 5 Accuracy: 97.56
+    Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_in21k-pre_3rdparty_in1k_20221118-54ed5c46.pth
+    Converted From:
+      Weights: https://drive.google.com/u/0/uc?id=1DslZ2voXZQR1QoFY9KnbsHAeF84hzS0s&export=download
+      Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
+
+  - Name: replknet-31B_in21k-pre_3rdparty_in1k-384px
+    In Collection: RepLKNet
+    Config: configs/replknet/replknet-31B_32xb64_in1k-384px.py
+    Metadata:
+      Training Data:
+        - ImageNet-21k
+        - ImageNet-1k
+      FLOPs: 45952303104
+      Parameters: 79864168
+    Results:
+    - Dataset: ImageNet-1k
+      Task: Image Classification
+      Metrics:
+        Top 1 Accuracy: 85.99
+        Top 5 Accuracy: 97.75
+    Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_in21k-pre_3rdparty_in1k-384px_20221118-76c92b24.pth
+    Converted From:
+      Weights: https://drive.google.com/u/0/uc?id=1Sc46BWdXXm2fVP-K_hKKU_W8vAB-0duX&export=download
+      Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
+
+  - Name: replknet-31L_in21k-pre_3rdparty_in1k-384px
+    In Collection: RepLKNet
+    Config: configs/replknet/replknet-31L_32xb64_in1k-384px.py
+    Metadata:
+      Training Data:
+        - ImageNet-21k
+        - ImageNet-1k
+      FLOPs: 97240006656
+      Parameters: 172671016
+    Results:
+    - Dataset: ImageNet-1k
+      Task: Image Classification
+      Metrics:
+        Top 1 Accuracy: 86.63
+        Top 5 Accuracy: 98.00
+    Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31L_in21k-pre_3rdparty_in1k-384px_20221118-dc3fc07c.pth
+    Converted From:
+      Weights: https://drive.google.com/u/0/uc?id=1JYXoNHuRvC33QV1pmpzMTKEni1hpWfBl&export=download
+      Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
+
+  - Name: replknet-XL_meg73m-pre_3rdparty_in1k-320px
+    In Collection: RepLKNet
+    Config: configs/replknet/replknet-XL_32xb64_in1k-320px.py
+    Metadata:
+      Training Data:
+        - MegData-73M
+        - ImageNet-1k
+      FLOPs: 129570201600
+      Parameters: 335435752
+    Results:
+    - Dataset: ImageNet-1k
+      Task: Image Classification
+      Metrics:
+        Top 1 Accuracy: 87.57
+        Top 5 Accuracy: 98.39
+    Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-XL_meg73m-pre_3rdparty_in1k-320px_20221118-88259b1d.pth
+    Converted From:
+      Weights: https://drive.google.com/u/0/uc?id=1tPC60El34GntXByIRHb-z-Apm4Y5LX1T&export=download
+      Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
--- a/configs/replknet/replknet-31B_32xb64_in1k-384px.py
+++ b/configs/replknet/replknet-31B_32xb64_in1k-384px.py
@ -0,0 +1,12 @@
+_base_ = [
+    '../_base_/models/replknet-31B_in1k.py',
+    '../_base_/datasets/imagenet_bs16_pil_bicubic_384.py',
+    '../_base_/schedules/imagenet_bs256_coslr.py',
+    '../_base_/default_runtime.py'
+]
+
+# schedule settings
+param_scheduler = dict(
+    type='CosineAnnealingLR', T_max=300, by_epoch=True, begin=0, end=300)
+
+train_cfg = dict(by_epoch=True, max_epochs=300)
--- a/configs/replknet/replknet-31B_32xb64_in1k.py
+++ b/configs/replknet/replknet-31B_32xb64_in1k.py
@ -0,0 +1,12 @@
+_base_ = [
+    '../_base_/models/replknet-31B_in1k.py',
+    '../_base_/datasets/imagenet_bs32_pil_bicubic.py',
+    '../_base_/schedules/imagenet_bs256_coslr.py',
+    '../_base_/default_runtime.py'
+]
+
+# schedule settings
+param_scheduler = dict(
+    type='CosineAnnealingLR', T_max=300, by_epoch=True, begin=0, end=300)
+
+train_cfg = dict(by_epoch=True, max_epochs=300)
--- a/configs/replknet/replknet-31L_32xb64_in1k-384px.py
+++ b/configs/replknet/replknet-31L_32xb64_in1k-384px.py
@ -0,0 +1,12 @@
+_base_ = [
+    '../_base_/models/replknet-31L_in1k.py',
+    '../_base_/datasets/imagenet_bs16_pil_bicubic_384.py',
+    '../_base_/schedules/imagenet_bs256_coslr.py',
+    '../_base_/default_runtime.py'
+]
+
+# schedule settings
+param_scheduler = dict(
+    type='CosineAnnealingLR', T_max=300, by_epoch=True, begin=0, end=300)
+
+train_cfg = dict(by_epoch=True, max_epochs=300)
--- a/configs/replknet/replknet-XL_32xb64_in1k-320px.py
+++ b/configs/replknet/replknet-XL_32xb64_in1k-320px.py
@ -0,0 +1,12 @@
+_base_ = [
+    '../_base_/models/replknet-XL_in1k.py',
+    '../_base_/datasets/imagenet_bs8_pil_bicubic_320.py',
+    '../_base_/schedules/imagenet_bs256_coslr.py',
+    '../_base_/default_runtime.py'
+]
+
+# schedule settings
+param_scheduler = dict(
+    type='CosineAnnealingLR', T_max=300, by_epoch=True, begin=0, end=300)
+
+train_cfg = dict(by_epoch=True, max_epochs=300)
--- a/docs/en/api/models.rst
+++ b/docs/en/api/models.rst
@ -85,6 +85,7 @@ Backbones
   PCPVT
   PoolFormer
   RegNet
+   RepLKNet
   RepMLPNet
   RepVGG
   Res2Net
--- a/mmcls/models/backbones/init.py
+++ b/mmcls/models/backbones/init.py
@ -23,6 +23,7 @@ from .mobilevit import MobileViT
 from .mvit import MViT
 from .poolformer import PoolFormer
 from .regnet import RegNet
+from .replknet import RepLKNet
 from .repmlp import RepMLPNet
 from .repvgg import RepVGG
 from .res2net import Res2Net
@ -82,6 +83,7 @@ __all__ = [
    'CSPResNet',
    'CSPResNeXt',
    'CSPNet',
+    'RepLKNet',
    'RepMLPNet',
    'PoolFormer',
    'DenseNet',
--- a/mmcls/models/backbones/replknet.py
+++ b/mmcls/models/backbones/replknet.py
@ -0,0 +1,668 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import torch
+import torch.nn as nn
+import torch.utils.checkpoint as checkpoint
+from mmcv.cnn import build_activation_layer, build_norm_layer
+from mmcv.cnn.bricks import DropPath
+from mmengine.model import BaseModule
+from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm
+
+from mmcls.registry import MODELS
+from .base_backbone import BaseBackbone
+
+
+def conv_bn(in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            groups,
+            dilation=1,
+            norm_cfg=dict(type='BN')):
+    """Construct a sequential conv and bn.
+
+    Args:
+        in_channels (int): Dimension of input features.
+        out_channels (int): Dimension of output features.
+        kernel_size (int): kernel_size of the convolution.
+        stride (int): stride of the convolution.
+        padding (int): stride of the convolution.
+        groups (int): groups of the convolution.
+        dilation (int): dilation of the convolution. Default to 1.
+        norm_cfg (dict): dictionary to construct and config norm layer.
+            Default to  ``dict(type='BN', requires_grad=True)``.
+
+    Returns:
+        nn.Sequential(): A conv layer and a batch norm layer.
+    """
+    if padding is None:
+        padding = kernel_size // 2
+    result = nn.Sequential()
+    result.add_module(
+        'conv',
+        nn.Conv2d(
+            in_channels=in_channels,
+            out_channels=out_channels,
+            kernel_size=kernel_size,
+            stride=stride,
+            padding=padding,
+            dilation=dilation,
+            groups=groups,
+            bias=False))
+    result.add_module('bn', build_norm_layer(norm_cfg, out_channels)[1])
+    return result
+
+
+def conv_bn_relu(in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 padding,
+                 groups,
+                 dilation=1):
+    """Construct a sequential conv, bn and relu.
+
+    Args:
+        in_channels (int): Dimension of input features.
+        out_channels (int): Dimension of output features.
+        kernel_size (int): kernel_size of the convolution.
+        stride (int): stride of the convolution.
+        padding (int): stride of the convolution.
+        groups (int): groups of the convolution.
+        dilation (int): dilation of the convolution. Default to 1.
+
+    Returns:
+        nn.Sequential(): A conv layer, batch norm layer and a relu function.
+    """
+
+    if padding is None:
+        padding = kernel_size // 2
+    result = conv_bn(
+        in_channels=in_channels,
+        out_channels=out_channels,
+        kernel_size=kernel_size,
+        stride=stride,
+        padding=padding,
+        groups=groups,
+        dilation=dilation)
+    result.add_module('nonlinear', nn.ReLU())
+    return result
+
+
+def fuse_bn(conv, bn):
+    """Fuse the parameters in a branch with a conv and bn.
+
+    Args:
+        conv (nn.Conv2d): The convolution module to fuse.
+        bn (nn.BatchNorm2d): The batch normalization to fuse.
+
+    Returns:
+        tuple[torch.Tensor, torch.Tensor]: The parameters obtained after
+        fusing the parameters of conv and bn in one branch.
+        The first element is the weight and the second is the bias.
+    """
+    kernel = conv.weight
+    running_mean = bn.running_mean
+    running_var = bn.running_var
+    gamma = bn.weight
+    beta = bn.bias
+    eps = bn.eps
+    std = (running_var + eps).sqrt()
+    t = (gamma / std).reshape(-1, 1, 1, 1)
+    return kernel * t, beta - running_mean * gamma / std
+
+
+class ReparamLargeKernelConv(BaseModule):
+    """Super large kernel implemented by with large convolutions.
+
+    Input: Tensor with shape [B, C, H, W].
+    Output: Tensor with shape [B, C, H, W].
+
+    Args:
+        in_channels (int): Dimension of input features.
+        out_channels (int): Dimension of output features.
+        kernel_size (int): kernel_size of the large convolution.
+        stride (int): stride of the large convolution.
+        groups (int): groups of the large convolution.
+        small_kernel (int): kernel_size of the small convolution.
+        small_kernel_merged (bool): Whether to switch the model structure to
+            deployment mode (merge the small kernel to the large kernel).
+            Default to  False.
+        init_cfg (dict or list[dict], optional): Initialization config dict.
+            Defaults to None
+    """
+
+    def __init__(self,
+                 in_channels,
+                 out_channels,
+                 kernel_size,
+                 stride,
+                 groups,
+                 small_kernel,
+                 small_kernel_merged=False,
+                 init_cfg=None):
+        super(ReparamLargeKernelConv, self).__init__(init_cfg)
+        self.kernel_size = kernel_size
+        self.small_kernel = small_kernel
+        self.small_kernel_merged = small_kernel_merged
+        # We assume the conv does not change the feature map size,
+        # so padding = k//2.
+        # Otherwise, you may configure padding as you wish,
+        # and change the padding of small_conv accordingly.
+        padding = kernel_size // 2
+        if small_kernel_merged:
+            self.lkb_reparam = nn.Conv2d(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                dilation=1,
+                groups=groups,
+                bias=True)
+        else:
+            self.lkb_origin = conv_bn(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=kernel_size,
+                stride=stride,
+                padding=padding,
+                dilation=1,
+                groups=groups)
+            if small_kernel is not None:
+                assert small_kernel <= kernel_size
+                self.small_conv = conv_bn(
+                    in_channels=in_channels,
+                    out_channels=out_channels,
+                    kernel_size=small_kernel,
+                    stride=stride,
+                    padding=small_kernel // 2,
+                    groups=groups,
+                    dilation=1)
+
+    def forward(self, inputs):
+        if hasattr(self, 'lkb_reparam'):
+            out = self.lkb_reparam(inputs)
+        else:
+            out = self.lkb_origin(inputs)
+            if hasattr(self, 'small_conv'):
+                out += self.small_conv(inputs)
+        return out
+
+    def get_equivalent_kernel_bias(self):
+        eq_k, eq_b = fuse_bn(self.lkb_origin.conv, self.lkb_origin.bn)
+        if hasattr(self, 'small_conv'):
+            small_k, small_b = fuse_bn(self.small_conv.conv,
+                                       self.small_conv.bn)
+            eq_b += small_b
+            #   add to the central part
+            eq_k += nn.functional.pad(
+                small_k, [(self.kernel_size - self.small_kernel) // 2] * 4)
+        return eq_k, eq_b
+
+    def merge_kernel(self):
+        """Switch the model structure from training mode to deployment mode."""
+        if self.small_kernel_merged:
+            return
+        eq_k, eq_b = self.get_equivalent_kernel_bias()
+        self.lkb_reparam = nn.Conv2d(
+            in_channels=self.lkb_origin.conv.in_channels,
+            out_channels=self.lkb_origin.conv.out_channels,
+            kernel_size=self.lkb_origin.conv.kernel_size,
+            stride=self.lkb_origin.conv.stride,
+            padding=self.lkb_origin.conv.padding,
+            dilation=self.lkb_origin.conv.dilation,
+            groups=self.lkb_origin.conv.groups,
+            bias=True)
+
+        self.lkb_reparam.weight.data = eq_k
+        self.lkb_reparam.bias.data = eq_b
+        self.__delattr__('lkb_origin')
+        if hasattr(self, 'small_conv'):
+            self.__delattr__('small_conv')
+
+        self.small_kernel_merged = True
+
+
+class ConvFFN(BaseModule):
+    """Mlp implemented by with 1*1 convolutions.
+
+    Input: Tensor with shape [B, C, H, W].
+    Output: Tensor with shape [B, C, H, W].
+
+    Args:
+        in_channels (int): Dimension of input features.
+        internal_channels (int): Dimension of hidden features.
+        out_channels (int): Dimension of output features.
+        drop_path (float): Stochastic depth rate. Defaults to 0.
+        norm_cfg (dict): dictionary to construct and config norm layer.
+            Default to  ``dict(type='BN', requires_grad=True)``.
+        act_cfg (dict): The config dict for activation between pointwise
+            convolution. Defaults to ``dict(type='GELU')``.
+        init_cfg (dict or list[dict], optional): Initialization config dict.
+            Defaults to None.
+    """
+
+    def __init__(self,
+                 in_channels,
+                 internal_channels,
+                 out_channels,
+                 drop_path,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='GELU'),
+                 init_cfg=None):
+        super(ConvFFN, self).__init__(init_cfg)
+        self.drop_path = DropPath(
+            drop_prob=drop_path) if drop_path > 0. else nn.Identity()
+        self.preffn_bn = build_norm_layer(norm_cfg, in_channels)[1]
+        self.pw1 = conv_bn(
+            in_channels=in_channels,
+            out_channels=internal_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1)
+        self.pw2 = conv_bn(
+            in_channels=internal_channels,
+            out_channels=out_channels,
+            kernel_size=1,
+            stride=1,
+            padding=0,
+            groups=1)
+        self.nonlinear = build_activation_layer(act_cfg)
+
+    def forward(self, x):
+        out = self.preffn_bn(x)
+        out = self.pw1(out)
+        out = self.nonlinear(out)
+        out = self.pw2(out)
+        return x + self.drop_path(out)
+
+
+class RepLKBlock(BaseModule):
+    """RepLKBlock for RepLKNet backbone.
+
+    Args:
+        in_channels (int): The input channels of the block.
+        dw_channels (int): The intermediate channels of the block,
+            i.e., input channels of the large kernel convolution.
+        block_lk_size (int): size of the super large kernel. Defaults: 31.
+        small_kernel (int): size of the parallel small kernel. Defaults: 5.
+        drop_path (float): Stochastic depth rate. Defaults: 0.
+        small_kernel_merged (bool): Whether to switch the model structure to
+            deployment mode (merge the small kernel to the large kernel).
+            Default to  False.
+        norm_cfg (dict): dictionary to construct and config norm layer.
+            Default to  ``dict(type='BN', requires_grad=True)``.
+        act_cfg (dict): Config dict for activation layer.
+            Default to  ``dict(type='ReLU')``.
+        init_cfg (dict or list[dict], optional): Initialization config dict.
+            Default to  None
+    """
+
+    def __init__(self,
+                 in_channels,
+                 dw_channels,
+                 block_lk_size,
+                 small_kernel,
+                 drop_path,
+                 small_kernel_merged=False,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 init_cfg=None):
+        super(RepLKBlock, self).__init__(init_cfg)
+        self.pw1 = conv_bn_relu(in_channels, dw_channels, 1, 1, 0, groups=1)
+        self.pw2 = conv_bn(dw_channels, in_channels, 1, 1, 0, groups=1)
+        self.large_kernel = ReparamLargeKernelConv(
+            in_channels=dw_channels,
+            out_channels=dw_channels,
+            kernel_size=block_lk_size,
+            stride=1,
+            groups=dw_channels,
+            small_kernel=small_kernel,
+            small_kernel_merged=small_kernel_merged)
+        self.lk_nonlinear = build_activation_layer(act_cfg)
+        self.prelkb_bn = build_norm_layer(norm_cfg, in_channels)[1]
+        self.drop_path = DropPath(
+            drop_prob=drop_path) if drop_path > 0. else nn.Identity()
+        # print('drop path:', self.drop_path)
+
+    def forward(self, x):
+        out = self.prelkb_bn(x)
+        out = self.pw1(out)
+        out = self.large_kernel(out)
+        out = self.lk_nonlinear(out)
+        out = self.pw2(out)
+        return x + self.drop_path(out)
+
+
+class RepLKNetStage(BaseModule):
+    """
+    generate RepLKNet blocks for a stage
+    return: RepLKNet blocks
+
+    Args:
+        channels (int): The input channels of the stage.
+        num_blocks (int): The number of blocks of the stage.
+        stage_lk_size (int): size of the super large kernel. Defaults: 31.
+        drop_path (float): Stochastic depth rate. Defaults: 0.
+        small_kernel (int): size of the parallel small kernel. Defaults: 5.
+        dw_ratio (float): The intermediate channels
+            expansion ratio of the block. Defaults: 1.
+        ffn_ratio (float): Mlp expansion ratio. Defaults to 4.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default to  False.
+        small_kernel_merged (bool): Whether to switch the model structure to
+            deployment mode (merge the small kernel to the large kernel).
+            Default to  False.
+        norm_intermediate_features (bool): Construct and config norm layer
+            or not.
+            Using True will normalize the intermediate features for
+            downstream dense prediction tasks.
+        norm_cfg (dict): dictionary to construct and config norm layer.
+            Default to  ``dict(type='BN', requires_grad=True)``.
+        init_cfg (dict or list[dict], optional): Initialization config dict.
+            Default to  None
+    """
+
+    def __init__(
+            self,
+            channels,
+            num_blocks,
+            stage_lk_size,
+            drop_path,
+            small_kernel,
+            dw_ratio=1,
+            ffn_ratio=4,
+            with_cp=False,  # train with torch.utils.checkpoint to save memory
+            small_kernel_merged=False,
+            norm_intermediate_features=False,
+            norm_cfg=dict(type='BN'),
+            init_cfg=None):
+        super(RepLKNetStage, self).__init__(init_cfg)
+        self.with_cp = with_cp
+        blks = []
+        for i in range(num_blocks):
+            block_drop_path = drop_path[i] if isinstance(drop_path,
+                                                         list) else drop_path
+            #   Assume all RepLK Blocks within a stage share the same lk_size.
+            #   You may tune it on your own model.
+            replk_block = RepLKBlock(
+                in_channels=channels,
+                dw_channels=int(channels * dw_ratio),
+                block_lk_size=stage_lk_size,
+                small_kernel=small_kernel,
+                drop_path=block_drop_path,
+                small_kernel_merged=small_kernel_merged)
+            convffn_block = ConvFFN(
+                in_channels=channels,
+                internal_channels=int(channels * ffn_ratio),
+                out_channels=channels,
+                drop_path=block_drop_path)
+            blks.append(replk_block)
+            blks.append(convffn_block)
+        self.blocks = nn.ModuleList(blks)
+        if norm_intermediate_features:
+            self.norm = build_norm_layer(norm_cfg, channels)[1]
+        else:
+            self.norm = nn.Identity()
+
+    def forward(self, x):
+        for blk in self.blocks:
+            if self.with_cp:
+                x = checkpoint.checkpoint(blk, x)  # Save training memory
+            else:
+                x = blk(x)
+        return x
+
+
+@MODELS.register_module()
+class RepLKNet(BaseBackbone):
+    """RepLKNet backbone.
+
+    A PyTorch impl of :
+    `Scaling Up Your Kernels to 31x31: Revisiting Large Kernel Design in CNNs
+    <https://arxiv.org/abs/2203.06717>`_
+
+    Args:
+        arch (str | dict): The parameter of RepLKNet.
+            If it's a dict, it should contain the following keys:
+
+            - large_kernel_sizes (Sequence[int]):
+                Large kernel size in each stage.
+            - layers (Sequence[int]): Number of blocks in each stage.
+            - channels (Sequence[int]): Number of channels in each stage.
+            - small_kernel (int): size of the parallel small kernel.
+            - dw_ratio (float): The intermediate channels
+                expansion ratio of the block.
+        in_channels (int): Number of input image channels. Default to  3.
+        ffn_ratio (float): Mlp expansion ratio. Defaults to 4.
+        out_indices (Sequence[int]): Output from which stages.
+            Default to  (3, ).
+        strides (Sequence[int]): Strides of the first block of each stage.
+            Default to  (2, 2, 2, 2).
+        dilations (Sequence[int]): Dilation of each stage.
+            Default to  (1, 1, 1, 1).
+        frozen_stages (int): Stages to be frozen
+            (all param fixed). -1 means not freezing any parameters.
+            Default to  -1.
+        conv_cfg (dict | None): The config dict for conv layers.
+            Default to None.
+        norm_cfg (dict): The config dict for norm layers.
+            Default to  ``dict(type='BN')``.
+        act_cfg (dict): Config dict for activation layer.
+            Default to  ``dict(type='ReLU')``.
+        with_cp (bool): Use checkpoint or not. Using checkpoint will save some
+            memory while slowing down the training speed. Default to False.
+        deploy (bool): Whether to switch the model structure to deployment
+            mode. Default to False.
+        norm_intermediate_features (bool): Construct and
+            config norm layer or not.
+            Using True will normalize the intermediate features
+            for downstream dense prediction tasks.
+        norm_eval (bool): Whether to set norm layers to eval mode, namely,
+            freeze running stats (mean and var). Note: Effect on Batch Norm
+            and its variants only. Default to False.
+        init_cfg (dict or list[dict], optional): Initialization config dict.
+    """
+
+    arch_settings = {
+        '31B':
+        dict(
+            large_kernel_sizes=[31, 29, 27, 13],
+            layers=[2, 2, 18, 2],
+            channels=[128, 256, 512, 1024],
+            small_kernel=5,
+            dw_ratio=1),
+        '31L':
+        dict(
+            large_kernel_sizes=[31, 29, 27, 13],
+            layers=[2, 2, 18, 2],
+            channels=[192, 384, 768, 1536],
+            small_kernel=5,
+            dw_ratio=1),
+        'XL':
+        dict(
+            large_kernel_sizes=[27, 27, 27, 13],
+            layers=[2, 2, 18, 2],
+            channels=[256, 512, 1024, 2048],
+            small_kernel=None,
+            dw_ratio=1.5),
+    }
+
+    def __init__(self,
+                 arch,
+                 in_channels=3,
+                 ffn_ratio=4,
+                 out_indices=(3, ),
+                 strides=(2, 2, 2, 2),
+                 dilations=(1, 1, 1, 1),
+                 frozen_stages=-1,
+                 conv_cfg=None,
+                 norm_cfg=dict(type='BN'),
+                 act_cfg=dict(type='ReLU'),
+                 with_cp=False,
+                 drop_path_rate=0.3,
+                 small_kernel_merged=False,
+                 norm_intermediate_features=False,
+                 norm_eval=False,
+                 init_cfg=[
+                     dict(type='Kaiming', layer=['Conv2d']),
+                     dict(
+                         type='Constant',
+                         val=1,
+                         layer=['_BatchNorm', 'GroupNorm'])
+                 ]):
+        super(RepLKNet, self).__init__(init_cfg)
+
+        if isinstance(arch, str):
+            assert arch in self.arch_settings, \
+                f'"arch": "{arch}" is not one of the arch_settings'
+            arch = self.arch_settings[arch]
+        elif not isinstance(arch, dict):
+            raise TypeError('Expect "arch" to be either a string '
+                            f'or a dict, got {type(arch)}')
+
+        assert len(arch['layers']) == len(
+            arch['channels']) == len(strides) == len(dilations)
+        assert max(out_indices) < len(arch['layers'])
+
+        self.arch = arch
+        self.in_channels = in_channels
+        self.out_indices = out_indices
+        self.strides = strides
+        self.dilations = dilations
+        self.frozen_stages = frozen_stages
+        self.conv_cfg = conv_cfg
+        self.norm_cfg = norm_cfg
+        self.act_cfg = act_cfg
+        self.with_cp = with_cp
+        self.drop_path_rate = drop_path_rate
+        self.small_kernel_merged = small_kernel_merged
+        self.norm_eval = norm_eval
+        self.norm_intermediate_features = norm_intermediate_features
+
+        self.out_indices = out_indices
+
+        base_width = self.arch['channels'][0]
+        self.norm_intermediate_features = norm_intermediate_features
+        self.num_stages = len(self.arch['layers'])
+        self.stem = nn.ModuleList([
+            conv_bn_relu(
+                in_channels=in_channels,
+                out_channels=base_width,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                groups=1),
+            conv_bn_relu(
+                in_channels=base_width,
+                out_channels=base_width,
+                kernel_size=3,
+                stride=1,
+                padding=1,
+                groups=base_width),
+            conv_bn_relu(
+                in_channels=base_width,
+                out_channels=base_width,
+                kernel_size=1,
+                stride=1,
+                padding=0,
+                groups=1),
+            conv_bn_relu(
+                in_channels=base_width,
+                out_channels=base_width,
+                kernel_size=3,
+                stride=2,
+                padding=1,
+                groups=base_width)
+        ])
+        # stochastic depth. We set block-wise drop-path rate.
+        # The higher level blocks are more likely to be dropped.
+        # This implementation follows Swin.
+        dpr = [
+            x.item() for x in torch.linspace(0, drop_path_rate,
+                                             sum(self.arch['layers']))
+        ]
+        self.stages = nn.ModuleList()
+        self.transitions = nn.ModuleList()
+        for stage_idx in range(self.num_stages):
+            layer = RepLKNetStage(
+                channels=self.arch['channels'][stage_idx],
+                num_blocks=self.arch['layers'][stage_idx],
+                stage_lk_size=self.arch['large_kernel_sizes'][stage_idx],
+                drop_path=dpr[sum(self.arch['layers'][:stage_idx]
+                                  ):sum(self.arch['layers'][:stage_idx + 1])],
+                small_kernel=self.arch['small_kernel'],
+                dw_ratio=self.arch['dw_ratio'],
+                ffn_ratio=ffn_ratio,
+                with_cp=with_cp,
+                small_kernel_merged=small_kernel_merged,
+                norm_intermediate_features=(stage_idx in out_indices))
+            self.stages.append(layer)
+            if stage_idx < len(self.arch['layers']) - 1:
+                transition = nn.Sequential(
+                    conv_bn_relu(
+                        self.arch['channels'][stage_idx],
+                        self.arch['channels'][stage_idx + 1],
+                        1,
+                        1,
+                        0,
+                        groups=1),
+                    conv_bn_relu(
+                        self.arch['channels'][stage_idx + 1],
+                        self.arch['channels'][stage_idx + 1],
+                        3,
+                        stride=2,
+                        padding=1,
+                        groups=self.arch['channels'][stage_idx + 1]))
+                self.transitions.append(transition)
+
+    def forward_features(self, x):
+        x = self.stem[0](x)
+        for stem_layer in self.stem[1:]:
+            if self.with_cp:
+                x = checkpoint.checkpoint(stem_layer, x)  # save memory
+            else:
+                x = stem_layer(x)
+
+        #   Need the intermediate feature maps
+        outs = []
+        for stage_idx in range(self.num_stages):
+            x = self.stages[stage_idx](x)
+            if stage_idx in self.out_indices:
+                outs.append(self.stages[stage_idx].norm(x))
+                # For RepLKNet-XL normalize the features
+                # before feeding them into the heads
+            if stage_idx < self.num_stages - 1:
+                x = self.transitions[stage_idx](x)
+        return outs
+
+    def forward(self, x):
+        x = self.forward_features(x)
+        return tuple(x)
+
+    def _freeze_stages(self):
+        if self.frozen_stages >= 0:
+            self.stem.eval()
+            for param in self.stem.parameters():
+                param.requires_grad = False
+        for i in range(self.frozen_stages):
+            stage = self.stages[i]
+            stage.eval()
+            for param in stage.parameters():
+                param.requires_grad = False
+
+    def train(self, mode=True):
+        super(RepLKNet, self).train(mode)
+        self._freeze_stages()
+        if mode and self.norm_eval:
+            for m in self.modules():
+                if isinstance(m, _BatchNorm):
+                    m.eval()
+
+    def switch_to_deploy(self):
+        for m in self.modules():
+            if hasattr(m, 'merge_kernel'):
+                m.merge_kernel()
+        self.small_kernel_merged = True
--- a/model-index.yml
+++ b/model-index.yml
@ -38,3 +38,4 @@ Import:
  - configs/hornet/metafile.yml
  - configs/mobilevit/metafile.yml
  - configs/davit/metafile.yml
+  - configs/replknet/metafile.yml
--- a/tests/test_models/test_backbones/test_replknet.py
+++ b/tests/test_models/test_backbones/test_replknet.py
@ -0,0 +1,304 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import os
+import tempfile
+
+import pytest
+import torch
+from mmengine.runner import load_checkpoint, save_checkpoint
+from torch import nn
+from torch.nn.modules import GroupNorm
+from torch.nn.modules.batchnorm import _BatchNorm
+
+from mmcls.models.backbones import RepLKNet
+from mmcls.models.backbones.replknet import ReparamLargeKernelConv
+
+
+def check_norm_state(modules, train_state):
+    """Check if norm layer is in correct train state."""
+    for mod in modules:
+        if isinstance(mod, _BatchNorm):
+            if mod.training != train_state:
+                return False
+    return True
+
+
+def is_norm(modules):
+    """Check if is one of the norms."""
+    if isinstance(modules, (GroupNorm, _BatchNorm)):
+        return True
+    return False
+
+
+def is_replk_block(modules):
+    if isinstance(modules, ReparamLargeKernelConv):
+        return True
+    return False
+
+
+def test_replknet_replkblock():
+    # Test ReparamLargeKernelConv with in_channels != out_channels,
+    # kernel_size = 31, stride = 1, groups=in_channels, small_kernel = 5
+    block = ReparamLargeKernelConv(
+        5, 10, kernel_size=31, stride=1, groups=5, small_kernel=5)
+    block.eval()
+    x = torch.randn(1, 5, 64, 64)
+    x_out_not_deploy = block(x)
+    assert block.small_kernel <= block.kernel_size
+    assert not hasattr(block, 'lkb_reparam')
+    assert hasattr(block, 'lkb_origin')
+    assert hasattr(block, 'small_conv')
+    assert x_out_not_deploy.shape == torch.Size((1, 10, 64, 64))
+    block.merge_kernel()
+    assert block.small_kernel_merged is True
+    x_out_deploy = block(x)
+    assert x_out_deploy.shape == torch.Size((1, 10, 64, 64))
+    assert torch.allclose(x_out_not_deploy, x_out_deploy, atol=1e-5, rtol=1e-4)
+
+    # Test ReparamLargeKernelConv with in_channels == out_channels,
+    # kernel_size = 31, stride = 1, groups=in_channels, small_kernel = 5
+    block = ReparamLargeKernelConv(
+        12, 12, kernel_size=31, stride=1, groups=12, small_kernel=5)
+    block.eval()
+    x = torch.randn(1, 12, 64, 64)
+    x_out_not_deploy = block(x)
+    assert block.small_kernel <= block.kernel_size
+    assert not hasattr(block, 'lkb_reparam')
+    assert hasattr(block, 'lkb_origin')
+    assert hasattr(block, 'small_conv')
+    assert x_out_not_deploy.shape == torch.Size((1, 12, 64, 64))
+    block.merge_kernel()
+    assert block.small_kernel_merged is True
+    x_out_deploy = block(x)
+    assert x_out_deploy.shape == torch.Size((1, 12, 64, 64))
+    assert torch.allclose(x_out_not_deploy, x_out_deploy, atol=1e-5, rtol=1e-4)
+
+    # Test ReparamLargeKernelConv with in_channels == out_channels,
+    # kernel_size = 31, stride = 2, groups=in_channels, small_kernel = 5
+    block = ReparamLargeKernelConv(
+        16, 16, kernel_size=31, stride=2, groups=16, small_kernel=5)
+    block.eval()
+    x = torch.randn(1, 16, 64, 64)
+    x_out_not_deploy = block(x)
+    assert block.small_kernel <= block.kernel_size
+    assert not hasattr(block, 'lkb_reparam')
+    assert hasattr(block, 'lkb_origin')
+    assert hasattr(block, 'small_conv')
+    assert x_out_not_deploy.shape == torch.Size((1, 16, 32, 32))
+    block.merge_kernel()
+    assert block.small_kernel_merged is True
+    x_out_deploy = block(x)
+    assert x_out_deploy.shape == torch.Size((1, 16, 32, 32))
+    assert torch.allclose(x_out_not_deploy, x_out_deploy, atol=1e-5, rtol=1e-4)
+
+    # Test ReparamLargeKernelConv with in_channels == out_channels,
+    # kernel_size = 27, stride = 1, groups=in_channels, small_kernel = 5
+    block = ReparamLargeKernelConv(
+        12, 12, kernel_size=27, stride=1, groups=12, small_kernel=5)
+    block.eval()
+    x = torch.randn(1, 12, 48, 48)
+    x_out_not_deploy = block(x)
+    assert block.small_kernel <= block.kernel_size
+    assert not hasattr(block, 'lkb_reparam')
+    assert hasattr(block, 'lkb_origin')
+    assert hasattr(block, 'small_conv')
+    assert x_out_not_deploy.shape == torch.Size((1, 12, 48, 48))
+    block.merge_kernel()
+    assert block.small_kernel_merged is True
+    x_out_deploy = block(x)
+    assert x_out_deploy.shape == torch.Size((1, 12, 48, 48))
+    assert torch.allclose(x_out_not_deploy, x_out_deploy, atol=1e-5, rtol=1e-4)
+
+    # Test ReparamLargeKernelConv with in_channels == out_channels,
+    # kernel_size = 31, stride = 1, groups=in_channels, small_kernel = 7
+    block = ReparamLargeKernelConv(
+        12, 12, kernel_size=31, stride=1, groups=12, small_kernel=7)
+    block.eval()
+    x = torch.randn(1, 12, 64, 64)
+    x_out_not_deploy = block(x)
+    assert block.small_kernel <= block.kernel_size
+    assert not hasattr(block, 'lkb_reparam')
+    assert hasattr(block, 'lkb_origin')
+    assert hasattr(block, 'small_conv')
+    assert x_out_not_deploy.shape == torch.Size((1, 12, 64, 64))
+    block.merge_kernel()
+    assert block.small_kernel_merged is True
+    x_out_deploy = block(x)
+    assert x_out_deploy.shape == torch.Size((1, 12, 64, 64))
+    assert torch.allclose(x_out_not_deploy, x_out_deploy, atol=1e-5, rtol=1e-4)
+
+    # Test ReparamLargeKernelConv with deploy == True
+    block = ReparamLargeKernelConv(
+        8,
+        8,
+        kernel_size=31,
+        stride=1,
+        groups=8,
+        small_kernel=5,
+        small_kernel_merged=True)
+    assert isinstance(block.lkb_reparam, nn.Conv2d)
+    assert not hasattr(block, 'lkb_origin')
+    assert not hasattr(block, 'small_conv')
+    x = torch.randn(1, 8, 48, 48)
+    x_out = block(x)
+    assert x_out.shape == torch.Size((1, 8, 48, 48))
+
+
+def test_replknet_backbone():
+    with pytest.raises(TypeError):
+        # arch must be str or dict
+        RepLKNet(arch=[4, 6, 16, 1])
+
+    with pytest.raises(AssertionError):
+        # arch must in arch_settings
+        RepLKNet(arch='31C')
+
+    with pytest.raises(KeyError):
+        # arch must have num_blocks and width_factor
+        arch = dict(large_kernel_sizes=[31, 29, 27, 13])
+        RepLKNet(arch=arch)
+
+    with pytest.raises(KeyError):
+        # arch must have num_blocks and width_factor
+        arch = dict(large_kernel_sizes=[31, 29, 27, 13], layers=[2, 2, 18, 2])
+        RepLKNet(arch=arch)
+
+    with pytest.raises(KeyError):
+        # arch must have num_blocks and width_factor
+        arch = dict(
+            large_kernel_sizes=[31, 29, 27, 13],
+            layers=[2, 2, 18, 2],
+            channels=[128, 256, 512, 1024])
+        RepLKNet(arch=arch)
+
+    # len(arch['large_kernel_sizes']) == arch['layers'])
+    # == len(arch['channels'])
+    # == len(strides) == len(dilations)
+    with pytest.raises(AssertionError):
+        arch = dict(
+            large_kernel_sizes=[31, 29, 27, 13],
+            layers=[2, 2, 18, 2],
+            channels=[128, 256, 1024],
+            small_kernel=5,
+            dw_ratio=1)
+        RepLKNet(arch=arch)
+
+    # len(strides) must equal to 4
+    with pytest.raises(AssertionError):
+        RepLKNet('31B', strides=(2, 2, 2))
+
+    # len(dilations) must equal to 4
+    with pytest.raises(AssertionError):
+        RepLKNet('31B', strides=(2, 2, 2, 2), dilations=(1, 1, 1))
+
+    # max(out_indices) < len(arch['num_blocks'])
+    with pytest.raises(AssertionError):
+        RepLKNet('31B', out_indices=(5, ))
+
+    # Test RepLKNet norm state
+    model = RepLKNet('31B')
+    model.train()
+    assert check_norm_state(model.modules(), True)
+
+    # Test RepLKNet with first stage frozen
+    frozen_stages = 1
+    model = RepLKNet('31B', frozen_stages=frozen_stages)
+    model.train()
+    for param in model.stem.parameters():
+        assert param.requires_grad is False
+    for i in range(0, frozen_stages):
+        stage = model.stages[i]
+        for mod in stage.modules():
+            if isinstance(mod, _BatchNorm):
+                assert mod.training is False
+        for param in stage.parameters():
+            assert param.requires_grad is False
+
+    # Test RepLKNet with norm_eval
+    model = RepLKNet('31B', norm_eval=True)
+    model.train()
+    assert check_norm_state(model.modules(), False)
+
+    # Test RepLKNet forward with layer 3 forward
+    model = RepLKNet('31B', out_indices=(3, ))
+    model.init_weights()
+    model.train()
+
+    for m in model.modules():
+        if is_norm(m):
+            assert isinstance(m, _BatchNorm)
+
+    imgs = torch.randn(1, 3, 224, 224)
+    feat = model(imgs)
+    assert isinstance(feat, tuple)
+    assert len(feat) == 1
+    assert isinstance(feat[0], torch.Tensor)
+    assert feat[0].shape == torch.Size((1, 1024, 7, 7))
+
+    # Test RepLKNet forward
+    model_test_settings = [
+        dict(model_name='31B', out_sizes=(128, 256, 512, 1024)),
+        # dict(model_name='31L', out_sizes=(192, 384, 768, 1536)),
+        # dict(model_name='XL', out_sizes=(256, 512, 1024, 2048))
+    ]
+
+    choose_models = ['31B']
+    # Test RepLKNet model forward
+    for model_test_setting in model_test_settings:
+        if model_test_setting['model_name'] not in choose_models:
+            continue
+        model = RepLKNet(
+            model_test_setting['model_name'], out_indices=(0, 1, 2, 3))
+        model.init_weights()
+
+        # Test Norm
+        for m in model.modules():
+            if is_norm(m):
+                assert isinstance(m, _BatchNorm)
+
+        model.train()
+        imgs = torch.randn(1, 3, 224, 224)
+        feat = model(imgs)
+        assert feat[0].shape == torch.Size(
+            (1, model_test_setting['out_sizes'][0], 56, 56))
+        assert feat[1].shape == torch.Size(
+            (1, model_test_setting['out_sizes'][1], 28, 28))
+        assert feat[2].shape == torch.Size(
+            (1, model_test_setting['out_sizes'][2], 14, 14))
+        assert feat[3].shape == torch.Size(
+            (1, model_test_setting['out_sizes'][3], 7, 7))
+
+        # Test eval of "train" mode and "deploy" mode
+        gap = nn.AdaptiveAvgPool2d(output_size=(1))
+        fc = nn.Linear(model_test_setting['out_sizes'][3], 10)
+        model.eval()
+        feat = model(imgs)
+        pred = fc(gap(feat[3]).flatten(1))
+        model.switch_to_deploy()
+        for m in model.modules():
+            if isinstance(m, ReparamLargeKernelConv):
+                assert m.small_kernel_merged is True
+        feat_deploy = model(imgs)
+        pred_deploy = fc(gap(feat_deploy[3]).flatten(1))
+        for i in range(4):
+            torch.allclose(feat[i], feat_deploy[i])
+        torch.allclose(pred, pred_deploy)
+
+
+def test_replknet_load():
+    # Test output before and load from deploy checkpoint
+    model = RepLKNet('31B', out_indices=(0, 1, 2, 3))
+    inputs = torch.randn((1, 3, 224, 224))
+    ckpt_path = os.path.join(tempfile.gettempdir(), 'ckpt.pth')
+    model.switch_to_deploy()
+    model.eval()
+    outputs = model(inputs)
+
+    model_deploy = RepLKNet(
+        '31B', out_indices=(0, 1, 2, 3), small_kernel_merged=True)
+    model_deploy.eval()
+    save_checkpoint(model.state_dict(), ckpt_path)
+    load_checkpoint(model_deploy, ckpt_path, strict=True)
+
+    outputs_load = model_deploy(inputs)
+    for feat, feat_load in zip(outputs, outputs_load):
+        assert torch.allclose(feat, feat_load)
--- a/tests/test_models/test_backbones/test_repvgg.py
+++ b/tests/test_models/test_backbones/test_repvgg.py
@ -342,6 +342,7 @@ def test_repvgg_load():
    outputs = model(inputs)

    model_deploy = RepVGG('A1', out_indices=(0, 1, 2, 3), deploy=True)
+    model_deploy.eval()
    save_checkpoint(model.state_dict(), ckpt_path)
    load_checkpoint(model_deploy, ckpt_path, strict=True)

--- a/tools/model_converters/replknet_to_mmcls.py
+++ b/tools/model_converters/replknet_to_mmcls.py
@ -0,0 +1,58 @@
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+from collections import OrderedDict
+from pathlib import Path
+
+import torch
+
+
+def convert(src, dst):
+    print('Converting...')
+    blobs = torch.load(src, map_location='cpu')
+    converted_state_dict = OrderedDict()
+
+    for key in blobs:
+        splited_key = key.split('.')
+        print(splited_key)
+        splited_key = [
+            'backbone.stem' if i[:4] == 'stem' else i for i in splited_key
+        ]
+        splited_key = [
+            'backbone.stages' if i[:6] == 'stages' else i for i in splited_key
+        ]
+        splited_key = [
+            'backbone.transitions' if i[:11] == 'transitions' else i
+            for i in splited_key
+        ]
+        splited_key = [
+            'backbone.stages.3.norm' if i[:4] == 'norm' else i
+            for i in splited_key
+        ]
+        splited_key = [
+            'head.fc' if i[:4] == 'head' else i for i in splited_key
+        ]
+
+        new_key = '.'.join(splited_key)
+        converted_state_dict[new_key] = blobs[key]
+
+    torch.save(converted_state_dict, dst)
+    print('Done!')
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Convert model keys')
+    parser.add_argument('src', help='src detectron model path')
+    parser.add_argument('dst', help='save path')
+    args = parser.parse_args()
+
+    dst = Path(args.dst)
+    if dst.suffix != '.pth':
+        print('The path should contain the name of the pth format file.')
+        exit(1)
+    dst.parent.mkdir(parents=True, exist_ok=True)
+
+    convert(args.src, args.dst)
+
+
+if __name__ == '__main__':
+    main()