[Feature] Support RepLKnet backbone. (#1129)

* update replknet configs

* update replknet test

* update replknet model

* update replknet model

* update replknet model

* update replknet model

* Fix docs and config names

Co-authored-by: mzr1996 <mzr1996@163.com>
pull/1177/head
Jiahao Wang 2022-11-21 10:18:58 +08:00 committed by GitHub
parent c3c1cb93aa
commit 72c6bc4864
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 1496 additions and 0 deletions

View File

@ -151,6 +151,7 @@ Results and models are available in the [model zoo](https://mmclassification.rea
- [x] [HorNet](https://github.com/open-mmlab/mmclassification/tree/master/configs/hornet)
- [x] [MobileViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mobilevit)
- [x] [DaViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/davit)
- [x] [RepLKNet](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/replknet)
</details>

View File

@ -150,6 +150,7 @@ mim install -e .
- [x] [HorNet](https://github.com/open-mmlab/mmclassification/tree/master/configs/hornet)
- [x] [MobileViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mobilevit)
- [x] [DaViT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/davit)
- [x] [RepLKNet](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/replknet)
</details>

View File

@ -0,0 +1,57 @@
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=384,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=384, backend='pillow', interpolation='bicubic'),
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=16,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
val_dataloader = dict(
batch_size=16,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator

View File

@ -0,0 +1,63 @@
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
# RGB format normalization parameters
mean=[122.5, 122.5, 122.5],
std=[122.5, 122.5, 122.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=320,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='ResizeEdge',
scale=int(320 / 224 * 256),
edge='short',
backend='pillow',
interpolation='bicubic'),
dict(type='CenterCrop', crop_size=320),
dict(type='PackClsInputs'),
]
train_dataloader = dict(
batch_size=8,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/train.txt',
data_prefix='train',
pipeline=train_pipeline),
sampler=dict(type='DefaultSampler', shuffle=True),
persistent_workers=True,
)
val_dataloader = dict(
batch_size=8,
num_workers=5,
dataset=dict(
type=dataset_type,
data_root='data/imagenet',
ann_file='meta/val.txt',
data_prefix='val',
pipeline=test_pipeline),
sampler=dict(type='DefaultSampler', shuffle=False),
persistent_workers=True,
)
val_evaluator = dict(type='Accuracy', topk=(1, 5))
# If you want standard test, please manually configure the test dataset
test_dataloader = val_dataloader
test_evaluator = val_evaluator

View File

@ -0,0 +1,25 @@
from mmcls.models import build_classifier
model = dict(
type='ImageClassifier',
backbone=dict(
type='RepLKNet',
arch='31B',
out_indices=(3, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))
if __name__ == '__main__':
# model.pop('type')
model = build_classifier(model)
model.eval()
print('------------------- training-time model -------------')
for i in model.state_dict().keys():
print(i)

View File

@ -0,0 +1,15 @@
model = dict(
type='ImageClassifier',
backbone=dict(
type='RepLKNet',
arch='31L',
out_indices=(3, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1536,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,15 @@
model = dict(
type='ImageClassifier',
backbone=dict(
type='RepLKNet',
arch='XL',
out_indices=(3, ),
),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=2048,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,95 @@
# RepLKNet
> [Scaling Up Your Kernels to 31x31: Revisiting Large Kernel Design in CNNs](https://arxiv.org/abs/2203.06717)
<!-- [ALGORITHM] -->
## Abstract
We revisit large kernel design in modern convolutional neural networks (CNNs). Inspired by recent advances in vision transformers (ViTs), in this paper, we demonstrate that using a few large convolutional kernels instead of a stack of small kernels could be a more powerful paradigm. We suggested five guidelines, e.g., applying re-parameterized large depth-wise convolutions, to design efficient highperformance large-kernel CNNs. Following the guidelines, we propose RepLKNet, a pure CNN architecture whose kernel size is as large as 31×31, in contrast to commonly used 3×3. RepLKNet greatly closes the performance gap between CNNs and ViTs, e.g., achieving comparable or superior results than Swin Transformer on ImageNet and a few typical downstream tasks, with lower latency. RepLKNet also shows nice scalability to big data and large models, obtaining 87.8% top-1 accuracy on ImageNet and 56.0% mIoU on ADE20K, which is very competitive among the state-of-the-arts with similar model sizes. Our study further reveals that, in contrast to small-kernel CNNs, large kernel CNNs have much larger effective receptive fields and higher shape bias rather than texture bias.
<div align=center>
<img src="https://user-images.githubusercontent.com/48375204/197546040-cdf078c3-7fbd-400f-8b27-01668c8dfebf.png" width="60%"/>
</div>
## Results and models
### ImageNet-1k
| Model | Resolution | Pretrained Dataset | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------: | :--------: | :----------------: | :-----------------------------: | :-----------------------------: | :-------: | :-------: | :------------------------------------: | :--------------------------------------: |
| RepLKNet-31B\* | 224x224 | From Scratch | 79.9train) \| 79.5 (deploy) | 15.6 (train) \| 15.4 (deploy) | 83.48 | 96.57 | [config (train)](./replknet-31B_32xb64_in1k.py) \| [config (deploy)](./deploy/replknet-31B-deploy_32xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_3rdparty_in1k_20221118-fd08e268.pth) |
| RepLKNet-31B\* | 384x384 | From Scratch | 79.9train) \| 79.5 (deploy) | 46.0 (train) \| 45.3 (deploy) | 84.84 | 97.34 | [config (train)](./replknet-31B_32xb64_in1k-384px.py) \| [config (deploy)](./deploy/replknet-31B-deploy_32xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_3rdparty_in1k-384px_20221118-03a170ce.pth) |
| RepLKNet-31B\* | 224x224 | ImageNet-21K | 79.9train) \| 79.5 (deploy) | 15.6 (train) \| 15.4 (deploy) | 85.20 | 97.56 | [config (train)](./replknet-31B_32xb64_in1k.py) \| [config (deploy)](./deploy/replknet-31B-deploy_32xb64_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_in21k-pre_3rdparty_in1k_20221118-54ed5c46.pth) |
| RepLKNet-31B\* | 384x384 | ImageNet-21K | 79.9train) \| 79.5 (deploy) | 46.0 (train) \| 45.3 (deploy) | 85.99 | 97.75 | [config (train)](./replknet-31B_32xb64_in1k-384px.py) \| [config (deploy)](./deploy/replknet-31B-deploy_32xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_in21k-pre_3rdparty_in1k-384px_20221118-76c92b24.pth) |
| RepLKNet-31L\* | 384x384 | ImageNet-21K | 172.7train) \| 172.0 (deploy) | 97.2 (train) \| 97.0 (deploy) | 86.63 | 98.00 | [config (train)](./replknet-31L_32xb64_in1k-384px.py) \| [config (deploy)](./deploy/replknet-31L-deploy_32xb64_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31L_in21k-pre_3rdparty_in1k-384px_20221118-dc3fc07c.pth) |
| RepLKNet-XL\* | 320x320 | MegData-73M | 335.4train) \| 335.0 (deploy) | 129.6 (train) \| 129.0 (deploy) | 87.57 | 98.39 | [config (train)](./replknet-XL_32xb64_in1k-320px.py) \| [config (deploy)](./deploy/replknet-XL-deploy_32xb64_in1k-320px.py) | [model](https://download.openmmlab.com/mmclassification/v0/replknet/replknet-XL_meg73m-pre_3rdparty_in1k-320px_20221118-88259b1d.pth) |
*Models with * are converted from the [official repo](https://github.com/DingXiaoH/RepVGG). The config files of these models are only for validation. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
## How to use
The checkpoints provided are all `training-time` models. Use the reparameterize tool to switch them to more efficient `inference-time` architecture, which not only has fewer parameters but also less calculations.
### Use tool
Use provided tool to reparameterize the given model and save the checkpoint:
```bash
python tools/convert_models/reparameterize_model.py ${CFG_PATH} ${SRC_CKPT_PATH} ${TARGET_CKPT_PATH}
```
`${CFG_PATH}` is the config file, `${SRC_CKPT_PATH}` is the source chenpoint file, `${TARGET_CKPT_PATH}` is the target deploy weight file path.
To use reparameterized weights, the config file must switch to the deploy config files.
```bash
python tools/test.py ${Deploy_CFG} ${Deploy_Checkpoint} --metrics accuracy
```
### In the code
Use `backbone.switch_to_deploy()` or `classificer.backbone.switch_to_deploy()` to switch to the deploy mode. For example:
```python
from mmcls.models import build_backbone
backbone_cfg=dict(type='RepLKNet',arch='31B'),
backbone = build_backbone(backbone_cfg)
backbone.switch_to_deploy()
```
or
```python
from mmcls.models import build_classifier
cfg = dict(
type='ImageClassifier',
backbone=dict(
type='RepLKNet',
arch='31B'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))
classifier = build_classifier(cfg)
classifier.backbone.switch_to_deploy()
```
## Citation
```
@inproceedings{ding2022scaling,
title={Scaling up your kernels to 31x31: Revisiting large kernel design in cnns},
author={Ding, Xiaohan and Zhang, Xiangyu and Han, Jungong and Ding, Guiguang},
booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},
pages={11963--11975},
year={2022}
}
```

View File

@ -0,0 +1,3 @@
_base_ = '../replknet-31B_32xb64_in1k-384px.py'
model = dict(backbone=dict(small_kernel_merged=True))

View File

@ -0,0 +1,3 @@
_base_ = '../replknet-31B_32xb64_in1k.py'
model = dict(backbone=dict(small_kernel_merged=True))

View File

@ -0,0 +1,3 @@
_base_ = '../replknet-31L_32xb64_in1k-384px.py'
model = dict(backbone=dict(small_kernel_merged=True))

View File

@ -0,0 +1,3 @@
_base_ = '../replknet-XL_32xb64_in1k-320px.py'
model = dict(backbone=dict(small_kernel_merged=True))

View File

@ -0,0 +1,129 @@
Collections:
- Name: RepLKNet
Metadata:
Training Data: ImageNet-1k
Architecture:
- Large-Kernel Convolution
- VGG-style Neural Network
Paper:
URL: https://arxiv.org/abs/2203.06717
Title: 'Scaling Up Your Kernels to 31x31: Revisiting Large Kernel Design in CNNs'
README: configs/replknet/README.md
Code:
URL: https://github.com/open-mmlab/mmclassification/blob/v1.0.0rc3/mmcls/models/backbones/replknet.py
Version: v1.0.0rc3
Models:
- Name: replknet-31B_3rdparty_in1k
In Collection: RepLKNet
Config: configs/replknet/replknet-31B_32xb64_in1k.py
Metadata:
FLOPs: 15636547584
Parameters: 79864168
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 83.48
Top 5 Accuracy: 96.57
Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_3rdparty_in1k_20221118-fd08e268.pth
Converted From:
Weights: https://drive.google.com/u/0/uc?id=1azQUiCxK9feYVkkrPqwVPBtNsTzDrX7S&export=download
Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
- Name: replknet-31B_3rdparty_in1k-384px
In Collection: RepLKNet
Config: configs/replknet/replknet-31B_32xb64_in1k-384px.py
Metadata:
FLOPs: 45952303104
Parameters: 79864168
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 84.84
Top 5 Accuracy: 97.34
Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_3rdparty_in1k-384px_20221118-03a170ce.pth
Converted From:
Weights: https://drive.google.com/u/0/uc?id=1vo-P3XB6mRLUeDzmgv90dOu73uCeLfZN&export=download
Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
- Name: replknet-31B_in21k-pre_3rdparty_in1k
In Collection: RepLKNet
Config: configs/replknet/replknet-31B_32xb64_in1k.py
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 15636547584
Parameters: 79864168
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 85.20
Top 5 Accuracy: 97.56
Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_in21k-pre_3rdparty_in1k_20221118-54ed5c46.pth
Converted From:
Weights: https://drive.google.com/u/0/uc?id=1DslZ2voXZQR1QoFY9KnbsHAeF84hzS0s&export=download
Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
- Name: replknet-31B_in21k-pre_3rdparty_in1k-384px
In Collection: RepLKNet
Config: configs/replknet/replknet-31B_32xb64_in1k-384px.py
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 45952303104
Parameters: 79864168
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 85.99
Top 5 Accuracy: 97.75
Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31B_in21k-pre_3rdparty_in1k-384px_20221118-76c92b24.pth
Converted From:
Weights: https://drive.google.com/u/0/uc?id=1Sc46BWdXXm2fVP-K_hKKU_W8vAB-0duX&export=download
Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
- Name: replknet-31L_in21k-pre_3rdparty_in1k-384px
In Collection: RepLKNet
Config: configs/replknet/replknet-31L_32xb64_in1k-384px.py
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 97240006656
Parameters: 172671016
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 86.63
Top 5 Accuracy: 98.00
Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-31L_in21k-pre_3rdparty_in1k-384px_20221118-dc3fc07c.pth
Converted From:
Weights: https://drive.google.com/u/0/uc?id=1JYXoNHuRvC33QV1pmpzMTKEni1hpWfBl&export=download
Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py
- Name: replknet-XL_meg73m-pre_3rdparty_in1k-320px
In Collection: RepLKNet
Config: configs/replknet/replknet-XL_32xb64_in1k-320px.py
Metadata:
Training Data:
- MegData-73M
- ImageNet-1k
FLOPs: 129570201600
Parameters: 335435752
Results:
- Dataset: ImageNet-1k
Task: Image Classification
Metrics:
Top 1 Accuracy: 87.57
Top 5 Accuracy: 98.39
Weights: https://download.openmmlab.com/mmclassification/v0/replknet/replknet-XL_meg73m-pre_3rdparty_in1k-320px_20221118-88259b1d.pth
Converted From:
Weights: https://drive.google.com/u/0/uc?id=1tPC60El34GntXByIRHb-z-Apm4Y5LX1T&export=download
Code: https://github.com/DingXiaoH/RepLKNet-pytorch/blob/main/replknet.py

View File

@ -0,0 +1,12 @@
_base_ = [
'../_base_/models/replknet-31B_in1k.py',
'../_base_/datasets/imagenet_bs16_pil_bicubic_384.py',
'../_base_/schedules/imagenet_bs256_coslr.py',
'../_base_/default_runtime.py'
]
# schedule settings
param_scheduler = dict(
type='CosineAnnealingLR', T_max=300, by_epoch=True, begin=0, end=300)
train_cfg = dict(by_epoch=True, max_epochs=300)

View File

@ -0,0 +1,12 @@
_base_ = [
'../_base_/models/replknet-31B_in1k.py',
'../_base_/datasets/imagenet_bs32_pil_bicubic.py',
'../_base_/schedules/imagenet_bs256_coslr.py',
'../_base_/default_runtime.py'
]
# schedule settings
param_scheduler = dict(
type='CosineAnnealingLR', T_max=300, by_epoch=True, begin=0, end=300)
train_cfg = dict(by_epoch=True, max_epochs=300)

View File

@ -0,0 +1,12 @@
_base_ = [
'../_base_/models/replknet-31L_in1k.py',
'../_base_/datasets/imagenet_bs16_pil_bicubic_384.py',
'../_base_/schedules/imagenet_bs256_coslr.py',
'../_base_/default_runtime.py'
]
# schedule settings
param_scheduler = dict(
type='CosineAnnealingLR', T_max=300, by_epoch=True, begin=0, end=300)
train_cfg = dict(by_epoch=True, max_epochs=300)

View File

@ -0,0 +1,12 @@
_base_ = [
'../_base_/models/replknet-XL_in1k.py',
'../_base_/datasets/imagenet_bs8_pil_bicubic_320.py',
'../_base_/schedules/imagenet_bs256_coslr.py',
'../_base_/default_runtime.py'
]
# schedule settings
param_scheduler = dict(
type='CosineAnnealingLR', T_max=300, by_epoch=True, begin=0, end=300)
train_cfg = dict(by_epoch=True, max_epochs=300)

View File

@ -85,6 +85,7 @@ Backbones
PCPVT
PoolFormer
RegNet
RepLKNet
RepMLPNet
RepVGG
Res2Net

View File

@ -23,6 +23,7 @@ from .mobilevit import MobileViT
from .mvit import MViT
from .poolformer import PoolFormer
from .regnet import RegNet
from .replknet import RepLKNet
from .repmlp import RepMLPNet
from .repvgg import RepVGG
from .res2net import Res2Net
@ -82,6 +83,7 @@ __all__ = [
'CSPResNet',
'CSPResNeXt',
'CSPNet',
'RepLKNet',
'RepMLPNet',
'PoolFormer',
'DenseNet',

View File

@ -0,0 +1,668 @@
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
import torch.utils.checkpoint as checkpoint
from mmcv.cnn import build_activation_layer, build_norm_layer
from mmcv.cnn.bricks import DropPath
from mmengine.model import BaseModule
from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm
from mmcls.registry import MODELS
from .base_backbone import BaseBackbone
def conv_bn(in_channels,
out_channels,
kernel_size,
stride,
padding,
groups,
dilation=1,
norm_cfg=dict(type='BN')):
"""Construct a sequential conv and bn.
Args:
in_channels (int): Dimension of input features.
out_channels (int): Dimension of output features.
kernel_size (int): kernel_size of the convolution.
stride (int): stride of the convolution.
padding (int): stride of the convolution.
groups (int): groups of the convolution.
dilation (int): dilation of the convolution. Default to 1.
norm_cfg (dict): dictionary to construct and config norm layer.
Default to ``dict(type='BN', requires_grad=True)``.
Returns:
nn.Sequential(): A conv layer and a batch norm layer.
"""
if padding is None:
padding = kernel_size // 2
result = nn.Sequential()
result.add_module(
'conv',
nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=dilation,
groups=groups,
bias=False))
result.add_module('bn', build_norm_layer(norm_cfg, out_channels)[1])
return result
def conv_bn_relu(in_channels,
out_channels,
kernel_size,
stride,
padding,
groups,
dilation=1):
"""Construct a sequential conv, bn and relu.
Args:
in_channels (int): Dimension of input features.
out_channels (int): Dimension of output features.
kernel_size (int): kernel_size of the convolution.
stride (int): stride of the convolution.
padding (int): stride of the convolution.
groups (int): groups of the convolution.
dilation (int): dilation of the convolution. Default to 1.
Returns:
nn.Sequential(): A conv layer, batch norm layer and a relu function.
"""
if padding is None:
padding = kernel_size // 2
result = conv_bn(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
groups=groups,
dilation=dilation)
result.add_module('nonlinear', nn.ReLU())
return result
def fuse_bn(conv, bn):
"""Fuse the parameters in a branch with a conv and bn.
Args:
conv (nn.Conv2d): The convolution module to fuse.
bn (nn.BatchNorm2d): The batch normalization to fuse.
Returns:
tuple[torch.Tensor, torch.Tensor]: The parameters obtained after
fusing the parameters of conv and bn in one branch.
The first element is the weight and the second is the bias.
"""
kernel = conv.weight
running_mean = bn.running_mean
running_var = bn.running_var
gamma = bn.weight
beta = bn.bias
eps = bn.eps
std = (running_var + eps).sqrt()
t = (gamma / std).reshape(-1, 1, 1, 1)
return kernel * t, beta - running_mean * gamma / std
class ReparamLargeKernelConv(BaseModule):
"""Super large kernel implemented by with large convolutions.
Input: Tensor with shape [B, C, H, W].
Output: Tensor with shape [B, C, H, W].
Args:
in_channels (int): Dimension of input features.
out_channels (int): Dimension of output features.
kernel_size (int): kernel_size of the large convolution.
stride (int): stride of the large convolution.
groups (int): groups of the large convolution.
small_kernel (int): kernel_size of the small convolution.
small_kernel_merged (bool): Whether to switch the model structure to
deployment mode (merge the small kernel to the large kernel).
Default to False.
init_cfg (dict or list[dict], optional): Initialization config dict.
Defaults to None
"""
def __init__(self,
in_channels,
out_channels,
kernel_size,
stride,
groups,
small_kernel,
small_kernel_merged=False,
init_cfg=None):
super(ReparamLargeKernelConv, self).__init__(init_cfg)
self.kernel_size = kernel_size
self.small_kernel = small_kernel
self.small_kernel_merged = small_kernel_merged
# We assume the conv does not change the feature map size,
# so padding = k//2.
# Otherwise, you may configure padding as you wish,
# and change the padding of small_conv accordingly.
padding = kernel_size // 2
if small_kernel_merged:
self.lkb_reparam = nn.Conv2d(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=1,
groups=groups,
bias=True)
else:
self.lkb_origin = conv_bn(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
stride=stride,
padding=padding,
dilation=1,
groups=groups)
if small_kernel is not None:
assert small_kernel <= kernel_size
self.small_conv = conv_bn(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=small_kernel,
stride=stride,
padding=small_kernel // 2,
groups=groups,
dilation=1)
def forward(self, inputs):
if hasattr(self, 'lkb_reparam'):
out = self.lkb_reparam(inputs)
else:
out = self.lkb_origin(inputs)
if hasattr(self, 'small_conv'):
out += self.small_conv(inputs)
return out
def get_equivalent_kernel_bias(self):
eq_k, eq_b = fuse_bn(self.lkb_origin.conv, self.lkb_origin.bn)
if hasattr(self, 'small_conv'):
small_k, small_b = fuse_bn(self.small_conv.conv,
self.small_conv.bn)
eq_b += small_b
# add to the central part
eq_k += nn.functional.pad(
small_k, [(self.kernel_size - self.small_kernel) // 2] * 4)
return eq_k, eq_b
def merge_kernel(self):
"""Switch the model structure from training mode to deployment mode."""
if self.small_kernel_merged:
return
eq_k, eq_b = self.get_equivalent_kernel_bias()
self.lkb_reparam = nn.Conv2d(
in_channels=self.lkb_origin.conv.in_channels,
out_channels=self.lkb_origin.conv.out_channels,
kernel_size=self.lkb_origin.conv.kernel_size,
stride=self.lkb_origin.conv.stride,
padding=self.lkb_origin.conv.padding,
dilation=self.lkb_origin.conv.dilation,
groups=self.lkb_origin.conv.groups,
bias=True)
self.lkb_reparam.weight.data = eq_k
self.lkb_reparam.bias.data = eq_b
self.__delattr__('lkb_origin')
if hasattr(self, 'small_conv'):
self.__delattr__('small_conv')
self.small_kernel_merged = True
class ConvFFN(BaseModule):
"""Mlp implemented by with 1*1 convolutions.
Input: Tensor with shape [B, C, H, W].
Output: Tensor with shape [B, C, H, W].
Args:
in_channels (int): Dimension of input features.
internal_channels (int): Dimension of hidden features.
out_channels (int): Dimension of output features.
drop_path (float): Stochastic depth rate. Defaults to 0.
norm_cfg (dict): dictionary to construct and config norm layer.
Default to ``dict(type='BN', requires_grad=True)``.
act_cfg (dict): The config dict for activation between pointwise
convolution. Defaults to ``dict(type='GELU')``.
init_cfg (dict or list[dict], optional): Initialization config dict.
Defaults to None.
"""
def __init__(self,
in_channels,
internal_channels,
out_channels,
drop_path,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='GELU'),
init_cfg=None):
super(ConvFFN, self).__init__(init_cfg)
self.drop_path = DropPath(
drop_prob=drop_path) if drop_path > 0. else nn.Identity()
self.preffn_bn = build_norm_layer(norm_cfg, in_channels)[1]
self.pw1 = conv_bn(
in_channels=in_channels,
out_channels=internal_channels,
kernel_size=1,
stride=1,
padding=0,
groups=1)
self.pw2 = conv_bn(
in_channels=internal_channels,
out_channels=out_channels,
kernel_size=1,
stride=1,
padding=0,
groups=1)
self.nonlinear = build_activation_layer(act_cfg)
def forward(self, x):
out = self.preffn_bn(x)
out = self.pw1(out)
out = self.nonlinear(out)
out = self.pw2(out)
return x + self.drop_path(out)
class RepLKBlock(BaseModule):
"""RepLKBlock for RepLKNet backbone.
Args:
in_channels (int): The input channels of the block.
dw_channels (int): The intermediate channels of the block,
i.e., input channels of the large kernel convolution.
block_lk_size (int): size of the super large kernel. Defaults: 31.
small_kernel (int): size of the parallel small kernel. Defaults: 5.
drop_path (float): Stochastic depth rate. Defaults: 0.
small_kernel_merged (bool): Whether to switch the model structure to
deployment mode (merge the small kernel to the large kernel).
Default to False.
norm_cfg (dict): dictionary to construct and config norm layer.
Default to ``dict(type='BN', requires_grad=True)``.
act_cfg (dict): Config dict for activation layer.
Default to ``dict(type='ReLU')``.
init_cfg (dict or list[dict], optional): Initialization config dict.
Default to None
"""
def __init__(self,
in_channels,
dw_channels,
block_lk_size,
small_kernel,
drop_path,
small_kernel_merged=False,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
init_cfg=None):
super(RepLKBlock, self).__init__(init_cfg)
self.pw1 = conv_bn_relu(in_channels, dw_channels, 1, 1, 0, groups=1)
self.pw2 = conv_bn(dw_channels, in_channels, 1, 1, 0, groups=1)
self.large_kernel = ReparamLargeKernelConv(
in_channels=dw_channels,
out_channels=dw_channels,
kernel_size=block_lk_size,
stride=1,
groups=dw_channels,
small_kernel=small_kernel,
small_kernel_merged=small_kernel_merged)
self.lk_nonlinear = build_activation_layer(act_cfg)
self.prelkb_bn = build_norm_layer(norm_cfg, in_channels)[1]
self.drop_path = DropPath(
drop_prob=drop_path) if drop_path > 0. else nn.Identity()
# print('drop path:', self.drop_path)
def forward(self, x):
out = self.prelkb_bn(x)
out = self.pw1(out)
out = self.large_kernel(out)
out = self.lk_nonlinear(out)
out = self.pw2(out)
return x + self.drop_path(out)
class RepLKNetStage(BaseModule):
"""
generate RepLKNet blocks for a stage
return: RepLKNet blocks
Args:
channels (int): The input channels of the stage.
num_blocks (int): The number of blocks of the stage.
stage_lk_size (int): size of the super large kernel. Defaults: 31.
drop_path (float): Stochastic depth rate. Defaults: 0.
small_kernel (int): size of the parallel small kernel. Defaults: 5.
dw_ratio (float): The intermediate channels
expansion ratio of the block. Defaults: 1.
ffn_ratio (float): Mlp expansion ratio. Defaults to 4.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default to False.
small_kernel_merged (bool): Whether to switch the model structure to
deployment mode (merge the small kernel to the large kernel).
Default to False.
norm_intermediate_features (bool): Construct and config norm layer
or not.
Using True will normalize the intermediate features for
downstream dense prediction tasks.
norm_cfg (dict): dictionary to construct and config norm layer.
Default to ``dict(type='BN', requires_grad=True)``.
init_cfg (dict or list[dict], optional): Initialization config dict.
Default to None
"""
def __init__(
self,
channels,
num_blocks,
stage_lk_size,
drop_path,
small_kernel,
dw_ratio=1,
ffn_ratio=4,
with_cp=False, # train with torch.utils.checkpoint to save memory
small_kernel_merged=False,
norm_intermediate_features=False,
norm_cfg=dict(type='BN'),
init_cfg=None):
super(RepLKNetStage, self).__init__(init_cfg)
self.with_cp = with_cp
blks = []
for i in range(num_blocks):
block_drop_path = drop_path[i] if isinstance(drop_path,
list) else drop_path
# Assume all RepLK Blocks within a stage share the same lk_size.
# You may tune it on your own model.
replk_block = RepLKBlock(
in_channels=channels,
dw_channels=int(channels * dw_ratio),
block_lk_size=stage_lk_size,
small_kernel=small_kernel,
drop_path=block_drop_path,
small_kernel_merged=small_kernel_merged)
convffn_block = ConvFFN(
in_channels=channels,
internal_channels=int(channels * ffn_ratio),
out_channels=channels,
drop_path=block_drop_path)
blks.append(replk_block)
blks.append(convffn_block)
self.blocks = nn.ModuleList(blks)
if norm_intermediate_features:
self.norm = build_norm_layer(norm_cfg, channels)[1]
else:
self.norm = nn.Identity()
def forward(self, x):
for blk in self.blocks:
if self.with_cp:
x = checkpoint.checkpoint(blk, x) # Save training memory
else:
x = blk(x)
return x
@MODELS.register_module()
class RepLKNet(BaseBackbone):
"""RepLKNet backbone.
A PyTorch impl of :
`Scaling Up Your Kernels to 31x31: Revisiting Large Kernel Design in CNNs
<https://arxiv.org/abs/2203.06717>`_
Args:
arch (str | dict): The parameter of RepLKNet.
If it's a dict, it should contain the following keys:
- large_kernel_sizes (Sequence[int]):
Large kernel size in each stage.
- layers (Sequence[int]): Number of blocks in each stage.
- channels (Sequence[int]): Number of channels in each stage.
- small_kernel (int): size of the parallel small kernel.
- dw_ratio (float): The intermediate channels
expansion ratio of the block.
in_channels (int): Number of input image channels. Default to 3.
ffn_ratio (float): Mlp expansion ratio. Defaults to 4.
out_indices (Sequence[int]): Output from which stages.
Default to (3, ).
strides (Sequence[int]): Strides of the first block of each stage.
Default to (2, 2, 2, 2).
dilations (Sequence[int]): Dilation of each stage.
Default to (1, 1, 1, 1).
frozen_stages (int): Stages to be frozen
(all param fixed). -1 means not freezing any parameters.
Default to -1.
conv_cfg (dict | None): The config dict for conv layers.
Default to None.
norm_cfg (dict): The config dict for norm layers.
Default to ``dict(type='BN')``.
act_cfg (dict): Config dict for activation layer.
Default to ``dict(type='ReLU')``.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Default to False.
deploy (bool): Whether to switch the model structure to deployment
mode. Default to False.
norm_intermediate_features (bool): Construct and
config norm layer or not.
Using True will normalize the intermediate features
for downstream dense prediction tasks.
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. Default to False.
init_cfg (dict or list[dict], optional): Initialization config dict.
"""
arch_settings = {
'31B':
dict(
large_kernel_sizes=[31, 29, 27, 13],
layers=[2, 2, 18, 2],
channels=[128, 256, 512, 1024],
small_kernel=5,
dw_ratio=1),
'31L':
dict(
large_kernel_sizes=[31, 29, 27, 13],
layers=[2, 2, 18, 2],
channels=[192, 384, 768, 1536],
small_kernel=5,
dw_ratio=1),
'XL':
dict(
large_kernel_sizes=[27, 27, 27, 13],
layers=[2, 2, 18, 2],
channels=[256, 512, 1024, 2048],
small_kernel=None,
dw_ratio=1.5),
}
def __init__(self,
arch,
in_channels=3,
ffn_ratio=4,
out_indices=(3, ),
strides=(2, 2, 2, 2),
dilations=(1, 1, 1, 1),
frozen_stages=-1,
conv_cfg=None,
norm_cfg=dict(type='BN'),
act_cfg=dict(type='ReLU'),
with_cp=False,
drop_path_rate=0.3,
small_kernel_merged=False,
norm_intermediate_features=False,
norm_eval=False,
init_cfg=[
dict(type='Kaiming', layer=['Conv2d']),
dict(
type='Constant',
val=1,
layer=['_BatchNorm', 'GroupNorm'])
]):
super(RepLKNet, self).__init__(init_cfg)
if isinstance(arch, str):
assert arch in self.arch_settings, \
f'"arch": "{arch}" is not one of the arch_settings'
arch = self.arch_settings[arch]
elif not isinstance(arch, dict):
raise TypeError('Expect "arch" to be either a string '
f'or a dict, got {type(arch)}')
assert len(arch['layers']) == len(
arch['channels']) == len(strides) == len(dilations)
assert max(out_indices) < len(arch['layers'])
self.arch = arch
self.in_channels = in_channels
self.out_indices = out_indices
self.strides = strides
self.dilations = dilations
self.frozen_stages = frozen_stages
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self.with_cp = with_cp
self.drop_path_rate = drop_path_rate
self.small_kernel_merged = small_kernel_merged
self.norm_eval = norm_eval
self.norm_intermediate_features = norm_intermediate_features
self.out_indices = out_indices
base_width = self.arch['channels'][0]
self.norm_intermediate_features = norm_intermediate_features
self.num_stages = len(self.arch['layers'])
self.stem = nn.ModuleList([
conv_bn_relu(
in_channels=in_channels,
out_channels=base_width,
kernel_size=3,
stride=2,
padding=1,
groups=1),
conv_bn_relu(
in_channels=base_width,
out_channels=base_width,
kernel_size=3,
stride=1,
padding=1,
groups=base_width),
conv_bn_relu(
in_channels=base_width,
out_channels=base_width,
kernel_size=1,
stride=1,
padding=0,
groups=1),
conv_bn_relu(
in_channels=base_width,
out_channels=base_width,
kernel_size=3,
stride=2,
padding=1,
groups=base_width)
])
# stochastic depth. We set block-wise drop-path rate.
# The higher level blocks are more likely to be dropped.
# This implementation follows Swin.
dpr = [
x.item() for x in torch.linspace(0, drop_path_rate,
sum(self.arch['layers']))
]
self.stages = nn.ModuleList()
self.transitions = nn.ModuleList()
for stage_idx in range(self.num_stages):
layer = RepLKNetStage(
channels=self.arch['channels'][stage_idx],
num_blocks=self.arch['layers'][stage_idx],
stage_lk_size=self.arch['large_kernel_sizes'][stage_idx],
drop_path=dpr[sum(self.arch['layers'][:stage_idx]
):sum(self.arch['layers'][:stage_idx + 1])],
small_kernel=self.arch['small_kernel'],
dw_ratio=self.arch['dw_ratio'],
ffn_ratio=ffn_ratio,
with_cp=with_cp,
small_kernel_merged=small_kernel_merged,
norm_intermediate_features=(stage_idx in out_indices))
self.stages.append(layer)
if stage_idx < len(self.arch['layers']) - 1:
transition = nn.Sequential(
conv_bn_relu(
self.arch['channels'][stage_idx],
self.arch['channels'][stage_idx + 1],
1,
1,
0,
groups=1),
conv_bn_relu(
self.arch['channels'][stage_idx + 1],
self.arch['channels'][stage_idx + 1],
3,
stride=2,
padding=1,
groups=self.arch['channels'][stage_idx + 1]))
self.transitions.append(transition)
def forward_features(self, x):
x = self.stem[0](x)
for stem_layer in self.stem[1:]:
if self.with_cp:
x = checkpoint.checkpoint(stem_layer, x) # save memory
else:
x = stem_layer(x)
# Need the intermediate feature maps
outs = []
for stage_idx in range(self.num_stages):
x = self.stages[stage_idx](x)
if stage_idx in self.out_indices:
outs.append(self.stages[stage_idx].norm(x))
# For RepLKNet-XL normalize the features
# before feeding them into the heads
if stage_idx < self.num_stages - 1:
x = self.transitions[stage_idx](x)
return outs
def forward(self, x):
x = self.forward_features(x)
return tuple(x)
def _freeze_stages(self):
if self.frozen_stages >= 0:
self.stem.eval()
for param in self.stem.parameters():
param.requires_grad = False
for i in range(self.frozen_stages):
stage = self.stages[i]
stage.eval()
for param in stage.parameters():
param.requires_grad = False
def train(self, mode=True):
super(RepLKNet, self).train(mode)
self._freeze_stages()
if mode and self.norm_eval:
for m in self.modules():
if isinstance(m, _BatchNorm):
m.eval()
def switch_to_deploy(self):
for m in self.modules():
if hasattr(m, 'merge_kernel'):
m.merge_kernel()
self.small_kernel_merged = True

View File

@ -38,3 +38,4 @@ Import:
- configs/hornet/metafile.yml
- configs/mobilevit/metafile.yml
- configs/davit/metafile.yml
- configs/replknet/metafile.yml

View File

@ -0,0 +1,304 @@
# Copyright (c) OpenMMLab. All rights reserved.
import os
import tempfile
import pytest
import torch
from mmengine.runner import load_checkpoint, save_checkpoint
from torch import nn
from torch.nn.modules import GroupNorm
from torch.nn.modules.batchnorm import _BatchNorm
from mmcls.models.backbones import RepLKNet
from mmcls.models.backbones.replknet import ReparamLargeKernelConv
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
def is_norm(modules):
"""Check if is one of the norms."""
if isinstance(modules, (GroupNorm, _BatchNorm)):
return True
return False
def is_replk_block(modules):
if isinstance(modules, ReparamLargeKernelConv):
return True
return False
def test_replknet_replkblock():
# Test ReparamLargeKernelConv with in_channels != out_channels,
# kernel_size = 31, stride = 1, groups=in_channels, small_kernel = 5
block = ReparamLargeKernelConv(
5, 10, kernel_size=31, stride=1, groups=5, small_kernel=5)
block.eval()
x = torch.randn(1, 5, 64, 64)
x_out_not_deploy = block(x)
assert block.small_kernel <= block.kernel_size
assert not hasattr(block, 'lkb_reparam')
assert hasattr(block, 'lkb_origin')
assert hasattr(block, 'small_conv')
assert x_out_not_deploy.shape == torch.Size((1, 10, 64, 64))
block.merge_kernel()
assert block.small_kernel_merged is True
x_out_deploy = block(x)
assert x_out_deploy.shape == torch.Size((1, 10, 64, 64))
assert torch.allclose(x_out_not_deploy, x_out_deploy, atol=1e-5, rtol=1e-4)
# Test ReparamLargeKernelConv with in_channels == out_channels,
# kernel_size = 31, stride = 1, groups=in_channels, small_kernel = 5
block = ReparamLargeKernelConv(
12, 12, kernel_size=31, stride=1, groups=12, small_kernel=5)
block.eval()
x = torch.randn(1, 12, 64, 64)
x_out_not_deploy = block(x)
assert block.small_kernel <= block.kernel_size
assert not hasattr(block, 'lkb_reparam')
assert hasattr(block, 'lkb_origin')
assert hasattr(block, 'small_conv')
assert x_out_not_deploy.shape == torch.Size((1, 12, 64, 64))
block.merge_kernel()
assert block.small_kernel_merged is True
x_out_deploy = block(x)
assert x_out_deploy.shape == torch.Size((1, 12, 64, 64))
assert torch.allclose(x_out_not_deploy, x_out_deploy, atol=1e-5, rtol=1e-4)
# Test ReparamLargeKernelConv with in_channels == out_channels,
# kernel_size = 31, stride = 2, groups=in_channels, small_kernel = 5
block = ReparamLargeKernelConv(
16, 16, kernel_size=31, stride=2, groups=16, small_kernel=5)
block.eval()
x = torch.randn(1, 16, 64, 64)
x_out_not_deploy = block(x)
assert block.small_kernel <= block.kernel_size
assert not hasattr(block, 'lkb_reparam')
assert hasattr(block, 'lkb_origin')
assert hasattr(block, 'small_conv')
assert x_out_not_deploy.shape == torch.Size((1, 16, 32, 32))
block.merge_kernel()
assert block.small_kernel_merged is True
x_out_deploy = block(x)
assert x_out_deploy.shape == torch.Size((1, 16, 32, 32))
assert torch.allclose(x_out_not_deploy, x_out_deploy, atol=1e-5, rtol=1e-4)
# Test ReparamLargeKernelConv with in_channels == out_channels,
# kernel_size = 27, stride = 1, groups=in_channels, small_kernel = 5
block = ReparamLargeKernelConv(
12, 12, kernel_size=27, stride=1, groups=12, small_kernel=5)
block.eval()
x = torch.randn(1, 12, 48, 48)
x_out_not_deploy = block(x)
assert block.small_kernel <= block.kernel_size
assert not hasattr(block, 'lkb_reparam')
assert hasattr(block, 'lkb_origin')
assert hasattr(block, 'small_conv')
assert x_out_not_deploy.shape == torch.Size((1, 12, 48, 48))
block.merge_kernel()
assert block.small_kernel_merged is True
x_out_deploy = block(x)
assert x_out_deploy.shape == torch.Size((1, 12, 48, 48))
assert torch.allclose(x_out_not_deploy, x_out_deploy, atol=1e-5, rtol=1e-4)
# Test ReparamLargeKernelConv with in_channels == out_channels,
# kernel_size = 31, stride = 1, groups=in_channels, small_kernel = 7
block = ReparamLargeKernelConv(
12, 12, kernel_size=31, stride=1, groups=12, small_kernel=7)
block.eval()
x = torch.randn(1, 12, 64, 64)
x_out_not_deploy = block(x)
assert block.small_kernel <= block.kernel_size
assert not hasattr(block, 'lkb_reparam')
assert hasattr(block, 'lkb_origin')
assert hasattr(block, 'small_conv')
assert x_out_not_deploy.shape == torch.Size((1, 12, 64, 64))
block.merge_kernel()
assert block.small_kernel_merged is True
x_out_deploy = block(x)
assert x_out_deploy.shape == torch.Size((1, 12, 64, 64))
assert torch.allclose(x_out_not_deploy, x_out_deploy, atol=1e-5, rtol=1e-4)
# Test ReparamLargeKernelConv with deploy == True
block = ReparamLargeKernelConv(
8,
8,
kernel_size=31,
stride=1,
groups=8,
small_kernel=5,
small_kernel_merged=True)
assert isinstance(block.lkb_reparam, nn.Conv2d)
assert not hasattr(block, 'lkb_origin')
assert not hasattr(block, 'small_conv')
x = torch.randn(1, 8, 48, 48)
x_out = block(x)
assert x_out.shape == torch.Size((1, 8, 48, 48))
def test_replknet_backbone():
with pytest.raises(TypeError):
# arch must be str or dict
RepLKNet(arch=[4, 6, 16, 1])
with pytest.raises(AssertionError):
# arch must in arch_settings
RepLKNet(arch='31C')
with pytest.raises(KeyError):
# arch must have num_blocks and width_factor
arch = dict(large_kernel_sizes=[31, 29, 27, 13])
RepLKNet(arch=arch)
with pytest.raises(KeyError):
# arch must have num_blocks and width_factor
arch = dict(large_kernel_sizes=[31, 29, 27, 13], layers=[2, 2, 18, 2])
RepLKNet(arch=arch)
with pytest.raises(KeyError):
# arch must have num_blocks and width_factor
arch = dict(
large_kernel_sizes=[31, 29, 27, 13],
layers=[2, 2, 18, 2],
channels=[128, 256, 512, 1024])
RepLKNet(arch=arch)
# len(arch['large_kernel_sizes']) == arch['layers'])
# == len(arch['channels'])
# == len(strides) == len(dilations)
with pytest.raises(AssertionError):
arch = dict(
large_kernel_sizes=[31, 29, 27, 13],
layers=[2, 2, 18, 2],
channels=[128, 256, 1024],
small_kernel=5,
dw_ratio=1)
RepLKNet(arch=arch)
# len(strides) must equal to 4
with pytest.raises(AssertionError):
RepLKNet('31B', strides=(2, 2, 2))
# len(dilations) must equal to 4
with pytest.raises(AssertionError):
RepLKNet('31B', strides=(2, 2, 2, 2), dilations=(1, 1, 1))
# max(out_indices) < len(arch['num_blocks'])
with pytest.raises(AssertionError):
RepLKNet('31B', out_indices=(5, ))
# Test RepLKNet norm state
model = RepLKNet('31B')
model.train()
assert check_norm_state(model.modules(), True)
# Test RepLKNet with first stage frozen
frozen_stages = 1
model = RepLKNet('31B', frozen_stages=frozen_stages)
model.train()
for param in model.stem.parameters():
assert param.requires_grad is False
for i in range(0, frozen_stages):
stage = model.stages[i]
for mod in stage.modules():
if isinstance(mod, _BatchNorm):
assert mod.training is False
for param in stage.parameters():
assert param.requires_grad is False
# Test RepLKNet with norm_eval
model = RepLKNet('31B', norm_eval=True)
model.train()
assert check_norm_state(model.modules(), False)
# Test RepLKNet forward with layer 3 forward
model = RepLKNet('31B', out_indices=(3, ))
model.init_weights()
model.train()
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert isinstance(feat, tuple)
assert len(feat) == 1
assert isinstance(feat[0], torch.Tensor)
assert feat[0].shape == torch.Size((1, 1024, 7, 7))
# Test RepLKNet forward
model_test_settings = [
dict(model_name='31B', out_sizes=(128, 256, 512, 1024)),
# dict(model_name='31L', out_sizes=(192, 384, 768, 1536)),
# dict(model_name='XL', out_sizes=(256, 512, 1024, 2048))
]
choose_models = ['31B']
# Test RepLKNet model forward
for model_test_setting in model_test_settings:
if model_test_setting['model_name'] not in choose_models:
continue
model = RepLKNet(
model_test_setting['model_name'], out_indices=(0, 1, 2, 3))
model.init_weights()
# Test Norm
for m in model.modules():
if is_norm(m):
assert isinstance(m, _BatchNorm)
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert feat[0].shape == torch.Size(
(1, model_test_setting['out_sizes'][0], 56, 56))
assert feat[1].shape == torch.Size(
(1, model_test_setting['out_sizes'][1], 28, 28))
assert feat[2].shape == torch.Size(
(1, model_test_setting['out_sizes'][2], 14, 14))
assert feat[3].shape == torch.Size(
(1, model_test_setting['out_sizes'][3], 7, 7))
# Test eval of "train" mode and "deploy" mode
gap = nn.AdaptiveAvgPool2d(output_size=(1))
fc = nn.Linear(model_test_setting['out_sizes'][3], 10)
model.eval()
feat = model(imgs)
pred = fc(gap(feat[3]).flatten(1))
model.switch_to_deploy()
for m in model.modules():
if isinstance(m, ReparamLargeKernelConv):
assert m.small_kernel_merged is True
feat_deploy = model(imgs)
pred_deploy = fc(gap(feat_deploy[3]).flatten(1))
for i in range(4):
torch.allclose(feat[i], feat_deploy[i])
torch.allclose(pred, pred_deploy)
def test_replknet_load():
# Test output before and load from deploy checkpoint
model = RepLKNet('31B', out_indices=(0, 1, 2, 3))
inputs = torch.randn((1, 3, 224, 224))
ckpt_path = os.path.join(tempfile.gettempdir(), 'ckpt.pth')
model.switch_to_deploy()
model.eval()
outputs = model(inputs)
model_deploy = RepLKNet(
'31B', out_indices=(0, 1, 2, 3), small_kernel_merged=True)
model_deploy.eval()
save_checkpoint(model.state_dict(), ckpt_path)
load_checkpoint(model_deploy, ckpt_path, strict=True)
outputs_load = model_deploy(inputs)
for feat, feat_load in zip(outputs, outputs_load):
assert torch.allclose(feat, feat_load)

View File

@ -342,6 +342,7 @@ def test_repvgg_load():
outputs = model(inputs)
model_deploy = RepVGG('A1', out_indices=(0, 1, 2, 3), deploy=True)
model_deploy.eval()
save_checkpoint(model.state_dict(), ckpt_path)
load_checkpoint(model_deploy, ckpt_path, strict=True)

View File

@ -0,0 +1,58 @@
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
from collections import OrderedDict
from pathlib import Path
import torch
def convert(src, dst):
print('Converting...')
blobs = torch.load(src, map_location='cpu')
converted_state_dict = OrderedDict()
for key in blobs:
splited_key = key.split('.')
print(splited_key)
splited_key = [
'backbone.stem' if i[:4] == 'stem' else i for i in splited_key
]
splited_key = [
'backbone.stages' if i[:6] == 'stages' else i for i in splited_key
]
splited_key = [
'backbone.transitions' if i[:11] == 'transitions' else i
for i in splited_key
]
splited_key = [
'backbone.stages.3.norm' if i[:4] == 'norm' else i
for i in splited_key
]
splited_key = [
'head.fc' if i[:4] == 'head' else i for i in splited_key
]
new_key = '.'.join(splited_key)
converted_state_dict[new_key] = blobs[key]
torch.save(converted_state_dict, dst)
print('Done!')
def main():
parser = argparse.ArgumentParser(description='Convert model keys')
parser.add_argument('src', help='src detectron model path')
parser.add_argument('dst', help='save path')
args = parser.parse_args()
dst = Path(args.dst)
if dst.suffix != '.pth':
print('The path should contain the name of the pth format file.')
exit(1)
dst.parent.mkdir(parents=True, exist_ok=True)
convert(args.src, args.dst)
if __name__ == '__main__':
main()