[Feature] Support ConvNeXt-V2 backbone. (#1294)

* [Feature] Support ConvNeXt-V2.

* Use registry of mmcls instead of mmengine.

* Add README.

* Add unit tests and docs.
pull/1214/head^2
Ma Zerun 2023-01-06 16:13:41 +08:00 committed by GitHub
parent e880451a54
commit 060b0ed3b5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
41 changed files with 1432 additions and 137 deletions

View File

@ -96,6 +96,7 @@ def inference(config_file, checkpoint, work_dir, args, exp_name):
data = default_collate([data] * args.batch_size)
resolution = tuple(data['inputs'].shape[-2:])
model = Runner.from_cfg(cfg).model
load_checkpoint(model, checkpoint, map_location='cpu')
forward = model.val_step
else:
# For configs only for get model.

View File

@ -1,27 +1,17 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='base',
out_indices=(3, ),
drop_path_rate=0.5,
gap_before_final_norm=True,
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(type='Constant', layer=['LayerNorm'], val=1., bias=0.),
]),
backbone=dict(type='ConvNeXt', arch='base', drop_path_rate=0.5),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0),

View File

@ -1,27 +1,17 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='large',
out_indices=(3, ),
drop_path_rate=0.5,
gap_before_final_norm=True,
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(type='Constant', layer=['LayerNorm'], val=1., bias=0.),
]),
backbone=dict(type='ConvNeXt', arch='large', drop_path_rate=0.5),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1536,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0),

View File

@ -1,27 +1,17 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='small',
out_indices=(3, ),
drop_path_rate=0.4,
gap_before_final_norm=True,
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(type='Constant', layer=['LayerNorm'], val=1., bias=0.),
]),
backbone=dict(type='ConvNeXt', arch='small', drop_path_rate=0.4),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=768,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0),

View File

@ -1,27 +1,17 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='tiny',
out_indices=(3, ),
drop_path_rate=0.1,
gap_before_final_norm=True,
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(type='Constant', layer=['LayerNorm'], val=1., bias=0.),
]),
backbone=dict(type='ConvNeXt', arch='tiny', drop_path_rate=0.1),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=768,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0),

View File

@ -1,27 +1,17 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='xlarge',
out_indices=(3, ),
drop_path_rate=0.5,
gap_before_final_norm=True,
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(type='Constant', layer=['LayerNorm'], val=1., bias=0.),
]),
backbone=dict(type='ConvNeXt', arch='xlarge', drop_path_rate=0.5),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=2048,
loss=dict(
type='LabelSmoothLoss', label_smooth_val=0.1, mode='original'),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0),

View File

@ -0,0 +1,20 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='atto',
drop_path_rate=0.1,
layer_scale_init_value=0.,
use_grn=True,
),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=320,
loss=dict(type='LabelSmoothLoss', label_smooth_val=0.2),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
)

View File

@ -0,0 +1,24 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='base',
drop_path_rate=0.1,
layer_scale_init_value=0.,
use_grn=True,
),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1024,
loss=dict(type='LabelSmoothLoss', label_smooth_val=0.1),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0),
]),
)

View File

@ -0,0 +1,20 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='femto',
drop_path_rate=0.1,
layer_scale_init_value=0.,
use_grn=True,
),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=384,
loss=dict(type='LabelSmoothLoss', label_smooth_val=0.1),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
)

View File

@ -0,0 +1,24 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='huge',
drop_path_rate=0.1,
layer_scale_init_value=0.,
use_grn=True,
),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=2816,
loss=dict(type='LabelSmoothLoss', label_smooth_val=0.1),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0),
]),
)

View File

@ -0,0 +1,24 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='large',
drop_path_rate=0.1,
layer_scale_init_value=0.,
use_grn=True,
),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1536,
loss=dict(type='LabelSmoothLoss', label_smooth_val=0.1),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0),
]),
)

View File

@ -0,0 +1,20 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='nano',
drop_path_rate=0.1,
layer_scale_init_value=0.,
use_grn=True,
),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=640,
loss=dict(type='LabelSmoothLoss', label_smooth_val=0.2),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
)

View File

@ -0,0 +1,20 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='pico',
drop_path_rate=0.1,
layer_scale_init_value=0.,
use_grn=True,
),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=512,
loss=dict(type='LabelSmoothLoss', label_smooth_val=0.1),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
)

View File

@ -0,0 +1,24 @@
# Model settings
model = dict(
type='ImageClassifier',
backbone=dict(
type='ConvNeXt',
arch='tiny',
drop_path_rate=0.2,
layer_scale_init_value=0.,
use_grn=True,
),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=768,
loss=dict(type='LabelSmoothLoss', label_smooth_val=0.2),
init_cfg=None,
),
init_cfg=dict(
type='TruncNormal', layer=['Conv2d', 'Linear'], std=.02, bias=0.),
train_cfg=dict(augments=[
dict(type='Mixup', alpha=0.8),
dict(type='CutMix', alpha=1.0),
]),
)

View File

@ -0,0 +1,68 @@
# ConvNeXt V2
> [Co-designing and Scaling ConvNets with Masked Autoencoders](http://arxiv.org/abs/2301.00808)
<!-- [ALGORITHM] -->
## Abstract
Driven by improved architectures and better representation learning frameworks, the field of visual recognition has enjoyed rapid modernization and performance boost in the early 2020s. For example, modern ConvNets, represented by ConvNeXt, have demonstrated strong performance in various scenarios. While these models were originally designed for supervised learning with ImageNet labels, they can also potentially benefit from self-supervised learning techniques such as masked autoencoders (MAE). However, we found that simply combining these two approaches leads to subpar performance. In this paper, we propose a fully convolutional masked autoencoder framework and a new Global Response Normalization (GRN) layer that can be added to the ConvNeXt architecture to enhance inter-channel feature competition. This co-design of self-supervised learning techniques and architectural improvement results in a new model family called ConvNeXt V2, which significantly improves the performance of pure ConvNets on various recognition benchmarks, including ImageNet classification, COCO detection, and ADE20K segmentation. We also provide pre-trained ConvNeXt V2 models of various sizes, ranging from an efficient 3.7M-parameter Atto model with 76.7% top-1 accuracy on ImageNet, to a 650M Huge model that achieves a state-of-the-art 88.9% accuracy using only public training data.
<div align=center>
<img src="https://user-images.githubusercontent.com/26739999/210496285-f235083f-218f-4153-8e21-c8a64481a2f5.png" width="50%"/>
</div>
## Results and models
### Pre-trained Models
The pre-trained models are only used to fine-tune, and therefore cannot be trained and don't have evaluation results.
| Model | Params(M) | Flops(G) | Config | Download |
| :-------------------------------------: | :-------: | :------: | :------------------------------------------: | :--------------------------------------------------------------------------------------------------: |
| convnext-v2-atto_3rdparty-fcmae_in1k\* | 3.71 | 0.55 | [config](./convnext-v2-atto_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-atto_3rdparty-fcmae_in1k_20230104-07514db4.pth) |
| convnext-v2-femto_3rdparty-fcmae_in1k\* | 5.23 | 0.78 | [config](./convnext-v2-femto_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-femto_3rdparty-fcmae_in1k_20230104-adbe2082.pth) |
| convnext-v2-pico_3rdparty-fcmae_in1k\* | 9.07 | 1.37 | [config](./convnext-v2-pico_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-pico_3rdparty-fcmae_in1k_20230104-147b1b59.pth) |
| convnext-v2-nano_3rdparty-fcmae_in1k\* | 15.62 | 2.45 | [config](./convnext-v2-nano_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_3rdparty-fcmae_in1k_20230104-3dd1f29e.pth) |
| convnext-v2-tiny_3rdparty-fcmae_in1k\* | 28.64 | 4.47 | [config](./convnext-v2-tiny_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_3rdparty-fcmae_in1k_20230104-80513adc.pth) |
| convnext-v2-base_3rdparty-fcmae_in1k\* | 88.72 | 15.38 | [config](./convnext-v2-base_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_3rdparty-fcmae_in1k_20230104-8a798eaf.pth) |
| convnext-v2-large_3rdparty-fcmae_in1k\* | 197.96 | 34.40 | [config](./convnext-v2-large_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_3rdparty-fcmae_in1k_20230104-bf38df92.pth) |
| convnext-v2-huge_3rdparty-fcmae_in1k\* | 660.29 | 115.00 | [config](./convnext-v2-huge_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_3rdparty-fcmae_in1k_20230104-fe43ae6c.pth) |
*Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt-V2).*
### ImageNet-1k
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :---------------------------------------------: | :------------------: | :-------: | :------: | :-------: | :-------: | :----------------------------------------------: | :------------------------------------------------: |
| convnext-v2-atto_fcmae-pre_3rdparty_in1k\* | FCMAE | 3.71 | 0.55 | 76.64 | 93.04 | [config](./convnext-v2-atto_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-atto_fcmae-pre_3rdparty_in1k_20230104-23765f83.pth) |
| convnext-v2-femto_fcmae-pre_3rdparty_in1k\* | FCMAE | 5.23 | 0.78 | 78.48 | 93.98 | [config](./convnext-v2-femto_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-femto_fcmae-pre_3rdparty_in1k_20230104-92a75d75.pth) |
| convnext-v2-pico_fcmae-pre_3rdparty_in1k\* | FCMAE | 9.07 | 1.37 | 80.31 | 95.08 | [config](./convnext-v2-pico_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-pico_fcmae-pre_3rdparty_in1k_20230104-d20263ca.pth) |
| convnext-v2-nano_fcmae-pre_3rdparty_in1k\* | FCMAE | 15.62 | 2.45 | 81.86 | 95.75 | [config](./convnext-v2-nano_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-pre_3rdparty_in1k_20230104-fe1aaaf2.pth) |
| convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k\* | FCMAE + ImageNet 21k | 15.62 | 2.45 | 82.04 | 96.16 | [config](./convnext-v2-nano_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k_20230104-91fa8ae2.pth) |
| convnext-v2-tiny_fcmae-pre_3rdparty_in1k\* | FCMAE | 28.64 | 4.47 | 82.94 | 96.29 | [config](./convnext-v2-tiny_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-pre_3rdparty_in1k_20230104-471a86de.pth) |
| convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k\* | FCMAE + ImageNet 21k | 28.64 | 4.47 | 83.89 | 96.96 | [config](./convnext-v2-tiny_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k_20230104-8cc8b8f2.pth) |
| convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k-384px\* | FCMAE + ImageNet 21k | 15.62 | 7.21 | 83.36 | 96.75 | [config](./convnext-v2-nano_32xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-f951ae87.pth) |
| convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k-384px\* | FCMAE + ImageNet 21k | 28.64 | 13.14 | 85.09 | 97.63 | [config](./convnext-v2-tiny_32xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-d8579f84.pth) |
| convnext-v2-base_fcmae-pre_3rdparty_in1k\* | FCMAE | 88.72 | 15.38 | 84.87 | 97.08 | [config](./convnext-v2-base_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-pre_3rdparty_in1k_20230104-00a70fa4.pth) |
| convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k\* | FCMAE + ImageNet 21k | 88.72 | 15.38 | 86.74 | 98.02 | [config](./convnext-v2-base_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k_20230104-c48d16a5.pth) |
| convnext-v2-large_fcmae-pre_3rdparty_in1k\* | FCMAE | 197.96 | 34.40 | 85.76 | 97.59 | [config](./convnext-v2-large_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-pre_3rdparty_in1k_20230104-ef393013.pth) |
| convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k\* | FCMAE + ImageNet 21k | 197.96 | 34.40 | 87.26 | 98.24 | [config](./convnext-v2-large_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k_20230104-d9c4dc0c.pth) |
| convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k-384px\* | FCMAE + ImageNet 21k | 88.72 | 45.21 | 87.63 | 98.42 | [config](./convnext-v2-base_32xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-379425cc.pth) |
| convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k-384px\* | FCMAE + ImageNet 21k | 197.96 | 101.10 | 88.18 | 98.52 | [config](./convnext-v2-large_32xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-9139a1f3.pth) |
| convnext-v2-huge_fcmae-pre_3rdparty_in1k\* | FCMAE | 660.29 | 115.00 | 86.25 | 97.75 | [config](./convnext-v2-huge_32xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-pre_3rdparty_in1k_20230104-f795e5b8.pth) |
| convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-384px\* | FCMAE + ImageNet 21k | 660.29 | 337.96 | 88.68 | 98.73 | [config](./convnext-v2-huge_32xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-02a4eb35.pth) |
| convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-512px\* | FCMAE + ImageNet 21k | 660.29 | 600.81 | 88.86 | 98.74 | [config](./convnext-v2-huge_32xb32_in1k-512px.py) | [model](https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-512px_20230104-ce32e63c.pth) |
*Models with * are converted from the [official repo](https://github.com/facebookresearch/ConvNeXt-V2). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
## Citation
```bibtex
@article{Woo2023ConvNeXtV2,
title={ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders},
author={Sanghyun Woo, Shoubhik Debnath, Ronghang Hu, Xinlei Chen, Zhuang Liu, In So Kweon and Saining Xie},
year={2023},
journal={arXiv preprint arXiv:2301.00808},
}
```

View File

@ -0,0 +1,24 @@
_base_ = [
'../_base_/models/convnext_v2/atto.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=8e-4, weight_decay=0.3),
clip_grad=None,
)
# learning policy
param_scheduler = [dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True)]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,35 @@
_base_ = [
'../_base_/models/convnext_v2/base.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,35 @@
_base_ = [
'../_base_/models/convnext_v2/base.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,24 @@
_base_ = [
'../_base_/models/convnext_v2/femto.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=8e-4, weight_decay=0.3),
clip_grad=None,
)
# learning policy
param_scheduler = [dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True)]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,35 @@
_base_ = [
'../_base_/models/convnext_v2/huge.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,54 @@
_base_ = [
'../_base_/models/convnext_v2/huge.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=512,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='Resize', scale=512, backend='pillow', interpolation='bicubic'),
dict(type='PackClsInputs'),
]
train_dataloader = dict(batch_size=32, dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,35 @@
_base_ = [
'../_base_/models/convnext_v2/huge.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,35 @@
_base_ = [
'../_base_/models/convnext_v2/large.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,35 @@
_base_ = [
'../_base_/models/convnext_v2/large.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=2.5e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=20,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=20)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,24 @@
_base_ = [
'../_base_/models/convnext_v2/nano.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=8e-4, weight_decay=0.3),
clip_grad=None,
)
# learning policy
param_scheduler = [dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True)]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,24 @@
_base_ = [
'../_base_/models/convnext_v2/nano.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=8e-4, weight_decay=0.3),
clip_grad=None,
)
# learning policy
param_scheduler = [dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True)]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,24 @@
_base_ = [
'../_base_/models/convnext_v2/pico.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=8e-4, weight_decay=0.3),
clip_grad=None,
)
# learning policy
param_scheduler = [dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True)]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=600, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,35 @@
_base_ = [
'../_base_/models/convnext_v2/tiny.py',
'../_base_/datasets/imagenet_bs64_swin_384.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=3.2e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=40,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=40)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,35 @@
_base_ = [
'../_base_/models/convnext_v2/tiny.py',
'../_base_/datasets/imagenet_bs64_swin_224.py',
'../_base_/schedules/imagenet_bs1024_adamw_swin.py',
'../_base_/default_runtime.py',
]
# dataset setting
train_dataloader = dict(batch_size=32)
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=3.2e-3),
clip_grad=None,
)
# learning policy
param_scheduler = [
# warm up learning rate scheduler
dict(
type='LinearLR',
start_factor=1e-3,
by_epoch=True,
end=40,
# update by iter
convert_to_iter_based=True),
# main learning rate scheduler
dict(type='CosineAnnealingLR', eta_min=1e-5, by_epoch=True, begin=40)
]
# train, val, test setting
train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1)
# runtime setting
custom_hooks = [dict(type='EMAHook', momentum=1e-4, priority='ABOVE_NORMAL')]

View File

@ -0,0 +1,433 @@
Collections:
- Name: ConvNeXt V2
Metadata:
Architecture:
- Global Response Normalization
Paper:
Title: Co-designing and Scaling ConvNets with Masked Autoencoders
URL: http://arxiv.org/abs/2301.00808
README: configs/convnext_v2/README.md
Models:
- Name: convnext-v2-atto_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 551718080
Parameters: 3708400
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-atto_3rdparty-fcmae_in1k_20230104-07514db4.pth
Config: configs/convnext_v2/convnext-v2-atto_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_atto_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-atto_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 551718080
Parameters: 3708400
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 76.64
Top 5 Accuracy: 93.04
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-atto_fcmae-pre_3rdparty_in1k_20230104-23765f83.pth
Config: configs/convnext_v2/convnext-v2-atto_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_atto_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-femto_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 784892544
Parameters: 5233240
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-femto_3rdparty-fcmae_in1k_20230104-adbe2082.pth
Config: configs/convnext_v2/convnext-v2-femto_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_femto_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-femto_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 784892544
Parameters: 5233240
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 78.48
Top 5 Accuracy: 93.98
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-femto_fcmae-pre_3rdparty_in1k_20230104-92a75d75.pth
Config: configs/convnext_v2/convnext-v2-femto_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_femto_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-pico_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 1374072320
Parameters: 9066280
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-pico_3rdparty-fcmae_in1k_20230104-147b1b59.pth
Config: configs/convnext_v2/convnext-v2-pico_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_pico_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-pico_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 1374072320
Parameters: 9066280
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 80.31
Top 5 Accuracy: 95.08
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-pico_fcmae-pre_3rdparty_in1k_20230104-d20263ca.pth
Config: configs/convnext_v2/convnext-v2-pico_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_pico_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-nano_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 2454926720
Parameters: 15623800
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_3rdparty-fcmae_in1k_20230104-3dd1f29e.pth
Config: configs/convnext_v2/convnext-v2-nano_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_nano_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-nano_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 2454926720
Parameters: 15623800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 81.86
Top 5 Accuracy: 95.75
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-pre_3rdparty_in1k_20230104-fe1aaaf2.pth
Config: configs/convnext_v2/convnext-v2-nano_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_nano_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 2454926720
Parameters: 15623800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 82.04
Top 5 Accuracy: 96.16
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k_20230104-91fa8ae2.pth
Config: configs/convnext_v2/convnext-v2-nano_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_nano_22k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-tiny_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 4469631744
Parameters: 28635496
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_3rdparty-fcmae_in1k_20230104-80513adc.pth
Config: configs/convnext_v2/convnext-v2-tiny_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_tiny_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-tiny_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 4469631744
Parameters: 28635496
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 82.94
Top 5 Accuracy: 96.29
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-pre_3rdparty_in1k_20230104-471a86de.pth
Config: configs/convnext_v2/convnext-v2-tiny_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_tiny_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 4469631744
Parameters: 28635496
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.89
Top 5 Accuracy: 96.96
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k_20230104-8cc8b8f2.pth
Config: configs/convnext_v2/convnext-v2-tiny_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_tiny_22k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 7214472320
Parameters: 15623800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.36
Top 5 Accuracy: 96.75
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-nano_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-f951ae87.pth
Config: configs/convnext_v2/convnext-v2-nano_32xb32_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_nano_22k_384_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 13135236864
Parameters: 28635496
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.09
Top 5 Accuracy: 97.63
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-tiny_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-d8579f84.pth
Config: configs/convnext_v2/convnext-v2-tiny_32xb32_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_tiny_22k_384_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-base_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 15382561792
Parameters: 88717800
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_3rdparty-fcmae_in1k_20230104-8a798eaf.pth
Config: configs/convnext_v2/convnext-v2-base_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_base_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-base_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 15382561792
Parameters: 88717800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 84.87
Top 5 Accuracy: 97.08
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-pre_3rdparty_in1k_20230104-00a70fa4.pth
Config: configs/convnext_v2/convnext-v2-base_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_base_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 15382561792
Parameters: 88717800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 86.74
Top 5 Accuracy: 98.02
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k_20230104-c48d16a5.pth
Config: configs/convnext_v2/convnext-v2-base_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_base_22k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-large_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 34403182080
Parameters: 197956840
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_3rdparty-fcmae_in1k_20230104-bf38df92.pth
Config: configs/convnext_v2/convnext-v2-large_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_large_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-large_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 34403182080
Parameters: 197956840
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.76
Top 5 Accuracy: 97.59
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-pre_3rdparty_in1k_20230104-ef393013.pth
Config: configs/convnext_v2/convnext-v2-large_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_large_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 34403182080
Parameters: 197956840
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 87.26
Top 5 Accuracy: 98.24
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k_20230104-d9c4dc0c.pth
Config: configs/convnext_v2/convnext-v2-large_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_large_22k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 45205885952
Parameters: 88717800
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 87.63
Top 5 Accuracy: 98.42
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-base_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-379425cc.pth
Config: configs/convnext_v2/convnext-v2-base_32xb32_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_base_22k_384_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 101103214080
Parameters: 197956840
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 88.18
Top 5 Accuracy: 98.52
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-large_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-9139a1f3.pth
Config: configs/convnext_v2/convnext-v2-large_32xb32_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_large_22k_384_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-huge_3rdparty-fcmae_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 114998639360
Parameters: 660289640
In Collection: ConvNeXt V2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_3rdparty-fcmae_in1k_20230104-fe43ae6c.pth
Config: configs/convnext_v2/convnext-v2-huge_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/pt_only/convnextv2_huge_1k_224_fcmae.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-huge_fcmae-pre_3rdparty_in1k
Metadata:
Training Data: ImageNet-1k
FLOPs: 114998639360
Parameters: 660289640
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 86.25
Top 5 Accuracy: 97.75
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-pre_3rdparty_in1k_20230104-f795e5b8.pth
Config: configs/convnext_v2/convnext-v2-huge_32xb32_in1k.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im1k/convnextv2_huge_1k_224_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-384px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 337955157760
Parameters: 660289640
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 88.68
Top 5 Accuracy: 98.73
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-384px_20230104-02a4eb35.pth
Config: configs/convnext_v2/convnext-v2-huge_32xb32_in1k-384px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_huge_22k_384_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2
- Name: convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-512px
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 600809158400
Parameters: 660289640
In Collection: ConvNeXt V2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 88.86
Top 5 Accuracy: 98.74
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/convnext-v2/convnext-v2-huge_fcmae-in21k-pre_3rdparty_in1k-512px_20230104-ce32e63c.pth
Config: configs/convnext_v2/convnext-v2-huge_32xb32_in1k-512px.py
Converted From:
Weights: https://dl.fbaipublicfiles.com/convnext/convnextv2/im22k/convnextv2_huge_22k_512_ema.pt
Code: https://github.com/facebookresearch/ConvNeXt-V2

View File

@ -270,7 +270,7 @@ param_scheduler = [
可以创建一个名为 `mmcls/engine/optimizer` 的文件夹,并在目录下的一个文件,如 `mmcls/engine/optimizer/my_optimizer.py` 中实现该自定义优化器:
```python
from mmengine.registry import OPTIMIZERS
from mmcls.registry import OPTIMIZERS
from torch.optim import Optimizer

View File

@ -5,48 +5,15 @@ from typing import Sequence
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.checkpoint as cp
from mmcv.cnn.bricks import DropPath, build_activation_layer, build_norm_layer
from mmcv.cnn.bricks import DropPath
from mmengine.model import BaseModule, ModuleList, Sequential
from mmengine.registry import MODELS
from mmcls.registry import MODELS
from ..utils import GRN, build_norm_layer
from .base_backbone import BaseBackbone
@MODELS.register_module('LN2d')
class LayerNorm2d(nn.LayerNorm):
"""LayerNorm on channels for 2d images.
Args:
num_channels (int): The number of channels of the input tensor.
eps (float): a value added to the denominator for numerical stability.
Defaults to 1e-5.
elementwise_affine (bool): a boolean value that when set to ``True``,
this module has learnable per-element affine parameters initialized
to ones (for weights) and zeros (for biases). Defaults to True.
"""
def __init__(self, num_channels: int, **kwargs) -> None:
super().__init__(num_channels, **kwargs)
self.num_channels = self.normalized_shape[0]
def forward(self, x, data_format='channel_first'):
assert x.dim() == 4, 'LayerNorm2d only supports inputs with shape ' \
f'(N, C, H, W), but got tensor with shape {x.shape}'
if data_format == 'channel_last':
x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias,
self.eps)
elif data_format == 'channel_first':
x = x.permute(0, 2, 3, 1)
x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias,
self.eps)
# If the output is discontiguous, it may cause some unexpected
# problem in the downstream tasks
x = x.permute(0, 3, 1, 2).contiguous()
return x
class ConvNeXtBlock(BaseModule):
"""ConvNeXt Block.
@ -88,6 +55,7 @@ class ConvNeXtBlock(BaseModule):
linear_pw_conv=True,
drop_path_rate=0.,
layer_scale_init_value=1e-6,
use_grn=False,
with_cp=False):
super().__init__()
self.with_cp = with_cp
@ -96,7 +64,7 @@ class ConvNeXtBlock(BaseModule):
in_channels, in_channels, groups=in_channels, **dw_conv_cfg)
self.linear_pw_conv = linear_pw_conv
self.norm = build_norm_layer(norm_cfg, in_channels)[1]
self.norm = build_norm_layer(norm_cfg, in_channels)
mid_channels = int(mlp_ratio * in_channels)
if self.linear_pw_conv:
@ -106,9 +74,14 @@ class ConvNeXtBlock(BaseModule):
pw_conv = partial(nn.Conv2d, kernel_size=1)
self.pointwise_conv1 = pw_conv(in_channels, mid_channels)
self.act = build_activation_layer(act_cfg)
self.act = MODELS.build(act_cfg)
self.pointwise_conv2 = pw_conv(mid_channels, in_channels)
if use_grn:
self.grn = GRN(mid_channels)
else:
self.grn = None
self.gamma = nn.Parameter(
layer_scale_init_value * torch.ones((in_channels)),
requires_grad=True) if layer_scale_init_value > 0 else None
@ -124,14 +97,21 @@ class ConvNeXtBlock(BaseModule):
if self.linear_pw_conv:
x = x.permute(0, 2, 3, 1) # (N, C, H, W) -> (N, H, W, C)
x = self.norm(x, data_format='channel_last')
x = self.norm(x, data_format='channel_last')
x = self.pointwise_conv1(x)
x = self.act(x)
if self.grn is not None:
x = self.grn(x, data_format='channel_last')
x = self.pointwise_conv2(x)
x = x.permute(0, 3, 1, 2) # (N, H, W, C) -> (N, C, H, W)
else:
x = self.norm(x, data_format='channel_first')
x = self.pointwise_conv1(x)
x = self.act(x)
x = self.pointwise_conv1(x)
x = self.act(x)
x = self.pointwise_conv2(x)
if self.linear_pw_conv:
x = x.permute(0, 3, 1, 2) # permute back
if self.grn is not None:
x = self.grn(x, data_format='channel_first')
x = self.pointwise_conv2(x)
if self.gamma is not None:
x = x.mul(self.gamma.view(1, -1, 1, 1))
@ -148,16 +128,20 @@ class ConvNeXtBlock(BaseModule):
@MODELS.register_module()
class ConvNeXt(BaseBackbone):
"""ConvNeXt.
"""ConvNeXt v1&v2 backbone.
A PyTorch implementation of : `A ConvNet for the 2020s
<https://arxiv.org/pdf/2201.03545.pdf>`_
A PyTorch implementation of `A ConvNet for the 2020s
<https://arxiv.org/abs/2201.03545>`_ and
`ConvNeXt V2: Co-designing and Scaling ConvNets with Masked Autoencoders
<http://arxiv.org/abs/2301.00808>`_
Modified from the `official repo
<https://github.com/facebookresearch/ConvNeXt/blob/main/models/convnext.py>`_
and `timm
<https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/convnext.py>`_.
To use ConvNeXt v2, please set ``use_grn=True`` and ``layer_scale_init_value=0.``.
Args:
arch (str | dict): The model's architecture. If string, it should be
one of architecture in ``ConvNeXt.arch_settings``. And if dict, it
@ -176,6 +160,8 @@ class ConvNeXt(BaseBackbone):
convolution. Defaults to ``dict(type='GELU')``.
linear_pw_conv (bool): Whether to use linear layer to do pointwise
convolution. Defaults to True.
use_grn (bool): Whether to add Global Response Normalization in the
blocks. Defaults to False.
drop_path_rate (float): Stochastic depth rate. Defaults to 0.
layer_scale_init_value (float): Init value for Layer Scale.
Defaults to 1e-6.
@ -191,6 +177,22 @@ class ConvNeXt(BaseBackbone):
init_cfg (dict, optional): Initialization config dict
""" # noqa: E501
arch_settings = {
'atto': {
'depths': [2, 2, 6, 2],
'channels': [40, 80, 160, 320]
},
'femto': {
'depths': [2, 2, 6, 2],
'channels': [48, 96, 192, 384]
},
'pico': {
'depths': [2, 2, 6, 2],
'channels': [64, 128, 256, 512]
},
'nano': {
'depths': [2, 2, 8, 2],
'channels': [80, 160, 320, 640]
},
'tiny': {
'depths': [3, 3, 9, 3],
'channels': [96, 192, 384, 768]
@ -211,6 +213,10 @@ class ConvNeXt(BaseBackbone):
'depths': [3, 3, 27, 3],
'channels': [256, 512, 1024, 2048]
},
'huge': {
'depths': [3, 3, 27, 3],
'channels': [352, 704, 1408, 2816]
}
}
def __init__(self,
@ -220,13 +226,23 @@ class ConvNeXt(BaseBackbone):
norm_cfg=dict(type='LN2d', eps=1e-6),
act_cfg=dict(type='GELU'),
linear_pw_conv=True,
use_grn=False,
drop_path_rate=0.,
layer_scale_init_value=1e-6,
out_indices=-1,
frozen_stages=0,
gap_before_final_norm=True,
with_cp=False,
init_cfg=None):
init_cfg=[
dict(
type='TruncNormal',
layer=['Conv2d', 'Linear'],
std=.02,
bias=0.),
dict(
type='Constant', layer=['LayerNorm'], val=1.,
bias=0.),
]):
super().__init__(init_cfg=init_cfg)
if isinstance(arch, str):
@ -278,7 +294,7 @@ class ConvNeXt(BaseBackbone):
self.channels[0],
kernel_size=stem_patch_size,
stride=stem_patch_size),
build_norm_layer(norm_cfg, self.channels[0])[1],
build_norm_layer(norm_cfg, self.channels[0]),
)
self.downsample_layers.append(stem)
@ -292,7 +308,7 @@ class ConvNeXt(BaseBackbone):
if i >= 1:
downsample_layer = nn.Sequential(
build_norm_layer(norm_cfg, self.channels[i - 1])[1],
build_norm_layer(norm_cfg, self.channels[i - 1]),
nn.Conv2d(
self.channels[i - 1],
channels,
@ -309,6 +325,7 @@ class ConvNeXt(BaseBackbone):
act_cfg=act_cfg,
linear_pw_conv=linear_pw_conv,
layer_scale_init_value=layer_scale_init_value,
use_grn=use_grn,
with_cp=with_cp) for j in range(depth)
])
block_idx += depth
@ -316,7 +333,7 @@ class ConvNeXt(BaseBackbone):
self.stages.append(stage)
if i in self.out_indices:
norm_layer = build_norm_layer(norm_cfg, channels)[1]
norm_layer = build_norm_layer(norm_cfg, channels)
self.add_module(f'norm{i}', norm_layer)
self._freeze_stages()

View File

@ -5,11 +5,12 @@ from typing import Sequence
import torch
import torch.nn as nn
from mmcv.cnn.bricks import DropPath, build_activation_layer, build_norm_layer
from mmcv.cnn.bricks import DropPath
from mmengine.model import BaseModule, ModuleList, Sequential
from mmengine.registry import MODELS
from ..utils import ChannelMultiheadAttention, PositionEncodingFourier
from mmcls.registry import MODELS
from ..utils import (ChannelMultiheadAttention, PositionEncodingFourier,
build_norm_layer)
from .base_backbone import BaseBackbone
from .convnext import ConvNeXtBlock
@ -81,7 +82,7 @@ class SDTAEncoder(BaseModule):
self.pos_embed = PositionEncodingFourier(
embed_dims=in_channel) if use_pos_emb else None
self.norm_csa = build_norm_layer(norm_cfg, in_channel)[1]
self.norm_csa = build_norm_layer(norm_cfg, in_channel)
self.gamma_csa = nn.Parameter(
layer_scale_init_value * torch.ones(in_channel),
requires_grad=True) if layer_scale_init_value > 0 else None
@ -92,9 +93,9 @@ class SDTAEncoder(BaseModule):
attn_drop=attn_drop,
proj_drop=proj_drop)
self.norm = build_norm_layer(norm_cfg, in_channel)[1]
self.norm = build_norm_layer(norm_cfg, in_channel)
self.pointwise_conv1 = nn.Linear(in_channel, mlp_ratio * in_channel)
self.act = build_activation_layer(act_cfg)
self.act = MODELS.build(act_cfg)
self.pointwise_conv2 = nn.Linear(mlp_ratio * in_channel, in_channel)
self.gamma = nn.Parameter(
layer_scale_init_value * torch.ones(in_channel),
@ -298,7 +299,7 @@ class EdgeNeXt(BaseBackbone):
self.downsample_layers = ModuleList()
stem = nn.Sequential(
nn.Conv2d(in_channels, self.channels[0], kernel_size=4, stride=4),
build_norm_layer(norm_cfg, self.channels[0])[1],
build_norm_layer(norm_cfg, self.channels[0]),
)
self.downsample_layers.append(stem)
@ -310,7 +311,7 @@ class EdgeNeXt(BaseBackbone):
if i >= 1:
downsample_layer = nn.Sequential(
build_norm_layer(norm_cfg, self.channels[i - 1])[1],
build_norm_layer(norm_cfg, self.channels[i - 1]),
nn.Conv2d(
self.channels[i - 1],
channels,
@ -354,7 +355,7 @@ class EdgeNeXt(BaseBackbone):
if i in self.out_indices:
out_norm_cfg = dict(type='LN') if self.gap_before_final_norm \
else norm_cfg
norm_layer = build_norm_layer(out_norm_cfg, channels)[1]
norm_layer = build_norm_layer(out_norm_cfg, channels)
self.add_module(f'norm{i}', norm_layer)
def init_weights(self) -> None:

View File

@ -5,9 +5,9 @@ from typing import Callable, Optional, Sequence
import torch
import torch.nn.functional as F
from mmcv.cnn import ConvModule, build_norm_layer
from mmengine.registry import MODELS
from torch import nn
from mmcls.registry import MODELS
from .base_backbone import BaseBackbone
from .mobilenet_v2 import InvertedResidual
from .vision_transformer import TransformerEncoderLayer

View File

@ -6,9 +6,9 @@ import torch.nn as nn
import torch.utils.checkpoint as checkpoint
from mmcv.cnn.bricks import DropPath, build_activation_layer, build_norm_layer
from mmengine.model import BaseModule, ModuleList, Sequential
from mmengine.registry import MODELS
from torch.nn import functional as F
from mmcls.registry import MODELS
from ..utils import LeAttention
from .base_backbone import BaseBackbone

View File

@ -11,6 +11,7 @@ from .helpers import is_tracing, to_2tuple, to_3tuple, to_4tuple, to_ntuple
from .inverted_residual import InvertedResidual
from .layer_scale import LayerScale
from .make_divisible import make_divisible
from .norm import GRN, LayerNorm2d, build_norm_layer
from .position_encoding import (ConditionalPositionEncoding,
PositionEncodingFourier)
from .se_layer import SELayer
@ -23,5 +24,6 @@ __all__ = [
'resize_pos_embed', 'resize_relative_position_bias_table',
'ClsDataPreprocessor', 'Mixup', 'CutMix', 'ResizeMix', 'BEiTAttention',
'LayerScale', 'WindowMSA', 'WindowMSAV2', 'ChannelMultiheadAttention',
'PositionEncodingFourier', 'LeAttention'
'PositionEncodingFourier', 'LeAttention', 'GRN', 'LayerNorm2d',
'build_norm_layer'
]

View File

@ -0,0 +1,123 @@
# Copyright (c) OpenMMLab. All rights reserved.
import torch
import torch.nn as nn
import torch.nn.functional as F
from mmcls.registry import MODELS
@MODELS.register_module()
class GRN(nn.Module):
"""Global Response Normalization Module.
Come from `ConvNeXt V2: Co-designing and Scaling ConvNets with Masked
Autoencoders <http://arxiv.org/abs/2301.00808>`_
Args:
in_channels (int): The number of channels of the input tensor.
eps (float): a value added to the denominator for numerical stability.
Defaults to 1e-6.
"""
def __init__(self, in_channels, eps=1e-6):
super().__init__()
self.in_channels = in_channels
self.gamma = nn.Parameter(torch.zeros(in_channels))
self.beta = nn.Parameter(torch.zeros(in_channels))
self.eps = eps
def forward(self, x: torch.Tensor, data_format='channel_first'):
"""Forward method.
Args:
x (torch.Tensor): The input tensor.
data_format (str): The format of the input tensor. If
``"channel_first"``, the shape of the input tensor should be
(B, C, H, W). If ``"channel_last"``, the shape of the input
tensor should be (B, H, W, C). Defaults to "channel_first".
"""
if data_format == 'channel_last':
gx = torch.norm(x, p=2, dim=(1, 2), keepdim=True)
nx = gx / (gx.mean(dim=-1, keepdim=True) + self.eps)
x = self.gamma * (x * nx) + self.beta + x
elif data_format == 'channel_first':
gx = torch.norm(x, p=2, dim=(2, 3), keepdim=True)
nx = gx / (gx.mean(dim=1, keepdim=True) + self.eps)
x = self.gamma.view(1, -1, 1, 1) * (x * nx) + self.beta.view(
1, -1, 1, 1) + x
return x
@MODELS.register_module('LN2d')
class LayerNorm2d(nn.LayerNorm):
"""LayerNorm on channels for 2d images.
Args:
num_channels (int): The number of channels of the input tensor.
eps (float): a value added to the denominator for numerical stability.
Defaults to 1e-5.
elementwise_affine (bool): a boolean value that when set to ``True``,
this module has learnable per-element affine parameters initialized
to ones (for weights) and zeros (for biases). Defaults to True.
"""
def __init__(self, num_channels: int, **kwargs) -> None:
super().__init__(num_channels, **kwargs)
self.num_channels = self.normalized_shape[0]
def forward(self, x, data_format='channel_first'):
"""Forward method.
Args:
x (torch.Tensor): The input tensor.
data_format (str): The format of the input tensor. If
``"channel_first"``, the shape of the input tensor should be
(B, C, H, W). If ``"channel_last"``, the shape of the input
tensor should be (B, H, W, C). Defaults to "channel_first".
"""
assert x.dim() == 4, 'LayerNorm2d only supports inputs with shape ' \
f'(N, C, H, W), but got tensor with shape {x.shape}'
if data_format == 'channel_last':
x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias,
self.eps)
elif data_format == 'channel_first':
x = x.permute(0, 2, 3, 1)
x = F.layer_norm(x, self.normalized_shape, self.weight, self.bias,
self.eps)
# If the output is discontiguous, it may cause some unexpected
# problem in the downstream tasks
x = x.permute(0, 3, 1, 2).contiguous()
return x
def build_norm_layer(cfg: dict, num_features: int) -> nn.Module:
"""Build normalization layer.
Args:
cfg (dict): The norm layer config, which should contain:
- type (str): Layer type.
- layer args: Args needed to instantiate a norm layer.
num_features (int): Number of input channels.
Returns:
nn.Module: The created norm layer.
"""
if not isinstance(cfg, dict):
raise TypeError('cfg must be a dict')
if 'type' not in cfg:
raise KeyError('the cfg dict must contain the key "type"')
cfg_ = cfg.copy()
layer_type = cfg_.pop('type')
norm_layer = MODELS.get(layer_type)
if norm_layer is None:
raise KeyError(f'Cannot find {layer_type} in registry under scope '
f'name {MODELS.scope}')
layer = norm_layer(num_features, **cfg_)
if layer_type == 'SyncBN' and hasattr(layer, '_specify_ddp_gpu_num'):
layer._specify_ddp_gpu_num(1)
return layer

View File

@ -48,3 +48,4 @@ Import:
- configs/clip/metafile.yml
- configs/mixmim/metafile.yml
- configs/efficientnet_v2/metafile.yml
- configs/convnext_v2/metafile.yml

View File

@ -9,9 +9,8 @@ from unittest.mock import MagicMock, call, patch
import numpy as np
from mmengine.logging import MMLogger
from mmengine.registry import TRANSFORMS
from mmcls.registry import DATASETS
from mmcls.registry import DATASETS, TRANSFORMS
from mmcls.utils import register_all_modules
register_all_modules()

View File

@ -94,3 +94,13 @@ def test_convnext():
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == torch.Size([1, 768])
# Test linear_pw_conv=False
model = ConvNeXt(arch='tiny', out_indices=-1, linear_pw_conv=False)
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 1
assert feat[0].shape == torch.Size([1, 768])

View File

@ -0,0 +1,60 @@
# Copyright (c) OpenMMLab. All rights reserved.
from unittest import TestCase
import torch
import torch.nn.functional as F
from mmcls.models.utils import GRN, LayerNorm2d
class TestGRN(TestCase):
def test_init(self):
module = GRN(in_channels=32, eps=1e-3)
self.assertEqual(module.in_channels, 32)
self.assertEqual(module.eps, 1e-3)
self.assertTrue(module.gamma.requires_grad)
self.assertTrue(module.beta.requires_grad)
self.assertEqual(module.gamma.shape, (32, ))
self.assertTrue(module.beta.shape, (32, ))
def test_forward(self):
module = GRN(in_channels=32, eps=1e-3)
input_ = torch.rand(1, 28, 28, 32)
gx = torch.norm(input_, p=2, dim=(1, 2), keepdim=True)
nx = gx / (gx.mean(dim=3, keepdim=True) + 1e-3)
expected_out = module.gamma * input_ * nx + module.beta + input_
torch.testing.assert_allclose(
module(input_, data_format='channel_last'), expected_out)
input_ = input_.permute([0, 3, 1, 2])
expected_out = expected_out.permute([0, 3, 1, 2])
torch.testing.assert_allclose(
module(input_, data_format='channel_first'), expected_out)
class TestLayerNorm2d(TestCase):
def test_init(self):
module = LayerNorm2d(num_channels=32, eps=1e-3)
self.assertEqual(module.num_channels, 32)
self.assertEqual(module.eps, 1e-3)
self.assertTrue(module.weight.requires_grad)
self.assertTrue(module.bias.requires_grad)
self.assertEqual(module.weight.shape, (32, ))
self.assertTrue(module.bias.shape, (32, ))
def test_forward(self):
module = LayerNorm2d(num_channels=32, eps=1e-3)
input_ = torch.rand(1, 28, 28, 32)
expected_out = F.layer_norm(input_, module.normalized_shape,
module.weight, module.bias, 1e-3)
torch.testing.assert_allclose(
module(input_, data_format='channel_last'), expected_out)
input_ = input_.permute([0, 3, 1, 2])
expected_out = expected_out.permute([0, 3, 1, 2])
torch.testing.assert_allclose(
module(input_, data_format='channel_first'), expected_out)