[Feature] [CodeCamp #68] Add EfficientnetV2 Backbone. (#1253)

* add efficientnet_v2.py

* add efficientnet_v2 in __init__.py

* add efficientnet_v2_s base config file

* add efficientnet_v2 config file

* add efficientnet_v2 config file

* update tuple output

* update config file

* update model file

* update model file

* update model file

* update config file

* update model file

* update config file

* update model file

* update model file

* update model file

* update model file

* update model file

* update config file

* update config file

* update model file

* update model file

* update model file

* update model file

* update model config file

* Update efficientnet_v2.py

* add config file and modify arch

* add config file and modify arch

* add the file about convert_pth from timm to mmcls

* update efficientnetv2 model file with mmcls style

* add the file about convert_pth from timm to mmcls

* add the file about convert_pth from timm to mmcls

* update convert file

* update model file

* update convert file

* update model file

* update model file

* update model file

* add metefile and README

* Update tools/model_converters/efficientnetv2-timm_to_mmcls.py

Co-authored-by: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com>

* update model file and convert file

* update model file and convert file

* update model file and convert file

* update model file and convert file

* update model file

* update model file

* update model file

* update config file and README file

* update metafile

* Update efficientnetv2_to_mmcls.py

* update model-index.yml

* update metafile.yml

* update b0 and s train pipeline

* update b0 and s train pipeline

* update b0 and s train pipeline

* add test_efficientnet_v2

* update test_efficientnet_v2

* update model file docs

* update test_efficientnet_v2

* update test_efficientnet_v2

* add efficientnet_v2.py

* add efficientnet_v2 in __init__.py

* add efficientnet_v2_s base config file

* add efficientnet_v2 config file

* add efficientnet_v2 config file

* update tuple output

* update config file

* update model file

* update model file

* update model file

* update model file

* update config file

* update config file

* update model file

* update model file

* update model file

* update model file

* update model file

* update config file

* update config file

* update model file

* update model file

* update model file

* update model file

* update model config file

* Update efficientnet_v2.py

* add config file and modify arch

* add config file and modify arch

* add the file about convert_pth from timm to mmcls

* update efficientnetv2 model file with mmcls style

* add the file about convert_pth from timm to mmcls

* add the file about convert_pth from timm to mmcls

* update convert file

* update model file

* update convert file

* update model file

* update model file

* update model file

* add metefile and README

* Update tools/model_converters/efficientnetv2-timm_to_mmcls.py

Co-authored-by: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com>

* update model file and convert file

* update model file and convert file

* update model file and convert file

* update model file and convert file

* update model file

* update model file

* update model file

* update config file and README file

* update metafile

* Update efficientnetv2_to_mmcls.py

* update model-index.yml

* update metafile.yml

* update b0 and s train pipeline

* update b0 and s train pipeline

* update b0 and s train pipeline

* add test_efficientnet_v2

* update test_efficientnet_v2

* update model file docs

* update test_efficientnet_v2

* update test_efficientnet_v2

* pass pre-commit hook

* refactor efficientnetv2

* refactor efficientnetv2

* update readme, metafile and weight links

* update model-index.yml

* fix lint

* fix typo

* Update efficientnetv2-b1_8xb32_in1k.py

* Update efficientnetv2-b2_8xb32_in1k.py

* Update efficientnetv2-b3_8xb32_in1k.py

* update two moduals and model file

* update modual file

* update accuracys

* update accuracys

* update metafile

* fix build docs

* update links

* update README.md

Co-authored-by: qingtian <459291290@qq.com>
Co-authored-by: Ezra-Yu <18586273+Ezra-Yu@users.noreply.github.com>
pull/1286/head
QINGTIAN 2022-12-30 15:18:39 +08:00 committed by GitHub
parent 9038c1c255
commit 74743ef588
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
49 changed files with 1831 additions and 3 deletions

View File

@ -155,6 +155,7 @@ Results and models are available in the [model zoo](https://mmclassification.rea
- [x] [BEiT](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/beit) / [BEiT v2](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/beitv2)
- [x] [EVA](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/eva)
- [x] [MixMIM](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/mixmim)
- [x] [EfficientNetV2](https://github.com/open-mmlab/mmclassification/tree/1.x/configs/efficientnet_v2)
</details>

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='b0'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='b1'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='b2'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1408,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='b3'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1536,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='l'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='m'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='s'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='xl'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='b0'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='b1'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='b2'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1408,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='b3'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1536,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='l'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='m'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='s'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,12 @@
# model settings
model = dict(
type='ImageClassifier',
backbone=dict(type='EfficientNetV2', arch='xl'),
neck=dict(type='GlobalAveragePooling'),
head=dict(
type='LinearClsHead',
num_classes=1000,
in_channels=1280,
loss=dict(type='CrossEntropyLoss', loss_weight=1.0),
topk=(1, 5),
))

View File

@ -0,0 +1,116 @@
# EfficientNetV2
> [EfficientNetV2: Smaller Models and Faster Training](https://arxiv.org/abs/2104.00298)
<!-- [ALGORITHM] -->
## Abstract
This paper introduces EfficientNetV2, a new family of convolutional networks that have faster training speed and better parameter efficiency than previous models. To develop this family of models, we use a combination of training-aware neural architecture search and scaling, to jointly optimize training speed and parameter efficiency. The models were searched from the search space enriched with new ops such as Fused-MBConv. Our experiments show that EfficientNetV2 models train much faster than state-of-the-art models while being up to 6.8x smaller. Our training can be further sped up by progressively increasing the image size during training, but it often causes a drop in accuracy. To compensate for this accuracy drop, we propose to adaptively adjust regularization (e.g., dropout and data augmentation) as well, such that we can achieve both fast training and good accuracy. With progressive learning, our EfficientNetV2 significantly outperforms previous models on ImageNet and CIFAR/Cars/Flowers datasets. By pretraining on the same ImageNet21k, our EfficientNetV2 achieves 87.3% top-1 accuracy on ImageNet ILSVRC2012, outperforming the recent ViT by 2.0% accuracy while training 5x-11x faster using the same computing resources. Code will be available at https://github.com/google/automl/tree/master/efficientnetv2.
<div align=center>
<img src="https://user-images.githubusercontent.com/18586273/208616931-0c5107f1-f08c-48d3-8694-7a6eaf227dc2.png" width="50%"/>
</div>
## How to use it?
<!-- [TABS-BEGIN] -->
**Predict image**
```python
>>> import torch
>>> from mmcls.apis import init_model, inference_model
>>>
>>> model = init_model('configs/efficientnet_v2/efficientnetv2-b0_8xb32_in1k.py', "https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-b0_8xb32_in1k_20221219-9689f21f.pth")
>>> predict = inference_model(model, 'demo/demo.JPEG')
>>> print(predict['pred_class'])
sea snake
>>> print(predict['pred_score'])
0.3147328197956085
```
**Use the model**
```python
>>> import torch
>>> from mmcls import get_model
>>>
>>> model = get_model("efficientnetv2-b0_3rdparty_in1k", pretrained=True)
>>> model.eval()
>>> inputs = torch.rand(1, 3, 224, 224).to(model.data_preprocessor.device)
>>> # To get classification scores.
>>> out = model(inputs)
>>> print(out.shape)
torch.Size([1, 1000])
>>> # To extract features.
>>> outs = model.extract_feat(inputs)
>>> print(outs[0].shape)
torch.Size([1, 1280])
```
**Train/Test Command**
Place the ImageNet dataset to the `data/imagenet/` directory, or prepare datasets according to the [docs](https://mmclassification.readthedocs.io/en/1.x/user_guides/dataset_prepare.html#prepare-dataset).
Train:
```shell
python tools/train.py configs/efficientnet_v2/efficientnetv2-b0_8xb32_in1k.py
```
Test:
```shell
python tools/test.py configs/efficientnet_v2/efficientnetv2-b0_8xb32_in1k.py https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-b0_8xb32_in1k_20221219-9689f21f.pth
```
<!-- [TABS-END] -->
For more configurable parameters, please refer to the [API](https://mmclassification.readthedocs.io/en/1.x/api/generated/mmcls.models.backbones.EfficientNetV2.html#mmcls.models.backbones.EfficientNetV2).
## Results and models
### ImageNet-1k
| Model | Pretrain | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download |
| :------------------------------------------------: | :----------: | :-------: | :------: | :-------: | :-------: | :-----------------------------------------------: | :----------------------------------------------------: |
| EfficientNetV2-b0\* (`efficientnetv2-b0_3rdparty_in1k`) | From scratch | 7.14 | 0.92 | 78.52 | 94.44 | [config](./efficientnetv2-b0_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-b0_3rdparty_in1k_20221221-9ef6e736.pth) |
| EfficientNetV2-b1\* (`efficientnetv2-b1_3rdparty_in1k`) | From scratch | 8.14 | 1.44 | 79.80 | 94.89 | [config](./efficientnetv2-b1_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-b1_3rdparty_in1k_20221221-6955d9ce.pth) |
| EfficientNetV2-b2\* (`efficientnetv2-b2_3rdparty_in1k`) | From scratch | 10.10 | 1.99 | 80.63 | 95.30 | [config](./efficientnetv2-b2_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-b2_3rdparty_in1k_20221221-74f7d493.pth) |
| EfficientNetV2-b3\* (`efficientnetv2-b3_3rdparty_in1k`) | From scratch | 14.36 | 3.50 | 82.03 | 95.88 | [config](./efficientnetv2-b3_8xb32_in1k.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-b3_3rdparty_in1k_20221221-b6f07a36.pth) |
| EfficientNetV2-s\* (`efficientnetv2-s_3rdparty_in1k`) | From scratch | 21.46 | 9.72 | 83.82 | 96.67 | [config](./efficientnetv2-s_8xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-s_3rdparty_in1k_20221220-f0eaff9d.pth) |
| EfficientNetV2-m\* (`efficientnetv2-m_3rdparty_in1k`) | From scratch | 54.14 | 26.88 | 85.01 | 97.26 | [config](./efficientnetv2-m_8xb32_in1k-480px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-m_3rdparty_in1k_20221220-9dc0c729.pth) |
| EfficientNetV2-l\* (`efficientnetv2-l_3rdparty_in1k`) | From scratch | 118.52 | 60.14 | 85.43 | 97.31 | [config](./efficientnetv2-l_8xb32_in1k-480px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-l_3rdparty_in1k_20221220-5c3bac0f.pth) |
| EfficientNetV2-s\* (`efficientnetv2-s_in21k-pre_3rdparty_in1k`) | ImageNet 21k | 21.46 | 9.72 | 84.29 | 97.26 | [config](./efficientnetv2-s_8xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-s_in21k-pre-3rdparty_in1k_20221220-7a7c8475.pth) |
| EfficientNetV2-m\* (`efficientnetv2-m_in21k-pre_3rdparty_in1k`) | ImageNet 21k | 54.14 | 26.88 | 85.47 | 97.76 | [config](./efficientnetv2-m_8xb32_in1k-480px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-m_in21k-pre-3rdparty_in1k_20221220-a1013a04.pth) |
| EfficientNetV2-l\* (`efficientnetv2-l_in21k-pre_3rdparty_in1k`) | ImageNet 21k | 118.52 | 60.14 | 86.31 | 97.99 | [config](./efficientnetv2-l_8xb32_in1k-480px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-l_in21k-pre-3rdparty_in1k_20221220-63df0efd.pth) |
| EfficientNetV2-xl\* (`efficientnetv2-xl_in21k-pre_3rdparty_in1k`) | ImageNet 21k | 208.12 | 98.34 | 86.39 | 97.83 | [config](./efficientnetv2-xl_8xb32_in1k-512px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-xl_in21k-pre-3rdparty_in1k_20221220-583ac18b.pth) |
*Models with * are converted from the [official repo](https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py). The config files of these models are only for inference. We don't ensure these config files' training accuracy and welcome you to contribute your reproduction results.*
### Pre-trained Models In ImageNet-21K
The pre-trained models are only used to fine-tune, and therefore cannot be trained and don't have evaluation results.
| Model | Params(M) | Flops(G) | Config | Download |
| :------------------------------------------------------: | :-------: | :------: | :-----------------------------------------------: | :----------------------------------------------------------------------------: |
| EfficientNetV2-s\* (`efficientnetv2-s_3rdparty_in21k`) | 21.46 | 9.72 | [config](./efficientnetv2-s_8xb32_in1k-384px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-s_3rdparty_in21k_20221220-c0572b56.pth) |
| EfficientNetV2-m\* (`efficientnetv2-m_3rdparty_in21k`) | 54.14 | 26.88 | [config](./efficientnetv2-m_8xb32_in1k-480px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-m_3rdparty_in21k_20221220-073e944c.pth) |
| EfficientNetV2-l\* (`efficientnetv2-l_3rdparty_in21k`) | 118.52 | 60.14 | [config](./efficientnetv2-l_8xb32_in1k-480px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-l_3rdparty_in21k_20221220-f28f91e1.pth) |
| EfficientNetV2-xl\* (`efficientnetv2-xl_3rdparty_in21k`) | 208.12 | 98.34 | [config](./efficientnetv2-xl_8xb32_in1k-512px.py) | [model](https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-xl_3rdparty_in21k_20221220-b2c9329c.pth) |
*Models with * are converted from the [official repo](https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py).*
## Citation
```bibtex
@inproceedings{tan2021efficientnetv2,
title={Efficientnetv2: Smaller models and faster training},
author={Tan, Mingxing and Le, Quoc},
booktitle={International Conference on Machine Learning},
pages={10096--10106},
year={2021},
organization={PMLR}
}
```

View File

@ -0,0 +1,58 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_b0.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
bgr_mean = data_preprocessor['mean'][::-1]
bgr_std = data_preprocessor['std'][::-1]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=192,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(
type='RandAugment',
policies='timm_increasing',
num_policies=2,
total_level=10,
magnitude_level=9,
magnitude_std=0.5,
hparams=dict(
pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')),
dict(
type='RandomErasing',
erase_prob=0.25,
mode='rand',
min_area_ratio=0.02,
max_area_ratio=1 / 3,
fill_color=bgr_mean,
fill_std=bgr_std),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=224, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,23 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_b1.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=192),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=240),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,23 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_b2.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=208),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=260),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,23 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_b3.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=240),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=300),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,34 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_l.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=480, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,34 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_l.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=480, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,34 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_m.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=480, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,34 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_m.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=480, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,58 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_s.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
bgr_mean = data_preprocessor['mean'][::-1]
bgr_std = data_preprocessor['std'][::-1]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=300,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(
type='RandAugment',
policies='timm_increasing',
num_policies=2,
total_level=10,
magnitude_level=9,
magnitude_std=0.5,
hparams=dict(
pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')),
dict(
type='RandomErasing',
erase_prob=0.25,
mode='rand',
min_area_ratio=0.02,
max_area_ratio=1 / 3,
fill_color=bgr_mean,
fill_std=bgr_std),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=384, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,34 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_s.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=300, crop_padding=0),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=384, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,34 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnet_v2_xl.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=512, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,58 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnetv2_b0.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
# convert image from BGR to RGB
to_rgb=True,
)
bgr_mean = data_preprocessor['mean'][::-1]
bgr_std = data_preprocessor['std'][::-1]
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='RandomResizedCrop',
scale=192,
backend='pillow',
interpolation='bicubic'),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(
type='RandAugment',
policies='timm_increasing',
num_policies=2,
total_level=10,
magnitude_level=9,
magnitude_std=0.5,
hparams=dict(
pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')),
dict(
type='RandomErasing',
erase_prob=0.25,
mode='rand',
min_area_ratio=0.02,
max_area_ratio=1 / 3,
fill_color=bgr_mean,
fill_std=bgr_std),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=224, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,21 @@
_base_ = ['./efficientnetv2-b0_8xb32_in1k.py']
# model setting
model = dict(backbone=dict(arch='b1'), head=dict(in_channels=1280, ))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=192),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=240, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,21 @@
_base_ = ['./efficientnetv2-b0_8xb32_in1k.py']
# model setting
model = dict(backbone=dict(arch='b2'), head=dict(in_channels=1408, ))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=208),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=260, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,21 @@
_base_ = ['./efficientnetv2-b0_8xb32_in1k.py']
# model setting
model = dict(backbone=dict(arch='b3'), head=dict(in_channels=1536, ))
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=240),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=300, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,23 @@
_base_ = [
'efficientnetv2-s_8xb32_in1k-384px.py',
]
# model setting
model = dict(backbone=dict(arch='l'), )
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=480, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,4 @@
_base_ = ['./efficientnetv2-s_8xb32_in21k.py']
# model setting
model = dict(backbone=dict(arch='l'), )

View File

@ -0,0 +1,23 @@
_base_ = [
'efficientnetv2-s_8xb32_in1k-384px.py',
]
# model setting
model = dict(backbone=dict(arch='m'), )
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=480, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,4 @@
_base_ = ['./efficientnetv2-s_8xb32_in21k.py']
# model setting
model = dict(backbone=dict(arch='m'), )

View File

@ -0,0 +1,34 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnetv2_s.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# dataset settings
dataset_type = 'ImageNet'
data_preprocessor = dict(
num_classes=1000,
# RGB format normalization parameters
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=300, crop_padding=0),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=384, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,43 @@
_base_ = [
'../_base_/models/efficientnet_v2/efficientnetv2_s.py',
'../_base_/datasets/imagenet_bs32.py',
'../_base_/schedules/imagenet_bs256.py',
'../_base_/default_runtime.py',
]
# model setting
model = dict(head=dict(num_classes=21843))
# dataset settings
dataset_type = 'ImageNet21k'
data_preprocessor = dict(
num_classes=21843,
# RGB format normalization parameters
mean=[127.5, 127.5, 127.5],
std=[127.5, 127.5, 127.5],
# convert image from BGR to RGB
to_rgb=True,
)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=224),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=224, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))
# schedule setting
optim_wrapper = dict(
optimizer=dict(lr=4e-3),
clip_grad=dict(max_norm=5.0),
)

View File

@ -0,0 +1,23 @@
_base_ = [
'efficientnetv2-s_8xb32_in1k-384px.py',
]
# model setting
model = dict(backbone=dict(arch='xl'), )
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetRandomCrop', scale=384, crop_padding=0),
dict(type='RandomFlip', prob=0.5, direction='horizontal'),
dict(type='PackClsInputs'),
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='EfficientNetCenterCrop', crop_size=512, crop_padding=0),
dict(type='PackClsInputs'),
]
train_dataloader = dict(dataset=dict(pipeline=train_pipeline))
val_dataloader = dict(dataset=dict(pipeline=test_pipeline))
test_dataloader = dict(dataset=dict(pipeline=test_pipeline))

View File

@ -0,0 +1,4 @@
_base_ = ['./efficientnetv2-s_8xb32_in21k.py']
# model setting
model = dict(backbone=dict(arch='xl'), )

View File

@ -0,0 +1,255 @@
Collections:
- Name: EfficientNetV2
Metadata:
Training Data: ImageNet-1k
Architecture:
- 1x1 Convolution
- Average Pooling
- Convolution
- Dense Connections
- Dropout
- Inverted Residual Block
- RMSProp
- Squeeze-and-Excitation Block
- Swish
Paper:
URL: https://arxiv.org/abs/2104.00298
Title: "EfficientNetV2: Smaller Models and Faster Training"
README: configs/efficientnet_v2/README.md
Code:
URL: https://github.com/open-mmlab/mmclassification/blob/dev-1.x/mmcls/models/backbones/beit.py
Version: v1.0.0rc4
Models:
- Name: efficientnetv2-b0_3rdparty_in1k
Metadata:
FLOPs: 919843360
Parameters: 7139704
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 78.52
Top 5 Accuracy: 94.44
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-b0_3rdparty_in1k_20221221-9ef6e736.pth
Config: configs/efficientnet_v2/efficientnetv2-b0_8xb32_in1k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_b0-c7cc451f.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-b1_3rdparty_in1k
Metadata:
FLOPs: 1438287552
Parameters: 8141052
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 79.80
Top 5 Accuracy: 94.89
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-b1_3rdparty_in1k_20221221-6955d9ce.pth
Config: configs/efficientnet_v2/efficientnetv2-b1_8xb32_in1k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_b1-be6e41b0.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-b2_3rdparty_in1k
Metadata:
FLOPs: 1986433080
Parameters: 10096086
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 80.63
Top 5 Accuracy: 95.30
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-b2_3rdparty_in1k_20221221-74f7d493.pth
Config: configs/efficientnet_v2/efficientnetv2-b2_8xb32_in1k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_b2-847de54e.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-b3_3rdparty_in1k
Metadata:
FLOPs: 3498068400
Parameters: 14358406
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 82.03
Top 5 Accuracy: 95.88
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-b3_3rdparty_in1k_20221221-b6f07a36.pth
Config: configs/efficientnet_v2/efficientnetv2-b3_8xb32_in1k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_b3-57773f13.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-s_3rdparty_in1k
Metadata:
FLOPs: 9719420928
Parameters: 21458488
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 83.82
Top 5 Accuracy: 96.67
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-s_3rdparty_in1k_20221220-f0eaff9d.pth
Config: configs/efficientnet_v2/efficientnetv2-s_8xb32_in1k-384px.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_s-eb54923e.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-m_3rdparty_in1k
Metadata:
FLOPs: 26880363584
Parameters: 54139356
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.01
Top 5 Accuracy: 97.26
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-m_3rdparty_in1k_20221220-9dc0c729.pth
Config: configs/efficientnet_v2/efficientnetv2-m_8xb32_in1k-480px.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_m-cc09e0cd.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-l_3rdparty_in1k
Metadata:
FLOPs: 60142387008
Parameters: 118515272
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.43
Top 5 Accuracy: 97.31
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-l_3rdparty_in1k_20221220-5c3bac0f.pth
Config: configs/efficientnet_v2/efficientnetv2-l_8xb32_in1k-480px.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_l-d664b728.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-s_in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 9719420928
Parameters: 21458488
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 84.29
Top 5 Accuracy: 97.26
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-s_in21k-pre-3rdparty_in1k_20221220-7a7c8475.pth
Config: configs/efficientnet_v2/efficientnetv2-s_8xb32_in1k-384px.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_s_21ft1k-d7dafa41.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-m_in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 26880363584
Parameters: 54139356
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 85.47
Top 5 Accuracy: 97.76
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-m_in21k-pre-3rdparty_in1k_20221220-a1013a04.pth
Config: configs/efficientnet_v2/efficientnetv2-m_8xb32_in1k-480px.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_m_21ft1k-bf41664a.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-l_in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 60142387008
Parameters: 118515272
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 86.31
Top 5 Accuracy: 97.99
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-l_in21k-pre-3rdparty_in1k_20221220-63df0efd.pth
Config: configs/efficientnet_v2/efficientnetv2-l_8xb32_in1k-480px.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_l_21ft1k-60127a9d.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-xl_in21k-pre_3rdparty_in1k
Metadata:
Training Data:
- ImageNet-21k
- ImageNet-1k
FLOPs: 98341230592
Parameters: 208119808
In Collection: EfficientNetV2
Results:
- Dataset: ImageNet-1k
Metrics:
Top 1 Accuracy: 86.39
Top 5 Accuracy: 97.83
Task: Image Classification
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-xl_in21k-pre-3rdparty_in1k_20221220-583ac18b.pth
Config: configs/efficientnet_v2/efficientnetv2-xl_8xb32_in1k-512px.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_xl_in21ft1k-06c35c48.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-s_3rdparty_in21k
Metadata:
FLOPs: 3309720768
Parameters: 48158371
In Collection: EfficientNetV2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-s_3rdparty_in21k_20221220-c0572b56.pth
Config: configs/efficientnet_v2/efficientnetv2-s_8xb32_in21k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_s_21k-6337ad01.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-m_3rdparty_in21k
Metadata:
FLOPs: 5861638208
Parameters: 80839239
In Collection: EfficientNetV2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-m_3rdparty_in21k_20221220-073e944c.pth
Config: configs/efficientnet_v2/efficientnetv2-m_8xb32_in21k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_m_21k-361418a2.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-l_3rdparty_in21k
Metadata:
FLOPs: 13114950464
Parameters: 145215155
In Collection: EfficientNetV2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-l_3rdparty_in21k_20221220-f28f91e1.pth
Config: configs/efficientnet_v2/efficientnetv2-l_8xb32_in21k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_l_21k-91a19ec9.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py
- Name: efficientnetv2-xl_3rdparty_in21k
Metadata:
FLOPs: 18855244288
Parameters: 234819691
In Collection: EfficientNetV2
Results: null
Weights: https://download.openmmlab.com/mmclassification/v0/efficientnetv2/efficientnetv2-xl_3rdparty_in21k_20221220-b2c9329c.pth
Config: configs/efficientnet_v2/efficientnetv2-xl_8xb32_in21k.py
Converted From:
Weights: https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-effv2-weights/tf_efficientnetv2_xl_in21k-fd7e8abf.pth
Code: https://github.com/rwightman/pytorch-image-models/blob/main/timm/models/efficientnet.py

View File

@ -73,6 +73,7 @@ Backbones
EdgeNeXt
EfficientFormer
EfficientNet
EfficientNetV2
HRNet
HorNet
InceptionV3

View File

@ -12,6 +12,7 @@ from .densenet import DenseNet
from .edgenext import EdgeNeXt
from .efficientformer import EfficientFormer
from .efficientnet import EfficientNet
from .efficientnet_v2 import EfficientNetV2
from .hornet import HorNet
from .hrnet import HRNet
from .inception_v3 import InceptionV3
@ -78,6 +79,7 @@ __all__ = [
'PCPVT',
'SVT',
'EfficientNet',
'EfficientNetV2',
'ConvNeXt',
'HRNet',
'ResNetV1c',

View File

@ -69,7 +69,7 @@ class EdgeResidual(BaseModule):
in_channels=in_channels,
out_channels=mid_channels,
kernel_size=kernel_size,
stride=1,
stride=stride,
padding=kernel_size // 2,
conv_cfg=conv_cfg,
norm_cfg=norm_cfg,
@ -82,9 +82,9 @@ class EdgeResidual(BaseModule):
in_channels=mid_channels,
out_channels=out_channels,
kernel_size=1,
stride=stride,
stride=1,
padding=0,
conv_cfg=conv_cfg,
conv_cfg=None,
norm_cfg=norm_cfg,
act_cfg=None)

View File

@ -0,0 +1,343 @@
# Copyright (c) OpenMMLab. All rights reserved.
from typing import Sequence, Tuple
import torch
import torch.nn as nn
from mmcv.cnn.bricks import ConvModule, DropPath
from mmengine.model import Sequential
from torch import Tensor
from mmcls.models.backbones.base_backbone import BaseBackbone
from mmcls.models.backbones.efficientnet import EdgeResidual as FusedMBConv
from mmcls.models.utils import InvertedResidual as MBConv
from mmcls.registry import MODELS
class EnhancedConvModule(ConvModule):
"""ConvModule with short-cut and droppath.
Args:
in_channels (int): Number of channels in the input feature map.
Same as that in ``nn._ConvNd``.
out_channels (int): Number of channels produced by the convolution.
Same as that in ``nn._ConvNd``.
kernel_size (int | tuple[int]): Size of the convolving kernel.
Same as that in ``nn._ConvNd``.
stride (int | tuple[int]): Stride of the convolution.
Same as that in ``nn._ConvNd``.
has_skip (bool): Whether there is short-cut. Defaults to False.
drop_path_rate (float): Stochastic depth rate. Default 0.0.
padding (int | tuple[int]): Zero-padding added to both sides of
the input. Same as that in ``nn._ConvNd``.
dilation (int | tuple[int]): Spacing between kernel elements.
Same as that in ``nn._ConvNd``.
groups (int): Number of blocked connections from input channels to
output channels. Same as that in ``nn._ConvNd``.
bias (bool | str): If specified as `auto`, it will be decided by the
norm_cfg. Bias will be set as True if `norm_cfg` is None, otherwise
False. Default: "auto".
conv_cfg (dict): Config dict for convolution layer. Default: None,
which means using conv2d.
norm_cfg (dict): Config dict for normalization layer. Default: None.
act_cfg (dict): Config dict for activation layer.
Default: dict(type='ReLU').
inplace (bool): Whether to use inplace mode for activation.
Default: True.
with_spectral_norm (bool): Whether use spectral norm in conv module.
Default: False.
padding_mode (str): If the `padding_mode` has not been supported by
current `Conv2d` in PyTorch, we will use our own padding layer
instead. Currently, we support ['zeros', 'circular'] with official
implementation and ['reflect'] with our own implementation.
Default: 'zeros'.
order (tuple[str]): The order of conv/norm/activation layers. It is a
sequence of "conv", "norm" and "act". Common examples are
("conv", "norm", "act") and ("act", "conv", "norm").
Default: ('conv', 'norm', 'act').
"""
def __init__(self, *args, has_skip=False, drop_path_rate=0, **kwargs):
super().__init__(*args, **kwargs)
self.has_skip = has_skip
if self.has_skip and (self.in_channels != self.out_channels
or self.stride != (1, 1)):
raise ValueError('the stride must be 1 and the `in_channels` and'
' `out_channels` must be the same , when '
'`has_skip` is True in `EnhancedConvModule` .')
self.drop_path = DropPath(
drop_path_rate) if drop_path_rate else nn.Identity()
def forward(self, x: torch.Tensor, **kwargs) -> torch.Tensor:
short_cut = x
x = super().forward(x, **kwargs)
if self.has_skip:
x = self.drop_path(x) + short_cut
return x
@MODELS.register_module()
class EfficientNetV2(BaseBackbone):
"""EfficientNetV2 backbone.
A PyTorch implementation of EfficientNetV2 introduced by:
`EfficientNetV2: Smaller Models and Faster Training
<https://arxiv.org/abs/2104.00298>`_
Args:
arch (str): Architecture of efficientnetv2. Defaults to s.
in_channels (int): Number of input image channels. Defaults to 3.
drop_path_rate (float): The ratio of the stochastic depth.
Defaults to 0.0.
out_indices (Sequence[int]): Output from which stages.
Defaults to (-1, ).
frozen_stages (int): Stages to be frozen (all param fixed).
Defaults to 0, which means not freezing any parameters.
conv_cfg (dict): Config dict for convolution layer.
Defaults to None, which means using conv2d.
norm_cfg (dict): Config dict for normalization layer.
Defaults to dict(type='BN').
act_cfg (dict): Config dict for activation layer.
Defaults to dict(type='Swish').
norm_eval (bool): Whether to set norm layers to eval mode, namely,
freeze running stats (mean and var). Note: Effect on Batch Norm
and its variants only. Defaults to False.
with_cp (bool): Use checkpoint or not. Using checkpoint will save some
memory while slowing down the training speed. Defaults to False.
"""
# Parameters to build layers. From left to right:
# - repeat (int): The repeat number of the block in the layer
# - kernel_size (int): The kernel size of the layer
# - stride (int): The stride of the first block of the layer
# - expand_ratio (int, float): The expand_ratio of the mid_channels
# - in_channel (int): The number of in_channels of the layer
# - out_channel (int): The number of out_channels of the layer
# - se_ratio (float): The sequeeze ratio of SELayer.
# - block_type (int): -2: ConvModule, -1: EnhancedConvModule,
# 0: FusedMBConv, 1: MBConv
arch_settings = {
**dict.fromkeys(['small', 's'], [[2, 3, 1, 1, 24, 24, 0.0, -1],
[4, 3, 2, 4, 24, 48, 0.0, 0],
[4, 3, 2, 4, 48, 64, 0.0, 0],
[6, 3, 2, 4, 64, 128, 0.25, 1],
[9, 3, 1, 6, 128, 160, 0.25, 1],
[15, 3, 2, 6, 160, 256, 0.25, 1],
[1, 1, 1, 1, 256, 1280, 0.0, -2]]),
**dict.fromkeys(['m', 'medium'], [[3, 3, 1, 1, 24, 24, 0.0, -1],
[5, 3, 2, 4, 24, 48, 0.0, 0],
[5, 3, 2, 4, 48, 80, 0.0, 0],
[7, 3, 2, 4, 80, 160, 0.25, 1],
[14, 3, 1, 6, 160, 176, 0.25, 1],
[18, 3, 2, 6, 176, 304, 0.25, 1],
[5, 3, 1, 6, 304, 512, 0.25, 1],
[1, 1, 1, 1, 512, 1280, 0.0, -2]]),
**dict.fromkeys(['l', 'large'], [[4, 3, 1, 1, 32, 32, 0.0, -1],
[7, 3, 2, 4, 32, 64, 0.0, 0],
[7, 3, 2, 4, 64, 96, 0.0, 0],
[10, 3, 2, 4, 96, 192, 0.25, 1],
[19, 3, 1, 6, 192, 224, 0.25, 1],
[25, 3, 2, 6, 224, 384, 0.25, 1],
[7, 3, 1, 6, 384, 640, 0.25, 1],
[1, 1, 1, 1, 640, 1280, 0.0, -2]]),
**dict.fromkeys(['xl'], [[4, 3, 1, 1, 32, 32, 0.0, -1],
[8, 3, 2, 4, 32, 64, 0.0, 0],
[8, 3, 2, 4, 64, 96, 0.0, 0],
[16, 3, 2, 4, 96, 192, 0.25, 1],
[24, 3, 1, 6, 192, 256, 0.25, 1],
[32, 3, 2, 6, 256, 512, 0.25, 1],
[8, 3, 1, 6, 512, 640, 0.25, 1],
[1, 1, 1, 1, 640, 1280, 0.0, -2]]),
**dict.fromkeys(['b0'], [[1, 3, 1, 1, 32, 16, 0.0, -1],
[2, 3, 2, 4, 16, 32, 0.0, 0],
[2, 3, 2, 4, 32, 48, 0.0, 0],
[3, 3, 2, 4, 48, 96, 0.25, 1],
[5, 3, 1, 6, 96, 112, 0.25, 1],
[8, 3, 2, 6, 112, 192, 0.25, 1],
[1, 1, 1, 1, 192, 1280, 0.0, -2]]),
**dict.fromkeys(['b1'], [[2, 3, 1, 1, 32, 16, 0.0, -1],
[3, 3, 2, 4, 16, 32, 0.0, 0],
[3, 3, 2, 4, 32, 48, 0.0, 0],
[4, 3, 2, 4, 48, 96, 0.25, 1],
[6, 3, 1, 6, 96, 112, 0.25, 1],
[9, 3, 2, 6, 112, 192, 0.25, 1],
[1, 1, 1, 1, 192, 1280, 0.0, -2]]),
**dict.fromkeys(['b2'], [[2, 3, 1, 1, 32, 16, 0.0, -1],
[3, 3, 2, 4, 16, 32, 0.0, 0],
[3, 3, 2, 4, 32, 56, 0.0, 0],
[4, 3, 2, 4, 56, 104, 0.25, 1],
[6, 3, 1, 6, 104, 120, 0.25, 1],
[10, 3, 2, 6, 120, 208, 0.25, 1],
[1, 1, 1, 1, 208, 1408, 0.0, -2]]),
**dict.fromkeys(['b3'], [[2, 3, 1, 1, 40, 16, 0.0, -1],
[3, 3, 2, 4, 16, 40, 0.0, 0],
[3, 3, 2, 4, 40, 56, 0.0, 0],
[5, 3, 2, 4, 56, 112, 0.25, 1],
[7, 3, 1, 6, 112, 136, 0.25, 1],
[12, 3, 2, 6, 136, 232, 0.25, 1],
[1, 1, 1, 1, 232, 1536, 0.0, -2]])
}
def __init__(self,
arch: str = 's',
in_channels: int = 3,
drop_path_rate: float = 0.,
out_indices: Sequence[int] = (-1, ),
frozen_stages: int = 0,
conv_cfg=dict(type='Conv2dAdaptivePadding'),
norm_cfg=dict(type='BN', eps=1e-3, momentum=0.1),
act_cfg=dict(type='Swish'),
norm_eval: bool = False,
with_cp: bool = False,
init_cfg=[
dict(type='Kaiming', layer='Conv2d'),
dict(
type='Constant',
layer=['_BatchNorm', 'GroupNorm'],
val=1)
]):
super(EfficientNetV2, self).__init__(init_cfg)
assert arch in self.arch_settings, \
f'"{arch}" is not one of the arch_settings ' \
f'({", ".join(self.arch_settings.keys())})'
self.arch = self.arch_settings[arch]
if frozen_stages not in range(len(self.arch) + 1):
raise ValueError('frozen_stages must be in range(0, '
f'{len(self.arch)}), but get {frozen_stages}')
self.drop_path_rate = drop_path_rate
self.frozen_stages = frozen_stages
self.norm_eval = norm_eval
self.with_cp = with_cp
self.layers = nn.ModuleList()
assert self.arch[-1][-1] == -2, \
f'the last block_type of `arch_setting` must be -2 ,' \
f'but get `{self.arch[-1][-1]}`'
self.in_channels = in_channels
self.out_channels = self.arch[-1][5]
self.conv_cfg = conv_cfg
self.norm_cfg = norm_cfg
self.act_cfg = act_cfg
self.make_layers()
# there len(slef.arch) + 2 layers in the backbone
# including: the first + len(self.arch) layers + the last
if isinstance(out_indices, int):
out_indices = [out_indices]
assert isinstance(out_indices, Sequence), \
f'"out_indices" must by a sequence or int, ' \
f'get {type(out_indices)} instead.'
out_indices = list(out_indices)
for i, index in enumerate(out_indices):
if index < 0:
out_indices[i] = len(self.layers) + index
assert 0 <= out_indices[i] <= len(self.layers), \
f'Invalid out_indices {index}.'
self.out_indices = out_indices
def make_layers(self, ):
# make the first layer
self.layers.append(
ConvModule(
in_channels=self.in_channels,
out_channels=self.arch[0][4],
kernel_size=3,
stride=2,
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg))
in_channels = self.arch[0][4]
layer_setting = self.arch[:-1]
total_num_blocks = sum([x[0] for x in layer_setting])
block_idx = 0
dpr = [
x.item()
for x in torch.linspace(0, self.drop_path_rate, total_num_blocks)
] # stochastic depth decay rule
for layer_cfg in layer_setting:
layer = []
(repeat, kernel_size, stride, expand_ratio, _, out_channels,
se_ratio, block_type) = layer_cfg
for i in range(repeat):
stride = stride if i == 0 else 1
if block_type == -1:
has_skip = stride == 1 and in_channels == out_channels
droppath_rate = dpr[block_idx] if has_skip else 0.0
layer.append(
EnhancedConvModule(
in_channels=in_channels,
out_channels=out_channels,
kernel_size=kernel_size,
has_skip=has_skip,
drop_path_rate=droppath_rate,
stride=stride,
padding=1,
conv_cfg=None,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg))
in_channels = out_channels
else:
mid_channels = int(in_channels * expand_ratio)
se_cfg = None
if block_type != 0 and se_ratio > 0:
se_cfg = dict(
channels=mid_channels,
ratio=expand_ratio * (1.0 / se_ratio),
divisor=1,
act_cfg=(self.act_cfg, dict(type='Sigmoid')))
block = FusedMBConv if block_type == 0 else MBConv
conv_cfg = self.conv_cfg if stride == 2 else None
layer.append(
block(
in_channels=in_channels,
out_channels=out_channels,
mid_channels=mid_channels,
kernel_size=kernel_size,
stride=stride,
se_cfg=se_cfg,
conv_cfg=conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg,
drop_path_rate=dpr[block_idx],
with_cp=self.with_cp))
in_channels = out_channels
block_idx += 1
self.layers.append(Sequential(*layer))
# make the last layer
self.layers.append(
ConvModule(
in_channels=in_channels,
out_channels=self.out_channels,
kernel_size=self.arch[-1][1],
stride=self.arch[-1][2],
conv_cfg=self.conv_cfg,
norm_cfg=self.norm_cfg,
act_cfg=self.act_cfg))
def forward(self, x: Tensor) -> Tuple[Tensor]:
outs = []
for i, layer in enumerate(self.layers):
x = layer(x)
if i in self.out_indices:
outs.append(x)
return tuple(outs)
def _freeze_stages(self):
for i in range(self.frozen_stages):
m = self.layers[i]
m.eval()
for param in m.parameters():
param.requires_grad = False
def train(self, mode=True):
super(EfficientNetV2, self).train(mode)
self._freeze_stages()
if mode and self.norm_eval:
for m in self.modules():
if isinstance(m, nn.BatchNorm2d):
m.eval()

View File

@ -47,3 +47,4 @@ Import:
- configs/revvit/metafile.yml
- configs/clip/metafile.yml
- configs/mixmim/metafile.yml
- configs/efficientnet_v2/metafile.yml

View File

@ -0,0 +1,150 @@
# Copyright (c) OpenMMLab. All rights reserved.
import pytest
import torch
from torch.nn.modules import GroupNorm
from torch.nn.modules.batchnorm import _BatchNorm
from mmcls.models.backbones import EfficientNetV2
def is_norm(modules):
"""Check if is one of the norms."""
if isinstance(modules, (GroupNorm, _BatchNorm)):
return True
return False
def check_norm_state(modules, train_state):
"""Check if norm layer is in correct train state."""
for mod in modules:
if isinstance(mod, _BatchNorm):
if mod.training != train_state:
return False
return True
def test_efficientnet_v2_backbone():
with pytest.raises(TypeError):
# pretrained must be a string path
model = EfficientNetV2()
model.init_weights(pretrained=0)
with pytest.raises(AssertionError):
# arch must in arc_settings
EfficientNetV2(arch='others')
with pytest.raises(ValueError):
# frozen_stages must less than 8
EfficientNetV2(arch='b1', frozen_stages=12)
# Test EfficientNetV2
model = EfficientNetV2()
model.init_weights()
model.train()
x = torch.rand((1, 3, 224, 224))
model(x)
# Test EfficientNetV2 with first stage frozen
frozen_stages = 7
model = EfficientNetV2(arch='b0', frozen_stages=frozen_stages)
model.init_weights()
model.train()
for i in range(frozen_stages):
layer = model.layers[i]
for mod in layer.modules():
if isinstance(mod, _BatchNorm):
assert mod.training is False
for param in layer.parameters():
assert param.requires_grad is False
# Test EfficientNetV2 with norm eval
model = EfficientNetV2(norm_eval=True)
model.init_weights()
model.train()
assert check_norm_state(model.modules(), False)
# Test EfficientNetV2 forward with 'b0' arch
out_channels = [32, 16, 32, 48, 96, 112, 192, 1280]
model = EfficientNetV2(arch='b0', out_indices=(0, 1, 2, 3, 4, 5, 6, 7))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 224, 224)
feat = model(imgs)
assert len(feat) == 8
assert feat[0].shape == torch.Size([1, out_channels[0], 112, 112])
assert feat[1].shape == torch.Size([1, out_channels[1], 112, 112])
assert feat[2].shape == torch.Size([1, out_channels[2], 56, 56])
assert feat[3].shape == torch.Size([1, out_channels[3], 28, 28])
assert feat[4].shape == torch.Size([1, out_channels[4], 14, 14])
assert feat[5].shape == torch.Size([1, out_channels[5], 14, 14])
assert feat[6].shape == torch.Size([1, out_channels[6], 7, 7])
assert feat[6].shape == torch.Size([1, out_channels[6], 7, 7])
# Test EfficientNetV2 forward with 'b0' arch and GroupNorm
out_channels = [32, 16, 32, 48, 96, 112, 192, 1280]
model = EfficientNetV2(
arch='b0',
out_indices=(0, 1, 2, 3, 4, 5, 6, 7),
norm_cfg=dict(type='GN', num_groups=2, requires_grad=True))
for m in model.modules():
if is_norm(m):
assert isinstance(m, GroupNorm)
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 64, 64)
feat = model(imgs)
assert len(feat) == 8
assert feat[0].shape == torch.Size([1, out_channels[0], 32, 32])
assert feat[1].shape == torch.Size([1, out_channels[1], 32, 32])
assert feat[2].shape == torch.Size([1, out_channels[2], 16, 16])
assert feat[3].shape == torch.Size([1, out_channels[3], 8, 8])
assert feat[4].shape == torch.Size([1, out_channels[4], 4, 4])
assert feat[5].shape == torch.Size([1, out_channels[5], 4, 4])
assert feat[6].shape == torch.Size([1, out_channels[6], 2, 2])
assert feat[7].shape == torch.Size([1, out_channels[7], 2, 2])
# Test EfficientNetV2 forward with 'm' arch
out_channels = [24, 24, 48, 80, 160, 176, 304, 512, 1280]
model = EfficientNetV2(arch='m', out_indices=(0, 1, 2, 3, 4, 5, 6, 7, 8))
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 64, 64)
feat = model(imgs)
assert len(feat) == 9
assert feat[0].shape == torch.Size([1, out_channels[0], 32, 32])
assert feat[1].shape == torch.Size([1, out_channels[1], 32, 32])
assert feat[2].shape == torch.Size([1, out_channels[2], 16, 16])
assert feat[3].shape == torch.Size([1, out_channels[3], 8, 8])
assert feat[4].shape == torch.Size([1, out_channels[4], 4, 4])
assert feat[5].shape == torch.Size([1, out_channels[5], 4, 4])
assert feat[6].shape == torch.Size([1, out_channels[6], 2, 2])
assert feat[7].shape == torch.Size([1, out_channels[7], 2, 2])
assert feat[8].shape == torch.Size([1, out_channels[8], 2, 2])
# Test EfficientNetV2 forward with 'm' arch and GroupNorm
out_channels = [24, 24, 48, 80, 160, 176, 304, 512, 1280]
model = EfficientNetV2(
arch='m',
out_indices=(0, 1, 2, 3, 4, 5, 6, 7, 8),
norm_cfg=dict(type='GN', num_groups=2, requires_grad=True))
for m in model.modules():
if is_norm(m):
assert isinstance(m, GroupNorm)
model.init_weights()
model.train()
imgs = torch.randn(1, 3, 64, 64)
feat = model(imgs)
assert len(feat) == 9
assert feat[0].shape == torch.Size([1, out_channels[0], 32, 32])
assert feat[1].shape == torch.Size([1, out_channels[1], 32, 32])
assert feat[2].shape == torch.Size([1, out_channels[2], 16, 16])
assert feat[3].shape == torch.Size([1, out_channels[3], 8, 8])
assert feat[4].shape == torch.Size([1, out_channels[4], 4, 4])
assert feat[5].shape == torch.Size([1, out_channels[5], 4, 4])
assert feat[6].shape == torch.Size([1, out_channels[6], 2, 2])
assert feat[7].shape == torch.Size([1, out_channels[7], 2, 2])
assert feat[8].shape == torch.Size([1, out_channels[8], 2, 2])

View File

@ -0,0 +1,99 @@
# Copyright (c) OpenMMLab. All rights reserved.
"""convert the weights of efficientnetv2 in
timm(https://github.com/rwightman/pytorch-image-models) to mmcls format."""
import argparse
import os.path as osp
import mmengine
import torch
from mmengine.runner import CheckpointLoader
def convert_from_efficientnetv2_timm(param):
# main change_key
param_lst = list(param.keys())
op = str(int(param_lst[-9][7]) + 2)
new_key = dict()
for name in param_lst:
data = param[name]
if 'blocks' not in name:
if 'conv_stem' in name:
name = name.replace('conv_stem', 'backbone.layers.0.conv')
if 'bn1' in name:
name = name.replace('bn1', 'backbone.layers.0.bn')
if 'conv_head' in name:
# if efficientnet-v2_s/base/b1/b2/b3op = 7
# if for m/l/xl , op = 8
name = name.replace('conv_head', f'backbone.layers.{op}.conv')
if 'bn2' in name:
name = name.replace('bn2', f'backbone.layers.{op}.bn')
if 'classifier' in name:
name = name.replace('classifier', 'head.fc')
else:
operator = int(name[7])
if operator == 0:
name = name[:7] + str(operator + 1) + name[8:]
name = name.replace('blocks', 'backbone.layers')
if 'conv' in name:
name = name.replace('conv', 'conv')
if 'bn1' in name:
name = name.replace('bn1', 'bn')
elif operator < 3:
name = name[:7] + str(operator + 1) + name[8:]
name = name.replace('blocks', 'backbone.layers')
if 'conv_exp' in name:
name = name.replace('conv_exp', 'conv1.conv')
if 'conv_pwl' in name:
name = name.replace('conv_pwl', 'conv2.conv')
if 'bn1' in name:
name = name.replace('bn1', 'conv1.bn')
if 'bn2' in name:
name = name.replace('bn2', 'conv2.bn')
else:
name = name[:7] + str(operator + 1) + name[8:]
name = name.replace('blocks', 'backbone.layers')
if 'conv_pwl' in name:
name = name.replace('conv_pwl', 'linear_conv.conv')
if 'conv_pw' in name:
name = name.replace('conv_pw', 'expand_conv.conv')
if 'conv_dw' in name:
name = name.replace('conv_dw', 'depthwise_conv.conv')
if 'bn1' in name:
name = name.replace('bn1', 'expand_conv.bn')
if 'bn2' in name:
name = name.replace('bn2', 'depthwise_conv.bn')
if 'bn3' in name:
name = name.replace('bn3', 'linear_conv.bn')
if 'se.conv_reduce' in name:
name = name.replace('se.conv_reduce', 'se.conv1.conv')
if 'se.conv_expand' in name:
name = name.replace('se.conv_expand', 'se.conv2.conv')
new_key[name] = data
return new_key
def main():
parser = argparse.ArgumentParser(
description='Convert pretrained efficientnetv2 '
'models in timm to mmcls style.')
parser.add_argument('src', help='src model path or url')
# The dst path must be a full path of the new checkpoint.
parser.add_argument('dst', help='save path')
args = parser.parse_args()
checkpoint = CheckpointLoader.load_checkpoint(args.src, map_location='cpu')
if 'state_dict' in checkpoint:
state_dict = checkpoint['state_dict']
else:
state_dict = checkpoint
weight = convert_from_efficientnetv2_timm(state_dict)
mmengine.mkdir_or_exist(osp.dirname(args.dst))
torch.save(weight, args.dst)
print('Done!!')
if __name__ == '__main__':
main()