diff --git a/configs/_base_/datasets/imagenet_bs128_swin_224.py b/configs/_base_/datasets/imagenet_bs64_swin_224.py similarity index 99% rename from configs/_base_/datasets/imagenet_bs128_swin_224.py rename to configs/_base_/datasets/imagenet_bs64_swin_224.py index 7209cce7..d8c9c856 100644 --- a/configs/_base_/datasets/imagenet_bs128_swin_224.py +++ b/configs/_base_/datasets/imagenet_bs64_swin_224.py @@ -101,7 +101,7 @@ test_pipeline = [ dict(type='Collect', keys=['img']) ] data = dict( - samples_per_gpu=128, + samples_per_gpu=64, workers_per_gpu=8, train=dict( type=dataset_type, diff --git a/configs/_base_/datasets/imagenet_bs128_swin_384.py b/configs/_base_/datasets/imagenet_bs64_swin_384.py similarity index 98% rename from configs/_base_/datasets/imagenet_bs128_swin_384.py rename to configs/_base_/datasets/imagenet_bs64_swin_384.py index 5ac70cb0..d2639399 100644 --- a/configs/_base_/datasets/imagenet_bs128_swin_384.py +++ b/configs/_base_/datasets/imagenet_bs64_swin_384.py @@ -23,7 +23,7 @@ test_pipeline = [ dict(type='Collect', keys=['img']) ] data = dict( - samples_per_gpu=128, + samples_per_gpu=64, workers_per_gpu=8, train=dict( type=dataset_type, diff --git a/configs/_base_/models/swin_transformer/base_384.py b/configs/_base_/models/swin_transformer/base_384.py index 9fb42893..7d125d54 100644 --- a/configs/_base_/models/swin_transformer/base_384.py +++ b/configs/_base_/models/swin_transformer/base_384.py @@ -6,7 +6,7 @@ model = dict( type='SwinTransformer', arch='base', img_size=384, - stage_cfg=dict(block_cfg=dict(window_size=12))), + stage_cfgs=dict(block_cfgs=dict(window_size=12))), neck=dict(type='GlobalAveragePooling', dim=1), head=dict( type='LinearClsHead', diff --git a/configs/_base_/models/swin_transformer/large_384.py b/configs/_base_/models/swin_transformer/large_384.py index a38b182c..bbdf0611 100644 --- a/configs/_base_/models/swin_transformer/large_384.py +++ b/configs/_base_/models/swin_transformer/large_384.py @@ -6,7 +6,7 @@ model = dict( type='SwinTransformer', arch='large', img_size=384, - stage_cfg=dict(block_cfg=dict(window_size=12))), + stage_cfgs=dict(block_cfgs=dict(window_size=12))), neck=dict(type='GlobalAveragePooling', dim=1), head=dict( type='LinearClsHead', diff --git a/configs/swin_transformer/README.md b/configs/swin_transformer/README.md index fa880227..1bb590d3 100644 --- a/configs/swin_transformer/README.md +++ b/configs/swin_transformer/README.md @@ -36,6 +36,6 @@ The pre-trained modles are converted from [model zoo of Swin Transformer](https: ### ImageNet | Model | Pretrain | resolution | Params(M) | Flops(G) | Top-1 (%) | Top-5 (%) | Config | Download | |:---------:|:------------:|:-----------:|:---------:|:---------:|:---------:|:---------:|:----------:|:--------:| -| Swin-T | ImageNet-1k | 224x224 | 28.29 | 4.36 | 81.18 | 95.61 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_tiny_224_imagenet.py) |[model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_imagenet-66df6be6.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_imagenet-66df6be6.log.json)| -| Swin-S | ImageNet-1k | 224x224 | 49.61 | 8.52 | 83.02 | 96.29 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_small_224_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_imagenet-7f9d988b.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_imagenet-7f9d988b.log.json)| -| Swin-B | ImageNet-1k | 224x224 | 87.77 | 15.14 | 83.36 | 96.44 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_base_224_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_imagenet-93230b0d.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_imagenet-93230b0d.log.json)| +| Swin-T | ImageNet-1k | 224x224 | 28.29 | 4.36 | 81.18 | 95.61 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_tiny_224_b16x64_300e_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925-66df6be6.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925.log.json)| +| Swin-S | ImageNet-1k | 224x224 | 49.61 | 8.52 | 83.02 | 96.29 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_small_224_b16x64_300e_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219-7f9d988b.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219.log.json)| +| Swin-B | ImageNet-1k | 224x224 | 87.77 | 15.14 | 83.36 | 96.44 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_base_224_b16x64_300e_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_b16x64_300e_imagenet_20210616_190742-93230b0d.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_b16x64_300e_imagenet_20210616_190742.log.json)| diff --git a/configs/swin_transformer/metafile.yml b/configs/swin_transformer/metafile.yml index fdd34591..e8a12635 100644 --- a/configs/swin_transformer/metafile.yml +++ b/configs/swin_transformer/metafile.yml @@ -14,7 +14,7 @@ Collections: README: configs/swin_transformer/README.md Models: -- Config: configs/swin_transformer/swin_tiny_224_imagenet.py +- Config: configs/swin_transformer/swin_tiny_224_b16x64_300e_imagenet.py In Collection: Swin-Transformer Metadata: FLOPs: 4360000000 @@ -30,8 +30,8 @@ Models: Top 1 Accuracy: 81.18 Top 5 Accuracy: 95.61 Task: Image Classification - Weights: https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_imagenet-66df6be6.pth -- Config: configs/swin_transformer/swin_small_224_imagenet.py + Weights: https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925-66df6be6.pth +- Config: configs/swin_transformer/swin_small_224_b16x64_300e_imagenet.py In Collection: Swin-Transformer Metadata: FLOPs: 8520000000 @@ -47,8 +47,8 @@ Models: Top 1 Accuracy: 83.02 Top 5 Accuracy: 96.29 Task: Image Classification - Weights: https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_imagenet-7f9d988b.pth -- Config: configs/swin_transformer/swin_base_224_imagenet.py + Weights: https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219-7f9d988b.pth +- Config: configs/swin_transformer/swin_base_224_b16x64_300e_imagenet.py In Collection: Swin-Transformer Metadata: FLOPs: 15140000000 @@ -64,4 +64,4 @@ Models: Top 1 Accuracy: 83.36 Top 5 Accuracy: 96.44 Task: Image Classification - Weights: https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_imagenet-93230b0d.pth + Weights: https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_b16x64_300e_imagenet_20210616_190742-93230b0d.pth diff --git a/configs/swin_transformer/swin_base_224_imagenet.py b/configs/swin_transformer/swin_base_224_b16x64_300e_imagenet.py similarity index 74% rename from configs/swin_transformer/swin_base_224_imagenet.py rename to configs/swin_transformer/swin_base_224_b16x64_300e_imagenet.py index e1ce91d9..2a4548af 100644 --- a/configs/swin_transformer/swin_base_224_imagenet.py +++ b/configs/swin_transformer/swin_base_224_b16x64_300e_imagenet.py @@ -1,6 +1,6 @@ _base_ = [ '../_base_/models/swin_transformer/base_224.py', - '../_base_/datasets/imagenet_bs128_swin_224.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py' ] diff --git a/configs/swin_transformer/swin_base_384_imagenet.py b/configs/swin_transformer/swin_base_384_evalonly_imagenet.py similarity index 77% rename from configs/swin_transformer/swin_base_384_imagenet.py rename to configs/swin_transformer/swin_base_384_evalonly_imagenet.py index 194de565..711a0d6d 100644 --- a/configs/swin_transformer/swin_base_384_imagenet.py +++ b/configs/swin_transformer/swin_base_384_evalonly_imagenet.py @@ -1,7 +1,7 @@ # Only for evaluation _base_ = [ '../_base_/models/swin_transformer/base_384.py', - '../_base_/datasets/imagenet_bs128_swin_384.py', + '../_base_/datasets/imagenet_bs64_swin_384.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py' ] diff --git a/configs/swin_transformer/swin_large_224_imagenet.py b/configs/swin_transformer/swin_large_224_evalonly_imagenet.py similarity index 77% rename from configs/swin_transformer/swin_large_224_imagenet.py rename to configs/swin_transformer/swin_large_224_evalonly_imagenet.py index 39c896bf..4e875c59 100644 --- a/configs/swin_transformer/swin_large_224_imagenet.py +++ b/configs/swin_transformer/swin_large_224_evalonly_imagenet.py @@ -1,7 +1,7 @@ # Only for evaluation _base_ = [ '../_base_/models/swin_transformer/large_224.py', - '../_base_/datasets/imagenet_bs128_swin_224.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py' ] diff --git a/configs/swin_transformer/swin_large_384_imagenet.py b/configs/swin_transformer/swin_large_384_evalonly_imagenet.py similarity index 77% rename from configs/swin_transformer/swin_large_384_imagenet.py rename to configs/swin_transformer/swin_large_384_evalonly_imagenet.py index 6f4b7b7a..a7f0ad27 100644 --- a/configs/swin_transformer/swin_large_384_imagenet.py +++ b/configs/swin_transformer/swin_large_384_evalonly_imagenet.py @@ -1,7 +1,7 @@ # Only for evaluation _base_ = [ '../_base_/models/swin_transformer/large_384.py', - '../_base_/datasets/imagenet_bs128_swin_384.py', + '../_base_/datasets/imagenet_bs64_swin_384.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py' ] diff --git a/configs/swin_transformer/swin_small_224_imagenet.py b/configs/swin_transformer/swin_small_224_b16x64_300e_imagenet.py similarity index 75% rename from configs/swin_transformer/swin_small_224_imagenet.py rename to configs/swin_transformer/swin_small_224_b16x64_300e_imagenet.py index 1bf08afa..aa1fa21b 100644 --- a/configs/swin_transformer/swin_small_224_imagenet.py +++ b/configs/swin_transformer/swin_small_224_b16x64_300e_imagenet.py @@ -1,6 +1,6 @@ _base_ = [ '../_base_/models/swin_transformer/small_224.py', - '../_base_/datasets/imagenet_bs128_swin_224.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py' ] diff --git a/configs/swin_transformer/swin_tiny_224_imagenet.py b/configs/swin_transformer/swin_tiny_224_b16x64_300e_imagenet.py similarity index 74% rename from configs/swin_transformer/swin_tiny_224_imagenet.py rename to configs/swin_transformer/swin_tiny_224_b16x64_300e_imagenet.py index 7f537864..e1ed022a 100644 --- a/configs/swin_transformer/swin_tiny_224_imagenet.py +++ b/configs/swin_transformer/swin_tiny_224_b16x64_300e_imagenet.py @@ -1,6 +1,6 @@ _base_ = [ '../_base_/models/swin_transformer/tiny_224.py', - '../_base_/datasets/imagenet_bs128_swin_224.py', + '../_base_/datasets/imagenet_bs64_swin_224.py', '../_base_/schedules/imagenet_bs1024_adamw_swin.py', '../_base_/default_runtime.py' ] diff --git a/docs/model_zoo.md b/docs/model_zoo.md index 1c1baa41..8c9fb208 100644 --- a/docs/model_zoo.md +++ b/docs/model_zoo.md @@ -40,9 +40,9 @@ The ResNet family models below are trained by standard data augmentations, i.e., | ViT-B/32* | 88.3 | 8.56 | 81.73 | 96.13 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit_base_patch32_384_finetune_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/vit/vit_base_patch32_384.pth) | [log]() | | ViT-L/16* | 304.72 | 116.68 | 85.08 | 97.38 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit_large_patch16_384_finetune_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/vit/vit_large_patch16_384.pth) | [log]() | | ViT-L/32* | 306.63 | 29.66 | 81.52 | 96.06 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit_large_patch32_384_finetune_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/vit/vit_large_patch32_384.pth) | [log]() | -| Swin-Transformer tiny | 28.29 | 4.36 | 81.18 | 95.61 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_tiny_224_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_imagenet-66df6be6.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_imagenet-66df6be6.log.json)| -| Swin-Transformer small| 49.61 | 8.52 | 83.02 | 96.29 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_small_224_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_imagenet-7f9d988b.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_imagenet-7f9d988b.log.json)| -| Swin-Transformer base | 87.77 | 15.14 | 83.36 | 96.44 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_base_224_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_imagenet-93230b0d.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_imagenet-93230b0d.log.json)| +| Swin-Transformer tiny | 28.29 | 4.36 | 81.18 | 95.61 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_tiny_224_b16x64_300e_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925-66df6be6.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_tiny_224_b16x64_300e_imagenet_20210616_090925.log.json)| +| Swin-Transformer small| 49.61 | 8.52 | 83.02 | 96.29 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_small_224_b16x64_300e_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219-7f9d988b.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_small_224_b16x64_300e_imagenet_20210615_110219.log.json)| +| Swin-Transformer base | 87.77 | 15.14 | 83.36 | 96.44 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/swin_transformer/swin_base_224_b16x64_300e_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_b16x64_300e_imagenet_20210616_190742-93230b0d.pth) | [log](https://download.openmmlab.com/mmclassification/v0/swin-transformer/swin_base_224_b16x64_300e_imagenet_20210616_190742.log.json)| Models with * are converted from other repos, others are trained by ourselves. diff --git a/mmcls/models/backbones/swin_transformer.py b/mmcls/models/backbones/swin_transformer.py index 1551f452..b55ff829 100644 --- a/mmcls/models/backbones/swin_transformer.py +++ b/mmcls/models/backbones/swin_transformer.py @@ -134,7 +134,7 @@ class SwinBlockSequence(BaseModule): drop_paths = [drop_paths] * depth if not isinstance(block_cfgs, Sequence): - block_cfg = [deepcopy(block_cfgs) for _ in range(depth)] + block_cfgs = [deepcopy(block_cfgs) for _ in range(depth)] self.blocks = ModuleList() for i in range(depth): @@ -145,7 +145,7 @@ class SwinBlockSequence(BaseModule): 'shift': False if i % 2 == 0 else True, 'drop_path': drop_paths[i], 'auto_pad': auto_pad, - **block_cfg[i] + **block_cfgs[i] } block = SwinBlock(**_block_cfg) self.blocks.append(block)