diff --git a/configs/_base_/default_runtime.py b/configs/_base_/default_runtime.py index 90240e49..ba965a45 100644 --- a/configs/_base_/default_runtime.py +++ b/configs/_base_/default_runtime.py @@ -8,8 +8,6 @@ log_config = dict( # dict(type='TensorboardLoggerHook') ]) # yapf:enable -# You can register your own hooks like this -# custom_hooks=[dict(type='EMAHook')] dist_params = dict(backend='nccl') log_level = 'INFO' diff --git a/configs/_base_/schedules/imagenet_bs4096_AdamW.py b/configs/_base_/schedules/imagenet_bs4096_AdamW.py new file mode 100644 index 00000000..859cf4b2 --- /dev/null +++ b/configs/_base_/schedules/imagenet_bs4096_AdamW.py @@ -0,0 +1,18 @@ +# optimizer +optimizer = dict(type='AdamW', lr=0.003, weight_decay=0.3) +optimizer_config = dict(grad_clip=dict(max_norm=1.0)) + +# specific to vit pretrain +paramwise_cfg = dict( + custom_keys={ + '.backbone.cls_token': dict(decay_mult=0.0), + '.backbone.pos_embed': dict(decay_mult=0.0) + }) +# learning policy +lr_config = dict( + policy='CosineAnnealing', + min_lr=0, + warmup='linear', + warmup_iters=10000, + warmup_ratio=1e-4) +runner = dict(type='EpochBasedRunner', max_epochs=300) diff --git a/configs/vision_transformer/vit_base_patch16_224_pretrain_imagenet.py b/configs/vision_transformer/vit_base_patch16_224_pretrain_imagenet.py index 8513938c..55f02496 100644 --- a/configs/vision_transformer/vit_base_patch16_224_pretrain_imagenet.py +++ b/configs/vision_transformer/vit_base_patch16_224_pretrain_imagenet.py @@ -1,7 +1,7 @@ _base_ = [ '../_base_/models/vit_base_patch16_224_pretrain.py', - '../_base_/datasets/imagenet_bs32_pil_resize.py', - '../_base_/schedules/imagenet_bs2048_AdamW.py', + '../_base_/datasets/imagenet_bs64_pil_resize.py', + '../_base_/schedules/imagenet_bs4096_AdamW.py', '../_base_/default_runtime.py' ] @@ -123,7 +123,7 @@ test_pipeline = [ dict(type='Collect', keys=['img']) ] data = dict( - samples_per_gpu=32, + samples_per_gpu=64, workers_per_gpu=2, train=dict( type=dataset_type, diff --git a/configs/vision_transformer/vit_large_patch16_224_finetune_imagenet.py b/configs/vision_transformer/vit_large_patch16_224_finetune_imagenet.py index 7809d26b..a7410b77 100644 --- a/configs/vision_transformer/vit_large_patch16_224_finetune_imagenet.py +++ b/configs/vision_transformer/vit_large_patch16_224_finetune_imagenet.py @@ -8,14 +8,3 @@ _base_ = [ img_norm_cfg = dict( mean=[127.5, 127.5, 127.5], std=[127.5, 127.5, 127.5], to_rgb=True) - -test_pipeline = [ - dict(type='LoadImageFromFile'), - dict(type='Resize', size=(384, -1), backend='pillow'), - dict(type='CenterCrop', crop_size=384), - dict(type='Normalize', **img_norm_cfg), - dict(type='ImageToTensor', keys=['img']), - dict(type='Collect', keys=['img']) -] - -data = dict(test=dict(pipeline=test_pipeline)) diff --git a/docs/model_zoo.md b/docs/model_zoo.md index 53160654..5ae3af1f 100644 --- a/docs/model_zoo.md +++ b/docs/model_zoo.md @@ -20,10 +20,10 @@ The ResNet family models below are trained by standard data augmentations, i.e., | ResNet-50 | 25.56 | 4.12 | 76.55 | 93.15 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet50_b32x8_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.pth) | [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet50_batch256_imagenet_20200708-cfb998bf.log.json) | | ResNet-101 | 44.55 | 7.85 | 78.18 | 94.03 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet101_b32x8_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_batch256_imagenet_20200708-753f3608.pth) | [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet101_batch256_imagenet_20200708-753f3608.log.json) | | ResNet-152 | 60.19 | 11.58 | 78.63 | 94.16 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnet152_b32x8_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_batch256_imagenet_20200708-ec25b1f9.pth) | [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnet152_batch256_imagenet_20200708-ec25b1f9.log.json) | -| ResNeSt-50 | 27.48 | 5.41 | 81.13 | 95.59 | | [model](https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth) | [log]() | -| ResNeSt-101 | 48.28 | 10.27 | 82.32 | 96.24 | | [model](https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth) | [log]() | -| ResNeSt-200 | 70.2 | 17.53 | 82.41 | 96.22 | | [model](https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth) | [log]() | -| ResNeSt-269 | 110.93 | 22.58 | 82.70 | 96.28 | | [model](https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth) | [log]() | +| ResNeSt-50* | 27.48 | 5.41 | 81.13 | 95.59 | | [model](https://download.openmmlab.com/mmclassification/v0/resnest/resnest50_imagenet_converted-1ebf0afe.pth) | [log]() | +| ResNeSt-101* | 48.28 | 10.27 | 82.32 | 96.24 | | [model](https://download.openmmlab.com/mmclassification/v0/resnest/resnest101_imagenet_converted-032caa52.pth) | [log]() | +| ResNeSt-200* | 70.2 | 17.53 | 82.41 | 96.22 | | [model](https://download.openmmlab.com/mmclassification/v0/resnest/resnest200_imagenet_converted-581a60f2.pth) | [log]() | +| ResNeSt-269* | 110.93 | 22.58 | 82.70 | 96.28 | | [model](https://download.openmmlab.com/mmclassification/v0/resnest/resnest269_imagenet_converted-59930960.pth) | [log]() | | ResNetV1D-50 | 25.58 | 4.36 | 77.4 | 93.66 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnetv1d50_b32x8_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_batch256_imagenet_20200708-1ad0ce94.pth) | [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d50_batch256_imagenet_20200708-1ad0ce94.log.json) | | ResNetV1D-101 | 44.57 | 8.09 | 78.85 | 94.38 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnetv1d101_b32x8_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_batch256_imagenet_20200708-9cb302ef.pth) | [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d101_batch256_imagenet_20200708-9cb302ef.log.json) | | ResNetV1D-152 | 60.21 | 11.82 | 79.35 | 94.61 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/resnet/resnetv1d152_b32x8_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_batch256_imagenet_20200708-e79cb6a2.pth) | [log](https://download.openmmlab.com/mmclassification/v0/resnet/resnetv1d152_batch256_imagenet_20200708-e79cb6a2.log.json) | @@ -36,6 +36,10 @@ The ResNet family models below are trained by standard data augmentations, i.e., | ShuffleNetV1 1.0x (group=3) | 1.87 | 0.146 | 68.13 | 87.81 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/shufflenet_v1/shufflenet_v1_1x_b64x16_linearlr_bn_nowd_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.pth) | [log](https://download.openmmlab.com/mmclassification/v0/shufflenet_v1/shufflenet_v1_batch1024_imagenet_20200804-5d6cec73.log.json) | | ShuffleNetV2 1.0x | 2.28 | 0.149 | 69.55 | 88.92 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/shufflenet_v2/shufflenet_v2_1x_b64x16_linearlr_bn_nowd_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200812-5bf4721e.pth) | [log](https://download.openmmlab.com/mmclassification/v0/shufflenet_v2/shufflenet_v2_batch1024_imagenet_20200804-8860eec9.log.json) | | MobileNet V2 | 3.5 | 0.319 | 71.86 | 90.42 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/mobilenet_v2/mobilenet_v2_b32x8_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.pth) | [log](https://download.openmmlab.com/mmclassification/v0/mobilenet_v2/mobilenet_v2_batch256_imagenet_20200708-3b2dc3af.log.json) | +| ViT-B/16* | 86.86 | 33.03 | 84.20 | 97.18 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit_base_patch16_384_finetune_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/vit/vit_base_patch16_384.pth) | [log]() | +| ViT-B/32* | 88.3 | 8.56 | 81.73 | 96.13 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit_base_patch32_384_finetune_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/vit/vit_base_patch32_384.pth) | [log]() | +| ViT-L/16* | 304.72 | 116.68 | 85.08 | 97.38 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit_large_patch16_384_finetune_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/vit/vit_large_patch16_384.pth) | [log]() | +| ViT-L/32* | 306.63 | 29.66 | 81.52 | 96.06 | [config](https://github.com/open-mmlab/mmclassification/blob/master/configs/vision_transformer/vit_large_patch32_384_finetune_imagenet.py) | [model](https://download.openmmlab.com/mmclassification/v0/vit/vit_large_patch32_384.pth) | [log]() | Models with * are converted from other repos, others are trained by ourselves.