diff --git a/mmpretrain/configs/_base_/datasets/imagenet_bs256_beitv2.py b/mmpretrain/configs/_base_/datasets/imagenet_bs256_beitv2.py new file mode 100644 index 00000000..f89eb17b --- /dev/null +++ b/mmpretrain/configs/_base_/datasets/imagenet_bs256_beitv2.py @@ -0,0 +1,53 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.dataset import DefaultSampler, default_collate + +from mmpretrain.datasets import (BEiTMaskGenerator, ColorJitter, ImageNet, + LoadImageFromFile, PackInputs, RandomFlip, + RandomResizedCropAndInterpolationWithTwoPic) +from mmpretrain.models import TwoNormDataPreprocessor + +dataset_type = ImageNet +data_root = 'data/imagenet/' + +data_preprocessor = dict( + type=TwoNormDataPreprocessor, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + second_mean=[127.5, 127.5, 127.5], + second_std=[127.5, 127.5, 127.5], + to_rgb=True) + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict( + type=ColorJitter, brightness=0.4, contrast=0.4, saturation=0.4, + hue=0.), + dict(type=RandomFlip, prob=0.5, direction='horizontal'), + dict( + type=RandomResizedCropAndInterpolationWithTwoPic, + size=224, + second_size=224, + interpolation='bicubic', + second_interpolation='bicubic', + scale=(0.2, 1.0)), + dict( + type=BEiTMaskGenerator, + input_size=(14, 14), + num_masking_patches=75, + max_num_patches=75, + min_num_patches=16), + dict(type=PackInputs) +] + +train_dataloader = dict( + batch_size=256, + num_workers=8, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=True), + collate_fn=dict(type=default_collate), + dataset=dict( + type=dataset_type, + data_root=data_root, + split='train', + pipeline=train_pipeline)) diff --git a/mmpretrain/configs/_base_/datasets/imagenet_bs64_swin_224.py b/mmpretrain/configs/_base_/datasets/imagenet_bs64_swin_224.py new file mode 100644 index 00000000..f687a602 --- /dev/null +++ b/mmpretrain/configs/_base_/datasets/imagenet_bs64_swin_224.py @@ -0,0 +1,90 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.dataset import DefaultSampler, default_collate + +from mmpretrain.datasets import (CenterCrop, ImageNet, LoadImageFromFile, + PackInputs, RandAugment, RandomErasing, + RandomFlip, RandomResizedCrop, Resize, + ResizeEdge) +from mmpretrain.evaluation import Accuracy + +# dataset settings +dataset_type = ImageNet +data_preprocessor = dict( + num_classes=1000, + # RGB format normalization parameters + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + # convert image from BGR to RGB + to_rgb=True, +) + +bgr_mean = data_preprocessor['mean'][::-1] +bgr_std = data_preprocessor['std'][::-1] + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict( + type=RandomResizedCrop, + scale=224, + backend='pillow', + interpolation='bicubic'), + dict(type=RandomFlip, prob=0.5, direction='horizontal'), + dict( + type=RandAugment, + policies='timm_increasing', + num_policies=2, + total_level=10, + magnitude_level=9, + magnitude_std=0.5, + hparams=dict( + pad_val=[round(x) for x in bgr_mean], interpolation='bicubic')), + dict( + type=RandomErasing, + erase_prob=0.25, + mode='rand', + min_area_ratio=0.02, + max_area_ratio=1 / 3, + fill_color=bgr_mean, + fill_std=bgr_std), + dict(type=PackInputs), +] + +test_pipeline = [ + dict(type=LoadImageFromFile), + dict( + type=ResizeEdge, + scale=256, + edge='short', + backend='pillow', + interpolation='bicubic'), + dict(type=CenterCrop, crop_size=224), + dict(type=PackInputs), +] + +train_dataloader = dict( + batch_size=64, + num_workers=5, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + split='train', + pipeline=train_pipeline), + sampler=dict(type=DefaultSampler, shuffle=True), +) + +val_dataloader = dict( + batch_size=64, + num_workers=5, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + split='val', + pipeline=test_pipeline), + sampler=dict(type=DefaultSampler, shuffle=False), +) +val_evaluator = dict(type=Accuracy, topk=(1, 5)) + +# If you want standard test, please manually configure the test dataset +test_dataloader = val_dataloader +test_evaluator = val_evaluator diff --git a/mmpretrain/configs/beit/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py b/mmpretrain/configs/beit/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py new file mode 100644 index 00000000..00a76b75 --- /dev/null +++ b/mmpretrain/configs/beit/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py @@ -0,0 +1,139 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.config import read_base + +with read_base(): + from ..._base_.datasets.imagenet_bs64_swin_224 import * + from ..._base_.schedules.imagenet_bs1024_adamw_swin import * + from ..._base_.default_runtime import * + +from mmengine.hooks import CheckpointHook +from mmengine.model import PretrainedInit, TruncNormalInit +from mmengine.optim import CosineAnnealingLR, LinearLR +from torch.optim import AdamW + +from mmpretrain.datasets import LoadImageFromFile, PackInputs, RandomFlip +from mmpretrain.engine.optimizers import \ + LearningRateDecayOptimWrapperConstructor +from mmpretrain.models import (BEiTViT, ImageClassifier, LabelSmoothLoss, + LinearClsHead) +from mmpretrain.models.utils.batch_augments import CutMix, Mixup + +data_preprocessor = dict( + num_classes=1000, + mean=[127.5, 127.5, 127.5], + std=[127.5, 127.5, 127.5], + to_rgb=True, +) + +# model settings +model = dict( + type=ImageClassifier, + backbone=dict( + type=BEiTViT, + arch='base', + img_size=224, + patch_size=16, + drop_path_rate=0.1, + out_type='avg_featmap', + use_abs_pos_emb=False, + use_rel_pos_bias=True, + use_shared_rel_pos_bias=False, + init_cfg=dict(type=PretrainedInit, checkpoint='', prefix='backbone.')), + neck=None, + head=dict( + type=LinearClsHead, + num_classes=1000, + in_channels=768, + loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'), + init_cfg=[dict(type=TruncNormalInit, layer='Linear', std=0.02)]), + train_cfg=dict( + augments=[dict(type=Mixup, alpha=0.8), + dict(type=CutMix, alpha=1.0)])) + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict( + type=RandomResizedCrop, + scale=224, + backend='pillow', + interpolation='bicubic'), + dict(type=RandomFlip, prob=0.5, direction='horizontal'), + dict( + type=RandAugment, + policies='timm_increasing', + num_policies=2, + total_level=10, + magnitude_level=9, + magnitude_std=0.5, + hparams=dict(pad_val=[104, 116, 124], interpolation='bicubic')), + dict( + type=RandomErasing, + erase_prob=0.25, + mode='rand', + min_area_ratio=0.02, + max_area_ratio=0.3333333333333333, + fill_color=[103.53, 116.28, 123.675], + fill_std=[57.375, 57.12, 58.395]), + dict(type=PackInputs) +] +test_pipeline = [ + dict(type=LoadImageFromFile), + dict( + type=ResizeEdge, + scale=256, + edge='short', + backend='pillow', + interpolation='bicubic'), + dict(type=CenterCrop, crop_size=224), + dict(type=PackInputs) +] + +train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(batch_size=128, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +# optimizer wrapper +optim_wrapper = dict( + optimizer=dict(type=AdamW, lr=4e-3, weight_decay=0.05, betas=(0.9, 0.999)), + constructor=LearningRateDecayOptimWrapperConstructor, + paramwise_cfg=dict( + _delete_=True, + layer_decay_rate=0.65, + custom_keys={ + # the following configurations are designed for BEiT + '.ln': dict(decay_mult=0.0), + '.bias': dict(decay_mult=0.0), + 'q_bias': dict(decay_mult=0.0), + 'v_bias': dict(decay_mult=0.0), + '.cls_token': dict(decay_mult=0.0), + '.pos_embed': dict(decay_mult=0.0), + '.gamma': dict(decay_mult=0.0), + })) + +# learning rate scheduler +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-4, + by_epoch=True, + begin=0, + end=20, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + by_epoch=True, + begin=20, + end=100, + eta_min=1e-6, + convert_to_iter_based=True) +] + +# runtime settings +default_hooks = dict( + # save checkpoint per epoch. + checkpoint=dict(type=CheckpointHook, interval=1, max_keep_ckpts=2)) + +train_cfg = dict(by_epoch=True, max_epochs=100) + +randomness = dict(seed=0) diff --git a/mmpretrain/configs/beit/benchmarks/beit-base-p16_8xb64_in1k.py b/mmpretrain/configs/beit/benchmarks/beit-base-p16_8xb64_in1k.py new file mode 100644 index 00000000..b4718afb --- /dev/null +++ b/mmpretrain/configs/beit/benchmarks/beit-base-p16_8xb64_in1k.py @@ -0,0 +1,50 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.config import read_base + +with read_base(): + from ..._base_.datasets.imagenet_bs64_swin_224 import * + from ..._base_.schedules.imagenet_bs1024_adamw_swin import * + from ..._base_.default_runtime import * + +from mmengine.model import ConstantInit, TruncNormalInit + +from mmpretrain.models import (BEiTViT, ImageClassifier, LabelSmoothLoss, + LinearClsHead) +from mmpretrain.models.utils.batch_augments import CutMix, Mixup + +data_preprocessor = dict( + num_classes=1000, + # RGB format normalization parameters + mean=[127.5, 127.5, 127.5], + std=[127.5, 127.5, 127.5], + # convert image from BGR to RGB + to_rgb=True, +) + +model = dict( + type=ImageClassifier, + backbone=dict( + type=BEiTViT, + arch='base', + img_size=224, + patch_size=16, + out_type='avg_featmap', + use_abs_pos_emb=False, + use_rel_pos_bias=True, + use_shared_rel_pos_bias=False, + ), + neck=None, + head=dict( + type=LinearClsHead, + num_classes=1000, + in_channels=768, + loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'), + ), + init_cfg=[ + dict(type=TruncNormalInit, layer='Linear', std=.02), + dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.), + ], + train_cfg=dict( + augments=[dict(type=Mixup, alpha=0.8), + dict(type=CutMix, alpha=1.0)])) diff --git a/mmpretrain/configs/beitv2/beitv2_beit-base-p16_8xb256-amp-coslr-1600e_in1k.py b/mmpretrain/configs/beitv2/beitv2_beit-base-p16_8xb256-amp-coslr-1600e_in1k.py new file mode 100644 index 00000000..6bec16b3 --- /dev/null +++ b/mmpretrain/configs/beitv2/beitv2_beit-base-p16_8xb256-amp-coslr-1600e_in1k.py @@ -0,0 +1,130 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.config import read_base + +with read_base(): + from .._base_.datasets.imagenet_bs256_beitv2 import * + from .._base_.default_runtime import * + +from mmengine.model import ConstantInit, PretrainedInit, TruncNormalInit +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from mmengine.runner import EpochBasedTrainLoop +from torch.optim import AdamW + +from mmpretrain.models import (VQKD, BEiT, BEiTPretrainViT, BEiTV2Head, + BEiTV2Neck, CrossEntropyLoss) + +vqkd_encoder = dict( + arch='base', + img_size=224, + patch_size=16, + in_channels=3, + out_indices=-1, + drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN', eps=1e-6), + final_norm=True, + out_type='featmap', + with_cls_token=True, + frozen_stages=-1, + use_abs_pos_emb=True, + use_rel_pos_bias=False, + use_shared_rel_pos_bias=False, + layer_scale_init_value=0., + interpolate_mode='bicubic', + patch_cfg=dict(), + layer_cfgs=dict(), + init_cfg=None) + +layer_scale_init_value = 0.1 +drop_path_rate = 0.1 # 0. for 300 epochs and 0.1 for 1600 epochs. + +model = dict( + type=BEiT, + backbone=dict( + type=BEiTPretrainViT, + arch='base', + patch_size=16, + out_indices=[-4, -1], + drop_path_rate=drop_path_rate, + final_norm=False, + out_type='raw', + layer_scale_init_value=layer_scale_init_value, + init_cfg=[ + dict(type=TruncNormalInit, std=0.02, layer='Linear'), + dict(type=TruncNormalInit, std=0.02, layer='Conv2d'), + dict(type=ConstantInit, layer='LayerNorm', val=1.0, bias=0.0) + ]), + neck=dict( + type=BEiTV2Neck, + num_layers=2, + early_layers=9, + backbone_arch='base', + drop_path_rate=drop_path_rate, + layer_scale_init_value=layer_scale_init_value, + ), + head=dict( + type=BEiTV2Head, + embed_dims=768, + num_embed=8192, + loss=dict(type=CrossEntropyLoss)), + target_generator=dict( + type=VQKD, + encoder_config=vqkd_encoder, + init_cfg=dict( + type=PretrainedInit, + checkpoint= # noqa + 'https://download.openmmlab.com/mmselfsup/1.x/target_generator_ckpt/vqkd_encoder.pth' # noqa + ))) + +# optimizer wrapper +optim_wrapper = dict( + type=AmpOptimWrapper, + loss_scale='dynamic', + # betas: (0.9, 0.98) for 300 epochs and (0.9, 0.999) for 1600 epochs. + optimizer=dict( + type=AdamW, lr=1.5e-3, betas=(0.9, 0.999), weight_decay=0.05), + clip_grad=dict(max_norm=3.0), + paramwise_cfg=dict( + custom_keys={ + # the following configurations are designed for BEiT + '.ln': dict(decay_mult=0.0), + '.bias': dict(decay_mult=0.0), + 'q_bias': dict(decay_mult=0.0), + 'v_bias': dict(decay_mult=0.0), + '.cls_token': dict(decay_mult=0.0), + '.pos_embed': dict(decay_mult=0.0), + '.gamma': dict(decay_mult=0.0), + })) + +# learning rate scheduler +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-4, + by_epoch=True, + begin=0, + end=10, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=1e-5, + by_epoch=True, + begin=10, + end=1600, + convert_to_iter_based=True) +] + +# runtime settings +train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=1600) +default_hooks = dict( + # only keeps the latest 3 checkpoints + checkpoint=dict(type=CheckpointHook, interval=1, max_keep_ckpts=3)) + +randomness = dict(seed=0, diff_rank_seed=True) + +find_unused_parameters = True + +# NOTE: `auto_scale_lr` is for automatically scaling LR +# based on the actual training batch size. +auto_scale_lr = dict(base_batch_size=2048) diff --git a/mmpretrain/configs/beitv2/beitv2_beit-base-p16_8xb256-amp-coslr-300e_in1k.py b/mmpretrain/configs/beitv2/beitv2_beit-base-p16_8xb256-amp-coslr-300e_in1k.py new file mode 100644 index 00000000..3fe9b503 --- /dev/null +++ b/mmpretrain/configs/beitv2/beitv2_beit-base-p16_8xb256-amp-coslr-300e_in1k.py @@ -0,0 +1,130 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.config import read_base + +with read_base(): + from .._base_.datasets.imagenet_bs256_beitv2 import * + from .._base_.default_runtime import * + +from mmengine.model import ConstantInit, PretrainedInit, TruncNormalInit +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from mmengine.runner import EpochBasedTrainLoop +from torch.optim import AdamW + +from mmpretrain.models import (VQKD, BEiT, BEiTPretrainViT, BEiTV2Head, + BEiTV2Neck, CrossEntropyLoss) + +# model settings +vqkd_encoder = dict( + arch='base', + img_size=224, + patch_size=16, + in_channels=3, + out_indices=-1, + drop_rate=0., + drop_path_rate=0., + norm_cfg=dict(type='LN', eps=1e-6), + final_norm=True, + out_type='featmap', + with_cls_token=True, + frozen_stages=-1, + use_abs_pos_emb=True, + use_rel_pos_bias=False, + use_shared_rel_pos_bias=False, + layer_scale_init_value=0., + interpolate_mode='bicubic', + patch_cfg=dict(), + layer_cfgs=dict(), + init_cfg=None) + +layer_scale_init_value = 0.1 +drop_path_rate = 0. # 0. for 300 epochs and 0.1 for 1600 epochs. +model = dict( + type=BEiT, + backbone=dict( + type=BEiTPretrainViT, + arch='base', + patch_size=16, + out_indices=[-4, -1], + drop_path_rate=drop_path_rate, + final_norm=False, + out_type='raw', + layer_scale_init_value=layer_scale_init_value, + init_cfg=[ + dict(type=TruncNormalInit, std=0.02, layer='Linear'), + dict(type=TruncNormalInit, std=0.02, layer='Conv2d'), + dict(type=ConstantInit, layer='LayerNorm', val=1.0, bias=0.0) + ]), + neck=dict( + type=BEiTV2Neck, + num_layers=2, + early_layers=9, + backbone_arch='base', + drop_path_rate=drop_path_rate, + layer_scale_init_value=layer_scale_init_value, + ), + head=dict( + type=BEiTV2Head, + embed_dims=768, + num_embed=8192, + loss=dict(type=CrossEntropyLoss)), + target_generator=dict( + type=VQKD, + encoder_config=vqkd_encoder, + init_cfg=dict( + type=PretrainedInit, + checkpoint= # noqa + 'https://download.openmmlab.com/mmselfsup/1.x/target_generator_ckpt/vqkd_encoder.pth' # noqa + ))) + +# optimizer wrapper +optim_wrapper = dict( + type=AmpOptimWrapper, + loss_scale='dynamic', + # betas: (0.9, 0.98) for 300 epochs and (0.9, 0.999) for 1600 epochs. + optimizer=dict( + type=AdamW, lr=1.5e-3, betas=(0.9, 0.98), weight_decay=0.05), + clip_grad=dict(max_norm=3.0), + paramwise_cfg=dict( + custom_keys={ + # the following configurations are designed for BEiT + '.ln': dict(decay_mult=0.0), + '.bias': dict(decay_mult=0.0), + 'q_bias': dict(decay_mult=0.0), + 'v_bias': dict(decay_mult=0.0), + '.cls_token': dict(decay_mult=0.0), + '.pos_embed': dict(decay_mult=0.0), + '.gamma': dict(decay_mult=0.0), + })) + +# learning rate scheduler +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-4, + by_epoch=True, + begin=0, + end=10, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=1e-5, + by_epoch=True, + begin=10, + end=300, + convert_to_iter_based=True) +] + +# runtime settings +train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=300) +default_hooks = dict( + # only keeps the latest 3 checkpoints + checkpoint=dict(type=CheckpointHook, interval=1, max_keep_ckpts=3)) + +randomness = dict(seed=0, diff_rank_seed=True) + +find_unused_parameters = True + +# NOTE: `auto_scale_lr` is for automatically scaling LR +# based on the actual training batch size. +auto_scale_lr = dict(base_batch_size=2048) diff --git a/mmpretrain/configs/beitv2/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py b/mmpretrain/configs/beitv2/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py new file mode 100644 index 00000000..ee32d3a9 --- /dev/null +++ b/mmpretrain/configs/beitv2/benchmarks/beit-base-p16_8xb128-coslr-100e_in1k.py @@ -0,0 +1,132 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.config import read_base + +with read_base(): + from ..._base_.datasets.imagenet_bs64_swin_224 import * + from ..._base_.schedules.imagenet_bs1024_adamw_swin import * + from ..._base_.default_runtime import * + +from mmengine.model import PretrainedInit, TruncNormalInit +from mmengine.optim import CosineAnnealingLR, LinearLR +from torch.optim import AdamW + +from mmpretrain.engine.optimizers import \ + LearningRateDecayOptimWrapperConstructor +from mmpretrain.models import (BEiTViT, ImageClassifier, LabelSmoothLoss, + LinearClsHead) +from mmpretrain.models.utils.batch_augments import CutMix, Mixup + +# model settings +model = dict( + type=ImageClassifier, + backbone=dict( + type=BEiTViT, + arch='base', + img_size=224, + patch_size=16, + # 0.2 for 1600 epochs pretrained models and 0.1 for 300 epochs. + drop_path_rate=0.1, + out_type='avg_featmap', + use_abs_pos_emb=False, + use_rel_pos_bias=True, + use_shared_rel_pos_bias=False, + init_cfg=dict(type=PretrainedInit, checkpoint='', prefix='backbone.')), + neck=None, + head=dict( + type=LinearClsHead, + num_classes=1000, + in_channels=768, + loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'), + init_cfg=[dict(type=TruncNormalInit, layer='Linear', std=0.02)]), + train_cfg=dict( + augments=[dict(type=Mixup, alpha=0.8), + dict(type=CutMix, alpha=1.0)])) + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict( + type=RandomResizedCrop, + scale=224, + backend='pillow', + interpolation='bicubic'), + dict(type=RandomFlip, prob=0.5, direction='horizontal'), + dict( + type=RandAugment, + policies='timm_increasing', + num_policies=2, + total_level=10, + magnitude_level=9, + magnitude_std=0.5, + hparams=dict(pad_val=[104, 116, 124], interpolation='bicubic')), + dict( + type=RandomErasing, + erase_prob=0.25, + mode='rand', + min_area_ratio=0.02, + max_area_ratio=0.3333333333333333, + fill_color=[103.53, 116.28, 123.675], + fill_std=[57.375, 57.12, 58.395]), + dict(type=PackInputs) +] +test_pipeline = [ + dict(type=LoadImageFromFile), + dict( + type=ResizeEdge, + scale=256, + edge='short', + backend='pillow', + interpolation='bicubic'), + dict(type=CenterCrop, crop_size=224), + dict(type=PackInputs) +] + +train_dataloader = dict(batch_size=128, dataset=dict(pipeline=train_pipeline)) +val_dataloader = dict(batch_size=128, dataset=dict(pipeline=test_pipeline)) +test_dataloader = val_dataloader + +# optimizer wrapper +optim_wrapper = dict( + optimizer=dict(type=AdamW, lr=5e-4, weight_decay=0.05, betas=(0.9, 0.999)), + constructor=LearningRateDecayOptimWrapperConstructor, + paramwise_cfg=dict( + _delete_=True, + # 0.6 for 1600 epochs pretrained models and 0.65 for 300 epochs + layer_decay_rate=0.65, + custom_keys={ + # the following configurations are designed for BEiT + '.ln': dict(decay_mult=0.0), + '.bias': dict(decay_mult=0.0), + 'q_bias': dict(decay_mult=0.0), + 'v_bias': dict(decay_mult=0.0), + '.cls_token': dict(decay_mult=0.0), + '.pos_embed': dict(decay_mult=0.0), + '.gamma': dict(decay_mult=0.0), + })) + +# learning rate scheduler +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-4, + by_epoch=True, + begin=0, + end=20, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + by_epoch=True, + begin=20, + end=100, + eta_min=1e-6, + convert_to_iter_based=True) +] + +# runtime settings +default_hooks = dict( + # save checkpoint per epoch. + checkpoint=dict(type=CheckpointHook, interval=1, max_keep_ckpts=2)) + +train_cfg = dict(by_epoch=True, max_epochs=100) + +randomness = dict(seed=0) diff --git a/mmpretrain/configs/beitv2/benchmarks/beit-base-p16_8xb64_in1k.py b/mmpretrain/configs/beitv2/benchmarks/beit-base-p16_8xb64_in1k.py new file mode 100644 index 00000000..ec20ba95 --- /dev/null +++ b/mmpretrain/configs/beitv2/benchmarks/beit-base-p16_8xb64_in1k.py @@ -0,0 +1,42 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.config import read_base + +with read_base(): + from ..._base_.datasets.imagenet_bs64_swin_224 import * + from ..._base_.schedules.imagenet_bs1024_adamw_swin import * + from ..._base_.default_runtime import * + +from mmengine.model import ConstantInit, TruncNormalInit + +from mmpretrain.models import (BEiTViT, ImageClassifier, LabelSmoothLoss, + LinearClsHead) +from mmpretrain.models.utils.batch_augments.cutmix import CutMix +from mmpretrain.models.utils.batch_augments.mixup import Mixup + +model = dict( + type=ImageClassifier, + backbone=dict( + type=BEiTViT, + arch='base', + img_size=224, + patch_size=16, + out_type='avg_featmap', + use_abs_pos_emb=False, + use_rel_pos_bias=True, + use_shared_rel_pos_bias=False, + ), + neck=None, + head=dict( + type=LinearClsHead, + num_classes=1000, + in_channels=768, + loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'), + ), + init_cfg=[ + dict(type=TruncNormalInit, layer='Linear', std=.02), + dict(type=ConstantInit, layer='LayerNorm', val=1., bias=0.), + ], + train_cfg=dict( + augments=[dict(type=Mixup, alpha=0.8), + dict(type=CutMix, alpha=1.0)]))