diff --git a/mmpretrain/configs/_base_/datasets/imagenet_bs32.py b/mmpretrain/configs/_base_/datasets/imagenet_bs32.py new file mode 100644 index 00000000..7d074008 --- /dev/null +++ b/mmpretrain/configs/_base_/datasets/imagenet_bs32.py @@ -0,0 +1,62 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.dataset import DefaultSampler + +from mmpretrain.datasets import (CenterCrop, ImageNet, LoadImageFromFile, + PackInputs, RandomFlip, RandomResizedCrop, + ResizeEdge) +from mmpretrain.evaluation import Accuracy + +# dataset settings +dataset_type = ImageNet +data_preprocessor = dict( + num_classes=1000, + # RGB format normalization parameters + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + # convert image from BGR to RGB + to_rgb=True, +) + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=RandomResizedCrop, scale=224), + dict(type=RandomFlip, prob=0.5, direction='horizontal'), + dict(type=PackInputs), +] + +test_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=ResizeEdge, scale=256, edge='short'), + dict(type=CenterCrop, crop_size=224), + dict(type=PackInputs), +] + +train_dataloader = dict( + batch_size=32, + num_workers=5, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + ann_file='meta/train.txt', + data_prefix='train', + pipeline=train_pipeline), + sampler=dict(type=DefaultSampler, shuffle=True), +) + +val_dataloader = dict( + batch_size=32, + num_workers=5, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + ann_file='meta/val.txt', + data_prefix='val', + pipeline=test_pipeline), + sampler=dict(type=DefaultSampler, shuffle=False), +) +val_evaluator = dict(type=Accuracy, topk=(1, 5)) + +# If you want standard test, please manually configure the test dataset +test_dataloader = val_dataloader +test_evaluator = val_evaluator diff --git a/mmpretrain/configs/_base_/datasets/imagenet_bs32_simclr.py b/mmpretrain/configs/_base_/datasets/imagenet_bs32_simclr.py new file mode 100644 index 00000000..b687a06f --- /dev/null +++ b/mmpretrain/configs/_base_/datasets/imagenet_bs32_simclr.py @@ -0,0 +1,63 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmcv.transforms import (LoadImageFromFile, RandomApply, RandomFlip, + RandomGrayscale) +from mmengine.dataset import DefaultSampler, default_collate + +from mmpretrain.datasets import (ColorJitter, GaussianBlur, ImageNet, + MultiView, PackInputs, RandomResizedCrop) +from mmpretrain.models import SelfSupDataPreprocessor + +# dataset settings +dataset_type = 'ImageNet' +data_root = 'data/imagenet/' +data_preprocessor = dict( + type=SelfSupDataPreprocessor, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True) + +view_pipeline = [ + dict(type=RandomResizedCrop, scale=224, backend='pillow'), + dict(type=RandomFlip, prob=0.5), + dict( + type=RandomApply, + transforms=[ + dict( + type=ColorJitter, + brightness=0.8, + contrast=0.8, + saturation=0.8, + hue=0.2) + ], + prob=0.8), + dict( + type=RandomGrayscale, + prob=0.2, + keep_channels=True, + channel_weights=(0.114, 0.587, 0.2989)), + dict( + type=GaussianBlur, + magnitude_range=(0.1, 2.0), + magnitude_std='inf', + prob=0.5), +] + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=MultiView, num_views=2, transforms=[view_pipeline]), + dict(type=PackInputs) +] + +train_dataloader = dict( + batch_size=32, + num_workers=4, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=True), + collate_fn=dict(type=default_collate), + dataset=dict( + type=ImageNet, + data_root=data_root, + ann_file='meta/train.txt', + data_prefix=dict(img_path='train/'), + pipeline=train_pipeline)) diff --git a/mmpretrain/configs/_base_/datasets/imagenet_bs512_mae.py b/mmpretrain/configs/_base_/datasets/imagenet_bs512_mae.py new file mode 100644 index 00000000..f64d5eac --- /dev/null +++ b/mmpretrain/configs/_base_/datasets/imagenet_bs512_mae.py @@ -0,0 +1,41 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmcv.transforms import LoadImageFromFile, RandomFlip +from mmengine.dataset.sampler import DefaultSampler + +from mmpretrain.datasets import ImageNet, PackInputs, RandomResizedCrop +from mmpretrain.models import SelfSupDataPreprocessor + +# dataset settings +dataset_type = 'ImageNet' +data_root = 'data/imagenet/' +data_preprocessor = dict( + type=SelfSupDataPreprocessor, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + to_rgb=True) + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict( + type=RandomResizedCrop, + scale=224, + crop_ratio_range=(0.2, 1.0), + backend='pillow', + interpolation='bicubic'), + dict(type=RandomFlip, prob=0.5), + dict(type=PackInputs) +] + +train_dataloader = dict( + batch_size=512, + num_workers=8, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=True), + collate_fn=dict(type='default_collate'), + dataset=dict( + type=ImageNet, + data_root=data_root, + ann_file='meta/train.txt', + data_prefix=dict(img_path='train/'), + pipeline=train_pipeline)) diff --git a/mmpretrain/configs/_base_/datasets/imagenet_bs64_swin_384.py b/mmpretrain/configs/_base_/datasets/imagenet_bs64_swin_384.py new file mode 100644 index 00000000..85aeb1e2 --- /dev/null +++ b/mmpretrain/configs/_base_/datasets/imagenet_bs64_swin_384.py @@ -0,0 +1,64 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.dataset import DefaultSampler + +from mmpretrain.datasets import (ImageNet, LoadImageFromFile, PackInputs, + RandomFlip, RandomResizedCrop, Resize) +from mmpretrain.evaluation import Accuracy + +# dataset settings +dataset_type = ImageNet +data_preprocessor = dict( + num_classes=1000, + # RGB format normalization parameters + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + # convert image from BGR to RGB + to_rgb=True, +) + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict( + type=RandomResizedCrop, + scale=384, + backend='pillow', + interpolation='bicubic'), + dict(type=RandomFlip, prob=0.5, direction='horizontal'), + dict(type=PackInputs), +] + +test_pipeline = [ + dict(type=LoadImageFromFile), + dict(type=Resize, scale=384, backend='pillow', interpolation='bicubic'), + dict(type=PackInputs), +] + +train_dataloader = dict( + batch_size=64, + num_workers=5, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + ann_file='meta/train.txt', + data_prefix='train', + pipeline=train_pipeline), + sampler=dict(type=DefaultSampler, shuffle=True), +) + +val_dataloader = dict( + batch_size=64, + num_workers=5, + dataset=dict( + type=dataset_type, + data_root='data/imagenet', + ann_file='meta/val.txt', + data_prefix='val', + pipeline=test_pipeline), + sampler=dict(type=DefaultSampler, shuffle=False), +) +val_evaluator = dict(type=Accuracy, topk=(1, 5)) + +# If you want standard test, please manually configure the test dataset +test_dataloader = val_dataloader +test_evaluator = val_evaluator diff --git a/mmpretrain/configs/_base_/default_runtime.py b/mmpretrain/configs/_base_/default_runtime.py new file mode 100644 index 00000000..b5c748eb --- /dev/null +++ b/mmpretrain/configs/_base_/default_runtime.py @@ -0,0 +1,61 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.hooks import (CheckpointHook, DistSamplerSeedHook, IterTimerHook, + LoggerHook, ParamSchedulerHook) +from mmengine.visualization import LocalVisBackend + +from mmpretrain.engine.hooks import VisualizationHook +from mmpretrain.visualization import UniversalVisualizer + +# configure default hooks +default_hooks = dict( + # record the time of every iteration. + timer=dict(type=IterTimerHook), + + # print log every 100 iterations. + logger=dict(type=LoggerHook, interval=100), + + # enable the parameter scheduler. + param_scheduler=dict(type=ParamSchedulerHook), + + # save checkpoint per epoch. + checkpoint=dict(type=CheckpointHook, interval=1), + + # set sampler seed in distributed evrionment. + sampler_seed=dict(type=DistSamplerSeedHook), + + # validation results visualization, set True to enable it. + visualization=dict(type=VisualizationHook, enable=False), +) + +# configure environment +env_cfg = dict( + # whether to enable cudnn benchmark + cudnn_benchmark=False, + + # set multi process parameters + mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0), + + # set distributed parameters + dist_cfg=dict(backend='nccl'), +) + +# set visualizer +vis_backends = [dict(type=LocalVisBackend)] +visualizer = dict(type=UniversalVisualizer, vis_backends=vis_backends) + +# set log level +log_level = 'INFO' + +# load from which checkpoint +load_from = None + +# whether to resume training from the loaded checkpoint +resume = False + +# Defaults to use random seed and disable `deterministic` +randomness = dict(seed=None, deterministic=False) + +# Do not need to specify default_scope with new config. Therefore set it to +# None to avoid BC-breaking. +default_scope = None diff --git a/mmpretrain/configs/_base_/models/convnext_base.py b/mmpretrain/configs/_base_/models/convnext_base.py new file mode 100644 index 00000000..6315b2f1 --- /dev/null +++ b/mmpretrain/configs/_base_/models/convnext_base.py @@ -0,0 +1,25 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.model import TruncNormalInit + +from mmpretrain.models import (ConvNeXt, CutMix, ImageClassifier, + LabelSmoothLoss, LinearClsHead, Mixup) + +# Model settings +model = dict( + type=ImageClassifier, + backbone=dict(type=ConvNeXt, arch='base', drop_path_rate=0.5), + head=dict( + type=LinearClsHead, + num_classes=1000, + in_channels=1024, + loss=dict(type=LabelSmoothLoss, label_smooth_val=0.1, mode='original'), + init_cfg=None, + ), + init_cfg=dict( + type=TruncNormalInit, layer=['Conv2d', 'Linear'], std=.02, bias=0.), + train_cfg=dict(augments=[ + dict(type=Mixup, alpha=0.8), + dict(type=CutMix, alpha=1.0), + ]), +) diff --git a/mmpretrain/configs/_base_/models/mae_vit_base_p16.py b/mmpretrain/configs/_base_/models/mae_vit_base_p16.py new file mode 100644 index 00000000..9347d1e8 --- /dev/null +++ b/mmpretrain/configs/_base_/models/mae_vit_base_p16.py @@ -0,0 +1,28 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmpretrain.models import (MAE, MAEPretrainDecoder, MAEPretrainHead, + MAEViT, PixelReconstructionLoss) + +# model settings +model = dict( + type=MAE, + backbone=dict(type=MAEViT, arch='b', patch_size=16, mask_ratio=0.75), + neck=dict( + type=MAEPretrainDecoder, + patch_size=16, + in_chans=3, + embed_dim=768, + decoder_embed_dim=512, + decoder_depth=8, + decoder_num_heads=16, + mlp_ratio=4., + ), + head=dict( + type=MAEPretrainHead, + norm_pix=True, + patch_size=16, + loss=dict(type=PixelReconstructionLoss, criterion='L2')), + init_cfg=[ + dict(type='Xavier', layer='Linear', distribution='uniform'), + dict(type='Constant', layer='LayerNorm', val=1.0, bias=0.0) + ]) diff --git a/mmpretrain/configs/_base_/models/resnet18.py b/mmpretrain/configs/_base_/models/resnet18.py new file mode 100644 index 00000000..30b8f651 --- /dev/null +++ b/mmpretrain/configs/_base_/models/resnet18.py @@ -0,0 +1,22 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmpretrain.models import (CrossEntropyLoss, GlobalAveragePooling, + ImageClassifier, LinearClsHead, ResNet) + +# model settings +model = dict( + type=ImageClassifier, + backbone=dict( + type=ResNet, + depth=18, + num_stages=4, + out_indices=(3, ), + style='pytorch'), + neck=dict(type=GlobalAveragePooling), + head=dict( + type=LinearClsHead, + num_classes=1000, + in_channels=512, + loss=dict(type=CrossEntropyLoss, loss_weight=1.0), + topk=(1, 5), + )) diff --git a/mmpretrain/configs/_base_/schedules/imagenet_bs1024_adamw_swin.py b/mmpretrain/configs/_base_/schedules/imagenet_bs1024_adamw_swin.py new file mode 100644 index 00000000..60ccaa0e --- /dev/null +++ b/mmpretrain/configs/_base_/schedules/imagenet_bs1024_adamw_swin.py @@ -0,0 +1,46 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.optim import CosineAnnealingLR, LinearLR +from torch.optim import AdamW + +# for batch in each gpu is 128, 8 gpu +# lr = 5e-4 * 128 * 8 / 512 = 0.001 +optim_wrapper = dict( + optimizer=dict( + type=AdamW, + lr=5e-4 * 1024 / 512, + weight_decay=0.05, + eps=1e-8, + betas=(0.9, 0.999)), + paramwise_cfg=dict( + norm_decay_mult=0.0, + bias_decay_mult=0.0, + flat_decay_mult=0.0, + custom_keys={ + '.absolute_pos_embed': dict(decay_mult=0.0), + '.relative_position_bias_table': dict(decay_mult=0.0) + }), +) + +# learning policy +param_scheduler = [ + # warm up learning rate scheduler + dict( + type=LinearLR, + start_factor=1e-3, + by_epoch=True, + end=20, + # update by iter + convert_to_iter_based=True), + # main learning rate scheduler + dict(type=CosineAnnealingLR, eta_min=1e-5, by_epoch=True, begin=20) +] + +# train, val, test setting +train_cfg = dict(by_epoch=True, max_epochs=300, val_interval=1) +val_cfg = dict() +test_cfg = dict() + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# based on the actual training batch size. +auto_scale_lr = dict(base_batch_size=1024) diff --git a/mmpretrain/configs/_base_/schedules/imagenet_bs256.py b/mmpretrain/configs/_base_/schedules/imagenet_bs256.py new file mode 100644 index 00000000..95afa2ad --- /dev/null +++ b/mmpretrain/configs/_base_/schedules/imagenet_bs256.py @@ -0,0 +1,21 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.optim import MultiStepLR +from torch.optim import SGD + +# optimizer +optim_wrapper = dict( + optimizer=dict(type=SGD, lr=0.1, momentum=0.9, weight_decay=0.0001)) + +# learning policy +param_scheduler = dict( + type=MultiStepLR, by_epoch=True, milestones=[30, 60, 90], gamma=0.1) + +# train, val, test setting +train_cfg = dict(by_epoch=True, max_epochs=100, val_interval=1) +val_cfg = dict() +test_cfg = dict() + +# NOTE: `auto_scale_lr` is for automatically scaling LR, +# based on the actual training batch size. +auto_scale_lr = dict(base_batch_size=256) diff --git a/mmpretrain/configs/_base_/schedules/imagenet_lars_coslr_200e.py b/mmpretrain/configs/_base_/schedules/imagenet_lars_coslr_200e.py new file mode 100644 index 00000000..0c7e6171 --- /dev/null +++ b/mmpretrain/configs/_base_/schedules/imagenet_lars_coslr_200e.py @@ -0,0 +1,27 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper +from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR +from mmengine.runner.loops import EpochBasedTrainLoop + +from mmpretrain.engine.optimizers.lars import LARS + +# optimizer wrapper +optim_wrapper = dict( + type=OptimWrapper, + optimizer=dict(type=LARS, lr=4.8, weight_decay=1e-6, momentum=0.9)) + +# learning rate scheduler +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-4, + by_epoch=True, + begin=0, + end=10, + convert_to_iter_based=True), + dict(type=CosineAnnealingLR, T_max=190, by_epoch=True, begin=10, end=200) +] + +# runtime settings +train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=200) diff --git a/mmpretrain/configs/beit/beit_beit_base_p16_8xb256_amp_coslr_300e_in1k.py b/mmpretrain/configs/beit/beit_beit_base_p16_8xb256_amp_coslr_300e_in1k.py new file mode 100644 index 00000000..a69ffde8 --- /dev/null +++ b/mmpretrain/configs/beit/beit_beit_base_p16_8xb256_amp_coslr_300e_in1k.py @@ -0,0 +1,144 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +if '_base_': + from .._base_.default_runtime import * + +from mmengine.dataset import DefaultSampler, default_collate +from mmengine.hooks import CheckpointHook +from mmengine.model import ConstantInit, PretrainedInit, TruncNormalInit +from mmengine.optim import AmpOptimWrapper, CosineAnnealingLR, LinearLR +from mmengine.runner import EpochBasedTrainLoop +from torch.optim import AdamW + +from mmpretrain.datasets import (BEiTMaskGenerator, ColorJitter, ImageNet, + LoadImageFromFile, PackInputs, RandomFlip, + RandomResizedCropAndInterpolationWithTwoPic) +from mmpretrain.models import (BEiT, BEiTPretrainViT, BEiTV1Head, + CrossEntropyLoss, DALLEEncoder, + TwoNormDataPreprocessor) + +# dataset settings +dataset_type = ImageNet +data_root = 'data/imagenet/' +data_preprocessor = dict( + type=TwoNormDataPreprocessor, + mean=[123.675, 116.28, 103.53], + std=[58.395, 57.12, 57.375], + second_mean=[-31.875, -31.875, -31.875], + second_std=[318.75, 318.75, 318.75], + to_rgb=True) + +train_pipeline = [ + dict(type=LoadImageFromFile), + dict( + type=ColorJitter, brightness=0.4, contrast=0.4, saturation=0.4, + hue=0.), + dict(type=RandomFlip, prob=0.5, direction='horizontal'), + dict( + type=RandomResizedCropAndInterpolationWithTwoPic, + size=224, + second_size=112, + interpolation='bicubic', + second_interpolation='lanczos', + scale=(0.08, 1.0)), + dict( + type=BEiTMaskGenerator, + input_size=(14, 14), + num_masking_patches=75, + max_num_patches=None, + min_num_patches=16), + dict(type=PackInputs) +] +train_dataloader = dict( + batch_size=256, + num_workers=8, + persistent_workers=True, + sampler=dict(type=DefaultSampler, shuffle=True), + collate_fn=dict(type=default_collate), + dataset=dict( + type=dataset_type, + data_root=data_root, + ann_file='meta/train.txt', + data_prefix=dict(img_path='train/'), + pipeline=train_pipeline)) + +# model settings +model = dict( + type=BEiT, + backbone=dict( + type=BEiTPretrainViT, + arch='base', + patch_size=16, + drop_path_rate=0.1, + final_norm=True, + out_type='raw', + layer_scale_init_value=0.1, + init_cfg=[ + dict(type=TruncNormalInit, std=0.02, layer='Linear'), + dict(type=TruncNormalInit, std=0.02, layer='Conv2d'), + dict(type=ConstantInit, layer='LayerNorm', val=1.0, bias=0.0) + ]), + neck=None, + head=dict( + type=BEiTV1Head, + embed_dims=768, + num_embed=8192, + loss=dict(type=CrossEntropyLoss)), + target_generator=dict( + type=DALLEEncoder, + init_cfg=dict( + type=PretrainedInit, + checkpoint= # noqa: E251 + 'https://download.openmmlab.com/mmselfsup/1.x/target_generator_ckpt/dalle_encoder.pth', # noqa: E501 + ))) + +# optimizer wrapper +optim_wrapper = dict( + type=AmpOptimWrapper, + loss_scale='dynamic', + optimizer=dict( + type=AdamW, lr=1.5e-3, betas=(0.9, 0.999), weight_decay=0.05), + clip_grad=dict(max_norm=3.0), + paramwise_cfg=dict( + custom_keys={ + # the following configurations are designed for BEiT + '.ln': dict(decay_mult=0.0), + '.bias': dict(decay_mult=0.0), + 'q_bias': dict(decay_mult=0.0), + 'v_bias': dict(decay_mult=0.0), + '.cls_token': dict(decay_mult=0.0), + '.pos_embed': dict(decay_mult=0.0), + '.gamma': dict(decay_mult=0.0), + })) + +# learning rate scheduler +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-4, + by_epoch=True, + begin=0, + end=10, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + eta_min=1e-5, + by_epoch=True, + begin=10, + end=300, + convert_to_iter_based=True) +] + +# runtime settings +train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=300) +default_hooks.update( + # only keeps the latest 3 checkpoints + checkpoint=dict(type=CheckpointHook, interval=1, max_keep_ckpts=3)) + +randomness.update(seed=0, diff_rank_seed=True) + +find_unused_parameters = True + +# NOTE: `auto_scale_lr` is for automatically scaling LR +# based on the actual training batch size. +auto_scale_lr = dict(base_batch_size=2048) diff --git a/mmpretrain/configs/convnext/convnext_base_32xb128_in1k_384px.py b/mmpretrain/configs/convnext/convnext_base_32xb128_in1k_384px.py new file mode 100644 index 00000000..31cf5edf --- /dev/null +++ b/mmpretrain/configs/convnext/convnext_base_32xb128_in1k_384px.py @@ -0,0 +1,26 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +if '_base_': + from .._base_.datasets.imagenet_bs64_swin_384 import * + from .._base_.default_runtime import * + from .._base_.models.convnext_base import * + from .._base_.schedules.imagenet_bs1024_adamw_swin import * + +from mmpretrain.engine import EMAHook + +# dataset setting +train_dataloader.update(batch_size=128) + +# schedule setting +optim_wrapper.update( + optimizer=dict(lr=4e-3), + clip_grad=dict(max_norm=5.0), +) + +# runtime setting +custom_hooks = [dict(type=EMAHook, momentum=4e-5, priority='ABOVE_NORMAL')] + +# NOTE: `auto_scale_lr` is for automatically scaling LR +# based on the actual training batch size. +# base_batch_size = (32 GPUs) x (128 samples per GPU) +auto_scale_lr = dict(base_batch_size=4096) diff --git a/mmpretrain/configs/eva/eva_mae_style_vit_base_p16_16xb256_coslr_400e_in1k.py b/mmpretrain/configs/eva/eva_mae_style_vit_base_p16_16xb256_coslr_400e_in1k.py new file mode 100644 index 00000000..3c3a7401 --- /dev/null +++ b/mmpretrain/configs/eva/eva_mae_style_vit_base_p16_16xb256_coslr_400e_in1k.py @@ -0,0 +1,90 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +if '_base_': + from .._base_.models.mae_vit_base_p16 import * + from .._base_.datasets.imagenet_bs512_mae import * + from .._base_.default_runtime import * + +from mmengine.hooks import CheckpointHook +from mmengine.optim import CosineAnnealingLR, LinearLR, OptimWrapper +from mmengine.runner import EpochBasedTrainLoop +from torch.optim import AdamW + +from mmpretrain.models import (EVA, CLIPGenerator, CosineSimilarityLoss, + MAEPretrainDecoder, MIMHead) + +# dataset settings +train_dataloader.batch_size = 256 + +# model settings +model.type = EVA +model.init_cfg = None +model.backbone.update(init_cfg=[ + dict(type='Xavier', distribution='uniform', layer='Linear'), + dict(type='Constant', layer='LayerNorm', val=1.0, bias=0.0) +]) +model.neck.update( + type=MAEPretrainDecoder, + predict_feature_dim=512, + init_cfg=[ + dict(type='Xavier', distribution='uniform', layer='Linear'), + dict(type='Constant', layer='LayerNorm', val=1.0, bias=0.0) + ]) +model.head = dict( + type=MIMHead, + loss=dict(type=CosineSimilarityLoss, shift_factor=2.0, scale_factor=2.0)) +model.target_generator = dict( + type=CLIPGenerator, + tokenizer_path= # noqa + 'https://download.openmmlab.com/mmselfsup/1.x/target_generator_ckpt/clip_vit_base_16.pth.tar' # noqa +) + +# optimizer wrapper +optim_wrapper = dict( + type=OptimWrapper, + optimizer=dict( + type=AdamW, + lr=1.5e-4 * 4096 / 256, + betas=(0.9, 0.95), + weight_decay=0.05), + paramwise_cfg=dict( + custom_keys={ + 'ln': dict(decay_mult=0.0), + 'bias': dict(decay_mult=0.0), + 'pos_embed': dict(decay_mult=0.), + 'mask_token': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.) + })) +find_unused_parameters = True + +# learning rate scheduler +param_scheduler = [ + dict( + type=LinearLR, + start_factor=1e-4, + by_epoch=True, + begin=0, + end=40, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + T_max=360, + by_epoch=True, + begin=40, + end=400, + convert_to_iter_based=True) +] + +# runtime settings +train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=400) +default_hooks.checkpoint = dict( + type=CheckpointHook, interval=1, max_keep_ckpts=3) + +randomness.update(dict(seed=0, diff_rank_seed=True)) + +# auto resume +resume = True + +# NOTE: `auto_scale_lr` is for automatically scaling LR +# based on the actual training batch size. +auto_scale_lr = dict(base_batch_size=4096) diff --git a/mmpretrain/configs/mae/mae_vit_base_p16_8xb512_amp_coslr_300e_in1k.py b/mmpretrain/configs/mae/mae_vit_base_p16_8xb512_amp_coslr_300e_in1k.py new file mode 100644 index 00000000..11a88f94 --- /dev/null +++ b/mmpretrain/configs/mae/mae_vit_base_p16_8xb512_amp_coslr_300e_in1k.py @@ -0,0 +1,63 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +if '_base_': + from .._base_.models.mae_vit_base_p16 import * + from .._base_.datasets.imagenet_bs512_mae import * + from .._base_.default_runtime import * + +from mmengine.hooks.checkpoint_hook import CheckpointHook +from mmengine.optim.optimizer.amp_optimizer_wrapper import AmpOptimWrapper +from mmengine.optim.scheduler.lr_scheduler import CosineAnnealingLR, LinearLR +from mmengine.runner.loops import EpochBasedTrainLoop +from torch.optim.adamw import AdamW + +# optimizer wrapper +optim_wrapper = dict( + type=AmpOptimWrapper, + loss_scale='dynamic', + optimizer=dict( + type=AdamW, + lr=1.5e-4 * 4096 / 256, + betas=(0.9, 0.95), + weight_decay=0.05), + paramwise_cfg=dict( + custom_keys={ + 'ln': dict(decay_mult=0.0), + 'bias': dict(decay_mult=0.0), + 'pos_embed': dict(decay_mult=0.), + 'mask_token': dict(decay_mult=0.), + 'cls_token': dict(decay_mult=0.) + })) + +# learning rate scheduler +param_scheduler = [ + dict( + type=LinearLR, + start_factor=0.0001, + by_epoch=True, + begin=0, + end=40, + convert_to_iter_based=True), + dict( + type=CosineAnnealingLR, + T_max=260, + by_epoch=True, + begin=40, + end=300, + convert_to_iter_based=True) +] + +# runtime settings +train_cfg = dict(type=EpochBasedTrainLoop, max_epochs=300) +# only keeps the latest 3 checkpoints +default_hooks.checkpoint = dict( + type=CheckpointHook, interval=1, max_keep_ckpts=3) + +randomness.update(seed=0, diff_rank_seed=True) + +# auto resume +resume = True + +# NOTE: `auto_scale_lr` is for automatically scaling LR +# based on the actual training batch size. +auto_scale_lr = dict(base_batch_size=4096) diff --git a/mmpretrain/configs/resnet/resnet18_8xb32_in1k.py b/mmpretrain/configs/resnet/resnet18_8xb32_in1k.py new file mode 100644 index 00000000..461b9c62 --- /dev/null +++ b/mmpretrain/configs/resnet/resnet18_8xb32_in1k.py @@ -0,0 +1,7 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +if '_base_': + from .._base_.datasets.imagenet_bs32 import * + from .._base_.default_runtime import * + from .._base_.models.resnet18 import * + from .._base_.schedules.imagenet_bs256 import * diff --git a/mmpretrain/configs/simclr/simclr_resnet50_16xb256_coslr_200e_in1k.py b/mmpretrain/configs/simclr/simclr_resnet50_16xb256_coslr_200e_in1k.py new file mode 100644 index 00000000..fe31b2ee --- /dev/null +++ b/mmpretrain/configs/simclr/simclr_resnet50_16xb256_coslr_200e_in1k.py @@ -0,0 +1,56 @@ +# Copyright (c) OpenMMLab. All rights reserved. +# This is a BETA new format config file, and the usage may change recently. +if '_base_': + from .._base_.datasets.imagenet_bs32_simclr import * + from .._base_.schedules.imagenet_lars_coslr_200e import * + from .._base_.default_runtime import * + +from mmengine.hooks.checkpoint_hook import CheckpointHook +from mmengine.optim.optimizer.optimizer_wrapper import OptimWrapper + +from mmpretrain.engine.optimizers.lars import LARS +from mmpretrain.models.backbones.resnet import ResNet +from mmpretrain.models.heads.contrastive_head import ContrastiveHead +from mmpretrain.models.losses.cross_entropy_loss import CrossEntropyLoss +from mmpretrain.models.necks.nonlinear_neck import NonLinearNeck +from mmpretrain.models.selfsup.simclr import SimCLR + +# dataset settings +train_dataloader.merge(dict(batch_size=256)) + +# model settings +model = dict( + type=SimCLR, + backbone=dict( + type=ResNet, + depth=50, + norm_cfg=dict(type='SyncBN'), + zero_init_residual=True), + neck=dict( + type=NonLinearNeck, # SimCLR non-linear neck + in_channels=2048, + hid_channels=2048, + out_channels=128, + num_layers=2, + with_avg_pool=True), + head=dict( + type=ContrastiveHead, + loss=dict(type=CrossEntropyLoss), + temperature=0.1), +) + +# optimizer +optim_wrapper = dict( + type=OptimWrapper, + optimizer=dict(type=LARS, lr=4.8, momentum=0.9, weight_decay=1e-6), + paramwise_cfg=dict( + custom_keys={ + 'bn': dict(decay_mult=0, lars_exclude=True), + 'bias': dict(decay_mult=0, lars_exclude=True), + # bn layer in ResNet block downsample module + 'downsample.1': dict(decay_mult=0, lars_exclude=True) + })) + +# runtime settings +default_hooks.checkpoint = dict( + type=CheckpointHook, interval=10, max_keep_ckpts=3) diff --git a/mmpretrain/datasets/transforms/__init__.py b/mmpretrain/datasets/transforms/__init__.py index 88c72ca1..efaec3af 100644 --- a/mmpretrain/datasets/transforms/__init__.py +++ b/mmpretrain/datasets/transforms/__init__.py @@ -13,8 +13,9 @@ from .formatting import (Collect, NumpyToPIL, PackInputs, PackMultiTaskInputs, from .processing import (Albumentations, BEiTMaskGenerator, CleanCaption, ColorJitter, EfficientNetCenterCrop, EfficientNetRandomCrop, Lighting, RandomCrop, - RandomErasing, RandomResizedCrop, RandomTranslatePad, - ResizeEdge, SimMIMMaskGenerator) + RandomErasing, RandomResizedCrop, + RandomResizedCropAndInterpolationWithTwoPic, + RandomTranslatePad, ResizeEdge, SimMIMMaskGenerator) from .wrappers import ApplyToList, MultiView for t in (CenterCrop, LoadImageFromFile, Normalize, RandomFlip, @@ -32,5 +33,6 @@ __all__ = [ 'PackMultiTaskInputs', 'GaussianBlur', 'BEiTMaskGenerator', 'SimMIMMaskGenerator', 'CenterCrop', 'LoadImageFromFile', 'Normalize', 'RandomFlip', 'RandomGrayscale', 'RandomResize', 'Resize', 'MultiView', - 'ApplyToList', 'CleanCaption', 'RandomTranslatePad' + 'ApplyToList', 'CleanCaption', 'RandomTranslatePad', + 'RandomResizedCropAndInterpolationWithTwoPic' ] diff --git a/setup.cfg b/setup.cfg index fe9c158b..06455344 100644 --- a/setup.cfg +++ b/setup.cfg @@ -30,3 +30,4 @@ extend-ignore = E251 # in `__init__.py` per-file-ignores = */__init__.py: F401 + mmpretrain/configs/*: F401,F403,F405 diff --git a/tools/dist_train.sh b/tools/dist_train.sh old mode 100644 new mode 100755